Newer
Older
#include <linux/ceph/ceph_debug.h>
#include <linux/slab.h>
#include <linux/ceph/libceph.h>
#include <linux/ceph/osdmap.h>
#include <linux/ceph/decode.h>
#include <linux/crush/hash.h>
#include <linux/crush/mapper.h>
char *ceph_osdmap_state_str(char *str, int len, int state)
{
if (!len)
if ((state & CEPH_OSD_EXISTS) && (state & CEPH_OSD_UP))
snprintf(str, len, "exists, up");
else if (state & CEPH_OSD_EXISTS)
snprintf(str, len, "exists");
else if (state & CEPH_OSD_UP)
snprintf(str, len, "up");
else
static int calc_bits_of(unsigned int t)
{
int b = 0;
while (t) {
t = t >> 1;
b++;
}
return b;
}
/*
* the foo_mask is the smallest value 2^n-1 that is >= foo.
*/
static void calc_pg_masks(struct ceph_pg_pool_info *pi)
{
pi->pg_num_mask = (1 << calc_bits_of(pi->pg_num-1)) - 1;
pi->pgp_num_mask = (1 << calc_bits_of(pi->pgp_num-1)) - 1;
}
/*
* decode crush map
*/
static int crush_decode_uniform_bucket(void **p, void *end,
struct crush_bucket_uniform *b)
{
dout("crush_decode_uniform_bucket %p to %p\n", *p, end);
ceph_decode_need(p, end, (1+b->h.size) * sizeof(u32), bad);
b->item_weight = ceph_decode_32(p);
return 0;
bad:
return -EINVAL;
}
static int crush_decode_list_bucket(void **p, void *end,
struct crush_bucket_list *b)
{
int j;
dout("crush_decode_list_bucket %p to %p\n", *p, end);
b->item_weights = kcalloc(b->h.size, sizeof(u32), GFP_NOFS);
if (b->item_weights == NULL)
return -ENOMEM;
b->sum_weights = kcalloc(b->h.size, sizeof(u32), GFP_NOFS);
if (b->sum_weights == NULL)
return -ENOMEM;
ceph_decode_need(p, end, 2 * b->h.size * sizeof(u32), bad);
for (j = 0; j < b->h.size; j++) {
b->item_weights[j] = ceph_decode_32(p);
b->sum_weights[j] = ceph_decode_32(p);
}
return 0;
bad:
return -EINVAL;
}
static int crush_decode_tree_bucket(void **p, void *end,
struct crush_bucket_tree *b)
{
int j;
dout("crush_decode_tree_bucket %p to %p\n", *p, end);
ceph_decode_32_safe(p, end, b->num_nodes, bad);
b->node_weights = kcalloc(b->num_nodes, sizeof(u32), GFP_NOFS);
if (b->node_weights == NULL)
return -ENOMEM;
ceph_decode_need(p, end, b->num_nodes * sizeof(u32), bad);
for (j = 0; j < b->num_nodes; j++)
b->node_weights[j] = ceph_decode_32(p);
return 0;
bad:
return -EINVAL;
}
static int crush_decode_straw_bucket(void **p, void *end,
struct crush_bucket_straw *b)
{
int j;
dout("crush_decode_straw_bucket %p to %p\n", *p, end);
b->item_weights = kcalloc(b->h.size, sizeof(u32), GFP_NOFS);
if (b->item_weights == NULL)
return -ENOMEM;
b->straws = kcalloc(b->h.size, sizeof(u32), GFP_NOFS);
if (b->straws == NULL)
return -ENOMEM;
ceph_decode_need(p, end, 2 * b->h.size * sizeof(u32), bad);
for (j = 0; j < b->h.size; j++) {
b->item_weights[j] = ceph_decode_32(p);
b->straws[j] = ceph_decode_32(p);
static int skip_name_map(void **p, void *end)
{
int len;
ceph_decode_32_safe(p, end, len ,bad);
while (len--) {
int strlen;
*p += sizeof(u32);
ceph_decode_32_safe(p, end, strlen, bad);
*p += strlen;
}
return 0;
bad:
return -EINVAL;
}
static struct crush_map *crush_decode(void *pbyval, void *end)
{
struct crush_map *c;
int err = -EINVAL;
int i, j;
void **p = &pbyval;
void *start = pbyval;
u32 magic;
dout("crush_decode %p to %p len %d\n", *p, end, (int)(end - *p));
c = kzalloc(sizeof(*c), GFP_NOFS);
if (c == NULL)
return ERR_PTR(-ENOMEM);
/* set tunables to default values */
c->choose_local_tries = 2;
c->choose_local_fallback_tries = 5;
c->choose_total_tries = 19;
Jim Schutt
committed
c->chooseleaf_descend_once = 0;
if (magic != CRUSH_MAGIC) {
pr_err("crush_decode magic %x != current %x\n",
(unsigned int)magic, (unsigned int)CRUSH_MAGIC);
c->max_buckets = ceph_decode_32(p);
c->max_rules = ceph_decode_32(p);
c->max_devices = ceph_decode_32(p);
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
c->buckets = kcalloc(c->max_buckets, sizeof(*c->buckets), GFP_NOFS);
if (c->buckets == NULL)
goto badmem;
c->rules = kcalloc(c->max_rules, sizeof(*c->rules), GFP_NOFS);
if (c->rules == NULL)
goto badmem;
/* buckets */
for (i = 0; i < c->max_buckets; i++) {
int size = 0;
u32 alg;
struct crush_bucket *b;
ceph_decode_32_safe(p, end, alg, bad);
if (alg == 0) {
c->buckets[i] = NULL;
continue;
}
dout("crush_decode bucket %d off %x %p to %p\n",
i, (int)(*p-start), *p, end);
switch (alg) {
case CRUSH_BUCKET_UNIFORM:
size = sizeof(struct crush_bucket_uniform);
break;
case CRUSH_BUCKET_LIST:
size = sizeof(struct crush_bucket_list);
break;
case CRUSH_BUCKET_TREE:
size = sizeof(struct crush_bucket_tree);
break;
case CRUSH_BUCKET_STRAW:
size = sizeof(struct crush_bucket_straw);
break;
default:
goto bad;
}
BUG_ON(size == 0);
b = c->buckets[i] = kzalloc(size, GFP_NOFS);
if (b == NULL)
goto badmem;
ceph_decode_need(p, end, 4*sizeof(u32), bad);
b->id = ceph_decode_32(p);
b->type = ceph_decode_16(p);
b->alg = ceph_decode_8(p);
b->hash = ceph_decode_8(p);
b->weight = ceph_decode_32(p);
b->size = ceph_decode_32(p);
dout("crush_decode bucket size %d off %x %p to %p\n",
b->size, (int)(*p-start), *p, end);
b->items = kcalloc(b->size, sizeof(__s32), GFP_NOFS);
if (b->items == NULL)
goto badmem;
b->perm = kcalloc(b->size, sizeof(u32), GFP_NOFS);
if (b->perm == NULL)
goto badmem;
b->perm_n = 0;
ceph_decode_need(p, end, b->size*sizeof(u32), bad);
for (j = 0; j < b->size; j++)
b->items[j] = ceph_decode_32(p);
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
switch (b->alg) {
case CRUSH_BUCKET_UNIFORM:
err = crush_decode_uniform_bucket(p, end,
(struct crush_bucket_uniform *)b);
if (err < 0)
goto bad;
break;
case CRUSH_BUCKET_LIST:
err = crush_decode_list_bucket(p, end,
(struct crush_bucket_list *)b);
if (err < 0)
goto bad;
break;
case CRUSH_BUCKET_TREE:
err = crush_decode_tree_bucket(p, end,
(struct crush_bucket_tree *)b);
if (err < 0)
goto bad;
break;
case CRUSH_BUCKET_STRAW:
err = crush_decode_straw_bucket(p, end,
(struct crush_bucket_straw *)b);
if (err < 0)
goto bad;
break;
}
}
/* rules */
dout("rule vec is %p\n", c->rules);
for (i = 0; i < c->max_rules; i++) {
u32 yes;
struct crush_rule *r;
ceph_decode_32_safe(p, end, yes, bad);
if (!yes) {
dout("crush_decode NO rule %d off %x %p to %p\n",
i, (int)(*p-start), *p, end);
c->rules[i] = NULL;
continue;
}
dout("crush_decode rule %d off %x %p to %p\n",
i, (int)(*p-start), *p, end);
/* len */
ceph_decode_32_safe(p, end, yes, bad);
#if BITS_PER_LONG == 32
if (yes > (ULONG_MAX - sizeof(*r))
/ sizeof(struct crush_rule_step))
goto bad;
#endif
r = c->rules[i] = kmalloc(sizeof(*r) +
yes*sizeof(struct crush_rule_step),
GFP_NOFS);
if (r == NULL)
goto badmem;
dout(" rule %d is at %p\n", i, r);
r->len = yes;
ceph_decode_copy_safe(p, end, &r->mask, 4, bad); /* 4 u8's */
ceph_decode_need(p, end, r->len*3*sizeof(u32), bad);
for (j = 0; j < r->len; j++) {
r->steps[j].op = ceph_decode_32(p);
r->steps[j].arg1 = ceph_decode_32(p);
r->steps[j].arg2 = ceph_decode_32(p);
for (num_name_maps = 0; num_name_maps < 3; num_name_maps++) {
err = skip_name_map(p, end);
if (err < 0)
goto done;
}
/* tunables */
ceph_decode_need(p, end, 3*sizeof(u32), done);
c->choose_local_tries = ceph_decode_32(p);
c->choose_local_fallback_tries = ceph_decode_32(p);
c->choose_total_tries = ceph_decode_32(p);
dout("crush decode tunable choose_local_tries = %d",
c->choose_local_tries);
dout("crush decode tunable choose_local_fallback_tries = %d",
c->choose_local_fallback_tries);
dout("crush decode tunable choose_total_tries = %d",
c->choose_total_tries);
Jim Schutt
committed
ceph_decode_need(p, end, sizeof(u32), done);
c->chooseleaf_descend_once = ceph_decode_32(p);
dout("crush decode tunable chooseleaf_descend_once = %d",
c->chooseleaf_descend_once);
dout("crush_decode success\n");
return c;
badmem:
err = -ENOMEM;
bad:
dout("crush_decode fail %d\n", err);
crush_destroy(c);
return ERR_PTR(err);
}
/*
* rbtree of pg_mapping for handling pg_temp (explicit mapping of pgid
* to a set of osds)
static int pgid_cmp(struct ceph_pg l, struct ceph_pg r)
if (l.pool < r.pool)
return -1;
if (l.pool > r.pool)
return 1;
if (l.seed < r.seed)
static int __insert_pg_mapping(struct ceph_pg_mapping *new,
struct rb_root *root)
{
struct rb_node **p = &root->rb_node;
struct rb_node *parent = NULL;
struct ceph_pg_mapping *pg = NULL;
dout("__insert_pg_mapping %llx %p\n", *(u64 *)&new->pgid, new);
while (*p) {
parent = *p;
pg = rb_entry(parent, struct ceph_pg_mapping, node);
c = pgid_cmp(new->pgid, pg->pgid);
if (c < 0)
return -EEXIST;
}
rb_link_node(&new->node, parent, p);
rb_insert_color(&new->node, root);
static struct ceph_pg_mapping *__lookup_pg_mapping(struct rb_root *root,
{
struct rb_node *n = root->rb_node;
struct ceph_pg_mapping *pg;
int c;
while (n) {
pg = rb_entry(n, struct ceph_pg_mapping, node);
c = pgid_cmp(pgid, pg->pgid);
n = n->rb_left;
n = n->rb_right;
dout("__lookup_pg_mapping %lld.%x got %p\n",
pgid.pool, pgid.seed, pg);
return pg;
}
return NULL;
}
static int __remove_pg_mapping(struct rb_root *root, struct ceph_pg pgid)
{
struct ceph_pg_mapping *pg = __lookup_pg_mapping(root, pgid);
if (pg) {
dout("__remove_pg_mapping %lld.%x %p\n", pgid.pool, pgid.seed,
pg);
rb_erase(&pg->node, root);
kfree(pg);
return 0;
}
dout("__remove_pg_mapping %lld.%x dne\n", pgid.pool, pgid.seed);
/*
* rbtree of pg pool info
*/
static int __insert_pg_pool(struct rb_root *root, struct ceph_pg_pool_info *new)
{
struct rb_node **p = &root->rb_node;
struct rb_node *parent = NULL;
struct ceph_pg_pool_info *pi = NULL;
while (*p) {
parent = *p;
pi = rb_entry(parent, struct ceph_pg_pool_info, node);
if (new->id < pi->id)
p = &(*p)->rb_left;
else if (new->id > pi->id)
p = &(*p)->rb_right;
else
return -EEXIST;
}
rb_link_node(&new->node, parent, p);
rb_insert_color(&new->node, root);
return 0;
}
static struct ceph_pg_pool_info *__lookup_pg_pool(struct rb_root *root, u64 id)
{
struct ceph_pg_pool_info *pi;
struct rb_node *n = root->rb_node;
while (n) {
pi = rb_entry(n, struct ceph_pg_pool_info, node);
if (id < pi->id)
n = n->rb_left;
else if (id > pi->id)
n = n->rb_right;
else
return pi;
}
return NULL;
}
struct ceph_pg_pool_info *ceph_pg_pool_by_id(struct ceph_osdmap *map, u64 id)
{
return __lookup_pg_pool(&map->pg_pools, id);
}
const char *ceph_pg_pool_name_by_id(struct ceph_osdmap *map, u64 id)
{
struct ceph_pg_pool_info *pi;
if (id == CEPH_NOPOOL)
return NULL;
if (WARN_ON_ONCE(id > (u64) INT_MAX))
return NULL;
pi = __lookup_pg_pool(&map->pg_pools, (int) id);
return pi ? pi->name : NULL;
}
EXPORT_SYMBOL(ceph_pg_pool_name_by_id);
int ceph_pg_poolid_by_name(struct ceph_osdmap *map, const char *name)
{
struct rb_node *rbp;
for (rbp = rb_first(&map->pg_pools); rbp; rbp = rb_next(rbp)) {
struct ceph_pg_pool_info *pi =
rb_entry(rbp, struct ceph_pg_pool_info, node);
if (pi->name && strcmp(pi->name, name) == 0)
return pi->id;
}
return -ENOENT;
}
EXPORT_SYMBOL(ceph_pg_poolid_by_name);
static void __remove_pg_pool(struct rb_root *root, struct ceph_pg_pool_info *pi)
{
rb_erase(&pi->node, root);
kfree(pi->name);
kfree(pi);
}
static int decode_pool(void **p, void *end, struct ceph_pg_pool_info *pi)
u8 ev, cv;
unsigned len, num;
void *pool_end;
ceph_decode_need(p, end, 2 + 4, bad);
ev = ceph_decode_8(p); /* encoding version */
cv = ceph_decode_8(p); /* compat version */
if (ev < 5) {
pr_warning("got v %d < 5 cv %d of ceph_pg_pool\n", ev, cv);
return -EINVAL;
}
if (cv > 9) {
pr_warning("got v %d cv %d > 9 of ceph_pg_pool\n", ev, cv);
return -EINVAL;
}
len = ceph_decode_32(p);
ceph_decode_need(p, end, len, bad);
pool_end = *p + len;
pi->type = ceph_decode_8(p);
pi->size = ceph_decode_8(p);
pi->crush_ruleset = ceph_decode_8(p);
pi->object_hash = ceph_decode_8(p);
pi->pg_num = ceph_decode_32(p);
pi->pgp_num = ceph_decode_32(p);
*p += 4 + 4; /* skip lpg* */
*p += 4; /* skip last_change */
*p += 8 + 4; /* skip snap_seq, snap_epoch */
/* skip snaps */
num = ceph_decode_32(p);
while (num--) {
*p += 8; /* snapid key */
*p += 1 + 1; /* versions */
len = ceph_decode_32(p);
*p += len;
/* skip removed_snaps */
num = ceph_decode_32(p);
*p += num * (8 + 8);
*p += 8; /* skip auid */
pi->flags = ceph_decode_64(p);
*p += 4; /* skip crash_replay_interval */
if (ev >= 7)
*p += 1; /* skip min_size */
if (ev >= 8)
*p += 8 + 8; /* skip quota_max_* */
if (ev >= 9) {
/* skip tiers */
num = ceph_decode_32(p);
*p += num * 8;
*p += 8; /* skip tier_of */
*p += 1; /* skip cache_mode */
pi->read_tier = ceph_decode_64(p);
pi->write_tier = ceph_decode_64(p);
} else {
pi->read_tier = -1;
pi->write_tier = -1;
}
/* ignore the rest */
*p = pool_end;
calc_pg_masks(pi);
return 0;
bad:
return -EINVAL;
static int decode_pool_names(void **p, void *end, struct ceph_osdmap *map)
{
struct ceph_pg_pool_info *pi;
u32 num, len;
u64 pool;
ceph_decode_32_safe(p, end, num, bad);
dout(" %d pool names\n", num);
while (num--) {
ceph_decode_64_safe(p, end, pool, bad);
ceph_decode_32_safe(p, end, len, bad);
dout(" pool %llu len %d\n", pool, len);
ceph_decode_need(p, end, len, bad);
pi = __lookup_pg_pool(&map->pg_pools, pool);
if (pi) {
char *name = kstrndup(*p, len, GFP_NOFS);
if (!name)
return -ENOMEM;
pi->name = name;
dout(" name is %s\n", pi->name);
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
}
*p += len;
}
return 0;
bad:
return -EINVAL;
}
/*
* osd map
*/
void ceph_osdmap_destroy(struct ceph_osdmap *map)
{
dout("osdmap_destroy %p\n", map);
if (map->crush)
crush_destroy(map->crush);
while (!RB_EMPTY_ROOT(&map->pg_temp)) {
struct ceph_pg_mapping *pg =
rb_entry(rb_first(&map->pg_temp),
struct ceph_pg_mapping, node);
rb_erase(&pg->node, &map->pg_temp);
kfree(pg);
}
while (!RB_EMPTY_ROOT(&map->pg_pools)) {
struct ceph_pg_pool_info *pi =
rb_entry(rb_first(&map->pg_pools),
struct ceph_pg_pool_info, node);
__remove_pg_pool(&map->pg_pools, pi);
}
kfree(map->osd_state);
kfree(map->osd_weight);
kfree(map->osd_addr);
kfree(map);
}
/*
* adjust max osd value. reallocate arrays.
*/
static int osdmap_set_max_osd(struct ceph_osdmap *map, int max)
{
u8 *state;
struct ceph_entity_addr *addr;
u32 *weight;
state = kcalloc(max, sizeof(*state), GFP_NOFS);
addr = kcalloc(max, sizeof(*addr), GFP_NOFS);
weight = kcalloc(max, sizeof(*weight), GFP_NOFS);
if (state == NULL || addr == NULL || weight == NULL) {
kfree(state);
kfree(addr);
kfree(weight);
return -ENOMEM;
}
/* copy old? */
if (map->osd_state) {
memcpy(state, map->osd_state, map->max_osd*sizeof(*state));
memcpy(addr, map->osd_addr, map->max_osd*sizeof(*addr));
memcpy(weight, map->osd_weight, map->max_osd*sizeof(*weight));
kfree(map->osd_state);
kfree(map->osd_addr);
kfree(map->osd_weight);
}
map->osd_state = state;
map->osd_weight = weight;
map->osd_addr = addr;
map->max_osd = max;
return 0;
}
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
static int __decode_pools(void **p, void *end, struct ceph_osdmap *map,
bool incremental)
{
u32 n;
ceph_decode_32_safe(p, end, n, e_inval);
while (n--) {
struct ceph_pg_pool_info *pi;
u64 pool;
int ret;
ceph_decode_64_safe(p, end, pool, e_inval);
pi = __lookup_pg_pool(&map->pg_pools, pool);
if (!incremental || !pi) {
pi = kzalloc(sizeof(*pi), GFP_NOFS);
if (!pi)
return -ENOMEM;
pi->id = pool;
ret = __insert_pg_pool(&map->pg_pools, pi);
if (ret) {
kfree(pi);
return ret;
}
}
ret = decode_pool(p, end, pi);
if (ret)
return ret;
}
return 0;
e_inval:
return -EINVAL;
}
static int decode_pools(void **p, void *end, struct ceph_osdmap *map)
{
return __decode_pools(p, end, map, false);
}
static int decode_new_pools(void **p, void *end, struct ceph_osdmap *map)
{
return __decode_pools(p, end, map, true);
}
static int osdmap_decode(void **p, void *end, struct ceph_osdmap *map)
u32 max;
u32 len, i;
dout("%s %p to %p len %d\n", __func__, *p, end, (int)(end - *p));
ceph_decode_16_safe(p, end, version, e_inval);
if (version > 6) {
pr_warning("got unknown v %d > 6 of osdmap\n", version);
}
if (version < 6) {
pr_warning("got old v %d < 6 of osdmap\n", version);
/* fsid, epoch, created, modified */
ceph_decode_need(p, end, sizeof(map->fsid) + sizeof(u32) +
sizeof(map->created) + sizeof(map->modified), e_inval);
epoch = map->epoch = ceph_decode_32(p);
ceph_decode_copy(p, &map->created, sizeof(map->created));
ceph_decode_copy(p, &map->modified, sizeof(map->modified));
/* pools */
err = decode_pools(p, end, map);
if (err)
goto bad;
/* pool_name */
err = decode_pool_names(p, end, map);
goto bad;
ceph_decode_32_safe(p, end, map->pool_max, e_inval);
ceph_decode_32_safe(p, end, map->flags, e_inval);
/* max_osd */
ceph_decode_32_safe(p, end, max, e_inval);
/* (re)alloc osd arrays */
err = osdmap_set_max_osd(map, max);
/* osd_state, osd_weight, osd_addrs->client_addr */
ceph_decode_need(p, end, 3*sizeof(u32) +
map->max_osd*(1 + sizeof(*map->osd_weight) +
sizeof(*map->osd_addr)), e_inval);
if (ceph_decode_32(p) != map->max_osd)
goto e_inval;
if (ceph_decode_32(p) != map->max_osd)
goto e_inval;
map->osd_weight[i] = ceph_decode_32(p);
if (ceph_decode_32(p) != map->max_osd)
goto e_inval;
ceph_decode_copy(p, map->osd_addr, map->max_osd*sizeof(*map->osd_addr));
for (i = 0; i < map->max_osd; i++)
ceph_decode_addr(&map->osd_addr[i]);
ceph_decode_32_safe(p, end, len, e_inval);
err = ceph_decode_pgid(p, end, &pgid);
ceph_decode_need(p, end, sizeof(u32), e_inval);
if (n > (UINT_MAX - sizeof(*pg)) / sizeof(u32))
goto e_inval;
ceph_decode_need(p, end, n * sizeof(u32), e_inval);
if (!pg) {
err = -ENOMEM;
pg->pgid = pgid;
pg->len = n;
for (j = 0; j < n; j++)
pg->osds[j] = ceph_decode_32(p);
err = __insert_pg_mapping(pg, &map->pg_temp);
if (err)
goto bad;
dout(" added pg_temp %lld.%x len %d\n", pgid.pool, pgid.seed,
len);
ceph_decode_32_safe(p, end, len, e_inval);
map->crush = crush_decode(*p, min(*p + len, end));
if (IS_ERR(map->crush)) {
err = PTR_ERR(map->crush);
map->crush = NULL;
goto bad;
}
/* ignore the rest */
dout("full osdmap epoch %d max_osd %d\n", map->epoch, map->max_osd);
e_inval:
err = -EINVAL;
pr_err("corrupt full osdmap (%d) epoch %d off %d (%p of %p-%p)\n",
err, epoch, (int)(*p - start), *p, start, end);
print_hex_dump(KERN_DEBUG, "osdmap: ",
DUMP_PREFIX_OFFSET, 16, 1,
start, end - start, true);
return err;
}
/*
* Allocate and decode a full map.
*/
struct ceph_osdmap *ceph_osdmap_decode(void **p, void *end)
{
struct ceph_osdmap *map;
int ret;
map = kzalloc(sizeof(*map), GFP_NOFS);
if (!map)
return ERR_PTR(-ENOMEM);
map->pg_temp = RB_ROOT;
mutex_init(&map->crush_scratch_mutex);
ret = osdmap_decode(p, end, map);
if (ret) {
ceph_osdmap_destroy(map);
return ERR_PTR(ret);
}
return map;
}
/*
* decode and apply an incremental map update.
*/
struct ceph_osdmap *osdmap_apply_incremental(void **p, void *end,
struct ceph_osdmap *map,
struct ceph_messenger *msgr)
{
struct crush_map *newcrush = NULL;
struct ceph_fsid fsid;
u32 epoch = 0;
struct ceph_timespec modified;
s32 len;
u64 pool;
__s64 new_pool_max;
__s32 new_flags, max;
dout("%s %p to %p len %d\n", __func__, *p, end, (int)(end - *p));
ceph_decode_16_safe(p, end, version, e_inval);
if (version != 6) {
pr_warning("got unknown v %d != 6 of inc osdmap\n", version);
goto e_inval;
/* fsid, epoch, modified, new_pool_max, new_flags */
ceph_decode_need(p, end, sizeof(fsid) + sizeof(u32) + sizeof(modified) +
sizeof(u64) + sizeof(u32), e_inval);
BUG_ON(epoch != map->epoch+1);
ceph_decode_copy(p, &modified, sizeof(modified));
new_pool_max = ceph_decode_64(p);
new_flags = ceph_decode_32(p);
ceph_decode_32_safe(p, end, len, e_inval);
if (len > 0) {
dout("apply_incremental full map len %d, %p to %p\n",
len, *p, end);
return ceph_osdmap_decode(p, min(*p+len, end));
ceph_decode_32_safe(p, end, len, e_inval);
if (len > 0) {
newcrush = crush_decode(*p, min(*p+len, end));
if (IS_ERR(newcrush)) {
err = PTR_ERR(newcrush);
newcrush = NULL;
goto bad;
}
}
/* new flags? */
if (new_flags >= 0)
map->flags = new_flags;
if (new_pool_max >= 0)
map->pool_max = new_pool_max;
ceph_decode_32_safe(p, end, max, e_inval);
if (newcrush) {
if (map->crush)
crush_destroy(map->crush);
map->crush = newcrush;
newcrush = NULL;
}
/* new_pools */
err = decode_new_pools(p, end, map);
if (err)
goto bad;
/* new_pool_names */
err = decode_pool_names(p, end, map);
if (err)
goto bad;
ceph_decode_32_safe(p, end, len, e_inval);
while (len--) {
struct ceph_pg_pool_info *pi;
ceph_decode_64_safe(p, end, pool, e_inval);
pi = __lookup_pg_pool(&map->pg_pools, pool);
if (pi)
__remove_pg_pool(&map->pg_pools, pi);
ceph_decode_32_safe(p, end, len, e_inval);
while (len--) {
u32 osd;
struct ceph_entity_addr addr;
ceph_decode_32_safe(p, end, osd, e_inval);
ceph_decode_copy_safe(p, end, &addr, sizeof(addr), e_inval);