Loading fs/btrfs/super.c +26 −0 Original line number Original line Diff line number Diff line Loading @@ -914,6 +914,32 @@ static int btrfs_remount(struct super_block *sb, int *flags, char *data) return 0; return 0; } } /* Used to sort the devices by max_avail(descending sort) */ static int btrfs_cmp_device_free_bytes(const void *dev_info1, const void *dev_info2) { if (((struct btrfs_device_info *)dev_info1)->max_avail > ((struct btrfs_device_info *)dev_info2)->max_avail) return -1; else if (((struct btrfs_device_info *)dev_info1)->max_avail < ((struct btrfs_device_info *)dev_info2)->max_avail) return 1; else return 0; } /* * sort the devices by max_avail, in which max free extent size of each device * is stored.(Descending Sort) */ static inline void btrfs_descending_sort_devices( struct btrfs_device_info *devices, size_t nr_devices) { sort(devices, nr_devices, sizeof(struct btrfs_device_info), btrfs_cmp_device_free_bytes, NULL); } /* /* * The helper to calc the free space on the devices that can be used to store * The helper to calc the free space on the devices that can be used to store * file data. * file data. Loading fs/btrfs/volumes.c +174 −319 Original line number Original line Diff line number Diff line Loading @@ -805,10 +805,7 @@ int find_free_dev_extent(struct btrfs_trans_handle *trans, /* we don't want to overwrite the superblock on the drive, /* we don't want to overwrite the superblock on the drive, * so we make sure to start at an offset of at least 1MB * so we make sure to start at an offset of at least 1MB */ */ search_start = 1024 * 1024; search_start = max(root->fs_info->alloc_start, 1024ull * 1024); if (root->fs_info->alloc_start + num_bytes <= search_end) search_start = max(root->fs_info->alloc_start, search_start); max_hole_start = search_start; max_hole_start = search_start; max_hole_size = 0; max_hole_size = 0; Loading Loading @@ -2227,275 +2224,204 @@ static int btrfs_add_system_chunk(struct btrfs_trans_handle *trans, return 0; return 0; } } static noinline u64 chunk_bytes_by_type(u64 type, u64 calc_size, /* int num_stripes, int sub_stripes) * sort the devices in descending order by max_avail, total_avail */ static int btrfs_cmp_device_info(const void *a, const void *b) { { if (type & (BTRFS_BLOCK_GROUP_RAID1 | BTRFS_BLOCK_GROUP_DUP)) const struct btrfs_device_info *di_a = a; return calc_size; const struct btrfs_device_info *di_b = b; else if (type & BTRFS_BLOCK_GROUP_RAID10) return calc_size * (num_stripes / sub_stripes); else return calc_size * num_stripes; } /* Used to sort the devices by max_avail(descending sort) */ if (di_a->max_avail > di_b->max_avail) int btrfs_cmp_device_free_bytes(const void *dev_info1, const void *dev_info2) { if (((struct btrfs_device_info *)dev_info1)->max_avail > ((struct btrfs_device_info *)dev_info2)->max_avail) return -1; return -1; else if (((struct btrfs_device_info *)dev_info1)->max_avail < if (di_a->max_avail < di_b->max_avail) ((struct btrfs_device_info *)dev_info2)->max_avail) return 1; if (di_a->total_avail > di_b->total_avail) return -1; if (di_a->total_avail < di_b->total_avail) return 1; return 1; else return 0; return 0; } } static int __btrfs_calc_nstripes(struct btrfs_fs_devices *fs_devices, u64 type, static int __btrfs_alloc_chunk(struct btrfs_trans_handle *trans, int *num_stripes, int *min_stripes, struct btrfs_root *extent_root, int *sub_stripes) struct map_lookup **map_ret, u64 *num_bytes_out, u64 *stripe_size_out, u64 start, u64 type) { { *num_stripes = 1; struct btrfs_fs_info *info = extent_root->fs_info; *min_stripes = 1; struct btrfs_fs_devices *fs_devices = info->fs_devices; *sub_stripes = 0; struct list_head *cur; struct map_lookup *map = NULL; struct extent_map_tree *em_tree; struct extent_map *em; struct btrfs_device_info *devices_info = NULL; u64 total_avail; int num_stripes; /* total number of stripes to allocate */ int sub_stripes; /* sub_stripes info for map */ int dev_stripes; /* stripes per dev */ int devs_max; /* max devs to use */ int devs_min; /* min devs needed */ int devs_increment; /* ndevs has to be a multiple of this */ int ncopies; /* how many copies to data has */ int ret; u64 max_stripe_size; u64 max_chunk_size; u64 stripe_size; u64 num_bytes; int ndevs; int i; int j; if (type & (BTRFS_BLOCK_GROUP_RAID0)) { if ((type & BTRFS_BLOCK_GROUP_RAID1) && *num_stripes = fs_devices->rw_devices; (type & BTRFS_BLOCK_GROUP_DUP)) { *min_stripes = 2; WARN_ON(1); } type &= ~BTRFS_BLOCK_GROUP_DUP; if (type & (BTRFS_BLOCK_GROUP_DUP)) { *num_stripes = 2; *min_stripes = 2; } if (type & (BTRFS_BLOCK_GROUP_RAID1)) { if (fs_devices->rw_devices < 2) return -ENOSPC; *num_stripes = 2; *min_stripes = 2; } if (type & (BTRFS_BLOCK_GROUP_RAID10)) { *num_stripes = fs_devices->rw_devices; if (*num_stripes < 4) return -ENOSPC; *num_stripes &= ~(u32)1; *sub_stripes = 2; *min_stripes = 4; } } return 0; if (list_empty(&fs_devices->alloc_list)) } return -ENOSPC; static u64 __btrfs_calc_stripe_size(struct btrfs_fs_devices *fs_devices, sub_stripes = 1; u64 proposed_size, u64 type, dev_stripes = 1; int num_stripes, int small_stripe) devs_increment = 1; { ncopies = 1; int min_stripe_size = 1 * 1024 * 1024; devs_max = 0; /* 0 == as many as possible */ u64 calc_size = proposed_size; devs_min = 1; u64 max_chunk_size = calc_size; int ncopies = 1; if (type & (BTRFS_BLOCK_GROUP_RAID1 | /* BTRFS_BLOCK_GROUP_DUP | * define the properties of each RAID type. BTRFS_BLOCK_GROUP_RAID10)) * FIXME: move this to a global table and use it in all RAID * calculation code */ if (type & (BTRFS_BLOCK_GROUP_DUP)) { dev_stripes = 2; ncopies = 2; devs_max = 1; } else if (type & (BTRFS_BLOCK_GROUP_RAID0)) { devs_min = 2; } else if (type & (BTRFS_BLOCK_GROUP_RAID1)) { devs_increment = 2; ncopies = 2; ncopies = 2; devs_max = 2; devs_min = 2; } else if (type & (BTRFS_BLOCK_GROUP_RAID10)) { sub_stripes = 2; devs_increment = 2; ncopies = 2; devs_min = 4; } else { devs_max = 1; } if (type & BTRFS_BLOCK_GROUP_DATA) { if (type & BTRFS_BLOCK_GROUP_DATA) { max_chunk_size = 10 * calc_size; max_stripe_size = 1024 * 1024 * 1024; min_stripe_size = 64 * 1024 * 1024; max_chunk_size = 10 * max_stripe_size; } else if (type & BTRFS_BLOCK_GROUP_METADATA) { } else if (type & BTRFS_BLOCK_GROUP_METADATA) { max_chunk_size = 256 * 1024 * 1024; max_stripe_size = 256 * 1024 * 1024; min_stripe_size = 32 * 1024 * 1024; max_chunk_size = max_stripe_size; } else if (type & BTRFS_BLOCK_GROUP_SYSTEM) { } else if (type & BTRFS_BLOCK_GROUP_SYSTEM) { calc_size = 8 * 1024 * 1024; max_stripe_size = 8 * 1024 * 1024; max_chunk_size = calc_size * 2; max_chunk_size = 2 * max_stripe_size; min_stripe_size = 1 * 1024 * 1024; } else { printk(KERN_ERR "btrfs: invalid chunk type 0x%llx requested\n", type); BUG_ON(1); } } /* we don't want a chunk larger than 10% of writeable space */ /* we don't want a chunk larger than 10% of writeable space */ max_chunk_size = min(div_factor(fs_devices->total_rw_bytes, 1), max_chunk_size = min(div_factor(fs_devices->total_rw_bytes, 1), max_chunk_size); max_chunk_size); if (calc_size * num_stripes > max_chunk_size * ncopies) { devices_info = kzalloc(sizeof(*devices_info) * fs_devices->rw_devices, calc_size = max_chunk_size * ncopies; GFP_NOFS); do_div(calc_size, num_stripes); if (!devices_info) do_div(calc_size, BTRFS_STRIPE_LEN); return -ENOMEM; calc_size *= BTRFS_STRIPE_LEN; } /* we don't want tiny stripes */ cur = fs_devices->alloc_list.next; if (!small_stripe) calc_size = max_t(u64, min_stripe_size, calc_size); /* /* * we're about to do_div by the BTRFS_STRIPE_LEN so lets make sure * in the first pass through the devices list, we gather information * we end up with something bigger than a stripe * about the available holes on each device. */ */ calc_size = max_t(u64, calc_size, BTRFS_STRIPE_LEN); ndevs = 0; while (cur != &fs_devices->alloc_list) { do_div(calc_size, BTRFS_STRIPE_LEN); struct btrfs_device *device; calc_size *= BTRFS_STRIPE_LEN; u64 max_avail; u64 dev_offset; return calc_size; } static struct map_lookup *__shrink_map_lookup_stripes(struct map_lookup *map, int num_stripes) { struct map_lookup *new; size_t len = map_lookup_size(num_stripes); BUG_ON(map->num_stripes < num_stripes); device = list_entry(cur, struct btrfs_device, dev_alloc_list); if (map->num_stripes == num_stripes) cur = cur->next; return map; new = kmalloc(len, GFP_NOFS); if (!device->writeable) { if (!new) { printk(KERN_ERR /* just change map->num_stripes */ "btrfs: read-only device in alloc_list\n"); map->num_stripes = num_stripes; WARN_ON(1); return map; continue; } } memcpy(new, map, len); if (!device->in_fs_metadata) new->num_stripes = num_stripes; continue; kfree(map); return new; } /* if (device->total_bytes > device->bytes_used) * helper to allocate device space from btrfs_device_info, in which we stored total_avail = device->total_bytes - device->bytes_used; * max free space information of every device. It is used when we can not else * allocate chunks by default size. total_avail = 0; * /* avail is off by max(alloc_start, 1MB), but that is the same * By this helper, we can allocate a new chunk as larger as possible. * for all devices, so it doesn't hurt the sorting later on */ */ static int __btrfs_alloc_tiny_space(struct btrfs_trans_handle *trans, struct btrfs_fs_devices *fs_devices, struct btrfs_device_info *devices, int nr_device, u64 type, struct map_lookup **map_lookup, int min_stripes, u64 *stripe_size) { int i, index, sort_again = 0; int min_devices = min_stripes; u64 max_avail, min_free; struct map_lookup *map = *map_lookup; int ret; if (nr_device < min_stripes) ret = find_free_dev_extent(trans, device, return -ENOSPC; max_stripe_size * dev_stripes, &dev_offset, &max_avail); if (ret && ret != -ENOSPC) goto error; btrfs_descending_sort_devices(devices, nr_device); if (ret == 0) max_avail = max_stripe_size * dev_stripes; max_avail = devices[0].max_avail; if (max_avail < BTRFS_STRIPE_LEN * dev_stripes) if (!max_avail) continue; return -ENOSPC; for (i = 0; i < nr_device; i++) { devices_info[ndevs].dev_offset = dev_offset; /* devices_info[ndevs].max_avail = max_avail; * if dev_offset = 0, it means the free space of this device devices_info[ndevs].total_avail = total_avail; * is less than what we need, and we didn't search max avail devices_info[ndevs].dev = device; * extent on this device, so do it now. ++ndevs; */ if (!devices[i].dev_offset) { ret = find_free_dev_extent(trans, devices[i].dev, max_avail, &devices[i].dev_offset, &devices[i].max_avail); if (ret != 0 && ret != -ENOSPC) return ret; sort_again = 1; } } } /* we update the max avail free extent of each devices, sort again */ /* if (sort_again) * now sort the devices by hole size / available space btrfs_descending_sort_devices(devices, nr_device); */ sort(devices_info, ndevs, sizeof(struct btrfs_device_info), if (type & BTRFS_BLOCK_GROUP_DUP) btrfs_cmp_device_info, NULL); min_devices = 1; if (!devices[min_devices - 1].max_avail) return -ENOSPC; max_avail = devices[min_devices - 1].max_avail; if (type & BTRFS_BLOCK_GROUP_DUP) do_div(max_avail, 2); max_avail = __btrfs_calc_stripe_size(fs_devices, max_avail, type, min_stripes, 1); if (type & BTRFS_BLOCK_GROUP_DUP) min_free = max_avail * 2; else min_free = max_avail; if (min_free > devices[min_devices - 1].max_avail) return -ENOSPC; map = __shrink_map_lookup_stripes(map, min_stripes); /* round down to number of usable stripes */ *stripe_size = max_avail; ndevs -= ndevs % devs_increment; index = 0; if (ndevs < devs_increment * sub_stripes || ndevs < devs_min) { for (i = 0; i < min_stripes; i++) { ret = -ENOSPC; map->stripes[i].dev = devices[index].dev; goto error; map->stripes[i].physical = devices[index].dev_offset; if (type & BTRFS_BLOCK_GROUP_DUP) { i++; map->stripes[i].dev = devices[index].dev; map->stripes[i].physical = devices[index].dev_offset + max_avail; } index++; } *map_lookup = map; return 0; } } static int __btrfs_alloc_chunk(struct btrfs_trans_handle *trans, if (devs_max && ndevs > devs_max) struct btrfs_root *extent_root, ndevs = devs_max; struct map_lookup **map_ret, /* u64 *num_bytes, u64 *stripe_size, * the primary goal is to maximize the number of stripes, so use as many u64 start, u64 type) * devices as possible, even if the stripes are not maximum sized. { */ struct btrfs_fs_info *info = extent_root->fs_info; stripe_size = devices_info[ndevs-1].max_avail; struct btrfs_device *device = NULL; num_stripes = ndevs * dev_stripes; struct btrfs_fs_devices *fs_devices = info->fs_devices; struct list_head *cur; struct map_lookup *map; struct extent_map_tree *em_tree; struct extent_map *em; struct btrfs_device_info *devices_info; struct list_head private_devs; u64 calc_size = 1024 * 1024 * 1024; u64 min_free; u64 avail; u64 dev_offset; int num_stripes; int min_stripes; int sub_stripes; int min_devices; /* the min number of devices we need */ int i; int ret; int index; if ((type & BTRFS_BLOCK_GROUP_RAID1) && if (stripe_size * num_stripes > max_chunk_size * ncopies) { (type & BTRFS_BLOCK_GROUP_DUP)) { stripe_size = max_chunk_size * ncopies; WARN_ON(1); do_div(stripe_size, num_stripes); type &= ~BTRFS_BLOCK_GROUP_DUP; } } if (list_empty(&fs_devices->alloc_list)) return -ENOSPC; ret = __btrfs_calc_nstripes(fs_devices, type, &num_stripes, &min_stripes, &sub_stripes); if (ret) return ret; devices_info = kzalloc(sizeof(*devices_info) * fs_devices->rw_devices, do_div(stripe_size, dev_stripes); GFP_NOFS); do_div(stripe_size, BTRFS_STRIPE_LEN); if (!devices_info) stripe_size *= BTRFS_STRIPE_LEN; return -ENOMEM; map = kmalloc(map_lookup_size(num_stripes), GFP_NOFS); map = kmalloc(map_lookup_size(num_stripes), GFP_NOFS); if (!map) { if (!map) { Loading @@ -2504,85 +2430,12 @@ static int __btrfs_alloc_chunk(struct btrfs_trans_handle *trans, } } map->num_stripes = num_stripes; map->num_stripes = num_stripes; cur = fs_devices->alloc_list.next; for (i = 0; i < ndevs; ++i) { index = 0; for (j = 0; j < dev_stripes; ++j) { i = 0; int s = i * dev_stripes + j; map->stripes[s].dev = devices_info[i].dev; calc_size = __btrfs_calc_stripe_size(fs_devices, calc_size, type, map->stripes[s].physical = devices_info[i].dev_offset + num_stripes, 0); j * stripe_size; if (type & BTRFS_BLOCK_GROUP_DUP) { min_free = calc_size * 2; min_devices = 1; } else { min_free = calc_size; min_devices = min_stripes; } INIT_LIST_HEAD(&private_devs); while (index < num_stripes) { device = list_entry(cur, struct btrfs_device, dev_alloc_list); BUG_ON(!device->writeable); if (device->total_bytes > device->bytes_used) avail = device->total_bytes - device->bytes_used; else avail = 0; cur = cur->next; if (device->in_fs_metadata && avail >= min_free) { ret = find_free_dev_extent(trans, device, min_free, &devices_info[i].dev_offset, &devices_info[i].max_avail); if (ret == 0) { list_move_tail(&device->dev_alloc_list, &private_devs); map->stripes[index].dev = device; map->stripes[index].physical = devices_info[i].dev_offset; index++; if (type & BTRFS_BLOCK_GROUP_DUP) { map->stripes[index].dev = device; map->stripes[index].physical = devices_info[i].dev_offset + calc_size; index++; } } else if (ret != -ENOSPC) goto error; devices_info[i].dev = device; i++; } else if (device->in_fs_metadata && avail >= BTRFS_STRIPE_LEN) { devices_info[i].dev = device; devices_info[i].max_avail = avail; i++; } if (cur == &fs_devices->alloc_list) break; } list_splice(&private_devs, &fs_devices->alloc_list); if (index < num_stripes) { if (index >= min_stripes) { num_stripes = index; if (type & (BTRFS_BLOCK_GROUP_RAID10)) { num_stripes /= sub_stripes; num_stripes *= sub_stripes; } map = __shrink_map_lookup_stripes(map, num_stripes); } else if (i >= min_devices) { ret = __btrfs_alloc_tiny_space(trans, fs_devices, devices_info, i, type, &map, min_stripes, &calc_size); if (ret) goto error; } else { ret = -ENOSPC; goto error; } } } } map->sector_size = extent_root->sectorsize; map->sector_size = extent_root->sectorsize; Loading @@ -2593,11 +2446,12 @@ static int __btrfs_alloc_chunk(struct btrfs_trans_handle *trans, map->sub_stripes = sub_stripes; map->sub_stripes = sub_stripes; *map_ret = map; *map_ret = map; *stripe_size = calc_size; num_bytes = stripe_size * (num_stripes / ncopies); *num_bytes = chunk_bytes_by_type(type, calc_size, map->num_stripes, sub_stripes); *stripe_size_out = stripe_size; *num_bytes_out = num_bytes; trace_btrfs_chunk_alloc(info->chunk_root, map, start, *num_bytes); trace_btrfs_chunk_alloc(info->chunk_root, map, start, num_bytes); em = alloc_extent_map(); em = alloc_extent_map(); if (!em) { if (!em) { Loading @@ -2606,7 +2460,7 @@ static int __btrfs_alloc_chunk(struct btrfs_trans_handle *trans, } } em->bdev = (struct block_device *)map; em->bdev = (struct block_device *)map; em->start = start; em->start = start; em->len = *num_bytes; em->len = num_bytes; em->block_start = 0; em->block_start = 0; em->block_len = em->len; em->block_len = em->len; Loading @@ -2619,20 +2473,21 @@ static int __btrfs_alloc_chunk(struct btrfs_trans_handle *trans, ret = btrfs_make_block_group(trans, extent_root, 0, type, ret = btrfs_make_block_group(trans, extent_root, 0, type, BTRFS_FIRST_CHUNK_TREE_OBJECTID, BTRFS_FIRST_CHUNK_TREE_OBJECTID, start, *num_bytes); start, num_bytes); BUG_ON(ret); BUG_ON(ret); index = 0; for (i = 0; i < map->num_stripes; ++i) { while (index < map->num_stripes) { struct btrfs_device *device; device = map->stripes[index].dev; u64 dev_offset; dev_offset = map->stripes[index].physical; device = map->stripes[i].dev; dev_offset = map->stripes[i].physical; ret = btrfs_alloc_dev_extent(trans, device, ret = btrfs_alloc_dev_extent(trans, device, info->chunk_root->root_key.objectid, info->chunk_root->root_key.objectid, BTRFS_FIRST_CHUNK_TREE_OBJECTID, BTRFS_FIRST_CHUNK_TREE_OBJECTID, start, dev_offset, calc_size); start, dev_offset, stripe_size); BUG_ON(ret); BUG_ON(ret); index++; } } kfree(devices_info); kfree(devices_info); Loading fs/btrfs/volumes.h +1 −15 Original line number Original line Diff line number Diff line Loading @@ -144,6 +144,7 @@ struct btrfs_device_info { struct btrfs_device *dev; struct btrfs_device *dev; u64 dev_offset; u64 dev_offset; u64 max_avail; u64 max_avail; u64 total_avail; }; }; struct map_lookup { struct map_lookup { Loading @@ -157,21 +158,6 @@ struct map_lookup { struct btrfs_bio_stripe stripes[]; struct btrfs_bio_stripe stripes[]; }; }; /* Used to sort the devices by max_avail(descending sort) */ int btrfs_cmp_device_free_bytes(const void *dev_info1, const void *dev_info2); /* * sort the devices by max_avail, in which max free extent size of each device * is stored.(Descending Sort) */ static inline void btrfs_descending_sort_devices( struct btrfs_device_info *devices, size_t nr_devices) { sort(devices, nr_devices, sizeof(struct btrfs_device_info), btrfs_cmp_device_free_bytes, NULL); } int btrfs_account_dev_extents_size(struct btrfs_device *device, u64 start, int btrfs_account_dev_extents_size(struct btrfs_device *device, u64 start, u64 end, u64 *length); u64 end, u64 *length); Loading Loading
fs/btrfs/super.c +26 −0 Original line number Original line Diff line number Diff line Loading @@ -914,6 +914,32 @@ static int btrfs_remount(struct super_block *sb, int *flags, char *data) return 0; return 0; } } /* Used to sort the devices by max_avail(descending sort) */ static int btrfs_cmp_device_free_bytes(const void *dev_info1, const void *dev_info2) { if (((struct btrfs_device_info *)dev_info1)->max_avail > ((struct btrfs_device_info *)dev_info2)->max_avail) return -1; else if (((struct btrfs_device_info *)dev_info1)->max_avail < ((struct btrfs_device_info *)dev_info2)->max_avail) return 1; else return 0; } /* * sort the devices by max_avail, in which max free extent size of each device * is stored.(Descending Sort) */ static inline void btrfs_descending_sort_devices( struct btrfs_device_info *devices, size_t nr_devices) { sort(devices, nr_devices, sizeof(struct btrfs_device_info), btrfs_cmp_device_free_bytes, NULL); } /* /* * The helper to calc the free space on the devices that can be used to store * The helper to calc the free space on the devices that can be used to store * file data. * file data. Loading
fs/btrfs/volumes.c +174 −319 Original line number Original line Diff line number Diff line Loading @@ -805,10 +805,7 @@ int find_free_dev_extent(struct btrfs_trans_handle *trans, /* we don't want to overwrite the superblock on the drive, /* we don't want to overwrite the superblock on the drive, * so we make sure to start at an offset of at least 1MB * so we make sure to start at an offset of at least 1MB */ */ search_start = 1024 * 1024; search_start = max(root->fs_info->alloc_start, 1024ull * 1024); if (root->fs_info->alloc_start + num_bytes <= search_end) search_start = max(root->fs_info->alloc_start, search_start); max_hole_start = search_start; max_hole_start = search_start; max_hole_size = 0; max_hole_size = 0; Loading Loading @@ -2227,275 +2224,204 @@ static int btrfs_add_system_chunk(struct btrfs_trans_handle *trans, return 0; return 0; } } static noinline u64 chunk_bytes_by_type(u64 type, u64 calc_size, /* int num_stripes, int sub_stripes) * sort the devices in descending order by max_avail, total_avail */ static int btrfs_cmp_device_info(const void *a, const void *b) { { if (type & (BTRFS_BLOCK_GROUP_RAID1 | BTRFS_BLOCK_GROUP_DUP)) const struct btrfs_device_info *di_a = a; return calc_size; const struct btrfs_device_info *di_b = b; else if (type & BTRFS_BLOCK_GROUP_RAID10) return calc_size * (num_stripes / sub_stripes); else return calc_size * num_stripes; } /* Used to sort the devices by max_avail(descending sort) */ if (di_a->max_avail > di_b->max_avail) int btrfs_cmp_device_free_bytes(const void *dev_info1, const void *dev_info2) { if (((struct btrfs_device_info *)dev_info1)->max_avail > ((struct btrfs_device_info *)dev_info2)->max_avail) return -1; return -1; else if (((struct btrfs_device_info *)dev_info1)->max_avail < if (di_a->max_avail < di_b->max_avail) ((struct btrfs_device_info *)dev_info2)->max_avail) return 1; if (di_a->total_avail > di_b->total_avail) return -1; if (di_a->total_avail < di_b->total_avail) return 1; return 1; else return 0; return 0; } } static int __btrfs_calc_nstripes(struct btrfs_fs_devices *fs_devices, u64 type, static int __btrfs_alloc_chunk(struct btrfs_trans_handle *trans, int *num_stripes, int *min_stripes, struct btrfs_root *extent_root, int *sub_stripes) struct map_lookup **map_ret, u64 *num_bytes_out, u64 *stripe_size_out, u64 start, u64 type) { { *num_stripes = 1; struct btrfs_fs_info *info = extent_root->fs_info; *min_stripes = 1; struct btrfs_fs_devices *fs_devices = info->fs_devices; *sub_stripes = 0; struct list_head *cur; struct map_lookup *map = NULL; struct extent_map_tree *em_tree; struct extent_map *em; struct btrfs_device_info *devices_info = NULL; u64 total_avail; int num_stripes; /* total number of stripes to allocate */ int sub_stripes; /* sub_stripes info for map */ int dev_stripes; /* stripes per dev */ int devs_max; /* max devs to use */ int devs_min; /* min devs needed */ int devs_increment; /* ndevs has to be a multiple of this */ int ncopies; /* how many copies to data has */ int ret; u64 max_stripe_size; u64 max_chunk_size; u64 stripe_size; u64 num_bytes; int ndevs; int i; int j; if (type & (BTRFS_BLOCK_GROUP_RAID0)) { if ((type & BTRFS_BLOCK_GROUP_RAID1) && *num_stripes = fs_devices->rw_devices; (type & BTRFS_BLOCK_GROUP_DUP)) { *min_stripes = 2; WARN_ON(1); } type &= ~BTRFS_BLOCK_GROUP_DUP; if (type & (BTRFS_BLOCK_GROUP_DUP)) { *num_stripes = 2; *min_stripes = 2; } if (type & (BTRFS_BLOCK_GROUP_RAID1)) { if (fs_devices->rw_devices < 2) return -ENOSPC; *num_stripes = 2; *min_stripes = 2; } if (type & (BTRFS_BLOCK_GROUP_RAID10)) { *num_stripes = fs_devices->rw_devices; if (*num_stripes < 4) return -ENOSPC; *num_stripes &= ~(u32)1; *sub_stripes = 2; *min_stripes = 4; } } return 0; if (list_empty(&fs_devices->alloc_list)) } return -ENOSPC; static u64 __btrfs_calc_stripe_size(struct btrfs_fs_devices *fs_devices, sub_stripes = 1; u64 proposed_size, u64 type, dev_stripes = 1; int num_stripes, int small_stripe) devs_increment = 1; { ncopies = 1; int min_stripe_size = 1 * 1024 * 1024; devs_max = 0; /* 0 == as many as possible */ u64 calc_size = proposed_size; devs_min = 1; u64 max_chunk_size = calc_size; int ncopies = 1; if (type & (BTRFS_BLOCK_GROUP_RAID1 | /* BTRFS_BLOCK_GROUP_DUP | * define the properties of each RAID type. BTRFS_BLOCK_GROUP_RAID10)) * FIXME: move this to a global table and use it in all RAID * calculation code */ if (type & (BTRFS_BLOCK_GROUP_DUP)) { dev_stripes = 2; ncopies = 2; devs_max = 1; } else if (type & (BTRFS_BLOCK_GROUP_RAID0)) { devs_min = 2; } else if (type & (BTRFS_BLOCK_GROUP_RAID1)) { devs_increment = 2; ncopies = 2; ncopies = 2; devs_max = 2; devs_min = 2; } else if (type & (BTRFS_BLOCK_GROUP_RAID10)) { sub_stripes = 2; devs_increment = 2; ncopies = 2; devs_min = 4; } else { devs_max = 1; } if (type & BTRFS_BLOCK_GROUP_DATA) { if (type & BTRFS_BLOCK_GROUP_DATA) { max_chunk_size = 10 * calc_size; max_stripe_size = 1024 * 1024 * 1024; min_stripe_size = 64 * 1024 * 1024; max_chunk_size = 10 * max_stripe_size; } else if (type & BTRFS_BLOCK_GROUP_METADATA) { } else if (type & BTRFS_BLOCK_GROUP_METADATA) { max_chunk_size = 256 * 1024 * 1024; max_stripe_size = 256 * 1024 * 1024; min_stripe_size = 32 * 1024 * 1024; max_chunk_size = max_stripe_size; } else if (type & BTRFS_BLOCK_GROUP_SYSTEM) { } else if (type & BTRFS_BLOCK_GROUP_SYSTEM) { calc_size = 8 * 1024 * 1024; max_stripe_size = 8 * 1024 * 1024; max_chunk_size = calc_size * 2; max_chunk_size = 2 * max_stripe_size; min_stripe_size = 1 * 1024 * 1024; } else { printk(KERN_ERR "btrfs: invalid chunk type 0x%llx requested\n", type); BUG_ON(1); } } /* we don't want a chunk larger than 10% of writeable space */ /* we don't want a chunk larger than 10% of writeable space */ max_chunk_size = min(div_factor(fs_devices->total_rw_bytes, 1), max_chunk_size = min(div_factor(fs_devices->total_rw_bytes, 1), max_chunk_size); max_chunk_size); if (calc_size * num_stripes > max_chunk_size * ncopies) { devices_info = kzalloc(sizeof(*devices_info) * fs_devices->rw_devices, calc_size = max_chunk_size * ncopies; GFP_NOFS); do_div(calc_size, num_stripes); if (!devices_info) do_div(calc_size, BTRFS_STRIPE_LEN); return -ENOMEM; calc_size *= BTRFS_STRIPE_LEN; } /* we don't want tiny stripes */ cur = fs_devices->alloc_list.next; if (!small_stripe) calc_size = max_t(u64, min_stripe_size, calc_size); /* /* * we're about to do_div by the BTRFS_STRIPE_LEN so lets make sure * in the first pass through the devices list, we gather information * we end up with something bigger than a stripe * about the available holes on each device. */ */ calc_size = max_t(u64, calc_size, BTRFS_STRIPE_LEN); ndevs = 0; while (cur != &fs_devices->alloc_list) { do_div(calc_size, BTRFS_STRIPE_LEN); struct btrfs_device *device; calc_size *= BTRFS_STRIPE_LEN; u64 max_avail; u64 dev_offset; return calc_size; } static struct map_lookup *__shrink_map_lookup_stripes(struct map_lookup *map, int num_stripes) { struct map_lookup *new; size_t len = map_lookup_size(num_stripes); BUG_ON(map->num_stripes < num_stripes); device = list_entry(cur, struct btrfs_device, dev_alloc_list); if (map->num_stripes == num_stripes) cur = cur->next; return map; new = kmalloc(len, GFP_NOFS); if (!device->writeable) { if (!new) { printk(KERN_ERR /* just change map->num_stripes */ "btrfs: read-only device in alloc_list\n"); map->num_stripes = num_stripes; WARN_ON(1); return map; continue; } } memcpy(new, map, len); if (!device->in_fs_metadata) new->num_stripes = num_stripes; continue; kfree(map); return new; } /* if (device->total_bytes > device->bytes_used) * helper to allocate device space from btrfs_device_info, in which we stored total_avail = device->total_bytes - device->bytes_used; * max free space information of every device. It is used when we can not else * allocate chunks by default size. total_avail = 0; * /* avail is off by max(alloc_start, 1MB), but that is the same * By this helper, we can allocate a new chunk as larger as possible. * for all devices, so it doesn't hurt the sorting later on */ */ static int __btrfs_alloc_tiny_space(struct btrfs_trans_handle *trans, struct btrfs_fs_devices *fs_devices, struct btrfs_device_info *devices, int nr_device, u64 type, struct map_lookup **map_lookup, int min_stripes, u64 *stripe_size) { int i, index, sort_again = 0; int min_devices = min_stripes; u64 max_avail, min_free; struct map_lookup *map = *map_lookup; int ret; if (nr_device < min_stripes) ret = find_free_dev_extent(trans, device, return -ENOSPC; max_stripe_size * dev_stripes, &dev_offset, &max_avail); if (ret && ret != -ENOSPC) goto error; btrfs_descending_sort_devices(devices, nr_device); if (ret == 0) max_avail = max_stripe_size * dev_stripes; max_avail = devices[0].max_avail; if (max_avail < BTRFS_STRIPE_LEN * dev_stripes) if (!max_avail) continue; return -ENOSPC; for (i = 0; i < nr_device; i++) { devices_info[ndevs].dev_offset = dev_offset; /* devices_info[ndevs].max_avail = max_avail; * if dev_offset = 0, it means the free space of this device devices_info[ndevs].total_avail = total_avail; * is less than what we need, and we didn't search max avail devices_info[ndevs].dev = device; * extent on this device, so do it now. ++ndevs; */ if (!devices[i].dev_offset) { ret = find_free_dev_extent(trans, devices[i].dev, max_avail, &devices[i].dev_offset, &devices[i].max_avail); if (ret != 0 && ret != -ENOSPC) return ret; sort_again = 1; } } } /* we update the max avail free extent of each devices, sort again */ /* if (sort_again) * now sort the devices by hole size / available space btrfs_descending_sort_devices(devices, nr_device); */ sort(devices_info, ndevs, sizeof(struct btrfs_device_info), if (type & BTRFS_BLOCK_GROUP_DUP) btrfs_cmp_device_info, NULL); min_devices = 1; if (!devices[min_devices - 1].max_avail) return -ENOSPC; max_avail = devices[min_devices - 1].max_avail; if (type & BTRFS_BLOCK_GROUP_DUP) do_div(max_avail, 2); max_avail = __btrfs_calc_stripe_size(fs_devices, max_avail, type, min_stripes, 1); if (type & BTRFS_BLOCK_GROUP_DUP) min_free = max_avail * 2; else min_free = max_avail; if (min_free > devices[min_devices - 1].max_avail) return -ENOSPC; map = __shrink_map_lookup_stripes(map, min_stripes); /* round down to number of usable stripes */ *stripe_size = max_avail; ndevs -= ndevs % devs_increment; index = 0; if (ndevs < devs_increment * sub_stripes || ndevs < devs_min) { for (i = 0; i < min_stripes; i++) { ret = -ENOSPC; map->stripes[i].dev = devices[index].dev; goto error; map->stripes[i].physical = devices[index].dev_offset; if (type & BTRFS_BLOCK_GROUP_DUP) { i++; map->stripes[i].dev = devices[index].dev; map->stripes[i].physical = devices[index].dev_offset + max_avail; } index++; } *map_lookup = map; return 0; } } static int __btrfs_alloc_chunk(struct btrfs_trans_handle *trans, if (devs_max && ndevs > devs_max) struct btrfs_root *extent_root, ndevs = devs_max; struct map_lookup **map_ret, /* u64 *num_bytes, u64 *stripe_size, * the primary goal is to maximize the number of stripes, so use as many u64 start, u64 type) * devices as possible, even if the stripes are not maximum sized. { */ struct btrfs_fs_info *info = extent_root->fs_info; stripe_size = devices_info[ndevs-1].max_avail; struct btrfs_device *device = NULL; num_stripes = ndevs * dev_stripes; struct btrfs_fs_devices *fs_devices = info->fs_devices; struct list_head *cur; struct map_lookup *map; struct extent_map_tree *em_tree; struct extent_map *em; struct btrfs_device_info *devices_info; struct list_head private_devs; u64 calc_size = 1024 * 1024 * 1024; u64 min_free; u64 avail; u64 dev_offset; int num_stripes; int min_stripes; int sub_stripes; int min_devices; /* the min number of devices we need */ int i; int ret; int index; if ((type & BTRFS_BLOCK_GROUP_RAID1) && if (stripe_size * num_stripes > max_chunk_size * ncopies) { (type & BTRFS_BLOCK_GROUP_DUP)) { stripe_size = max_chunk_size * ncopies; WARN_ON(1); do_div(stripe_size, num_stripes); type &= ~BTRFS_BLOCK_GROUP_DUP; } } if (list_empty(&fs_devices->alloc_list)) return -ENOSPC; ret = __btrfs_calc_nstripes(fs_devices, type, &num_stripes, &min_stripes, &sub_stripes); if (ret) return ret; devices_info = kzalloc(sizeof(*devices_info) * fs_devices->rw_devices, do_div(stripe_size, dev_stripes); GFP_NOFS); do_div(stripe_size, BTRFS_STRIPE_LEN); if (!devices_info) stripe_size *= BTRFS_STRIPE_LEN; return -ENOMEM; map = kmalloc(map_lookup_size(num_stripes), GFP_NOFS); map = kmalloc(map_lookup_size(num_stripes), GFP_NOFS); if (!map) { if (!map) { Loading @@ -2504,85 +2430,12 @@ static int __btrfs_alloc_chunk(struct btrfs_trans_handle *trans, } } map->num_stripes = num_stripes; map->num_stripes = num_stripes; cur = fs_devices->alloc_list.next; for (i = 0; i < ndevs; ++i) { index = 0; for (j = 0; j < dev_stripes; ++j) { i = 0; int s = i * dev_stripes + j; map->stripes[s].dev = devices_info[i].dev; calc_size = __btrfs_calc_stripe_size(fs_devices, calc_size, type, map->stripes[s].physical = devices_info[i].dev_offset + num_stripes, 0); j * stripe_size; if (type & BTRFS_BLOCK_GROUP_DUP) { min_free = calc_size * 2; min_devices = 1; } else { min_free = calc_size; min_devices = min_stripes; } INIT_LIST_HEAD(&private_devs); while (index < num_stripes) { device = list_entry(cur, struct btrfs_device, dev_alloc_list); BUG_ON(!device->writeable); if (device->total_bytes > device->bytes_used) avail = device->total_bytes - device->bytes_used; else avail = 0; cur = cur->next; if (device->in_fs_metadata && avail >= min_free) { ret = find_free_dev_extent(trans, device, min_free, &devices_info[i].dev_offset, &devices_info[i].max_avail); if (ret == 0) { list_move_tail(&device->dev_alloc_list, &private_devs); map->stripes[index].dev = device; map->stripes[index].physical = devices_info[i].dev_offset; index++; if (type & BTRFS_BLOCK_GROUP_DUP) { map->stripes[index].dev = device; map->stripes[index].physical = devices_info[i].dev_offset + calc_size; index++; } } else if (ret != -ENOSPC) goto error; devices_info[i].dev = device; i++; } else if (device->in_fs_metadata && avail >= BTRFS_STRIPE_LEN) { devices_info[i].dev = device; devices_info[i].max_avail = avail; i++; } if (cur == &fs_devices->alloc_list) break; } list_splice(&private_devs, &fs_devices->alloc_list); if (index < num_stripes) { if (index >= min_stripes) { num_stripes = index; if (type & (BTRFS_BLOCK_GROUP_RAID10)) { num_stripes /= sub_stripes; num_stripes *= sub_stripes; } map = __shrink_map_lookup_stripes(map, num_stripes); } else if (i >= min_devices) { ret = __btrfs_alloc_tiny_space(trans, fs_devices, devices_info, i, type, &map, min_stripes, &calc_size); if (ret) goto error; } else { ret = -ENOSPC; goto error; } } } } map->sector_size = extent_root->sectorsize; map->sector_size = extent_root->sectorsize; Loading @@ -2593,11 +2446,12 @@ static int __btrfs_alloc_chunk(struct btrfs_trans_handle *trans, map->sub_stripes = sub_stripes; map->sub_stripes = sub_stripes; *map_ret = map; *map_ret = map; *stripe_size = calc_size; num_bytes = stripe_size * (num_stripes / ncopies); *num_bytes = chunk_bytes_by_type(type, calc_size, map->num_stripes, sub_stripes); *stripe_size_out = stripe_size; *num_bytes_out = num_bytes; trace_btrfs_chunk_alloc(info->chunk_root, map, start, *num_bytes); trace_btrfs_chunk_alloc(info->chunk_root, map, start, num_bytes); em = alloc_extent_map(); em = alloc_extent_map(); if (!em) { if (!em) { Loading @@ -2606,7 +2460,7 @@ static int __btrfs_alloc_chunk(struct btrfs_trans_handle *trans, } } em->bdev = (struct block_device *)map; em->bdev = (struct block_device *)map; em->start = start; em->start = start; em->len = *num_bytes; em->len = num_bytes; em->block_start = 0; em->block_start = 0; em->block_len = em->len; em->block_len = em->len; Loading @@ -2619,20 +2473,21 @@ static int __btrfs_alloc_chunk(struct btrfs_trans_handle *trans, ret = btrfs_make_block_group(trans, extent_root, 0, type, ret = btrfs_make_block_group(trans, extent_root, 0, type, BTRFS_FIRST_CHUNK_TREE_OBJECTID, BTRFS_FIRST_CHUNK_TREE_OBJECTID, start, *num_bytes); start, num_bytes); BUG_ON(ret); BUG_ON(ret); index = 0; for (i = 0; i < map->num_stripes; ++i) { while (index < map->num_stripes) { struct btrfs_device *device; device = map->stripes[index].dev; u64 dev_offset; dev_offset = map->stripes[index].physical; device = map->stripes[i].dev; dev_offset = map->stripes[i].physical; ret = btrfs_alloc_dev_extent(trans, device, ret = btrfs_alloc_dev_extent(trans, device, info->chunk_root->root_key.objectid, info->chunk_root->root_key.objectid, BTRFS_FIRST_CHUNK_TREE_OBJECTID, BTRFS_FIRST_CHUNK_TREE_OBJECTID, start, dev_offset, calc_size); start, dev_offset, stripe_size); BUG_ON(ret); BUG_ON(ret); index++; } } kfree(devices_info); kfree(devices_info); Loading
fs/btrfs/volumes.h +1 −15 Original line number Original line Diff line number Diff line Loading @@ -144,6 +144,7 @@ struct btrfs_device_info { struct btrfs_device *dev; struct btrfs_device *dev; u64 dev_offset; u64 dev_offset; u64 max_avail; u64 max_avail; u64 total_avail; }; }; struct map_lookup { struct map_lookup { Loading @@ -157,21 +158,6 @@ struct map_lookup { struct btrfs_bio_stripe stripes[]; struct btrfs_bio_stripe stripes[]; }; }; /* Used to sort the devices by max_avail(descending sort) */ int btrfs_cmp_device_free_bytes(const void *dev_info1, const void *dev_info2); /* * sort the devices by max_avail, in which max free extent size of each device * is stored.(Descending Sort) */ static inline void btrfs_descending_sort_devices( struct btrfs_device_info *devices, size_t nr_devices) { sort(devices, nr_devices, sizeof(struct btrfs_device_info), btrfs_cmp_device_free_bytes, NULL); } int btrfs_account_dev_extents_size(struct btrfs_device *device, u64 start, int btrfs_account_dev_extents_size(struct btrfs_device *device, u64 start, u64 end, u64 *length); u64 end, u64 *length); Loading