From a52d9a8033c454cd9b4697cfafb467509fc1693f Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Mon, 27 Aug 2007 16:49:44 -0400 Subject: Btrfs: Extent based page cache code. This uses an rbtree of extents and tests instead of buffer heads. Signed-off-by: Chris Mason --- fs/btrfs/extent_map.c | 1656 +++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 1656 insertions(+) create mode 100644 fs/btrfs/extent_map.c (limited to 'fs/btrfs/extent_map.c') diff --git a/fs/btrfs/extent_map.c b/fs/btrfs/extent_map.c new file mode 100644 index 000000000000..d378edf0964e --- /dev/null +++ b/fs/btrfs/extent_map.c @@ -0,0 +1,1656 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "extent_map.h" + +static struct kmem_cache *extent_map_cache; +static struct kmem_cache *extent_state_cache; + +struct tree_entry { + u64 start; + u64 end; + int in_tree; + struct rb_node rb_node; +}; + +/* bits for the extent state */ +#define EXTENT_DIRTY 1 +#define EXTENT_WRITEBACK (1 << 1) +#define EXTENT_UPTODATE (1 << 2) +#define EXTENT_LOCKED (1 << 3) +#define EXTENT_NEW (1 << 4) +#define EXTENT_DELALLOC (1 << 5) + +#define EXTENT_IOBITS (EXTENT_LOCKED | EXTENT_WRITEBACK) + +static LIST_HEAD(all_states); +spinlock_t state_lock = SPIN_LOCK_UNLOCKED; + +void __init extent_map_init(void) +{ + extent_map_cache = kmem_cache_create("extent_map", + sizeof(struct extent_map), 0, + SLAB_RECLAIM_ACCOUNT | + SLAB_DESTROY_BY_RCU, + NULL); + extent_state_cache = kmem_cache_create("extent_state", + sizeof(struct extent_state), 0, + SLAB_RECLAIM_ACCOUNT | + SLAB_DESTROY_BY_RCU, + NULL); +} + +void __exit extent_map_exit(void) +{ + while(!list_empty(&all_states)) { + struct extent_state *state; + struct list_head *cur = all_states.next; + state = list_entry(cur, struct extent_state, list); + printk("found leaked state %Lu %Lu state %d in_tree %d\n", + state->start, state->end, state->state, state->in_tree); + list_del(&state->list); + kfree(state); + } + if (extent_map_cache) + kmem_cache_destroy(extent_map_cache); + if (extent_state_cache) + kmem_cache_destroy(extent_state_cache); +} + +void extent_map_tree_init(struct extent_map_tree *tree, + struct address_space *mapping, gfp_t mask) +{ + tree->map.rb_node = NULL; + tree->state.rb_node = NULL; + rwlock_init(&tree->lock); + tree->mapping = mapping; +} +EXPORT_SYMBOL(extent_map_tree_init); + +struct extent_map *alloc_extent_map(gfp_t mask) +{ + struct extent_map *em; + em = kmem_cache_alloc(extent_map_cache, mask); + if (!em || IS_ERR(em)) + return em; + em->in_tree = 0; + atomic_set(&em->refs, 1); + return em; +} +EXPORT_SYMBOL(alloc_extent_map); + +void free_extent_map(struct extent_map *em) +{ + if (atomic_dec_and_test(&em->refs)) { + WARN_ON(em->in_tree); + kmem_cache_free(extent_map_cache, em); + } +} +EXPORT_SYMBOL(free_extent_map); + + +struct extent_state *alloc_extent_state(gfp_t mask) +{ + struct extent_state *state; + state = kmem_cache_alloc(extent_state_cache, mask); + if (!state || IS_ERR(state)) + return state; + state->state = 0; + state->in_tree = 0; + atomic_set(&state->refs, 1); + init_waitqueue_head(&state->wq); + spin_lock_irq(&state_lock); + list_add(&state->list, &all_states); + spin_unlock_irq(&state_lock); + return state; +} +EXPORT_SYMBOL(alloc_extent_state); + +void free_extent_state(struct extent_state *state) +{ + if (atomic_dec_and_test(&state->refs)) { + WARN_ON(state->in_tree); + spin_lock_irq(&state_lock); + list_del_init(&state->list); + spin_unlock_irq(&state_lock); + kmem_cache_free(extent_state_cache, state); + } +} +EXPORT_SYMBOL(free_extent_state); + +static struct rb_node *tree_insert(struct rb_root *root, u64 offset, + struct rb_node *node) +{ + struct rb_node ** p = &root->rb_node; + struct rb_node * parent = NULL; + struct tree_entry *entry; + + while(*p) { + parent = *p; + entry = rb_entry(parent, struct tree_entry, rb_node); + + if (offset < entry->start) + p = &(*p)->rb_left; + else if (offset > entry->end) + p = &(*p)->rb_right; + else + return parent; + } + + entry = rb_entry(node, struct tree_entry, rb_node); + entry->in_tree = 1; + rb_link_node(node, parent, p); + rb_insert_color(node, root); + return NULL; +} + +static struct rb_node *__tree_search(struct rb_root *root, u64 offset, + struct rb_node **prev_ret) +{ + struct rb_node * n = root->rb_node; + struct rb_node *prev = NULL; + struct tree_entry *entry; + struct tree_entry *prev_entry = NULL; + + while(n) { + entry = rb_entry(n, struct tree_entry, rb_node); + prev = n; + prev_entry = entry; + + if (offset < entry->start) + n = n->rb_left; + else if (offset > entry->end) + n = n->rb_right; + else + return n; + } + if (!prev_ret) + return NULL; + while(prev && offset > prev_entry->end) { + prev = rb_next(prev); + prev_entry = rb_entry(prev, struct tree_entry, rb_node); + } + *prev_ret = prev; + return NULL; +} + +static inline struct rb_node *tree_search(struct rb_root *root, u64 offset) +{ + struct rb_node *prev; + struct rb_node *ret; + ret = __tree_search(root, offset, &prev); + if (!ret) + return prev; + return ret; +} + +static int tree_delete(struct rb_root *root, u64 offset) +{ + struct rb_node *node; + struct tree_entry *entry; + + node = __tree_search(root, offset, NULL); + if (!node) + return -ENOENT; + entry = rb_entry(node, struct tree_entry, rb_node); + entry->in_tree = 0; + rb_erase(node, root); + return 0; +} + +/* + * add_extent_mapping tries a simple backward merge with existing + * mappings. The extent_map struct passed in will be inserted into + * the tree directly (no copies made, just a reference taken). + */ +int add_extent_mapping(struct extent_map_tree *tree, + struct extent_map *em) +{ + int ret = 0; + struct extent_map *prev = NULL; + struct rb_node *rb; + + write_lock_irq(&tree->lock); + rb = tree_insert(&tree->map, em->end, &em->rb_node); + if (rb) { + prev = rb_entry(rb, struct extent_map, rb_node); + printk("found extent map %Lu %Lu on insert of %Lu %Lu\n", prev->start, prev->end, em->start, em->end); + ret = -EEXIST; + goto out; + } + atomic_inc(&em->refs); + if (em->start != 0) { + rb = rb_prev(&em->rb_node); + if (rb) + prev = rb_entry(rb, struct extent_map, rb_node); + if (prev && prev->end + 1 == em->start && + ((em->block_start == 0 && prev->block_start == 0) || + (em->block_start == prev->block_end + 1))) { + em->start = prev->start; + em->block_start = prev->block_start; + rb_erase(&prev->rb_node, &tree->map); + prev->in_tree = 0; + free_extent_map(prev); + } + } +out: + write_unlock_irq(&tree->lock); + return ret; +} +EXPORT_SYMBOL(add_extent_mapping); + +/* + * lookup_extent_mapping returns the first extent_map struct in the + * tree that intersects the [start, end] (inclusive) range. There may + * be additional objects in the tree that intersect, so check the object + * returned carefully to make sure you don't need additional lookups. + */ +struct extent_map *lookup_extent_mapping(struct extent_map_tree *tree, + u64 start, u64 end) +{ + struct extent_map *em; + struct rb_node *rb_node; + + read_lock_irq(&tree->lock); + rb_node = tree_search(&tree->map, start); + if (!rb_node) { + em = NULL; + goto out; + } + if (IS_ERR(rb_node)) { + em = ERR_PTR(PTR_ERR(rb_node)); + goto out; + } + em = rb_entry(rb_node, struct extent_map, rb_node); + if (em->end < start || em->start > end) { + em = NULL; + goto out; + } + atomic_inc(&em->refs); +out: + read_unlock_irq(&tree->lock); + return em; +} +EXPORT_SYMBOL(lookup_extent_mapping); + +/* + * removes an extent_map struct from the tree. No reference counts are + * dropped, and no checks are done to see if the range is in use + */ +int remove_extent_mapping(struct extent_map_tree *tree, struct extent_map *em) +{ + int ret; + + write_lock_irq(&tree->lock); + ret = tree_delete(&tree->map, em->end); + write_unlock_irq(&tree->lock); + return ret; +} +EXPORT_SYMBOL(remove_extent_mapping); + +/* + * utility function to look for merge candidates inside a given range. + * Any extents with matching state are merged together into a single + * extent in the tree. Extents with EXTENT_IO in their state field + * are not merged because the end_io handlers need to be able to do + * operations on them without sleeping (or doing allocations/splits). + * + * This should be called with the tree lock held. + */ +static int merge_state(struct extent_map_tree *tree, + struct extent_state *state) +{ + struct extent_state *other; + struct rb_node *other_node; + + if (state->state & EXTENT_IOBITS) + return 0; + + other_node = rb_prev(&state->rb_node); + if (other_node) { + other = rb_entry(other_node, struct extent_state, rb_node); + if (other->end == state->start - 1 && + other->state == state->state) { + state->start = other->start; + other->in_tree = 0; + rb_erase(&other->rb_node, &tree->state); + free_extent_state(other); + } + } + other_node = rb_next(&state->rb_node); + if (other_node) { + other = rb_entry(other_node, struct extent_state, rb_node); + if (other->start == state->end + 1 && + other->state == state->state) { + other->start = state->start; + state->in_tree = 0; + rb_erase(&state->rb_node, &tree->state); + free_extent_state(state); + } + } + return 0; +} + +/* + * insert an extent_state struct into the tree. 'bits' are set on the + * struct before it is inserted. + * + * This may return -EEXIST if the extent is already there, in which case the + * state struct is freed. + * + * The tree lock is not taken internally. This is a utility function and + * probably isn't what you want to call (see set/clear_extent_bit). + */ +static int insert_state(struct extent_map_tree *tree, + struct extent_state *state, u64 start, u64 end, + int bits) +{ + struct rb_node *node; + + if (end < start) { + printk("end < start %Lu %Lu\n", end, start); + WARN_ON(1); + } + state->state |= bits; + state->start = start; + state->end = end; + if ((end & 4095) == 0) { + printk("insert state %Lu %Lu strange end\n", start, end); + WARN_ON(1); + } + node = tree_insert(&tree->state, end, &state->rb_node); + if (node) { + struct extent_state *found; + found = rb_entry(node, struct extent_state, rb_node); +printk("found node %Lu %Lu on insert of %Lu %Lu\n", found->start, found->end, start, end); + free_extent_state(state); + return -EEXIST; + } + merge_state(tree, state); + return 0; +} + +/* + * split a given extent state struct in two, inserting the preallocated + * struct 'prealloc' as the newly created second half. 'split' indicates an + * offset inside 'orig' where it should be split. + * + * Before calling, + * the tree has 'orig' at [orig->start, orig->end]. After calling, there + * are two extent state structs in the tree: + * prealloc: [orig->start, split - 1] + * orig: [ split, orig->end ] + * + * The tree locks are not taken by this function. They need to be held + * by the caller. + */ +static int split_state(struct extent_map_tree *tree, struct extent_state *orig, + struct extent_state *prealloc, u64 split) +{ + struct rb_node *node; + prealloc->start = orig->start; + prealloc->end = split - 1; + prealloc->state = orig->state; + orig->start = split; + if ((prealloc->end & 4095) == 0) { + printk("insert state %Lu %Lu strange end\n", prealloc->start, + prealloc->end); + WARN_ON(1); + } + node = tree_insert(&tree->state, prealloc->end, &prealloc->rb_node); + if (node) { + struct extent_state *found; + found = rb_entry(node, struct extent_state, rb_node); +printk("found node %Lu %Lu on insert of %Lu %Lu\n", found->start, found->end, prealloc->start, prealloc->end); + free_extent_state(prealloc); + return -EEXIST; + } + return 0; +} + +/* + * utility function to clear some bits in an extent state struct. + * it will optionally wake up any one waiting on this state (wake == 1), or + * forcibly remove the state from the tree (delete == 1). + * + * If no bits are set on the state struct after clearing things, the + * struct is freed and removed from the tree + */ +static int clear_state_bit(struct extent_map_tree *tree, + struct extent_state *state, int bits, int wake, + int delete) +{ + int ret = state->state & bits; + state->state &= ~bits; + if (wake) + wake_up(&state->wq); + if (delete || state->state == 0) { + if (state->in_tree) { + rb_erase(&state->rb_node, &tree->state); + state->in_tree = 0; + free_extent_state(state); + } else { + WARN_ON(1); + } + } else { + merge_state(tree, state); + } + return ret; +} + +/* + * clear some bits on a range in the tree. This may require splitting + * or inserting elements in the tree, so the gfp mask is used to + * indicate which allocations or sleeping are allowed. + * + * pass 'wake' == 1 to kick any sleepers, and 'delete' == 1 to remove + * the given range from the tree regardless of state (ie for truncate). + * + * the range [start, end] is inclusive. + * + * This takes the tree lock, and returns < 0 on error, > 0 if any of the + * bits were already set, or zero if none of the bits were already set. + */ +int clear_extent_bit(struct extent_map_tree *tree, u64 start, u64 end, + int bits, int wake, int delete, gfp_t mask) +{ + struct extent_state *state; + struct extent_state *prealloc = NULL; + struct rb_node *node; + int err; + int set = 0; + +again: + if (!prealloc && (mask & __GFP_WAIT)) { + prealloc = alloc_extent_state(mask); + if (!prealloc) + return -ENOMEM; + } + + write_lock_irq(&tree->lock); + /* + * this search will find the extents that end after + * our range starts + */ + node = tree_search(&tree->state, start); + if (!node) + goto out; + state = rb_entry(node, struct extent_state, rb_node); + if (state->start > end) + goto out; + WARN_ON(state->end < start); + + /* + * | ---- desired range ---- | + * | state | or + * | ------------- state -------------- | + * + * We need to split the extent we found, and may flip + * bits on second half. + * + * If the extent we found extends past our range, we + * just split and search again. It'll get split again + * the next time though. + * + * If the extent we found is inside our range, we clear + * the desired bit on it. + */ + + if (state->start < start) { + err = split_state(tree, state, prealloc, start); + BUG_ON(err == -EEXIST); + prealloc = NULL; + if (err) + goto out; + if (state->end <= end) { + start = state->end + 1; + set |= clear_state_bit(tree, state, bits, + wake, delete); + } else { + start = state->start; + } + goto search_again; + } + /* + * | ---- desired range ---- | + * | state | + * We need to split the extent, and clear the bit + * on the first half + */ + if (state->start <= end && state->end > end) { + err = split_state(tree, state, prealloc, end + 1); + BUG_ON(err == -EEXIST); + + if (wake) + wake_up(&state->wq); + set |= clear_state_bit(tree, prealloc, bits, + wake, delete); + prealloc = NULL; + goto out; + } + + start = state->end + 1; + set |= clear_state_bit(tree, state, bits, wake, delete); + goto search_again; + +out: + write_unlock_irq(&tree->lock); + if (prealloc) + free_extent_state(prealloc); + + return set; + +search_again: + if (start >= end) + goto out; + write_unlock_irq(&tree->lock); + if (mask & __GFP_WAIT) + cond_resched(); + goto again; +} +EXPORT_SYMBOL(clear_extent_bit); + +static int wait_on_state(struct extent_map_tree *tree, + struct extent_state *state) +{ + DEFINE_WAIT(wait); + prepare_to_wait(&state->wq, &wait, TASK_UNINTERRUPTIBLE); + read_unlock_irq(&tree->lock); + schedule(); + read_lock_irq(&tree->lock); + finish_wait(&state->wq, &wait); + return 0; +} + +/* + * waits for one or more bits to clear on a range in the state tree. + * The range [start, end] is inclusive. + * The tree lock is taken by this function + */ +int wait_extent_bit(struct extent_map_tree *tree, u64 start, u64 end, int bits) +{ + struct extent_state *state; + struct rb_node *node; + + read_lock_irq(&tree->lock); +again: + while (1) { + /* + * this search will find all the extents that end after + * our range starts + */ + node = tree_search(&tree->state, start); + if (!node) + break; + + state = rb_entry(node, struct extent_state, rb_node); + + if (state->start > end) + goto out; + + if (state->state & bits) { + start = state->start; + atomic_inc(&state->refs); + wait_on_state(tree, state); + free_extent_state(state); + goto again; + } + start = state->end + 1; + + if (start > end) + break; + + if (need_resched()) { + read_unlock_irq(&tree->lock); + cond_resched(); + read_lock_irq(&tree->lock); + } + } +out: + read_unlock_irq(&tree->lock); + return 0; +} +EXPORT_SYMBOL(wait_extent_bit); + +/* + * set some bits on a range in the tree. This may require allocations + * or sleeping, so the gfp mask is used to indicate what is allowed. + * + * If 'exclusive' == 1, this will fail with -EEXIST if some part of the + * range already has the desired bits set. The start of the existing + * range is returned in failed_start in this case. + * + * [start, end] is inclusive + * This takes the tree lock. + */ +int set_extent_bit(struct extent_map_tree *tree, u64 start, u64 end, int bits, + int exclusive, u64 *failed_start, gfp_t mask) +{ + struct extent_state *state; + struct extent_state *prealloc = NULL; + struct rb_node *node; + int err = 0; + int set; + u64 last_start; + u64 last_end; +again: + if (!prealloc && (mask & __GFP_WAIT)) { + prealloc = alloc_extent_state(mask); + if (!prealloc) + return -ENOMEM; + } + + write_lock_irq(&tree->lock); + /* + * this search will find all the extents that end after + * our range starts. + */ + node = tree_search(&tree->state, start); + if (!node) { + err = insert_state(tree, prealloc, start, end, bits); + prealloc = NULL; + BUG_ON(err == -EEXIST); + goto out; + } + + state = rb_entry(node, struct extent_state, rb_node); + last_start = state->start; + last_end = state->end; + + /* + * | ---- desired range ---- | + * | state | + * + * Just lock what we found and keep going + */ + if (state->start == start && state->end <= end) { + set = state->state & bits; + if (set && exclusive) { + *failed_start = state->start; + err = -EEXIST; + goto out; + } + state->state |= bits; + start = state->end + 1; + merge_state(tree, state); + goto search_again; + } + + /* + * | ---- desired range ---- | + * | state | + * or + * | ------------- state -------------- | + * + * We need to split the extent we found, and may flip bits on + * second half. + * + * If the extent we found extends past our + * range, we just split and search again. It'll get split + * again the next time though. + * + * If the extent we found is inside our range, we set the + * desired bit on it. + */ + if (state->start < start) { + set = state->state & bits; + if (exclusive && set) { + *failed_start = start; + err = -EEXIST; + goto out; + } + err = split_state(tree, state, prealloc, start); + BUG_ON(err == -EEXIST); + prealloc = NULL; + if (err) + goto out; + if (state->end <= end) { + state->state |= bits; + start = state->end + 1; + merge_state(tree, state); + } else { + start = state->start; + } + goto search_again; + } + /* + * | ---- desired range ---- | + * | state | + * We need to split the extent, and set the bit + * on the first half + */ + if (state->start <= end && state->end > end) { + set = state->state & bits; + if (exclusive && set) { + *failed_start = start; + err = -EEXIST; + goto out; + } + err = split_state(tree, state, prealloc, end + 1); + BUG_ON(err == -EEXIST); + + prealloc->state |= bits; + merge_state(tree, prealloc); + prealloc = NULL; + goto out; + } + + /* + * | ---- desired range ---- | + * | state | or | state | + * + * There's a hole, we need to insert something in it and + * ignore the extent we found. + */ + if (state->start > start) { + u64 this_end; + if (end < last_start) + this_end = end; + else + this_end = last_start -1; + err = insert_state(tree, prealloc, start, this_end, + bits); + prealloc = NULL; + BUG_ON(err == -EEXIST); + if (err) + goto out; + start = this_end + 1; + goto search_again; + } + goto search_again; + +out: + write_unlock_irq(&tree->lock); + if (prealloc) + free_extent_state(prealloc); + + return err; + +search_again: + if (start > end) + goto out; + write_unlock_irq(&tree->lock); + if (mask & __GFP_WAIT) + cond_resched(); + goto again; +} +EXPORT_SYMBOL(set_extent_bit); + +/* wrappers around set/clear extent bit */ +int set_extent_dirty(struct extent_map_tree *tree, u64 start, u64 end, + gfp_t mask) +{ + return set_extent_bit(tree, start, end, EXTENT_DIRTY, 0, NULL, + mask); +} +EXPORT_SYMBOL(set_extent_dirty); + +int clear_extent_dirty(struct extent_map_tree *tree, u64 start, u64 end, + gfp_t mask) +{ + return clear_extent_bit(tree, start, end, EXTENT_DIRTY, 0, 0, mask); +} +EXPORT_SYMBOL(clear_extent_dirty); + +int set_extent_new(struct extent_map_tree *tree, u64 start, u64 end, + gfp_t mask) +{ + return set_extent_bit(tree, start, end, EXTENT_NEW, 0, NULL, + mask); +} +EXPORT_SYMBOL(set_extent_new); + +int clear_extent_new(struct extent_map_tree *tree, u64 start, u64 end, + gfp_t mask) +{ + return clear_extent_bit(tree, start, end, EXTENT_NEW, 0, 0, mask); +} +EXPORT_SYMBOL(clear_extent_new); + +int set_extent_uptodate(struct extent_map_tree *tree, u64 start, u64 end, + gfp_t mask) +{ + return set_extent_bit(tree, start, end, EXTENT_UPTODATE, 0, NULL, + mask); +} +EXPORT_SYMBOL(set_extent_uptodate); + +int clear_extent_uptodate(struct extent_map_tree *tree, u64 start, u64 end, + gfp_t mask) +{ + return clear_extent_bit(tree, start, end, EXTENT_UPTODATE, 0, 0, mask); +} +EXPORT_SYMBOL(clear_extent_uptodate); + +int set_extent_writeback(struct extent_map_tree *tree, u64 start, u64 end, + gfp_t mask) +{ + return set_extent_bit(tree, start, end, EXTENT_WRITEBACK, + 0, NULL, mask); +} +EXPORT_SYMBOL(set_extent_writeback); + +int clear_extent_writeback(struct extent_map_tree *tree, u64 start, u64 end, + gfp_t mask) +{ + return clear_extent_bit(tree, start, end, EXTENT_WRITEBACK, 1, 0, mask); +} +EXPORT_SYMBOL(clear_extent_writeback); + +int wait_on_extent_writeback(struct extent_map_tree *tree, u64 start, u64 end) +{ + return wait_extent_bit(tree, start, end, EXTENT_WRITEBACK); +} +EXPORT_SYMBOL(wait_on_extent_writeback); + +/* + * locks a range in ascending order, waiting for any locked regions + * it hits on the way. [start,end] are inclusive, and this will sleep. + */ +int lock_extent(struct extent_map_tree *tree, u64 start, u64 end, gfp_t mask) +{ + int err; + u64 failed_start; + while (1) { + err = set_extent_bit(tree, start, end, EXTENT_LOCKED, 1, + &failed_start, mask); + if (err == -EEXIST && (mask & __GFP_WAIT)) { + wait_extent_bit(tree, failed_start, end, EXTENT_LOCKED); + start = failed_start; + } else { + break; + } + WARN_ON(start > end); + } + return err; +} +EXPORT_SYMBOL(lock_extent); + +int unlock_extent(struct extent_map_tree *tree, u64 start, u64 end, + gfp_t mask) +{ + return clear_extent_bit(tree, start, end, EXTENT_LOCKED, 1, 0, mask); +} +EXPORT_SYMBOL(unlock_extent); + +/* + * helper function to set pages and extents in the tree dirty + */ +int set_range_dirty(struct extent_map_tree *tree, u64 start, u64 end) +{ + unsigned long index = start >> PAGE_CACHE_SHIFT; + unsigned long end_index = end >> PAGE_CACHE_SHIFT; + struct page *page; + + while (index <= end_index) { + page = find_get_page(tree->mapping, index); + BUG_ON(!page); + __set_page_dirty_nobuffers(page); + page_cache_release(page); + index++; + } + set_extent_dirty(tree, start, end, GFP_NOFS); + return 0; +} +EXPORT_SYMBOL(set_range_dirty); + +/* + * helper function to set both pages and extents in the tree writeback + */ +int set_range_writeback(struct extent_map_tree *tree, u64 start, u64 end) +{ + unsigned long index = start >> PAGE_CACHE_SHIFT; + unsigned long end_index = end >> PAGE_CACHE_SHIFT; + struct page *page; + + while (index <= end_index) { + page = find_get_page(tree->mapping, index); + BUG_ON(!page); + set_page_writeback(page); + page_cache_release(page); + index++; + } + set_extent_writeback(tree, start, end, GFP_NOFS); + return 0; +} +EXPORT_SYMBOL(set_range_writeback); + +/* + * helper function to lock both pages and extents in the tree. + * pages must be locked first. + */ +int lock_range(struct extent_map_tree *tree, u64 start, u64 end) +{ + unsigned long index = start >> PAGE_CACHE_SHIFT; + unsigned long end_index = end >> PAGE_CACHE_SHIFT; + struct page *page; + int err; + + while (index <= end_index) { + page = grab_cache_page(tree->mapping, index); + if (!page) { + err = -ENOMEM; + goto failed; + } + if (IS_ERR(page)) { + err = PTR_ERR(page); + goto failed; + } + index++; + } + lock_extent(tree, start, end, GFP_NOFS); + return 0; + +failed: + /* + * we failed above in getting the page at 'index', so we undo here + * up to but not including the page at 'index' + */ + end_index = index; + index = start >> PAGE_CACHE_SHIFT; + while (index < end_index) { + page = find_get_page(tree->mapping, index); + unlock_page(page); + page_cache_release(page); + index++; + } + return err; +} +EXPORT_SYMBOL(lock_range); + +/* + * helper function to unlock both pages and extents in the tree. + */ +int unlock_range(struct extent_map_tree *tree, u64 start, u64 end) +{ + unsigned long index = start >> PAGE_CACHE_SHIFT; + unsigned long end_index = end >> PAGE_CACHE_SHIFT; + struct page *page; + + while (index <= end_index) { + page = find_get_page(tree->mapping, index); + unlock_page(page); + page_cache_release(page); + index++; + } + unlock_extent(tree, start, end, GFP_NOFS); + return 0; +} +EXPORT_SYMBOL(unlock_range); + +/* + * searches a range in the state tree for a given mask. + * If 'filled' == 1, this returns 1 only if ever extent in the tree + * has the bits set. Otherwise, 1 is returned if any bit in the + * range is found set. + */ +static int test_range_bit(struct extent_map_tree *tree, u64 start, u64 end, + int bits, int filled) +{ + struct extent_state *state = NULL; + struct rb_node *node; + int bitset = 0; + + read_lock_irq(&tree->lock); + node = tree_search(&tree->state, start); + while (node && start <= end) { + state = rb_entry(node, struct extent_state, rb_node); + if (state->start > end) + break; + + if (filled && state->start > start) { + bitset = 0; + break; + } + if (state->state & bits) { + bitset = 1; + if (!filled) + break; + } else if (filled) { + bitset = 0; + break; + } + start = state->end + 1; + if (start > end) + break; + node = rb_next(node); + } + read_unlock_irq(&tree->lock); + return bitset; +} + +/* + * helper function to set a given page up to date if all the + * extents in the tree for that page are up to date + */ +static int check_page_uptodate(struct extent_map_tree *tree, + struct page *page) +{ + u64 start = page->index << PAGE_CACHE_SHIFT; + u64 end = start + PAGE_CACHE_SIZE - 1; + if (test_range_bit(tree, start, end, EXTENT_UPTODATE, 1)) + SetPageUptodate(page); + return 0; +} + +/* + * helper function to unlock a page if all the extents in the tree + * for that page are unlocked + */ +static int check_page_locked(struct extent_map_tree *tree, + struct page *page) +{ + u64 start = page->index << PAGE_CACHE_SHIFT; + u64 end = start + PAGE_CACHE_SIZE - 1; + if (!test_range_bit(tree, start, end, EXTENT_LOCKED, 0)) + unlock_page(page); + return 0; +} + +/* + * helper function to end page writeback if all the extents + * in the tree for that page are done with writeback + */ +static int check_page_writeback(struct extent_map_tree *tree, + struct page *page) +{ + u64 start = page->index << PAGE_CACHE_SHIFT; + u64 end = start + PAGE_CACHE_SIZE - 1; + if (!test_range_bit(tree, start, end, EXTENT_WRITEBACK, 0)) + end_page_writeback(page); + return 0; +} + +/* lots and lots of room for performance fixes in the end_bio funcs */ + +/* + * after a writepage IO is done, we need to: + * clear the uptodate bits on error + * clear the writeback bits in the extent tree for this IO + * end_page_writeback if the page has no more pending IO + * + * Scheduling is not allowed, so the extent state tree is expected + * to have one and only one object corresponding to this IO. + */ +static int end_bio_extent_writepage(struct bio *bio, + unsigned int bytes_done, int err) +{ + const int uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags); + struct bio_vec *bvec = bio->bi_io_vec + bio->bi_vcnt - 1; + struct extent_map_tree *tree = bio->bi_private; + u64 start; + u64 end; + int whole_page; + + if (bio->bi_size) + return 1; + + do { + struct page *page = bvec->bv_page; + start = (page->index << PAGE_CACHE_SHIFT) + bvec->bv_offset; + end = start + bvec->bv_len - 1; + + if (bvec->bv_offset == 0 && bvec->bv_len == PAGE_CACHE_SIZE) + whole_page = 1; + else + whole_page = 0; + + if (--bvec >= bio->bi_io_vec) + prefetchw(&bvec->bv_page->flags); + + if (!uptodate) { + clear_extent_uptodate(tree, start, end, GFP_ATOMIC); + ClearPageUptodate(page); + SetPageError(page); + } + clear_extent_writeback(tree, start, end, GFP_ATOMIC); + + if (whole_page) + end_page_writeback(page); + else + check_page_writeback(tree, page); + } while (bvec >= bio->bi_io_vec); + + bio_put(bio); + return 0; +} + +/* + * after a readpage IO is done, we need to: + * clear the uptodate bits on error + * set the uptodate bits if things worked + * set the page up to date if all extents in the tree are uptodate + * clear the lock bit in the extent tree + * unlock the page if there are no other extents locked for it + * + * Scheduling is not allowed, so the extent state tree is expected + * to have one and only one object corresponding to this IO. + */ +static int end_bio_extent_readpage(struct bio *bio, + unsigned int bytes_done, int err) +{ + const int uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags); + struct bio_vec *bvec = bio->bi_io_vec + bio->bi_vcnt - 1; + struct extent_map_tree *tree = bio->bi_private; + u64 start; + u64 end; + int whole_page; + + if (bio->bi_size) + return 1; + + do { + struct page *page = bvec->bv_page; + start = (page->index << PAGE_CACHE_SHIFT) + bvec->bv_offset; + end = start + bvec->bv_len - 1; + + if (bvec->bv_offset == 0 && bvec->bv_len == PAGE_CACHE_SIZE) + whole_page = 1; + else + whole_page = 0; + + if (--bvec >= bio->bi_io_vec) + prefetchw(&bvec->bv_page->flags); + + if (uptodate) { + set_extent_uptodate(tree, start, end, GFP_ATOMIC); + if (whole_page) + SetPageUptodate(page); + else + check_page_uptodate(tree, page); + } else { + ClearPageUptodate(page); + SetPageError(page); + } + + unlock_extent(tree, start, end, GFP_ATOMIC); + + if (whole_page) + unlock_page(page); + else + check_page_locked(tree, page); + } while (bvec >= bio->bi_io_vec); + + bio_put(bio); + return 0; +} + +/* + * IO done from prepare_write is pretty simple, we just unlock + * the structs in the extent tree when done, and set the uptodate bits + * as appropriate. + */ +static int end_bio_extent_preparewrite(struct bio *bio, + unsigned int bytes_done, int err) +{ + const int uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags); + struct bio_vec *bvec = bio->bi_io_vec + bio->bi_vcnt - 1; + struct extent_map_tree *tree = bio->bi_private; + u64 start; + u64 end; + + if (bio->bi_size) + return 1; + + do { + struct page *page = bvec->bv_page; + start = (page->index << PAGE_CACHE_SHIFT) + bvec->bv_offset; + end = start + bvec->bv_len - 1; + + if (--bvec >= bio->bi_io_vec) + prefetchw(&bvec->bv_page->flags); + + if (uptodate) { + set_extent_uptodate(tree, start, end, GFP_ATOMIC); + } else { + ClearPageUptodate(page); + SetPageError(page); + } + + unlock_extent(tree, start, end, GFP_ATOMIC); + + } while (bvec >= bio->bi_io_vec); + + bio_put(bio); + return 0; +} + +static int submit_extent_page(int rw, struct extent_map_tree *tree, + struct page *page, sector_t sector, + size_t size, unsigned long offset, + struct block_device *bdev, + bio_end_io_t end_io_func) +{ + struct bio *bio; + int ret = 0; + + bio = bio_alloc(GFP_NOIO, 1); + + bio->bi_sector = sector; + bio->bi_bdev = bdev; + bio->bi_io_vec[0].bv_page = page; + bio->bi_io_vec[0].bv_len = size; + bio->bi_io_vec[0].bv_offset = offset; + + bio->bi_vcnt = 1; + bio->bi_idx = 0; + bio->bi_size = size; + + bio->bi_end_io = end_io_func; + bio->bi_private = tree; + + bio_get(bio); + submit_bio(rw, bio); + + if (bio_flagged(bio, BIO_EOPNOTSUPP)) + ret = -EOPNOTSUPP; + + bio_put(bio); + return ret; +} + +/* + * basic readpage implementation. Locked extent state structs are inserted + * into the tree that are removed when the IO is done (by the end_io + * handlers) + */ +int extent_read_full_page(struct extent_map_tree *tree, struct page *page, + get_extent_t *get_extent) +{ + struct inode *inode = page->mapping->host; + u64 start = page->index << PAGE_CACHE_SHIFT; + u64 page_end = start + PAGE_CACHE_SIZE - 1; + u64 end; + u64 cur = start; + u64 extent_offset; + u64 last_byte = i_size_read(inode); + u64 block_start; + u64 cur_end; + sector_t sector; + struct extent_map *em; + struct block_device *bdev; + int ret; + int nr = 0; + size_t page_offset = 0; + size_t iosize; + size_t blocksize = inode->i_sb->s_blocksize; + + if (!PagePrivate(page)) { + SetPagePrivate(page); + set_page_private(page, 1); + page_cache_get(page); + } + + end = page_end; + lock_extent(tree, start, end, GFP_NOFS); + + while (cur <= end) { + if (cur >= last_byte) { + iosize = PAGE_CACHE_SIZE - page_offset; + zero_user_page(page, page_offset, iosize, KM_USER0); + set_extent_uptodate(tree, cur, cur + iosize - 1, + GFP_NOFS); + unlock_extent(tree, cur, cur + iosize - 1, GFP_NOFS); + break; + } + em = get_extent(inode, page, page_offset, cur, end, 0); + if (IS_ERR(em) || !em) { + SetPageError(page); + unlock_extent(tree, cur, end, GFP_NOFS); + break; + } + + extent_offset = cur - em->start; + BUG_ON(em->end < cur); + BUG_ON(end < cur); + + iosize = min(em->end - cur, end - cur) + 1; + cur_end = min(em->end, end); + iosize = (iosize + blocksize - 1) & ~((u64)blocksize - 1); + sector = (em->block_start + extent_offset) >> 9; + bdev = em->bdev; + block_start = em->block_start; + free_extent_map(em); + em = NULL; + + /* we've found a hole, just zero and go on */ + if (block_start == 0) { + zero_user_page(page, page_offset, iosize, KM_USER0); + set_extent_uptodate(tree, cur, cur + iosize - 1, + GFP_NOFS); + unlock_extent(tree, cur, cur + iosize - 1, GFP_NOFS); + cur = cur + iosize; + page_offset += iosize; + continue; + } + /* the get_extent function already copied into the page */ + if (test_range_bit(tree, cur, cur_end, EXTENT_UPTODATE, 1)) { + unlock_extent(tree, cur, cur + iosize - 1, GFP_NOFS); + cur = cur + iosize; + page_offset += iosize; + continue; + } + + ret = submit_extent_page(READ, tree, page, + sector, iosize, page_offset, bdev, + end_bio_extent_readpage); + if (ret) + SetPageError(page); + cur = cur + iosize; + page_offset += iosize; + nr++; + } + if (!nr) { + if (!PageError(page)) + SetPageUptodate(page); + unlock_page(page); + } + return 0; +} +EXPORT_SYMBOL(extent_read_full_page); + +/* + * the writepage semantics are similar to regular writepage. extent + * records are inserted to lock ranges in the tree, and as dirty areas + * are found, they are marked writeback. Then the lock bits are removed + * and the end_io handler clears the writeback ranges + */ +int extent_write_full_page(struct extent_map_tree *tree, struct page *page, + get_extent_t *get_extent, + struct writeback_control *wbc) +{ + struct inode *inode = page->mapping->host; + u64 start = page->index << PAGE_CACHE_SHIFT; + u64 page_end = start + PAGE_CACHE_SIZE - 1; + u64 end; + u64 cur = start; + u64 extent_offset; + u64 last_byte = i_size_read(inode); + u64 block_start; + sector_t sector; + struct extent_map *em; + struct block_device *bdev; + int ret; + int nr = 0; + size_t page_offset = 0; + size_t iosize; + size_t blocksize; + loff_t i_size = i_size_read(inode); + unsigned long end_index = i_size >> PAGE_CACHE_SHIFT; + + if (page->index > end_index) { + clear_extent_dirty(tree, start, page_end, GFP_NOFS); + unlock_page(page); + return 0; + } + + if (page->index == end_index) { + size_t offset = i_size & (PAGE_CACHE_SIZE - 1); + zero_user_page(page, offset, + PAGE_CACHE_SIZE - offset, KM_USER0); + } + + if (!PagePrivate(page)) { + SetPagePrivate(page); + set_page_private(page, 1); + page_cache_get(page); + } + + end = page_end; + lock_extent(tree, start, page_end, GFP_NOFS); + + if (last_byte <= start) { + clear_extent_dirty(tree, start, page_end, GFP_NOFS); + goto done; + } + + set_extent_uptodate(tree, start, page_end, GFP_NOFS); + blocksize = inode->i_sb->s_blocksize; + + while (cur <= end) { + if (cur >= last_byte) { + clear_extent_dirty(tree, cur, page_end, GFP_NOFS); + break; + } + em = get_extent(inode, page, page_offset, cur, end, 1); + if (IS_ERR(em) || !em) { + SetPageError(page); + break; + } + + extent_offset = cur - em->start; + BUG_ON(em->end < cur); + BUG_ON(end < cur); + iosize = min(em->end - cur, end - cur) + 1; + iosize = (iosize + blocksize - 1) & ~((u64)blocksize - 1); + sector = (em->block_start + extent_offset) >> 9; + bdev = em->bdev; + block_start = em->block_start; + free_extent_map(em); + em = NULL; + + if (block_start == 0 || block_start == EXTENT_MAP_INLINE) { + clear_extent_dirty(tree, cur, + cur + iosize - 1, GFP_NOFS); + cur = cur + iosize; + page_offset += iosize; + continue; + } + + /* leave this out until we have a page_mkwrite call */ + if (0 && !test_range_bit(tree, cur, cur + iosize - 1, + EXTENT_DIRTY, 0)) { + cur = cur + iosize; + page_offset += iosize; + continue; + } + clear_extent_dirty(tree, cur, cur + iosize - 1, GFP_NOFS); + set_range_writeback(tree, cur, cur + iosize - 1); + ret = submit_extent_page(WRITE, tree, page, + sector, iosize, page_offset, bdev, + end_bio_extent_writepage); + if (ret) + SetPageError(page); + cur = cur + iosize; + page_offset += iosize; + nr++; + } +done: + WARN_ON(test_range_bit(tree, start, page_end, EXTENT_DIRTY, 0)); + unlock_extent(tree, start, page_end, GFP_NOFS); + unlock_page(page); + return 0; +} +EXPORT_SYMBOL(extent_write_full_page); + +/* + * basic invalidatepage code, this waits on any locked or writeback + * ranges corresponding to the page, and then deletes any extent state + * records from the tree + */ +int extent_invalidatepage(struct extent_map_tree *tree, + struct page *page, unsigned long offset) +{ + u64 start = (page->index << PAGE_CACHE_SHIFT); + u64 end = start + PAGE_CACHE_SIZE - 1; + size_t blocksize = page->mapping->host->i_sb->s_blocksize; + + start += (offset + blocksize -1) & ~(blocksize - 1); + if (start > end) + return 0; + + lock_extent(tree, start, end, GFP_NOFS); + wait_on_extent_writeback(tree, start, end); + clear_extent_bit(tree, start, end, EXTENT_LOCKED | EXTENT_DIRTY, + 1, 1, GFP_NOFS); + return 0; +} +EXPORT_SYMBOL(extent_invalidatepage); + +/* + * simple commit_write call, set_range_dirty is used to mark both + * the pages and the extent records as dirty + */ +int extent_commit_write(struct extent_map_tree *tree, + struct inode *inode, struct page *page, + unsigned from, unsigned to) +{ + loff_t pos = ((loff_t)page->index << PAGE_CACHE_SHIFT) + to; + + if (!PagePrivate(page)) { + SetPagePrivate(page); + set_page_private(page, 1); + page_cache_get(page); + } + + set_page_dirty(page); + + if (pos > inode->i_size) { + i_size_write(inode, pos); + mark_inode_dirty(inode); + } + return 0; +} +EXPORT_SYMBOL(extent_commit_write); + +int extent_prepare_write(struct extent_map_tree *tree, + struct inode *inode, struct page *page, + unsigned from, unsigned to, get_extent_t *get_extent) +{ + u64 page_start = page->index << PAGE_CACHE_SHIFT; + u64 page_end = page_start + PAGE_CACHE_SIZE - 1; + u64 block_start; + u64 orig_block_start; + u64 block_end; + u64 cur_end; + struct extent_map *em; + unsigned blocksize = 1 << inode->i_blkbits; + size_t page_offset = 0; + size_t block_off_start; + size_t block_off_end; + int err = 0; + int iocount = 0; + int ret = 0; + int isnew; + + if (!PagePrivate(page)) { + SetPagePrivate(page); + set_page_private(page, 1); + page_cache_get(page); + } + block_start = (page_start + from) & ~((u64)blocksize - 1); + block_end = (page_start + to - 1) | (blocksize - 1); + orig_block_start = block_start; + + lock_extent(tree, page_start, page_end, GFP_NOFS); + while(block_start <= block_end) { + em = get_extent(inode, page, page_offset, block_start, + block_end, 1); + if (IS_ERR(em) || !em) { + goto err; + } + cur_end = min(block_end, em->end); + block_off_start = block_start & (PAGE_CACHE_SIZE - 1); + block_off_end = block_off_start + blocksize; + isnew = clear_extent_new(tree, block_start, cur_end, GFP_NOFS); + + if (!PageUptodate(page) && isnew && + (block_off_end > to || block_off_start < from)) { + void *kaddr; + + kaddr = kmap_atomic(page, KM_USER0); + if (block_off_end > to) + memset(kaddr + to, 0, block_off_end - to); + if (block_off_start < from) + memset(kaddr + block_off_start, 0, + from - block_off_start); + flush_dcache_page(page); + kunmap_atomic(kaddr, KM_USER0); + } + if (!isnew && !PageUptodate(page) && + (block_off_end > to || block_off_start < from) && + !test_range_bit(tree, block_start, cur_end, + EXTENT_UPTODATE, 1)) { + u64 sector; + u64 extent_offset = block_start - em->start; + size_t iosize; + sector = (em->block_start + extent_offset) >> 9; + iosize = (cur_end - block_start + blocksize - 1) & + ~((u64)blocksize - 1); + /* + * we've already got the extent locked, but we + * need to split the state such that our end_bio + * handler can clear the lock. + */ + set_extent_bit(tree, block_start, + block_start + iosize - 1, + EXTENT_LOCKED, 0, NULL, GFP_NOFS); + ret = submit_extent_page(READ, tree, page, + sector, iosize, page_offset, em->bdev, + end_bio_extent_preparewrite); + iocount++; + block_start = block_start + iosize; + } else { + set_extent_uptodate(tree, block_start, cur_end, + GFP_NOFS); + unlock_extent(tree, block_start, cur_end, GFP_NOFS); + block_start = cur_end + 1; + } + page_offset = block_start & (PAGE_CACHE_SIZE - 1); + free_extent_map(em); + } + if (iocount) { + wait_extent_bit(tree, orig_block_start, + block_end, EXTENT_LOCKED); + } + check_page_uptodate(tree, page); +err: + /* FIXME, zero out newly allocated blocks on error */ + return err; +} +EXPORT_SYMBOL(extent_prepare_write); + +/* + * a helper for releasepage. As long as there are no locked extents + * in the range corresponding to the page, both state records and extent + * map records are removed + */ +int try_release_extent_mapping(struct extent_map_tree *tree, struct page *page) +{ + struct extent_map *em; + u64 start = page->index << PAGE_CACHE_SHIFT; + u64 end = start + PAGE_CACHE_SIZE - 1; + u64 orig_start = start; + + while (start <= end) { + em = lookup_extent_mapping(tree, start, end); + if (!em || IS_ERR(em)) + break; + if (test_range_bit(tree, em->start, em->end, + EXTENT_LOCKED, 0)) { + free_extent_map(em); + start = em->end + 1; +printk("range still locked %Lu %Lu\n", em->start, em->end); + break; + } + remove_extent_mapping(tree, em); + start = em->end + 1; + /* once for the rb tree */ + free_extent_map(em); + /* once for us */ + free_extent_map(em); + } + WARN_ON(test_range_bit(tree, orig_start, end, EXTENT_WRITEBACK, 0)); + clear_extent_bit(tree, orig_start, end, EXTENT_UPTODATE, + 1, 1, GFP_NOFS); + return 1; +} +EXPORT_SYMBOL(try_release_extent_mapping); + -- cgit v1.2.3-70-g09d2 From b888db2bd7b67f190b32934e6a86181f262ac3ec Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Mon, 27 Aug 2007 16:49:44 -0400 Subject: Btrfs: Add delayed allocation to the extent based page tree code Signed-off-by: Chris Mason --- fs/btrfs/extent-tree.c | 9 --- fs/btrfs/extent_map.c | 147 +++++++++++++++++++++++++++++++++++++------------ fs/btrfs/extent_map.h | 3 + fs/btrfs/file.c | 50 +++++++---------- fs/btrfs/inode.c | 141 ++++++++++++++++++++++++++--------------------- fs/btrfs/super.c | 1 + 6 files changed, 216 insertions(+), 135 deletions(-) (limited to 'fs/btrfs/extent_map.c') diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 4049aadbeda9..c31e84d42653 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -39,7 +39,6 @@ static int cache_block_group(struct btrfs_root *root, u64 i; u64 last = 0; u64 hole_size; - u64 limit; int found = 0; root = root->fs_info->extent_root; @@ -62,7 +61,6 @@ static int cache_block_group(struct btrfs_root *root, return ret; if (ret && path->slots[0] > 0) path->slots[0]--; - limit = block_group->key.objectid + block_group->key.offset; while(1) { leaf = btrfs_buffer_leaf(path->nodes[0]); slot = path->slots[0]; @@ -982,7 +980,6 @@ static int find_free_extent(struct btrfs_trans_handle *trans, struct btrfs_root struct btrfs_block_group_cache *block_group; int full_scan = 0; int wrapped = 0; - u64 limit; WARN_ON(num_blocks < 1); ins->flags = 0; @@ -1049,12 +1046,6 @@ check_failed: l = btrfs_buffer_leaf(path->nodes[0]); slot = path->slots[0]; if (slot >= btrfs_header_nritems(&l->header)) { - if (start_found) - limit = last_block + - (block_group->key.offset >> 1); - else - limit = search_start + - (block_group->key.offset >> 1); ret = btrfs_next_leaf(root, path); if (ret == 0) continue; diff --git a/fs/btrfs/extent_map.c b/fs/btrfs/extent_map.c index d378edf0964e..a9c7419615b9 100644 --- a/fs/btrfs/extent_map.c +++ b/fs/btrfs/extent_map.c @@ -30,9 +30,6 @@ struct tree_entry { #define EXTENT_IOBITS (EXTENT_LOCKED | EXTENT_WRITEBACK) -static LIST_HEAD(all_states); -spinlock_t state_lock = SPIN_LOCK_UNLOCKED; - void __init extent_map_init(void) { extent_map_cache = kmem_cache_create("extent_map", @@ -49,15 +46,6 @@ void __init extent_map_init(void) void __exit extent_map_exit(void) { - while(!list_empty(&all_states)) { - struct extent_state *state; - struct list_head *cur = all_states.next; - state = list_entry(cur, struct extent_state, list); - printk("found leaked state %Lu %Lu state %d in_tree %d\n", - state->start, state->end, state->state, state->in_tree); - list_del(&state->list); - kfree(state); - } if (extent_map_cache) kmem_cache_destroy(extent_map_cache); if (extent_state_cache) @@ -69,6 +57,7 @@ void extent_map_tree_init(struct extent_map_tree *tree, { tree->map.rb_node = NULL; tree->state.rb_node = NULL; + tree->fill_delalloc = NULL; rwlock_init(&tree->lock); tree->mapping = mapping; } @@ -106,9 +95,6 @@ struct extent_state *alloc_extent_state(gfp_t mask) state->in_tree = 0; atomic_set(&state->refs, 1); init_waitqueue_head(&state->wq); - spin_lock_irq(&state_lock); - list_add(&state->list, &all_states); - spin_unlock_irq(&state_lock); return state; } EXPORT_SYMBOL(alloc_extent_state); @@ -117,9 +103,6 @@ void free_extent_state(struct extent_state *state) { if (atomic_dec_and_test(&state->refs)) { WARN_ON(state->in_tree); - spin_lock_irq(&state_lock); - list_del_init(&state->list); - spin_unlock_irq(&state_lock); kmem_cache_free(extent_state_cache, state); } } @@ -369,7 +352,7 @@ static int insert_state(struct extent_map_tree *tree, if (node) { struct extent_state *found; found = rb_entry(node, struct extent_state, rb_node); -printk("found node %Lu %Lu on insert of %Lu %Lu\n", found->start, found->end, start, end); + printk("found node %Lu %Lu on insert of %Lu %Lu\n", found->start, found->end, start, end); free_extent_state(state); return -EEXIST; } @@ -408,7 +391,7 @@ static int split_state(struct extent_map_tree *tree, struct extent_state *orig, if (node) { struct extent_state *found; found = rb_entry(node, struct extent_state, rb_node); -printk("found node %Lu %Lu on insert of %Lu %Lu\n", found->start, found->end, prealloc->start, prealloc->end); + printk("found node %Lu %Lu on insert of %Lu %Lu\n", found->start, found->end, prealloc->start, prealloc->end); free_extent_state(prealloc); return -EEXIST; } @@ -792,10 +775,20 @@ int set_extent_dirty(struct extent_map_tree *tree, u64 start, u64 end, } EXPORT_SYMBOL(set_extent_dirty); +int set_extent_delalloc(struct extent_map_tree *tree, u64 start, u64 end, + gfp_t mask) +{ + return set_extent_bit(tree, start, end, + EXTENT_DELALLOC | EXTENT_DIRTY, 0, NULL, + mask); +} +EXPORT_SYMBOL(set_extent_delalloc); + int clear_extent_dirty(struct extent_map_tree *tree, u64 start, u64 end, gfp_t mask) { - return clear_extent_bit(tree, start, end, EXTENT_DIRTY, 0, 0, mask); + return clear_extent_bit(tree, start, end, + EXTENT_DIRTY | EXTENT_DELALLOC, 0, 0, mask); } EXPORT_SYMBOL(clear_extent_dirty); @@ -922,6 +915,62 @@ int set_range_writeback(struct extent_map_tree *tree, u64 start, u64 end) } EXPORT_SYMBOL(set_range_writeback); +u64 find_lock_delalloc_range(struct extent_map_tree *tree, + u64 start, u64 lock_start, u64 *end, u64 max_bytes) +{ + struct rb_node *node; + struct extent_state *state; + u64 cur_start = start; + u64 found = 0; + u64 total_bytes = 0; + + write_lock_irq(&tree->lock); + /* + * this search will find all the extents that end after + * our range starts. + */ +search_again: + node = tree_search(&tree->state, cur_start); + if (!node || IS_ERR(node)) { + goto out; + } + + while(1) { + state = rb_entry(node, struct extent_state, rb_node); + if (state->start != cur_start) { + goto out; + } + if (!(state->state & EXTENT_DELALLOC)) { + goto out; + } + if (state->start >= lock_start) { + if (state->state & EXTENT_LOCKED) { + DEFINE_WAIT(wait); + atomic_inc(&state->refs); + write_unlock_irq(&tree->lock); + schedule(); + write_lock_irq(&tree->lock); + finish_wait(&state->wq, &wait); + free_extent_state(state); + goto search_again; + } + state->state |= EXTENT_LOCKED; + } + found++; + *end = state->end; + cur_start = state->end + 1; + node = rb_next(node); + if (!node) + break; + total_bytes = state->end - state->start + 1; + if (total_bytes >= max_bytes) + break; + } +out: + write_unlock_irq(&tree->lock); + return found; +} + /* * helper function to lock both pages and extents in the tree. * pages must be locked first. @@ -1285,6 +1334,7 @@ int extent_read_full_page(struct extent_map_tree *tree, struct page *page, if (!PagePrivate(page)) { SetPagePrivate(page); set_page_private(page, 1); + WARN_ON(!page->mapping->a_ops->invalidatepage); page_cache_get(page); } @@ -1384,7 +1434,10 @@ int extent_write_full_page(struct extent_map_tree *tree, struct page *page, size_t blocksize; loff_t i_size = i_size_read(inode); unsigned long end_index = i_size >> PAGE_CACHE_SHIFT; + u64 nr_delalloc; + u64 delalloc_end; + WARN_ON(!PageLocked(page)); if (page->index > end_index) { clear_extent_dirty(tree, start, page_end, GFP_NOFS); unlock_page(page); @@ -1400,11 +1453,34 @@ int extent_write_full_page(struct extent_map_tree *tree, struct page *page, if (!PagePrivate(page)) { SetPagePrivate(page); set_page_private(page, 1); + WARN_ON(!page->mapping->a_ops->invalidatepage); page_cache_get(page); } - end = page_end; lock_extent(tree, start, page_end, GFP_NOFS); + nr_delalloc = find_lock_delalloc_range(tree, start, page_end + 1, + &delalloc_end, + 128 * 1024 * 1024); + if (nr_delalloc) { + tree->fill_delalloc(inode, start, delalloc_end); + if (delalloc_end >= page_end + 1) { + clear_extent_bit(tree, page_end + 1, delalloc_end, + EXTENT_LOCKED | EXTENT_DELALLOC, + 1, 0, GFP_NOFS); + } + clear_extent_bit(tree, start, page_end, EXTENT_DELALLOC, + 0, 0, GFP_NOFS); + if (test_range_bit(tree, start, page_end, EXTENT_DELALLOC, 0)) { + printk("found delalloc bits after clear extent_bit\n"); + } + } else if (test_range_bit(tree, start, page_end, EXTENT_DELALLOC, 0)) { + printk("found delalloc bits after find_delalloc_range returns 0\n"); + } + + end = page_end; + if (test_range_bit(tree, start, page_end, EXTENT_DELALLOC, 0)) { + printk("found delalloc bits after lock_extent\n"); + } if (last_byte <= start) { clear_extent_dirty(tree, start, page_end, GFP_NOFS); @@ -1419,7 +1495,7 @@ int extent_write_full_page(struct extent_map_tree *tree, struct page *page, clear_extent_dirty(tree, cur, page_end, GFP_NOFS); break; } - em = get_extent(inode, page, page_offset, cur, end, 1); + em = get_extent(inode, page, page_offset, cur, end, 0); if (IS_ERR(em) || !em) { SetPageError(page); break; @@ -1507,6 +1583,7 @@ int extent_commit_write(struct extent_map_tree *tree, if (!PagePrivate(page)) { SetPagePrivate(page); set_page_private(page, 1); + WARN_ON(!page->mapping->a_ops->invalidatepage); page_cache_get(page); } @@ -1543,6 +1620,7 @@ int extent_prepare_write(struct extent_map_tree *tree, if (!PagePrivate(page)) { SetPagePrivate(page); set_page_private(page, 1); + WARN_ON(!page->mapping->a_ops->invalidatepage); page_cache_get(page); } block_start = (page_start + from) & ~((u64)blocksize - 1); @@ -1628,29 +1706,28 @@ int try_release_extent_mapping(struct extent_map_tree *tree, struct page *page) u64 start = page->index << PAGE_CACHE_SHIFT; u64 end = start + PAGE_CACHE_SIZE - 1; u64 orig_start = start; + int ret = 1; while (start <= end) { em = lookup_extent_mapping(tree, start, end); if (!em || IS_ERR(em)) break; - if (test_range_bit(tree, em->start, em->end, - EXTENT_LOCKED, 0)) { + if (!test_range_bit(tree, em->start, em->end, + EXTENT_LOCKED, 0)) { + remove_extent_mapping(tree, em); + /* once for the rb tree */ free_extent_map(em); - start = em->end + 1; -printk("range still locked %Lu %Lu\n", em->start, em->end); - break; } - remove_extent_mapping(tree, em); start = em->end + 1; - /* once for the rb tree */ - free_extent_map(em); /* once for us */ free_extent_map(em); } - WARN_ON(test_range_bit(tree, orig_start, end, EXTENT_WRITEBACK, 0)); - clear_extent_bit(tree, orig_start, end, EXTENT_UPTODATE, - 1, 1, GFP_NOFS); - return 1; + if (test_range_bit(tree, orig_start, end, EXTENT_LOCKED, 0)) + ret = 0; + else + clear_extent_bit(tree, orig_start, end, EXTENT_UPTODATE, + 1, 1, GFP_NOFS); + return ret; } EXPORT_SYMBOL(try_release_extent_mapping); diff --git a/fs/btrfs/extent_map.h b/fs/btrfs/extent_map.h index 108944aab4bd..e91a2e9619ee 100644 --- a/fs/btrfs/extent_map.h +++ b/fs/btrfs/extent_map.h @@ -11,6 +11,7 @@ struct extent_map_tree { struct rb_root state; struct address_space *mapping; rwlock_t lock; + int (*fill_delalloc)(struct inode *inode, u64 start, u64 end); }; /* note, this must start with the same fields as fs/extent_map.c:tree_entry */ @@ -74,6 +75,8 @@ int set_extent_new(struct extent_map_tree *tree, u64 start, u64 end, gfp_t mask); int set_extent_dirty(struct extent_map_tree *tree, u64 start, u64 end, gfp_t mask); +int set_extent_delalloc(struct extent_map_tree *tree, u64 start, u64 end, + gfp_t mask); int extent_invalidatepage(struct extent_map_tree *tree, struct page *page, unsigned long offset); int extent_write_full_page(struct extent_map_tree *tree, struct page *page, diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c index 71a481894ab6..d3d39e4a2797 100644 --- a/fs/btrfs/file.c +++ b/fs/btrfs/file.c @@ -143,7 +143,6 @@ static int dirty_and_release_pages(struct btrfs_trans_handle *trans, struct inode *inode = file->f_path.dentry->d_inode; struct extent_map *em; struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree; - struct btrfs_key ins; u64 hint_block; u64 num_blocks; u64 start_pos; @@ -162,6 +161,7 @@ static int dirty_and_release_pages(struct btrfs_trans_handle *trans, inode->i_blkbits; end_of_last_block = start_pos + (num_blocks << inode->i_blkbits) - 1; + lock_extent(em_tree, start_pos, end_of_last_block, GFP_NOFS); mutex_lock(&root->fs_info->fs_mutex); trans = btrfs_start_transaction(root, 1); if (!trans) { @@ -179,16 +179,6 @@ static int dirty_and_release_pages(struct btrfs_trans_handle *trans, /* FIXME...EIEIO, ENOSPC and more */ - /* step one, delete the existing extents in this range */ - /* FIXME blocksize != pagesize */ - if (start_pos < inode->i_size) { - err = btrfs_drop_extents(trans, root, inode, - start_pos, (pos + write_bytes + root->blocksize -1) & - ~((u64)root->blocksize - 1), &hint_block); - if (err) - goto failed; - } - /* insert any holes we need to create */ if (inode->i_size < start_pos) { u64 last_pos_in_file; @@ -213,29 +203,28 @@ static int dirty_and_release_pages(struct btrfs_trans_handle *trans, */ if (isize >= PAGE_CACHE_SIZE || pos + write_bytes < inode->i_size || pos + write_bytes - start_pos > BTRFS_MAX_INLINE_DATA_SIZE(root)) { - err = btrfs_alloc_extent(trans, root, inode->i_ino, - num_blocks, 0, hint_block, (u64)-1, - &ins, 1); - BUG_ON(err); - err = btrfs_insert_file_extent(trans, root, inode->i_ino, - start_pos, ins.objectid, ins.offset, - ins.offset); - BUG_ON(err); - em->start = start_pos; - em->end = end_of_last_block; - em->block_start = ins.objectid << inode->i_blkbits; - em->block_end = em->block_start + - (ins.offset << inode->i_blkbits) - 1; - set_extent_dirty(em_tree, start_pos, end_of_last_block, - GFP_NOFS); - err = add_extent_mapping(em_tree, em); + u64 last_end; for (i = 0; i < num_pages; i++) { struct page *p = pages[i]; SetPageUptodate(p); - __set_page_dirty_nobuffers(p); + set_page_dirty(p); } + last_end = pages[num_pages -1]->index << PAGE_CACHE_SHIFT; + last_end += PAGE_CACHE_SIZE - 1; + set_extent_delalloc(em_tree, start_pos, end_of_last_block, + GFP_NOFS); } else { struct page *p = pages[0]; + /* step one, delete the existing extents in this range */ + /* FIXME blocksize != pagesize */ + if (start_pos < inode->i_size) { + err = btrfs_drop_extents(trans, root, inode, start_pos, + (pos + write_bytes + root->blocksize -1) & + ~((u64)root->blocksize - 1), &hint_block); + if (err) + goto failed; + } + err = insert_inline_extent(trans, root, inode, start_pos, end_pos - start_pos, p, 0); BUG_ON(err); @@ -253,6 +242,7 @@ failed: err = btrfs_end_transaction(trans, root); out_unlock: mutex_unlock(&root->fs_info->fs_mutex); + unlock_extent(em_tree, start_pos, end_of_last_block, GFP_NOFS); free_extent_map(em); return err; } @@ -531,7 +521,7 @@ static int prepare_pages(struct btrfs_root *root, u64 num_blocks; u64 start_pos; - start_pos = pos & ~((u64)PAGE_CACHE_SIZE - 1); + start_pos = pos & ~((u64)root->blocksize - 1); num_blocks = (write_bytes + pos - start_pos + root->blocksize - 1) >> inode->i_blkbits; @@ -548,8 +538,10 @@ static int prepare_pages(struct btrfs_root *root, if (!PagePrivate(pages[i])) { SetPagePrivate(pages[i]); set_page_private(pages[i], 1); + WARN_ON(!pages[i]->mapping->a_ops->invalidatepage); page_cache_get(pages[i]); } + WARN_ON(!PageLocked(pages[i])); } return 0; } diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index fd4cb65fcf1f..dc181089aa74 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -70,6 +70,40 @@ static unsigned char btrfs_type_by_mode[S_IFMT >> S_SHIFT] = { [S_IFLNK >> S_SHIFT] = BTRFS_FT_SYMLINK, }; +static int run_delalloc_range(struct inode *inode, u64 start, u64 end) +{ + struct btrfs_root *root = BTRFS_I(inode)->root; + struct btrfs_trans_handle *trans; + struct btrfs_key ins; + u64 alloc_hint = 0; + u64 num_blocks; + int ret; + u64 blocksize = 1 << inode->i_blkbits; + + mutex_lock(&root->fs_info->fs_mutex); + trans = btrfs_start_transaction(root, 1); + btrfs_set_trans_block_group(trans, inode); + BUG_ON(!trans); + num_blocks = (end - start + blocksize) & ~(blocksize - 1); + ret = btrfs_drop_extents(trans, root, inode, + start, start + num_blocks, &alloc_hint); + num_blocks = num_blocks >> inode->i_blkbits; + ret = btrfs_alloc_extent(trans, root, inode->i_ino, num_blocks, 0, + alloc_hint, (u64)-1, &ins, 1); + if (ret) { + WARN_ON(1); + goto out; + } + ret = btrfs_insert_file_extent(trans, root, inode->i_ino, + start, ins.objectid, ins.offset, + ins.offset); +out: + btrfs_end_transaction(trans, root); + mutex_unlock(&root->fs_info->fs_mutex); + return ret; +} + + void btrfs_read_locked_inode(struct inode *inode) { struct btrfs_path *path; @@ -121,8 +155,7 @@ void btrfs_read_locked_inode(struct inode *inode) switch (inode->i_mode & S_IFMT) { case S_IFREG: inode->i_mapping->a_ops = &btrfs_aops; - extent_map_tree_init(&BTRFS_I(inode)->extent_tree, - inode->i_mapping, GFP_NOFS); + BTRFS_I(inode)->extent_tree.fill_delalloc = run_delalloc_range; inode->i_fop = &btrfs_file_operations; inode->i_op = &btrfs_file_inode_operations; break; @@ -450,11 +483,17 @@ static int btrfs_truncate_in_trans(struct btrfs_trans_handle *trans, BUG_ON(ret); } if (item_end < inode->i_size) { - if (found_type) { - btrfs_set_key_type(&key, found_type - 1); - continue; + if (found_type == BTRFS_DIR_ITEM_KEY) { + found_type = BTRFS_INODE_ITEM_KEY; + } else if (found_type == BTRFS_EXTENT_ITEM_KEY) { + found_type = BTRFS_CSUM_ITEM_KEY; + } else if (found_type) { + found_type--; + } else { + break; } - break; + btrfs_set_key_type(&key, found_type - 1); + continue; } if (btrfs_disk_key_offset(found_key) >= inode->i_size) del_item = 1; @@ -514,47 +553,34 @@ error: return ret; } -static int btrfs_cow_one_page(struct btrfs_trans_handle *trans, - struct inode *inode, struct page *page, +static int btrfs_cow_one_page(struct inode *inode, struct page *page, size_t zero_start) { char *kaddr; int ret = 0; - struct btrfs_root *root = BTRFS_I(inode)->root; - u64 alloc_hint = 0; + struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree; u64 page_start = page->index << PAGE_CACHE_SHIFT; - struct btrfs_key ins; + u64 page_end = page_start + PAGE_CACHE_SIZE - 1; if (!PagePrivate(page)) { SetPagePrivate(page); set_page_private(page, 1); + WARN_ON(!page->mapping->a_ops->invalidatepage); page_cache_get(page); } - btrfs_set_trans_block_group(trans, inode); - - ret = btrfs_drop_extents(trans, root, inode, - page_start, page_start + PAGE_CACHE_SIZE, - &alloc_hint); - if (ret) - goto out; - ret = btrfs_alloc_extent(trans, root, inode->i_ino, 1, 0, - alloc_hint, (u64)-1, &ins, 1); - if (ret) - goto out; - ret = btrfs_insert_file_extent(trans, root, inode->i_ino, - page_start, ins.objectid, 1, 1); - if (ret) - goto out; - SetPageChecked(page); - kaddr = kmap(page); + lock_extent(em_tree, page_start, page_end, GFP_NOFS); + set_extent_delalloc(&BTRFS_I(inode)->extent_tree, page_start, + page_end, GFP_NOFS); if (zero_start != PAGE_CACHE_SIZE) { + kaddr = kmap(page); memset(kaddr + zero_start, 0, PAGE_CACHE_SIZE - zero_start); flush_dcache_page(page); + kunmap(page); } - kunmap(page); + set_page_dirty(page); + unlock_extent(em_tree, page_start, page_end, GFP_NOFS); -out: return ret; } @@ -565,8 +591,6 @@ out: static int btrfs_truncate_page(struct address_space *mapping, loff_t from) { struct inode *inode = mapping->host; - struct btrfs_root *root = BTRFS_I(inode)->root; - struct btrfs_trans_handle *trans; unsigned blocksize = 1 << inode->i_blkbits; pgoff_t index = from >> PAGE_CACHE_SHIFT; unsigned offset = from & (PAGE_CACHE_SIZE-1); @@ -591,21 +615,7 @@ static int btrfs_truncate_page(struct address_space *mapping, loff_t from) } page_start = page->index << PAGE_CACHE_SHIFT; - mutex_lock(&root->fs_info->fs_mutex); - trans = btrfs_start_transaction(root, 1); - ret = btrfs_cow_one_page(trans, inode, page, offset); - if (!ret) { - char *kaddr = kmap(page); - ret = btrfs_csum_file_block(trans, root, inode->i_ino, - page_start, kaddr, PAGE_CACHE_SIZE); - kunmap(page); - } - set_extent_dirty(&BTRFS_I(inode)->extent_tree, - page_start, page_start + PAGE_CACHE_SIZE - 1, - GFP_NOFS); - set_page_dirty(page); - btrfs_end_transaction(trans, root); - mutex_unlock(&root->fs_info->fs_mutex); + ret = btrfs_cow_one_page(inode, page, offset); unlock_page(page); page_cache_release(page); @@ -757,6 +767,8 @@ static int btrfs_init_locked_inode(struct inode *inode, void *p) struct btrfs_iget_args *args = p; inode->i_ino = args->ino; BTRFS_I(inode)->root = args->root; + extent_map_tree_init(&BTRFS_I(inode)->extent_tree, + inode->i_mapping, GFP_NOFS); return 0; } @@ -968,7 +980,10 @@ static struct inode *btrfs_new_inode(struct btrfs_trans_handle *trans, if (!inode) return ERR_PTR(-ENOMEM); + extent_map_tree_init(&BTRFS_I(inode)->extent_tree, + inode->i_mapping, GFP_NOFS); BTRFS_I(inode)->root = root; + if (mode & S_IFDIR) owner = 0; else @@ -1128,6 +1143,7 @@ static int btrfs_create(struct inode *dir, struct dentry *dentry, inode->i_op = &btrfs_file_inode_operations; extent_map_tree_init(&BTRFS_I(inode)->extent_tree, inode->i_mapping, GFP_NOFS); + BTRFS_I(inode)->extent_tree.fill_delalloc = run_delalloc_range; } dir->i_sb->s_dirt = 1; btrfs_update_inode_block_group(trans, inode); @@ -1344,9 +1360,11 @@ again: extent_end = extent_start + (btrfs_file_extent_num_blocks(item) << inode->i_blkbits); err = 0; - if (start < extent_start || start > extent_end) { + if (start < extent_start || start >= extent_end) { em->start = start; if (start < extent_start) { + if (end < extent_start) + goto not_found; em->end = extent_end - 1; } else { em->end = end; @@ -1375,9 +1393,11 @@ again: size = btrfs_file_extent_inline_len(leaf->items + path->slots[0]); extent_end = extent_start + size; - if (start < extent_start || start > extent_end) { + if (start < extent_start || start >= extent_end) { em->start = start; if (start < extent_start) { + if (end < extent_start) + goto not_found; em->end = extent_end - 1; } else { em->end = end; @@ -1412,8 +1432,7 @@ not_found_em: insert: btrfs_release_path(root, path); if (em->start > start || em->end < start) { - printk("bad extent! %Lu %Lu start %Lu end %Lu\n", em->start, em->end, start, end); - WARN_ON(1); + printk("bad extent! em: [%Lu %Lu] passed [%Lu %Lu]\n", em->start, em->end, start, end); err = -EIO; goto out; } @@ -1648,6 +1667,13 @@ int btrfs_readpage(struct file *file, struct page *page) static int btrfs_writepage(struct page *page, struct writeback_control *wbc) { struct extent_map_tree *tree; + + + if (current->flags & PF_MEMALLOC) { + redirty_page_for_writepage(wbc, page); + unlock_page(page); + return 0; + } tree = &BTRFS_I(page->mapping->host)->extent_tree; return extent_write_full_page(tree, page, btrfs_get_extent, wbc); } @@ -1698,8 +1724,6 @@ static void btrfs_invalidatepage(struct page *page, unsigned long offset) int btrfs_page_mkwrite(struct vm_area_struct *vma, struct page *page) { struct inode *inode = vma->vm_file->f_path.dentry->d_inode; - struct btrfs_root *root = BTRFS_I(inode)->root; - struct btrfs_trans_handle *trans; unsigned long end; loff_t size; int ret = -EINVAL; @@ -1722,15 +1746,7 @@ int btrfs_page_mkwrite(struct vm_area_struct *vma, struct page *page) else end = PAGE_CACHE_SIZE; - mutex_lock(&root->fs_info->fs_mutex); - trans = btrfs_start_transaction(root, 1); - ret = btrfs_cow_one_page(trans, inode, page, end); - btrfs_end_transaction(trans, root); - mutex_unlock(&root->fs_info->fs_mutex); - set_extent_dirty(&BTRFS_I(inode)->extent_tree, - page_start, page_start + PAGE_CACHE_SIZE - 1, - GFP_NOFS); - set_page_dirty(page); + ret = btrfs_cow_one_page(inode, page, end); out_unlock: unlock_page(page); @@ -2264,6 +2280,7 @@ static int btrfs_symlink(struct inode *dir, struct dentry *dentry, inode->i_op = &btrfs_file_inode_operations; extent_map_tree_init(&BTRFS_I(inode)->extent_tree, inode->i_mapping, GFP_NOFS); + BTRFS_I(inode)->extent_tree.fill_delalloc = run_delalloc_range; } dir->i_sb->s_dirt = 1; btrfs_update_inode_block_group(trans, inode); diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index 7b7c32c73a2c..7da95538745f 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c @@ -83,6 +83,7 @@ static int btrfs_fill_super(struct super_block * sb, void * data, int silent) bi->location.offset = 0; bi->location.flags = 0; bi->root = tree_root; + btrfs_set_key_type(&bi->location, BTRFS_INODE_ITEM_KEY); if (!inode) { -- cgit v1.2.3-70-g09d2 From 07157aacb1ecd394a5494910b5f80d7e7d407d37 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Thu, 30 Aug 2007 08:50:51 -0400 Subject: Btrfs: Add file data csums back in via hooks in the extent map code Signed-off-by: Chris Mason --- fs/btrfs/extent_map.c | 94 ++++++++++++++++++++++++++++++++++++++++++------ fs/btrfs/extent_map.h | 15 +++++++- fs/btrfs/inode.c | 98 +++++++++++++++++++++++++++++++++++++++++++++++++-- 3 files changed, 193 insertions(+), 14 deletions(-) (limited to 'fs/btrfs/extent_map.c') diff --git a/fs/btrfs/extent_map.c b/fs/btrfs/extent_map.c index a9c7419615b9..6053f9db0739 100644 --- a/fs/btrfs/extent_map.c +++ b/fs/btrfs/extent_map.c @@ -57,7 +57,7 @@ void extent_map_tree_init(struct extent_map_tree *tree, { tree->map.rb_node = NULL; tree->state.rb_node = NULL; - tree->fill_delalloc = NULL; + tree->ops = NULL; rwlock_init(&tree->lock); tree->mapping = mapping; } @@ -93,6 +93,7 @@ struct extent_state *alloc_extent_state(gfp_t mask) return state; state->state = 0; state->in_tree = 0; + state->private = 0; atomic_set(&state->refs, 1); init_waitqueue_head(&state->wq); return state; @@ -1034,6 +1035,61 @@ int unlock_range(struct extent_map_tree *tree, u64 start, u64 end) } EXPORT_SYMBOL(unlock_range); +int set_state_private(struct extent_map_tree *tree, u64 start, u64 private) +{ + struct rb_node *node; + struct extent_state *state; + int ret = 0; + + write_lock_irq(&tree->lock); + /* + * this search will find all the extents that end after + * our range starts. + */ + node = tree_search(&tree->state, start); + if (!node || IS_ERR(node)) { + ret = -ENOENT; + goto out; + } + state = rb_entry(node, struct extent_state, rb_node); + if (state->start != start) { + ret = -ENOENT; + goto out; + } + state->private = private; +out: + write_unlock_irq(&tree->lock); + return ret; + +} + +int get_state_private(struct extent_map_tree *tree, u64 start, u64 *private) +{ + struct rb_node *node; + struct extent_state *state; + int ret = 0; + + read_lock_irq(&tree->lock); + /* + * this search will find all the extents that end after + * our range starts. + */ + node = tree_search(&tree->state, start); + if (!node || IS_ERR(node)) { + ret = -ENOENT; + goto out; + } + state = rb_entry(node, struct extent_state, rb_node); + if (state->start != start) { + ret = -ENOENT; + goto out; + } + *private = state->private; +out: + read_unlock_irq(&tree->lock); + return ret; +} + /* * searches a range in the state tree for a given mask. * If 'filled' == 1, this returns 1 only if ever extent in the tree @@ -1185,12 +1241,13 @@ static int end_bio_extent_writepage(struct bio *bio, static int end_bio_extent_readpage(struct bio *bio, unsigned int bytes_done, int err) { - const int uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags); + int uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags); struct bio_vec *bvec = bio->bi_io_vec + bio->bi_vcnt - 1; struct extent_map_tree *tree = bio->bi_private; u64 start; u64 end; int whole_page; + int ret; if (bio->bi_size) return 1; @@ -1208,6 +1265,11 @@ static int end_bio_extent_readpage(struct bio *bio, if (--bvec >= bio->bi_io_vec) prefetchw(&bvec->bv_page->flags); + if (uptodate && tree->ops && tree->ops->readpage_end_io_hook) { + ret = tree->ops->readpage_end_io_hook(page, start, end); + if (ret) + uptodate = 0; + } if (uptodate) { set_extent_uptodate(tree, start, end, GFP_ATOMIC); if (whole_page) @@ -1388,9 +1450,16 @@ int extent_read_full_page(struct extent_map_tree *tree, struct page *page, continue; } - ret = submit_extent_page(READ, tree, page, - sector, iosize, page_offset, bdev, - end_bio_extent_readpage); + ret = 0; + if (tree->ops && tree->ops->readpage_io_hook) { + ret = tree->ops->readpage_io_hook(page, cur, + cur + iosize - 1); + } + if (!ret) { + ret = submit_extent_page(READ, tree, page, + sector, iosize, page_offset, + bdev, end_bio_extent_readpage); + } if (ret) SetPageError(page); cur = cur + iosize; @@ -1462,7 +1531,7 @@ int extent_write_full_page(struct extent_map_tree *tree, struct page *page, &delalloc_end, 128 * 1024 * 1024); if (nr_delalloc) { - tree->fill_delalloc(inode, start, delalloc_end); + tree->ops->fill_delalloc(inode, start, delalloc_end); if (delalloc_end >= page_end + 1) { clear_extent_bit(tree, page_end + 1, delalloc_end, EXTENT_LOCKED | EXTENT_DELALLOC, @@ -1528,12 +1597,17 @@ int extent_write_full_page(struct extent_map_tree *tree, struct page *page, continue; } clear_extent_dirty(tree, cur, cur + iosize - 1, GFP_NOFS); - set_range_writeback(tree, cur, cur + iosize - 1); - ret = submit_extent_page(WRITE, tree, page, - sector, iosize, page_offset, bdev, - end_bio_extent_writepage); + ret = tree->ops->writepage_io_hook(page, cur, cur + iosize - 1); if (ret) SetPageError(page); + else { + set_range_writeback(tree, cur, cur + iosize - 1); + ret = submit_extent_page(WRITE, tree, page, sector, + iosize, page_offset, bdev, + end_bio_extent_writepage); + if (ret) + SetPageError(page); + } cur = cur + iosize; page_offset += iosize; nr++; diff --git a/fs/btrfs/extent_map.h b/fs/btrfs/extent_map.h index e91a2e9619ee..0eae6c420acb 100644 --- a/fs/btrfs/extent_map.h +++ b/fs/btrfs/extent_map.h @@ -6,12 +6,19 @@ #define EXTENT_MAP_INLINE (u64)-2 #define EXTENT_MAP_DELALLOC (u64)-1 +struct extent_map_ops { + int (*fill_delalloc)(struct inode *inode, u64 start, u64 end); + int (*writepage_io_hook)(struct page *page, u64 start, u64 end); + int (*readpage_io_hook)(struct page *page, u64 start, u64 end); + int (*readpage_end_io_hook)(struct page *page, u64 start, u64 end); +}; + struct extent_map_tree { struct rb_root map; struct rb_root state; struct address_space *mapping; rwlock_t lock; - int (*fill_delalloc)(struct inode *inode, u64 start, u64 end); + struct extent_map_ops *ops; }; /* note, this must start with the same fields as fs/extent_map.c:tree_entry */ @@ -36,6 +43,10 @@ struct extent_state { wait_queue_head_t wq; atomic_t refs; unsigned long state; + + /* for use by the FS */ + u64 private; + struct list_head list; }; @@ -89,4 +100,6 @@ int extent_commit_write(struct extent_map_tree *tree, struct inode *inode, struct page *page, unsigned from, unsigned to); int set_range_dirty(struct extent_map_tree *tree, u64 start, u64 end); +int set_state_private(struct extent_map_tree *tree, u64 start, u64 private); +int get_state_private(struct extent_map_tree *tree, u64 start, u64 *private); #endif diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 483eea69b0a6..3ee6b2fadf58 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -52,6 +52,7 @@ static struct inode_operations btrfs_file_inode_operations; static struct address_space_operations btrfs_aops; static struct address_space_operations btrfs_symlink_aops; static struct file_operations btrfs_dir_file_operations; +static struct extent_map_ops btrfs_extent_map_ops; static struct kmem_cache *btrfs_inode_cachep; struct kmem_cache *btrfs_trans_handle_cachep; @@ -103,6 +104,90 @@ out: return ret; } +int btrfs_writepage_io_hook(struct page *page, u64 start, u64 end) +{ + struct inode *inode = page->mapping->host; + struct btrfs_root *root = BTRFS_I(inode)->root; + struct btrfs_trans_handle *trans; + char *kaddr; + int ret; + u64 page_start = page->index << PAGE_CACHE_SHIFT; + size_t offset = start - page_start; + + mutex_lock(&root->fs_info->fs_mutex); + trans = btrfs_start_transaction(root, 1); + btrfs_set_trans_block_group(trans, inode); + kaddr = kmap(page); + btrfs_csum_file_block(trans, root, inode->i_ino, + start, kaddr + offset, end - start + 1); + kunmap(page); + ret = btrfs_end_transaction(trans, root); + BUG_ON(ret); + mutex_unlock(&root->fs_info->fs_mutex); + return ret; +} + +int btrfs_readpage_io_hook(struct page *page, u64 start, u64 end) +{ + int ret = 0; + struct inode *inode = page->mapping->host; + struct btrfs_root *root = BTRFS_I(inode)->root; + struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree; + struct btrfs_csum_item *item; + struct btrfs_path *path = NULL; + u64 private; + + mutex_lock(&root->fs_info->fs_mutex); + path = btrfs_alloc_path(); + item = btrfs_lookup_csum(NULL, root, path, inode->i_ino, start, 0); + if (IS_ERR(item)) { + ret = PTR_ERR(item); + /* a csum that isn't present is a preallocated region. */ + if (ret == -ENOENT || ret == -EFBIG) + ret = 0; + private = 0; + goto out; + } + memcpy((char *)&private, &item->csum, BTRFS_CRC32_SIZE); + set_state_private(em_tree, start, private); +out: + if (path) + btrfs_free_path(path); + mutex_unlock(&root->fs_info->fs_mutex); + return ret; +} + +int btrfs_readpage_end_io_hook(struct page *page, u64 start, u64 end) +{ + char csum[BTRFS_CRC32_SIZE]; + size_t offset = start - (page->index << PAGE_CACHE_SHIFT); + struct inode *inode = page->mapping->host; + struct btrfs_root *root = BTRFS_I(inode)->root; + struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree; + char *kaddr; + u64 private; + int ret; + + ret = get_state_private(em_tree, start, &private); + kaddr = kmap_atomic(page, KM_IRQ0); + if (ret) { + goto zeroit; + } + ret = btrfs_csum_data(root, kaddr + offset, end - start + 1, csum); + BUG_ON(ret); + if (memcmp(csum, &private, BTRFS_CRC32_SIZE)) { + goto zeroit; + } + kunmap_atomic(kaddr, KM_IRQ0); + return 0; + +zeroit: + printk("btrfs csum failed ino %lu off %llu\n", + page->mapping->host->i_ino, (unsigned long long)start); + memset(kaddr + offset, 1, end - start + 1); flush_dcache_page(page); + kunmap_atomic(kaddr, KM_IRQ0); + return 0; +} void btrfs_read_locked_inode(struct inode *inode) { @@ -155,7 +240,7 @@ void btrfs_read_locked_inode(struct inode *inode) switch (inode->i_mode & S_IFMT) { case S_IFREG: inode->i_mapping->a_ops = &btrfs_aops; - BTRFS_I(inode)->extent_tree.fill_delalloc = run_delalloc_range; + BTRFS_I(inode)->extent_tree.ops = &btrfs_extent_map_ops; inode->i_fop = &btrfs_file_operations; inode->i_op = &btrfs_file_inode_operations; break; @@ -1148,7 +1233,7 @@ static int btrfs_create(struct inode *dir, struct dentry *dentry, inode->i_op = &btrfs_file_inode_operations; extent_map_tree_init(&BTRFS_I(inode)->extent_tree, inode->i_mapping, GFP_NOFS); - BTRFS_I(inode)->extent_tree.fill_delalloc = run_delalloc_range; + BTRFS_I(inode)->extent_tree.ops = &btrfs_extent_map_ops; } dir->i_sb->s_dirt = 1; btrfs_update_inode_block_group(trans, inode); @@ -2286,7 +2371,7 @@ static int btrfs_symlink(struct inode *dir, struct dentry *dentry, inode->i_op = &btrfs_file_inode_operations; extent_map_tree_init(&BTRFS_I(inode)->extent_tree, inode->i_mapping, GFP_NOFS); - BTRFS_I(inode)->extent_tree.fill_delalloc = run_delalloc_range; + BTRFS_I(inode)->extent_tree.ops = &btrfs_extent_map_ops; } dir->i_sb->s_dirt = 1; btrfs_update_inode_block_group(trans, inode); @@ -2362,6 +2447,13 @@ static struct file_operations btrfs_dir_file_operations = { #endif }; +static struct extent_map_ops btrfs_extent_map_ops = { + .fill_delalloc = run_delalloc_range, + .writepage_io_hook = btrfs_writepage_io_hook, + .readpage_io_hook = btrfs_readpage_io_hook, + .readpage_end_io_hook = btrfs_readpage_end_io_hook, +}; + static struct address_space_operations btrfs_aops = { .readpage = btrfs_readpage, .writepage = btrfs_writepage, -- cgit v1.2.3-70-g09d2 From 2bf5a725a3b82efeaf7b292c085e69a9388a89ea Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Thu, 30 Aug 2007 11:54:02 -0400 Subject: Btrfs: fsx delalloc fixes Signed-off-by: Chris Mason --- fs/btrfs/extent_map.c | 9 ++- fs/btrfs/file.c | 26 +++++--- fs/btrfs/inode.c | 177 ++++---------------------------------------------- 3 files changed, 38 insertions(+), 174 deletions(-) (limited to 'fs/btrfs/extent_map.c') diff --git a/fs/btrfs/extent_map.c b/fs/btrfs/extent_map.c index 6053f9db0739..8ad6f8efc5a0 100644 --- a/fs/btrfs/extent_map.c +++ b/fs/btrfs/extent_map.c @@ -77,6 +77,8 @@ EXPORT_SYMBOL(alloc_extent_map); void free_extent_map(struct extent_map *em) { + if (!em) + return; if (atomic_dec_and_test(&em->refs)) { WARN_ON(em->in_tree); kmem_cache_free(extent_map_cache, em); @@ -102,6 +104,8 @@ EXPORT_SYMBOL(alloc_extent_state); void free_extent_state(struct extent_state *state) { + if (!state) + return; if (atomic_dec_and_test(&state->refs)) { WARN_ON(state->in_tree); kmem_cache_free(extent_state_cache, state); @@ -1395,8 +1399,8 @@ int extent_read_full_page(struct extent_map_tree *tree, struct page *page, if (!PagePrivate(page)) { SetPagePrivate(page); - set_page_private(page, 1); WARN_ON(!page->mapping->a_ops->invalidatepage); + set_page_private(page, 1); page_cache_get(page); } @@ -1638,7 +1642,8 @@ int extent_invalidatepage(struct extent_map_tree *tree, lock_extent(tree, start, end, GFP_NOFS); wait_on_extent_writeback(tree, start, end); - clear_extent_bit(tree, start, end, EXTENT_LOCKED | EXTENT_DIRTY, + clear_extent_bit(tree, start, end, + EXTENT_LOCKED | EXTENT_DIRTY | EXTENT_DELALLOC, 1, 1, GFP_NOFS); return 0; } diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c index d3d39e4a2797..07b121d4bd93 100644 --- a/fs/btrfs/file.c +++ b/fs/btrfs/file.c @@ -186,8 +186,16 @@ static int dirty_and_release_pages(struct btrfs_trans_handle *trans, u64 mask = root->blocksize - 1; last_pos_in_file = (isize + mask) & ~mask; hole_size = (start_pos - last_pos_in_file + mask) & ~mask; - hole_size >>= inode->i_blkbits; + if (last_pos_in_file < start_pos) { + err = btrfs_drop_extents(trans, root, inode, + last_pos_in_file, + last_pos_in_file + hole_size, + &hint_block); + if (err) + goto failed; + + hole_size >>= inode->i_blkbits; err = btrfs_insert_file_extent(trans, root, inode->i_ino, last_pos_in_file, @@ -217,13 +225,11 @@ static int dirty_and_release_pages(struct btrfs_trans_handle *trans, struct page *p = pages[0]; /* step one, delete the existing extents in this range */ /* FIXME blocksize != pagesize */ - if (start_pos < inode->i_size) { - err = btrfs_drop_extents(trans, root, inode, start_pos, - (pos + write_bytes + root->blocksize -1) & - ~((u64)root->blocksize - 1), &hint_block); - if (err) - goto failed; - } + err = btrfs_drop_extents(trans, root, inode, start_pos, + (pos + write_bytes + root->blocksize -1) & + ~((u64)root->blocksize - 1), &hint_block); + if (err) + goto failed; err = insert_inline_extent(trans, root, inode, start_pos, end_pos - start_pos, p, 0); @@ -400,6 +406,8 @@ next_slot: keep = 1; WARN_ON(start & (root->blocksize - 1)); if (found_extent) { + btrfs_drop_extent_cache(inode, key.offset, + start - 1 ); new_num = (start - key.offset) >> inode->i_blkbits; old_num = btrfs_file_extent_num_blocks(extent); @@ -464,7 +472,7 @@ next_slot: if (ret) { btrfs_print_leaf(root, btrfs_buffer_leaf(path->nodes[0])); - printk("got %d on inserting %Lu %u %Lu start %Lu end %Lu found %Lu %Lu\n", ret , ins.objectid, ins.flags, ins.offset, start, end, key.offset, extent_end); + printk("got %d on inserting %Lu %u %Lu start %Lu end %Lu found %Lu %Lu keep was %d\n", ret , ins.objectid, ins.flags, ins.offset, start, end, key.offset, extent_end, keep); } BUG_ON(ret); extent = btrfs_item_ptr( diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 3ee6b2fadf58..64710fa77d01 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -721,25 +721,35 @@ static int btrfs_setattr(struct dentry *dentry, struct iattr *attr) attr->ia_valid & ATTR_SIZE && attr->ia_size > inode->i_size) { struct btrfs_trans_handle *trans; struct btrfs_root *root = BTRFS_I(inode)->root; + struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree; + u64 mask = root->blocksize - 1; u64 pos = (inode->i_size + mask) & ~mask; + u64 block_end = attr->ia_size | mask; u64 hole_size; + u64 alloc_hint; if (attr->ia_size <= pos) goto out; btrfs_truncate_page(inode->i_mapping, inode->i_size); + lock_extent(em_tree, pos, block_end, GFP_NOFS); hole_size = (attr->ia_size - pos + mask) & ~mask; - hole_size >>= inode->i_blkbits; mutex_lock(&root->fs_info->fs_mutex); trans = btrfs_start_transaction(root, 1); btrfs_set_trans_block_group(trans, inode); + err = btrfs_drop_extents(trans, root, inode, + pos, pos + hole_size, &alloc_hint); + + hole_size >>= inode->i_blkbits; + err = btrfs_insert_file_extent(trans, root, inode->i_ino, pos, 0, 0, hole_size); btrfs_end_transaction(trans, root); mutex_unlock(&root->fs_info->fs_mutex); + unlock_extent(em_tree, pos, block_end, GFP_NOFS); if (err) return err; } @@ -1529,13 +1539,13 @@ insert: ret = add_extent_mapping(em_tree, em); if (ret == -EEXIST) { free_extent_map(em); + em = NULL; failed_insert++; if (failed_insert > 5) { printk("failing to insert %Lu %Lu\n", start, end); err = -EIO; goto out; } - em = NULL; goto again; } err = 0; @@ -1555,167 +1565,6 @@ out: return em; } - -/* - * FIBMAP and others want to pass in a fake buffer head. They need to - * use BTRFS_GET_BLOCK_NO_DIRECT to make sure we don't try to memcpy - * any packed file data into the fake bh - */ -#define BTRFS_GET_BLOCK_NO_CREATE 0 -#define BTRFS_GET_BLOCK_CREATE 1 -#define BTRFS_GET_BLOCK_NO_DIRECT 2 - -/* - * FIXME create==1 doe not work. - */ -static int btrfs_get_block_lock(struct inode *inode, sector_t iblock, - struct buffer_head *result, int create) -{ - int ret; - int err = 0; - u64 blocknr; - u64 extent_start = 0; - u64 extent_end = 0; - u64 objectid = inode->i_ino; - u32 found_type; - u64 alloc_hint = 0; - struct btrfs_path *path; - struct btrfs_root *root = BTRFS_I(inode)->root; - struct btrfs_file_extent_item *item; - struct btrfs_leaf *leaf; - struct btrfs_disk_key *found_key; - struct btrfs_trans_handle *trans = NULL; - - path = btrfs_alloc_path(); - BUG_ON(!path); - if (create & BTRFS_GET_BLOCK_CREATE) { - /* - * danger!, this only works if the page is properly up - * to date somehow - */ - trans = btrfs_start_transaction(root, 1); - if (!trans) { - err = -ENOMEM; - goto out; - } - ret = btrfs_drop_extents(trans, root, inode, - iblock << inode->i_blkbits, - (iblock + 1) << inode->i_blkbits, - &alloc_hint); - BUG_ON(ret); - } - - ret = btrfs_lookup_file_extent(NULL, root, path, - objectid, - iblock << inode->i_blkbits, 0); - if (ret < 0) { - err = ret; - goto out; - } - - if (ret != 0) { - if (path->slots[0] == 0) { - btrfs_release_path(root, path); - goto not_found; - } - path->slots[0]--; - } - - item = btrfs_item_ptr(btrfs_buffer_leaf(path->nodes[0]), path->slots[0], - struct btrfs_file_extent_item); - leaf = btrfs_buffer_leaf(path->nodes[0]); - blocknr = btrfs_file_extent_disk_blocknr(item); - blocknr += btrfs_file_extent_offset(item); - - /* are we inside the extent that was found? */ - found_key = &leaf->items[path->slots[0]].key; - found_type = btrfs_disk_key_type(found_key); - if (btrfs_disk_key_objectid(found_key) != objectid || - found_type != BTRFS_EXTENT_DATA_KEY) { - extent_end = 0; - extent_start = 0; - goto not_found; - } - found_type = btrfs_file_extent_type(item); - extent_start = btrfs_disk_key_offset(&leaf->items[path->slots[0]].key); - if (found_type == BTRFS_FILE_EXTENT_REG) { - extent_start = extent_start >> inode->i_blkbits; - extent_end = extent_start + btrfs_file_extent_num_blocks(item); - err = 0; - if (btrfs_file_extent_disk_blocknr(item) == 0) - goto out; - if (iblock >= extent_start && iblock < extent_end) { - btrfs_map_bh_to_logical(root, result, blocknr + - iblock - extent_start); - goto out; - } - } else if (found_type == BTRFS_FILE_EXTENT_INLINE) { - char *ptr; - char *map; - u32 size; - - if (create & BTRFS_GET_BLOCK_NO_DIRECT) { - err = -EINVAL; - goto out; - } - size = btrfs_file_extent_inline_len(leaf->items + - path->slots[0]); - extent_end = (extent_start + size) >> inode->i_blkbits; - extent_start >>= inode->i_blkbits; - if (iblock < extent_start || iblock > extent_end) { - goto not_found; - } - ptr = btrfs_file_extent_inline_start(item); - map = kmap(result->b_page); - memcpy(map, ptr, size); - memset(map + size, 0, PAGE_CACHE_SIZE - size); - flush_dcache_page(result->b_page); - kunmap(result->b_page); - set_buffer_uptodate(result); - SetPageChecked(result->b_page); - btrfs_map_bh_to_logical(root, result, 0); - } -not_found: - if (create & BTRFS_GET_BLOCK_CREATE) { - struct btrfs_key ins; - ret = btrfs_alloc_extent(trans, root, inode->i_ino, - 1, 0, alloc_hint, (u64)-1, - &ins, 1); - if (ret) { - err = ret; - goto out; - } - ret = btrfs_insert_file_extent(trans, root, inode->i_ino, - iblock << inode->i_blkbits, - ins.objectid, ins.offset, - ins.offset); - if (ret) { - err = ret; - goto out; - } - btrfs_map_bh_to_logical(root, result, ins.objectid); - } -out: - if (trans) { - ret = btrfs_end_transaction(trans, root); - if (!err) - err = ret; - } - btrfs_free_path(path); - return err; -} - -int btrfs_get_block(struct inode *inode, sector_t iblock, - struct buffer_head *result, int create) -{ - int err; - struct btrfs_root *root = BTRFS_I(inode)->root; - mutex_lock(&root->fs_info->fs_mutex); - err = btrfs_get_block_lock(inode, iblock, result, create); - mutex_unlock(&root->fs_info->fs_mutex); - return err; -} - static int btrfs_get_block_bmap(struct inode *inode, sector_t iblock, struct buffer_head *result, int create) { @@ -2469,6 +2318,8 @@ static struct address_space_operations btrfs_aops = { static struct address_space_operations btrfs_symlink_aops = { .readpage = btrfs_readpage, .writepage = btrfs_writepage, + .invalidatepage = btrfs_invalidatepage, + .releasepage = btrfs_releasepage, }; static struct inode_operations btrfs_file_inode_operations = { -- cgit v1.2.3-70-g09d2 From 86479a04eef8f304a13aeb8b64bcc8e506a68268 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Mon, 10 Sep 2007 19:58:16 -0400 Subject: Add support for defragging files via btrfsctl -d. Avoid OOM on extent tree defrag. Signed-off-by: Chris Mason --- fs/btrfs/ctree.c | 4 +++ fs/btrfs/disk-io.h | 2 ++ fs/btrfs/extent_map.c | 16 +++++---- fs/btrfs/inode.c | 93 +++++++++++++++++++++++++++++++++++++++++++------- fs/btrfs/tree-defrag.c | 3 ++ 5 files changed, 100 insertions(+), 18 deletions(-) (limited to 'fs/btrfs/extent_map.c') diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c index 89e282c35ce2..b41f48ade419 100644 --- a/fs/btrfs/ctree.c +++ b/fs/btrfs/ctree.c @@ -217,6 +217,9 @@ int btrfs_realloc_node(struct btrfs_trans_handle *trans, root->fs_info->generation); WARN_ON(1); } + if (buffer_defrag_done(parent)) + return 0; + parent_node = btrfs_buffer_node(parent); parent_nritems = btrfs_header_nritems(&parent_node->header); parent_level = btrfs_header_level(&parent_node->header); @@ -274,6 +277,7 @@ int btrfs_realloc_node(struct btrfs_trans_handle *trans, *last_ret = search_start; if (parent_level == 1) clear_buffer_defrag(tmp_bh); + set_buffer_defrag_done(tmp_bh); brelse(tmp_bh); } return err; diff --git a/fs/btrfs/disk-io.h b/fs/btrfs/disk-io.h index da6bb72750f2..714fa877438b 100644 --- a/fs/btrfs/disk-io.h +++ b/fs/btrfs/disk-io.h @@ -26,9 +26,11 @@ enum btrfs_bh_state_bits { BH_Checked = BH_PrivateStart, BH_Defrag, + BH_DefragDone, }; BUFFER_FNS(Checked, checked); BUFFER_FNS(Defrag, defrag); +BUFFER_FNS(DefragDone, defrag_done); static inline struct btrfs_node *btrfs_buffer_node(struct buffer_head *bh) { diff --git a/fs/btrfs/extent_map.c b/fs/btrfs/extent_map.c index 8ad6f8efc5a0..33f7a18dddf7 100644 --- a/fs/btrfs/extent_map.c +++ b/fs/btrfs/extent_map.c @@ -10,6 +10,12 @@ #include #include "extent_map.h" +/* temporary define until extent_map moves out of btrfs */ +struct kmem_cache *btrfs_cache_create(const char *name, size_t size, + unsigned long extra_flags, + void (*ctor)(void *, struct kmem_cache *, + unsigned long)); + static struct kmem_cache *extent_map_cache; static struct kmem_cache *extent_state_cache; @@ -32,14 +38,12 @@ struct tree_entry { void __init extent_map_init(void) { - extent_map_cache = kmem_cache_create("extent_map", - sizeof(struct extent_map), 0, - SLAB_RECLAIM_ACCOUNT | + extent_map_cache = btrfs_cache_create("extent_map", + sizeof(struct extent_map), SLAB_DESTROY_BY_RCU, NULL); - extent_state_cache = kmem_cache_create("extent_state", - sizeof(struct extent_state), 0, - SLAB_RECLAIM_ACCOUNT | + extent_state_cache = btrfs_cache_create("extent_state", + sizeof(struct extent_state), SLAB_DESTROY_BY_RCU, NULL); } diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 64710fa77d01..6b3e4404dc6a 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -1904,6 +1904,70 @@ fail: return ret; } +static unsigned long force_ra(struct address_space *mapping, + struct file_ra_state *ra, struct file *file, + pgoff_t offset, pgoff_t last_index) +{ + pgoff_t req_size; + +#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,23) + req_size = last_index - offset + 1; + offset = page_cache_readahead(mapping, ra, file, offset, req_size); + return offset; +#else + req_size = min(last_index - offset + 1, (pgoff_t)128); + page_cache_sync_readahead(mapping, ra, file, offset, req_size); + return offset + req_size; +#endif +} + +int btrfs_defrag_file(struct file *file) { + struct inode *inode = file->f_path.dentry->d_inode; + struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree; + struct page *page; + unsigned long last_index; + unsigned long ra_index = 0; + u64 page_start; + u64 page_end; + unsigned long i; + + mutex_lock(&inode->i_mutex); + last_index = inode->i_size >> PAGE_CACHE_SHIFT; + for (i = 0; i <= last_index; i++) { + if (i == ra_index) { + ra_index = force_ra(inode->i_mapping, &file->f_ra, + file, ra_index, last_index); + } + page = grab_cache_page(inode->i_mapping, i); + if (!page) + goto out_unlock; + if (!PageUptodate(page)) { + btrfs_readpage(NULL, page); + lock_page(page); + if (!PageUptodate(page)) { + unlock_page(page); + page_cache_release(page); + goto out_unlock; + } + } + page_start = page->index << PAGE_CACHE_SHIFT; + page_end = page_start + PAGE_CACHE_SIZE - 1; + + lock_extent(em_tree, page_start, page_end, GFP_NOFS); + set_extent_delalloc(em_tree, page_start, + page_end, GFP_NOFS); + unlock_extent(em_tree, page_start, page_end, GFP_NOFS); + set_page_dirty(page); + unlock_page(page); + page_cache_release(page); + balance_dirty_pages_ratelimited_nr(inode->i_mapping, 1); + } + +out_unlock: + mutex_unlock(&inode->i_mutex); + return 0; +} + int btrfs_ioctl(struct inode *inode, struct file *filp, unsigned int cmd, unsigned long arg) { @@ -1948,10 +2012,14 @@ int btrfs_ioctl(struct inode *inode, struct file *filp, unsigned int break; case BTRFS_IOC_DEFRAG: - mutex_lock(&root->fs_info->fs_mutex); - btrfs_defrag_root(root, 0); - btrfs_defrag_root(root->fs_info->extent_root, 0); - mutex_unlock(&root->fs_info->fs_mutex); + if (S_ISDIR(inode->i_mode)) { + mutex_lock(&root->fs_info->fs_mutex); + btrfs_defrag_root(root, 0); + btrfs_defrag_root(root->fs_info->extent_root, 0); + mutex_unlock(&root->fs_info->fs_mutex); + } else if (S_ISREG(inode->i_mode)) { + btrfs_defrag_file(filp); + } ret = 0; break; default: @@ -2018,7 +2086,7 @@ void btrfs_destroy_cachep(void) kmem_cache_destroy(btrfs_path_cachep); } -static struct kmem_cache *cache_create(const char *name, size_t size, +struct kmem_cache *btrfs_cache_create(const char *name, size_t size, unsigned long extra_flags, void (*ctor)(void *, struct kmem_cache *, unsigned long)) @@ -2033,27 +2101,28 @@ static struct kmem_cache *cache_create(const char *name, size_t size, int btrfs_init_cachep(void) { - btrfs_inode_cachep = cache_create("btrfs_inode_cache", + btrfs_inode_cachep = btrfs_cache_create("btrfs_inode_cache", sizeof(struct btrfs_inode), 0, init_once); if (!btrfs_inode_cachep) goto fail; - btrfs_trans_handle_cachep = cache_create("btrfs_trans_handle_cache", - sizeof(struct btrfs_trans_handle), - 0, NULL); + btrfs_trans_handle_cachep = + btrfs_cache_create("btrfs_trans_handle_cache", + sizeof(struct btrfs_trans_handle), + 0, NULL); if (!btrfs_trans_handle_cachep) goto fail; - btrfs_transaction_cachep = cache_create("btrfs_transaction_cache", + btrfs_transaction_cachep = btrfs_cache_create("btrfs_transaction_cache", sizeof(struct btrfs_transaction), 0, NULL); if (!btrfs_transaction_cachep) goto fail; - btrfs_path_cachep = cache_create("btrfs_path_cache", + btrfs_path_cachep = btrfs_cache_create("btrfs_path_cache", sizeof(struct btrfs_transaction), 0, NULL); if (!btrfs_path_cachep) goto fail; - btrfs_bit_radix_cachep = cache_create("btrfs_radix", 256, + btrfs_bit_radix_cachep = btrfs_cache_create("btrfs_radix", 256, SLAB_DESTROY_BY_RCU, NULL); if (!btrfs_bit_radix_cachep) goto fail; diff --git a/fs/btrfs/tree-defrag.c b/fs/btrfs/tree-defrag.c index 35fd20d24645..420597127ed1 100644 --- a/fs/btrfs/tree-defrag.c +++ b/fs/btrfs/tree-defrag.c @@ -113,6 +113,8 @@ static int defrag_walk_down(struct btrfs_trans_handle *trans, } WARN_ON(*level < 0); WARN_ON(*level >= BTRFS_MAX_LEVEL); + clear_buffer_defrag(path->nodes[*level]); + clear_buffer_defrag_done(path->nodes[*level]); btrfs_block_release(root, path->nodes[*level]); path->nodes[*level] = NULL; *level += 1; @@ -143,6 +145,7 @@ static int defrag_walk_up(struct btrfs_trans_handle *trans, return 0; } else { clear_buffer_defrag(path->nodes[*level]); + clear_buffer_defrag_done(path->nodes[*level]); btrfs_block_release(root, path->nodes[*level]); path->nodes[*level] = NULL; *level = i + 1; -- cgit v1.2.3-70-g09d2 From a8c450b211c010ac55190da23ceb6b39b393f411 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Mon, 10 Sep 2007 20:00:27 -0400 Subject: Btrfs: Reorder tests in set_extent_bit to properly find holes Yan Zheng noticed that set_extent_bit was exiting too early when there was a hole in the map. The fix is to reorder the tests to check for the hole first. Signed-off-by: Chris Mason --- fs/btrfs/extent_map.c | 44 ++++++++++++++++++++++---------------------- 1 file changed, 22 insertions(+), 22 deletions(-) (limited to 'fs/btrfs/extent_map.c') diff --git a/fs/btrfs/extent_map.c b/fs/btrfs/extent_map.c index 33f7a18dddf7..370ed97e52f5 100644 --- a/fs/btrfs/extent_map.c +++ b/fs/btrfs/extent_map.c @@ -712,28 +712,6 @@ again: } goto search_again; } - /* - * | ---- desired range ---- | - * | state | - * We need to split the extent, and set the bit - * on the first half - */ - if (state->start <= end && state->end > end) { - set = state->state & bits; - if (exclusive && set) { - *failed_start = start; - err = -EEXIST; - goto out; - } - err = split_state(tree, state, prealloc, end + 1); - BUG_ON(err == -EEXIST); - - prealloc->state |= bits; - merge_state(tree, prealloc); - prealloc = NULL; - goto out; - } - /* * | ---- desired range ---- | * | state | or | state | @@ -756,6 +734,28 @@ again: start = this_end + 1; goto search_again; } + /* + * | ---- desired range ---- | + * | state | + * We need to split the extent, and set the bit + * on the first half + */ + if (state->start <= end && state->end > end) { + set = state->state & bits; + if (exclusive && set) { + *failed_start = start; + err = -EEXIST; + goto out; + } + err = split_state(tree, state, prealloc, end + 1); + BUG_ON(err == -EEXIST); + + prealloc->state |= bits; + merge_state(tree, prealloc); + prealloc = NULL; + goto out; + } + goto search_again; out: -- cgit v1.2.3-70-g09d2 From 90f1c19a9fd2a943adc69d2b9b8c83bcc4bba6f9 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 10 Sep 2007 20:02:27 -0400 Subject: Btrfs: [PATCH] extent_map: fix locking for bio completion The bio completion handlers can be run in any context, e.g. when using the old ide driver they run in hardirq context with irqs disabled so lockdep rightfully warns about using write_lock_irq useage in these handlers. This patch switches clear_extent_bit and set_extent_bit to write_lock_irqsave to fix this problem. Signed-off-by: Christoph Hellwig Signed-off-by: Chris Mason --- fs/btrfs/extent_map.c | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) (limited to 'fs/btrfs/extent_map.c') diff --git a/fs/btrfs/extent_map.c b/fs/btrfs/extent_map.c index 370ed97e52f5..3021564ac62c 100644 --- a/fs/btrfs/extent_map.c +++ b/fs/btrfs/extent_map.c @@ -456,6 +456,7 @@ int clear_extent_bit(struct extent_map_tree *tree, u64 start, u64 end, struct extent_state *state; struct extent_state *prealloc = NULL; struct rb_node *node; + unsigned long flags; int err; int set = 0; @@ -466,7 +467,7 @@ again: return -ENOMEM; } - write_lock_irq(&tree->lock); + write_lock_irqsave(&tree->lock, flags); /* * this search will find the extents that end after * our range starts @@ -533,7 +534,7 @@ again: goto search_again; out: - write_unlock_irq(&tree->lock); + write_unlock_irqrestore(&tree->lock, flags); if (prealloc) free_extent_state(prealloc); @@ -542,7 +543,7 @@ out: search_again: if (start >= end) goto out; - write_unlock_irq(&tree->lock); + write_unlock_irqrestore(&tree->lock, flags); if (mask & __GFP_WAIT) cond_resched(); goto again; @@ -628,6 +629,7 @@ int set_extent_bit(struct extent_map_tree *tree, u64 start, u64 end, int bits, struct extent_state *state; struct extent_state *prealloc = NULL; struct rb_node *node; + unsigned long flags; int err = 0; int set; u64 last_start; @@ -639,7 +641,7 @@ again: return -ENOMEM; } - write_lock_irq(&tree->lock); + write_lock_irqsave(&tree->lock, flags); /* * this search will find all the extents that end after * our range starts. @@ -759,7 +761,7 @@ again: goto search_again; out: - write_unlock_irq(&tree->lock); + write_unlock_irqrestore(&tree->lock, flags); if (prealloc) free_extent_state(prealloc); @@ -768,7 +770,7 @@ out: search_again: if (start > end) goto out; - write_unlock_irq(&tree->lock); + write_unlock_irqrestore(&tree->lock, flags); if (mask & __GFP_WAIT) cond_resched(); goto again; -- cgit v1.2.3-70-g09d2 From d396c6f554d182b7bce4e2bb2acb47f80430c05f Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 10 Sep 2007 20:02:30 -0400 Subject: Btrfs: [PATCH] extent_map: provide generic bmap generic_bmap is completely trivial, while the extent to bh mapping in btrfs is rather complex. So provide a extent_bmap instead that takes a get_extent callback and can be used by filesystem using the extent_map code. Signed-off-by: Christoph Hellwig Signed-off-by: Chris Mason --- fs/btrfs/extent_map.c | 19 +++++++++++++++++++ fs/btrfs/extent_map.h | 2 ++ fs/btrfs/inode.c | 23 ++--------------------- 3 files changed, 23 insertions(+), 21 deletions(-) (limited to 'fs/btrfs/extent_map.c') diff --git a/fs/btrfs/extent_map.c b/fs/btrfs/extent_map.c index 3021564ac62c..ff2b1dd25bf9 100644 --- a/fs/btrfs/extent_map.c +++ b/fs/btrfs/extent_map.c @@ -1816,3 +1816,22 @@ int try_release_extent_mapping(struct extent_map_tree *tree, struct page *page) } EXPORT_SYMBOL(try_release_extent_mapping); +sector_t extent_bmap(struct address_space *mapping, sector_t iblock, + get_extent_t *get_extent) +{ + struct inode *inode = mapping->host; + u64 start = iblock << inode->i_blkbits; + u64 end = start + (1 << inode->i_blkbits) - 1; + struct extent_map *em; + + em = get_extent(inode, NULL, 0, start, end, 0); + if (!em || IS_ERR(em)) + return 0; + + // XXX(hch): block 0 is valid in some cases, e.g. XFS RT device + if (em->block_start == EXTENT_MAP_INLINE || + em->block_start == 0) + return 0; + + return (em->block_start + start - em->start) >> inode->i_blkbits; +} diff --git a/fs/btrfs/extent_map.h b/fs/btrfs/extent_map.h index 0eae6c420acb..da41aca74e0b 100644 --- a/fs/btrfs/extent_map.h +++ b/fs/btrfs/extent_map.h @@ -99,6 +99,8 @@ int extent_prepare_write(struct extent_map_tree *tree, int extent_commit_write(struct extent_map_tree *tree, struct inode *inode, struct page *page, unsigned from, unsigned to); +sector_t extent_bmap(struct address_space *mapping, sector_t iblock, + get_extent_t *get_extent); int set_range_dirty(struct extent_map_tree *tree, u64 start, u64 end); int set_state_private(struct extent_map_tree *tree, u64 start, u64 private); int get_state_private(struct extent_map_tree *tree, u64 start, u64 *private); diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 1ace6d11e097..39c1fe178f47 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -1567,28 +1567,9 @@ out: return em; } -static int btrfs_get_block_bmap(struct inode *inode, sector_t iblock, - struct buffer_head *result, int create) +static sector_t btrfs_bmap(struct address_space *mapping, sector_t iblock) { - struct btrfs_root *root = BTRFS_I(inode)->root; - u64 start = iblock << inode->i_blkbits; - u64 end = start + root->blocksize -1; - struct extent_map *em; - - em = btrfs_get_extent(inode, NULL, 0, start, end, 0); - if (em && !IS_ERR(em) && em->block_start != EXTENT_MAP_INLINE && - em->block_start != 0) { - u64 offset; - offset = start - em->start; - start = (em->block_start + offset) >> inode->i_blkbits; - btrfs_map_bh_to_logical(root, result, start); - } - return 0; -} - -static sector_t btrfs_bmap(struct address_space *as, sector_t block) -{ - return generic_block_bmap(as, block, btrfs_get_block_bmap); + return extent_bmap(mapping, iblock, btrfs_get_extent); } static int btrfs_prepare_write(struct file *file, struct page *page, -- cgit v1.2.3-70-g09d2 From b06355f0fe04a62d08238438654ec453e05304ba Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 10 Sep 2007 20:02:32 -0400 Subject: Btrfs: [PATCH] extent_map: make the writepage_io hook optional The writepage_io is not mandatory, e.g. my port of xfs to the extent_map code does not have one for now. So handle a NULL pointer gracefully here. Signed-off-by: Christoph Hellwig Signed-off-by: Chris Mason --- fs/btrfs/extent_map.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) (limited to 'fs/btrfs/extent_map.c') diff --git a/fs/btrfs/extent_map.c b/fs/btrfs/extent_map.c index ff2b1dd25bf9..162766773a32 100644 --- a/fs/btrfs/extent_map.c +++ b/fs/btrfs/extent_map.c @@ -1607,7 +1607,12 @@ int extent_write_full_page(struct extent_map_tree *tree, struct page *page, continue; } clear_extent_dirty(tree, cur, cur + iosize - 1, GFP_NOFS); - ret = tree->ops->writepage_io_hook(page, cur, cur + iosize - 1); + if (tree->ops && tree->ops->writepage_io_hook) { + ret = tree->ops->writepage_io_hook(page, cur, + cur + iosize - 1); + } else { + ret = 0; + } if (ret) SetPageError(page); else { -- cgit v1.2.3-70-g09d2 From 0e2752a72cb37075b24899f01e9bc6a589de3b6c Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 10 Sep 2007 20:02:33 -0400 Subject: Btrfs: [PATCH] extent_map: add writepage_end_io hook XFS updates the ondisk inode size only after the data I/O has finished, so it needs a hook when the writepage end_bio handler has finished. Might not be worth applying as-is as the per-page callback is very ineffcient. What XFS really wants is a callback when writeout of a whole extent has completed. This delayed i_size updates scheme might be worthwile for btrfs aswell, btw. Signed-off-by: Christoph Hellwig Signed-off-by: Chris Mason --- fs/btrfs/extent_map.c | 2 ++ fs/btrfs/extent_map.h | 1 + 2 files changed, 3 insertions(+) (limited to 'fs/btrfs/extent_map.c') diff --git a/fs/btrfs/extent_map.c b/fs/btrfs/extent_map.c index 162766773a32..ab5bde31b687 100644 --- a/fs/btrfs/extent_map.c +++ b/fs/btrfs/extent_map.c @@ -1231,6 +1231,8 @@ static int end_bio_extent_writepage(struct bio *bio, end_page_writeback(page); else check_page_writeback(tree, page); + if (tree->ops && tree->ops->writepage_end_io_hook) + tree->ops->writepage_end_io_hook(page, start, end); } while (bvec >= bio->bi_io_vec); bio_put(bio); diff --git a/fs/btrfs/extent_map.h b/fs/btrfs/extent_map.h index da41aca74e0b..40b53ee274fb 100644 --- a/fs/btrfs/extent_map.h +++ b/fs/btrfs/extent_map.h @@ -11,6 +11,7 @@ struct extent_map_ops { int (*writepage_io_hook)(struct page *page, u64 start, u64 end); int (*readpage_io_hook)(struct page *page, u64 start, u64 end); int (*readpage_end_io_hook)(struct page *page, u64 start, u64 end); + void (*writepage_end_io_hook)(struct page *page, u64 start, u64 end); }; struct extent_map_tree { -- cgit v1.2.3-70-g09d2 From b3cfa35a493866cffd00952438b6c77364d42d3c Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 17 Sep 2007 11:25:58 -0400 Subject: Btrfs: factor page private preparations into a helper Signed-off-by: Christoph Hellwig Signed-off-by: Chris Mason --- fs/btrfs/extent_map.c | 40 +++++++++++++++------------------------- fs/btrfs/extent_map.h | 1 + fs/btrfs/file.c | 7 +------ fs/btrfs/inode.c | 7 +------ 4 files changed, 18 insertions(+), 37 deletions(-) (limited to 'fs/btrfs/extent_map.c') diff --git a/fs/btrfs/extent_map.c b/fs/btrfs/extent_map.c index ab5bde31b687..0ab368e091f9 100644 --- a/fs/btrfs/extent_map.c +++ b/fs/btrfs/extent_map.c @@ -1379,6 +1379,16 @@ static int submit_extent_page(int rw, struct extent_map_tree *tree, return ret; } +void set_page_extent_mapped(struct page *page) +{ + if (!PagePrivate(page)) { + SetPagePrivate(page); + WARN_ON(!page->mapping->a_ops->invalidatepage); + set_page_private(page, 1); + page_cache_get(page); + } +} + /* * basic readpage implementation. Locked extent state structs are inserted * into the tree that are removed when the IO is done (by the end_io @@ -1405,12 +1415,7 @@ int extent_read_full_page(struct extent_map_tree *tree, struct page *page, size_t iosize; size_t blocksize = inode->i_sb->s_blocksize; - if (!PagePrivate(page)) { - SetPagePrivate(page); - WARN_ON(!page->mapping->a_ops->invalidatepage); - set_page_private(page, 1); - page_cache_get(page); - } + set_page_extent_mapped(page); end = page_end; lock_extent(tree, start, end, GFP_NOFS); @@ -1531,12 +1536,7 @@ int extent_write_full_page(struct extent_map_tree *tree, struct page *page, PAGE_CACHE_SIZE - offset, KM_USER0); } - if (!PagePrivate(page)) { - SetPagePrivate(page); - set_page_private(page, 1); - WARN_ON(!page->mapping->a_ops->invalidatepage); - page_cache_get(page); - } + set_page_extent_mapped(page); lock_extent(tree, start, page_end, GFP_NOFS); nr_delalloc = find_lock_delalloc_range(tree, start, page_end + 1, @@ -1672,13 +1672,7 @@ int extent_commit_write(struct extent_map_tree *tree, { loff_t pos = ((loff_t)page->index << PAGE_CACHE_SHIFT) + to; - if (!PagePrivate(page)) { - SetPagePrivate(page); - set_page_private(page, 1); - WARN_ON(!page->mapping->a_ops->invalidatepage); - page_cache_get(page); - } - + set_page_extent_mapped(page); set_page_dirty(page); if (pos > inode->i_size) { @@ -1709,12 +1703,8 @@ int extent_prepare_write(struct extent_map_tree *tree, int ret = 0; int isnew; - if (!PagePrivate(page)) { - SetPagePrivate(page); - set_page_private(page, 1); - WARN_ON(!page->mapping->a_ops->invalidatepage); - page_cache_get(page); - } + set_page_extent_mapped(page); + block_start = (page_start + from) & ~((u64)blocksize - 1); block_end = (page_start + to - 1) | (blocksize - 1); orig_block_start = block_start; diff --git a/fs/btrfs/extent_map.h b/fs/btrfs/extent_map.h index 40b53ee274fb..d913ce01248d 100644 --- a/fs/btrfs/extent_map.h +++ b/fs/btrfs/extent_map.h @@ -105,4 +105,5 @@ sector_t extent_bmap(struct address_space *mapping, sector_t iblock, int set_range_dirty(struct extent_map_tree *tree, u64 start, u64 end); int set_state_private(struct extent_map_tree *tree, u64 start, u64 private); int get_state_private(struct extent_map_tree *tree, u64 start, u64 *private); +void set_page_extent_mapped(struct page *page); #endif diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c index 698eaea612f1..4cc459c943ec 100644 --- a/fs/btrfs/file.c +++ b/fs/btrfs/file.c @@ -543,12 +543,7 @@ static int prepare_pages(struct btrfs_root *root, } cancel_dirty_page(pages[i], PAGE_CACHE_SIZE); wait_on_page_writeback(pages[i]); - if (!PagePrivate(pages[i])) { - SetPagePrivate(pages[i]); - set_page_private(pages[i], 1); - WARN_ON(!pages[i]->mapping->a_ops->invalidatepage); - page_cache_get(pages[i]); - } + set_page_extent_mapped(pages[i]); WARN_ON(!PageLocked(pages[i])); } return 0; diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 88b3ef20be69..0df325f89d62 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -652,12 +652,7 @@ static int btrfs_cow_one_page(struct inode *inode, struct page *page, u64 page_start = page->index << PAGE_CACHE_SHIFT; u64 page_end = page_start + PAGE_CACHE_SIZE - 1; - if (!PagePrivate(page)) { - SetPagePrivate(page); - set_page_private(page, 1); - WARN_ON(!page->mapping->a_ops->invalidatepage); - page_cache_get(page); - } + set_page_extent_mapped(page); lock_extent(em_tree, page_start, page_end, GFP_NOFS); set_extent_delalloc(&BTRFS_I(inode)->extent_tree, page_start, -- cgit v1.2.3-70-g09d2 From 5f39d397dfbe140a14edecd4e73c34ce23c4f9ee Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Mon, 15 Oct 2007 16:14:19 -0400 Subject: Btrfs: Create extent_buffer interface for large blocksizes Signed-off-by: Chris Mason --- fs/btrfs/Makefile | 2 + fs/btrfs/ctree.c | 1484 ++++++++++++++++++++++++++---------------------- fs/btrfs/ctree.h | 849 ++++++++++----------------- fs/btrfs/dir-item.c | 102 ++-- fs/btrfs/disk-io.c | 391 ++++++------- fs/btrfs/disk-io.h | 55 +- fs/btrfs/extent-tree.c | 294 +++++----- fs/btrfs/extent_map.c | 672 +++++++++++++++++++++- fs/btrfs/extent_map.h | 75 ++- fs/btrfs/file-item.c | 63 +- fs/btrfs/file.c | 129 +++-- fs/btrfs/inode-item.c | 22 +- fs/btrfs/inode-map.c | 17 +- fs/btrfs/inode.c | 435 ++++++++------ fs/btrfs/print-tree.c | 96 ++-- fs/btrfs/print-tree.h | 4 +- fs/btrfs/root-tree.c | 51 +- fs/btrfs/super.c | 7 +- fs/btrfs/sysfs.c | 10 +- fs/btrfs/transaction.c | 61 +- fs/btrfs/transaction.h | 5 +- fs/btrfs/tree-defrag.c | 58 +- 22 files changed, 2794 insertions(+), 2088 deletions(-) (limited to 'fs/btrfs/extent_map.c') diff --git a/fs/btrfs/Makefile b/fs/btrfs/Makefile index 6341cf7a9f91..a3b51085d7f0 100644 --- a/fs/btrfs/Makefile +++ b/fs/btrfs/Makefile @@ -23,4 +23,6 @@ modules_install: clean: $(MAKE) -C $(KERNELDIR) M=`pwd` clean +tester: + $(MAKE) -C $(KERNELDIR) M=`pwd` tree-defrag.o transaction.o sysfs.o super.o root-tree.o inode-map.o inode-item.o inode.o file-item.o file.o extent_map.o disk-io.o ctree.o dir-item.o extent-tree.o endif diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c index b41f48ade419..f60920e8a0e0 100644 --- a/fs/btrfs/ctree.c +++ b/fs/btrfs/ctree.c @@ -16,21 +16,24 @@ * Boston, MA 021110-1307, USA. */ +#include #include "ctree.h" #include "disk-io.h" #include "transaction.h" +#include "print-tree.h" static int split_node(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct btrfs_path *path, int level); static int split_leaf(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct btrfs_key *ins_key, struct btrfs_path *path, int data_size); -static int push_node_left(struct btrfs_trans_handle *trans, struct btrfs_root - *root, struct buffer_head *dst, struct buffer_head - *src); -static int balance_node_right(struct btrfs_trans_handle *trans, struct - btrfs_root *root, struct buffer_head *dst_buf, - struct buffer_head *src_buf); +static int push_node_left(struct btrfs_trans_handle *trans, + struct btrfs_root *root, struct extent_buffer *dst, + struct extent_buffer *src); +static int balance_node_right(struct btrfs_trans_handle *trans, + struct btrfs_root *root, + struct extent_buffer *dst_buf, + struct extent_buffer *src_buf); static int del_ptr(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct btrfs_path *path, int level, int slot); @@ -62,40 +65,38 @@ void btrfs_release_path(struct btrfs_root *root, struct btrfs_path *p) for (i = 0; i < BTRFS_MAX_LEVEL; i++) { if (!p->nodes[i]) break; - btrfs_block_release(root, p->nodes[i]); + free_extent_buffer(p->nodes[i]); } memset(p, 0, sizeof(*p)); } -static int __btrfs_cow_block(struct btrfs_trans_handle *trans, struct btrfs_root - *root, struct buffer_head *buf, struct buffer_head - *parent, int parent_slot, struct buffer_head - **cow_ret, u64 search_start, u64 empty_size) +static int __btrfs_cow_block(struct btrfs_trans_handle *trans, + struct btrfs_root *root, + struct extent_buffer *buf, + struct extent_buffer *parent, int parent_slot, + struct extent_buffer **cow_ret, + u64 search_start, u64 empty_size) { - struct buffer_head *cow; - struct btrfs_node *cow_node; + struct extent_buffer *cow; int ret = 0; int different_trans = 0; WARN_ON(root->ref_cows && trans->transid != root->last_trans); - WARN_ON(!buffer_uptodate(buf)); + cow = btrfs_alloc_free_block(trans, root, search_start, empty_size); if (IS_ERR(cow)) return PTR_ERR(cow); - cow_node = btrfs_buffer_node(cow); - if (buf->b_size != root->blocksize || cow->b_size != root->blocksize) + if (buf->len != root->sectorsize || cow->len != root->sectorsize) WARN_ON(1); - memcpy(cow_node, btrfs_buffer_node(buf), root->blocksize); - btrfs_set_header_blocknr(&cow_node->header, bh_blocknr(cow)); - btrfs_set_header_generation(&cow_node->header, trans->transid); - btrfs_set_header_owner(&cow_node->header, root->root_key.objectid); + copy_extent_buffer(cow, buf, 0, 0, cow->len); + btrfs_set_header_blocknr(cow, extent_buffer_blocknr(cow)); + btrfs_set_header_generation(cow, trans->transid); + btrfs_set_header_owner(cow, root->root_key.objectid); - WARN_ON(btrfs_header_generation(btrfs_buffer_header(buf)) > - trans->transid); - if (btrfs_header_generation(btrfs_buffer_header(buf)) != - trans->transid) { + WARN_ON(btrfs_header_generation(buf) > trans->transid); + if (btrfs_header_generation(buf) != trans->transid) { different_trans = 1; ret = btrfs_inc_ref(trans, root, buf); if (ret) @@ -106,29 +107,29 @@ static int __btrfs_cow_block(struct btrfs_trans_handle *trans, struct btrfs_root if (buf == root->node) { root->node = cow; - get_bh(cow); + extent_buffer_get(cow); if (buf != root->commit_root) { - btrfs_free_extent(trans, root, bh_blocknr(buf), 1, 1); + btrfs_free_extent(trans, root, + extent_buffer_blocknr(buf), 1, 1); } - btrfs_block_release(root, buf); + free_extent_buffer(buf); } else { - btrfs_set_node_blockptr(btrfs_buffer_node(parent), parent_slot, - bh_blocknr(cow)); + btrfs_set_node_blockptr(parent, parent_slot, + extent_buffer_blocknr(cow)); btrfs_mark_buffer_dirty(parent); - WARN_ON(btrfs_header_generation(btrfs_buffer_header(parent)) != - trans->transid); - btrfs_free_extent(trans, root, bh_blocknr(buf), 1, 1); + WARN_ON(btrfs_header_generation(parent) != trans->transid); + btrfs_free_extent(trans, root, extent_buffer_blocknr(buf),1,1); } - btrfs_block_release(root, buf); + free_extent_buffer(buf); btrfs_mark_buffer_dirty(cow); *cow_ret = cow; return 0; } -int btrfs_cow_block(struct btrfs_trans_handle *trans, struct btrfs_root - *root, struct buffer_head *buf, struct buffer_head - *parent, int parent_slot, struct buffer_head - **cow_ret) +int btrfs_cow_block(struct btrfs_trans_handle *trans, + struct btrfs_root *root, struct extent_buffer *buf, + struct extent_buffer *parent, int parent_slot, + struct extent_buffer **cow_ret) { u64 search_start; if (trans->transaction != root->fs_info->running_transaction) { @@ -141,13 +142,12 @@ int btrfs_cow_block(struct btrfs_trans_handle *trans, struct btrfs_root root->fs_info->generation); WARN_ON(1); } - if (btrfs_header_generation(btrfs_buffer_header(buf)) == - trans->transid) { + if (btrfs_header_generation(buf) == trans->transid) { *cow_ret = buf; return 0; } - search_start = bh_blocknr(buf) & ~((u64)65535); + search_start = extent_buffer_blocknr(buf) & ~((u64)65535); return __btrfs_cow_block(trans, root, buf, parent, parent_slot, cow_ret, search_start, 0); } @@ -161,9 +161,11 @@ static int close_blocks(u64 blocknr, u64 other) return 0; } -static int should_defrag_leaf(struct buffer_head *bh) +#if 0 +static int should_defrag_leaf(struct extent_buffer *eb) { - struct btrfs_leaf *leaf = btrfs_buffer_leaf(bh); + return 0; + struct btrfs_leaf *leaf = btrfs_buffer_leaf(eb); struct btrfs_disk_key *key; u32 nritems; @@ -188,14 +190,17 @@ static int should_defrag_leaf(struct buffer_head *bh) } return 0; } +#endif int btrfs_realloc_node(struct btrfs_trans_handle *trans, - struct btrfs_root *root, struct buffer_head *parent, + struct btrfs_root *root, struct extent_buffer *parent, int cache_only, u64 *last_ret) { + return 0; +#if 0 struct btrfs_node *parent_node; - struct buffer_head *cur_bh; - struct buffer_head *tmp_bh; + struct extent_buffer *cur_eb; + struct extent_buffer *tmp_eb; u64 blocknr; u64 search_start = *last_ret; u64 last_block = 0; @@ -281,6 +286,7 @@ int btrfs_realloc_node(struct btrfs_trans_handle *trans, brelse(tmp_bh); } return err; +#endif } /* @@ -289,12 +295,12 @@ int btrfs_realloc_node(struct btrfs_trans_handle *trans, * which is the stop of the leaf data stack */ static inline unsigned int leaf_data_end(struct btrfs_root *root, - struct btrfs_leaf *leaf) + struct extent_buffer *leaf) { - u32 nr = btrfs_header_nritems(&leaf->header); + u32 nr = btrfs_header_nritems(leaf); if (nr == 0) return BTRFS_LEAF_DATA_SIZE(root); - return btrfs_item_offset(leaf->items + nr - 1); + return btrfs_item_offset_nr(leaf, nr - 1); } /* @@ -310,9 +316,9 @@ static int comp_keys(struct btrfs_disk_key *disk, struct btrfs_key *k2) return 1; if (k1.objectid < k2->objectid) return -1; - if (k1.flags > k2->flags) + if (k1.type > k2->type) return 1; - if (k1.flags < k2->flags) + if (k1.type < k2->type) return -1; if (k1.offset > k2->offset) return 1; @@ -324,37 +330,39 @@ static int comp_keys(struct btrfs_disk_key *disk, struct btrfs_key *k2) static int check_node(struct btrfs_root *root, struct btrfs_path *path, int level) { - struct btrfs_node *parent = NULL; - struct btrfs_node *node = btrfs_buffer_node(path->nodes[level]); + struct extent_buffer *parent = NULL; + struct extent_buffer *node = path->nodes[level]; + struct btrfs_disk_key parent_key; + struct btrfs_disk_key node_key; int parent_slot; int slot; struct btrfs_key cpukey; - u32 nritems = btrfs_header_nritems(&node->header); + u32 nritems = btrfs_header_nritems(node); if (path->nodes[level + 1]) - parent = btrfs_buffer_node(path->nodes[level + 1]); + parent = path->nodes[level + 1]; slot = path->slots[level]; - BUG_ON(!buffer_uptodate(path->nodes[level])); BUG_ON(nritems == 0); if (parent) { - struct btrfs_disk_key *parent_key; - parent_slot = path->slots[level + 1]; - parent_key = &parent->ptrs[parent_slot].key; - BUG_ON(memcmp(parent_key, &node->ptrs[0].key, + btrfs_node_key(parent, &parent_key, parent_slot); + btrfs_node_key(node, &node_key, 0); + BUG_ON(memcmp(&parent_key, &node_key, sizeof(struct btrfs_disk_key))); BUG_ON(btrfs_node_blockptr(parent, parent_slot) != - btrfs_header_blocknr(&node->header)); + btrfs_header_blocknr(node)); } BUG_ON(nritems > BTRFS_NODEPTRS_PER_BLOCK(root)); if (slot != 0) { - btrfs_disk_key_to_cpu(&cpukey, &node->ptrs[slot - 1].key); - BUG_ON(comp_keys(&node->ptrs[slot].key, &cpukey) <= 0); + btrfs_node_key_to_cpu(node, &cpukey, slot - 1); + btrfs_node_key(node, &node_key, slot); + BUG_ON(comp_keys(&node_key, &cpukey) <= 0); } if (slot < nritems - 1) { - btrfs_disk_key_to_cpu(&cpukey, &node->ptrs[slot + 1].key); - BUG_ON(comp_keys(&node->ptrs[slot].key, &cpukey) >= 0); + btrfs_node_key_to_cpu(node, &cpukey, slot + 1); + btrfs_node_key(node, &node_key, slot); + BUG_ON(comp_keys(&node_key, &cpukey) >= 0); } return 0; } @@ -362,83 +370,172 @@ static int check_node(struct btrfs_root *root, struct btrfs_path *path, static int check_leaf(struct btrfs_root *root, struct btrfs_path *path, int level) { - struct btrfs_leaf *leaf = btrfs_buffer_leaf(path->nodes[level]); - struct btrfs_node *parent = NULL; + struct extent_buffer *leaf = path->nodes[level]; + struct extent_buffer *parent = NULL; int parent_slot; - int slot = path->slots[0]; struct btrfs_key cpukey; + struct btrfs_disk_key parent_key; + struct btrfs_disk_key leaf_key; + int slot = path->slots[0]; - u32 nritems = btrfs_header_nritems(&leaf->header); + u32 nritems = btrfs_header_nritems(leaf); if (path->nodes[level + 1]) - parent = btrfs_buffer_node(path->nodes[level + 1]); - - BUG_ON(btrfs_leaf_free_space(root, leaf) < 0); + parent = path->nodes[level + 1]; if (nritems == 0) return 0; if (parent) { - struct btrfs_disk_key *parent_key; - parent_slot = path->slots[level + 1]; - parent_key = &parent->ptrs[parent_slot].key; + btrfs_node_key(parent, &parent_key, parent_slot); + btrfs_item_key(leaf, &leaf_key, 0); - BUG_ON(memcmp(parent_key, &leaf->items[0].key, + BUG_ON(memcmp(&parent_key, &leaf_key, sizeof(struct btrfs_disk_key))); BUG_ON(btrfs_node_blockptr(parent, parent_slot) != - btrfs_header_blocknr(&leaf->header)); + btrfs_header_blocknr(leaf)); + } +#if 0 + for (i = 0; nritems > 1 && i < nritems - 2; i++) { + btrfs_item_key_to_cpu(leaf, &cpukey, i + 1); + btrfs_item_key(leaf, &leaf_key, i); + if (comp_keys(&leaf_key, &cpukey) >= 0) { + btrfs_print_leaf(root, leaf); + printk("slot %d offset bad key\n", i); + BUG_ON(1); + } + if (btrfs_item_offset_nr(leaf, i) != + btrfs_item_end_nr(leaf, i + 1)) { + btrfs_print_leaf(root, leaf); + printk("slot %d offset bad\n", i); + BUG_ON(1); + } + if (i == 0) { + if (btrfs_item_offset_nr(leaf, i) + + btrfs_item_size_nr(leaf, i) != + BTRFS_LEAF_DATA_SIZE(root)) { + btrfs_print_leaf(root, leaf); + printk("slot %d first offset bad\n", i); + BUG_ON(1); + } + } } - if (slot != 0) { - btrfs_disk_key_to_cpu(&cpukey, &leaf->items[slot - 1].key); - BUG_ON(comp_keys(&leaf->items[slot].key, &cpukey) <= 0); - BUG_ON(btrfs_item_offset(leaf->items + slot - 1) != - btrfs_item_end(leaf->items + slot)); + if (nritems > 0) { + if (btrfs_item_size_nr(leaf, nritems - 1) > 4096) { + btrfs_print_leaf(root, leaf); + printk("slot %d bad size \n", nritems - 1); + BUG_ON(1); + } + } +#endif + if (slot != 0 && slot < nritems - 1) { + btrfs_item_key(leaf, &leaf_key, slot); + btrfs_item_key_to_cpu(leaf, &cpukey, slot - 1); + if (comp_keys(&leaf_key, &cpukey) <= 0) { + btrfs_print_leaf(root, leaf); + printk("slot %d offset bad key\n", slot); + BUG_ON(1); + } + if (btrfs_item_offset_nr(leaf, slot - 1) != + btrfs_item_end_nr(leaf, slot)) { + btrfs_print_leaf(root, leaf); + printk("slot %d offset bad\n", slot); + BUG_ON(1); + } } if (slot < nritems - 1) { - btrfs_disk_key_to_cpu(&cpukey, &leaf->items[slot + 1].key); - BUG_ON(comp_keys(&leaf->items[slot].key, &cpukey) >= 0); - BUG_ON(btrfs_item_offset(leaf->items + slot) != - btrfs_item_end(leaf->items + slot + 1)); + btrfs_item_key(leaf, &leaf_key, slot); + btrfs_item_key_to_cpu(leaf, &cpukey, slot + 1); + BUG_ON(comp_keys(&leaf_key, &cpukey) >= 0); + if (btrfs_item_offset_nr(leaf, slot) != + btrfs_item_end_nr(leaf, slot + 1)) { + btrfs_print_leaf(root, leaf); + printk("slot %d offset bad\n", slot); + BUG_ON(1); + } } - BUG_ON(btrfs_item_offset(leaf->items) + - btrfs_item_size(leaf->items) != BTRFS_LEAF_DATA_SIZE(root)); + BUG_ON(btrfs_item_offset_nr(leaf, 0) + + btrfs_item_size_nr(leaf, 0) != BTRFS_LEAF_DATA_SIZE(root)); return 0; } static int check_block(struct btrfs_root *root, struct btrfs_path *path, int level) { - struct btrfs_node *node = btrfs_buffer_node(path->nodes[level]); - if (memcmp(node->header.fsid, root->fs_info->disk_super->fsid, - sizeof(node->header.fsid))) - BUG(); + struct extent_buffer *buf = path->nodes[level]; + char fsid[BTRFS_FSID_SIZE]; + + read_extent_buffer(buf, fsid, (unsigned long)btrfs_header_fsid(buf), + BTRFS_FSID_SIZE); + + if (memcmp(fsid, root->fs_info->fsid, BTRFS_FSID_SIZE)) { + int i = 0; + printk("warning bad block %Lu\n", buf->start); + if (!btrfs_buffer_uptodate(buf)) { + WARN_ON(1); + } + for (i = 0; i < BTRFS_FSID_SIZE; i++) { + printk("%x:%x ", root->fs_info->fsid[i], fsid[i]); + } + printk("\n"); + // BUG(); + } if (level == 0) return check_leaf(root, path, level); return check_node(root, path, level); } /* - * search for key in the array p. items p are item_size apart - * and there are 'max' items in p + * search for key in the extent_buffer. The items start at offset p, + * and they are item_size apart. There are 'max' items in p. + * * the slot in the array is returned via slot, and it points to * the place where you would insert key if it is not found in * the array. * * slot may point to max if the key is bigger than all of the keys */ -static int generic_bin_search(char *p, int item_size, struct btrfs_key *key, - int max, int *slot) +static int generic_bin_search(struct extent_buffer *eb, unsigned long p, + int item_size, struct btrfs_key *key, + int max, int *slot) { int low = 0; int high = max; int mid; int ret; struct btrfs_disk_key *tmp; + struct btrfs_disk_key unaligned; + unsigned long offset; + char *map_token = NULL; + char *kaddr = NULL; + unsigned long map_start = 0; + unsigned long map_len = 0; while(low < high) { mid = (low + high) / 2; - tmp = (struct btrfs_disk_key *)(p + mid * item_size); + offset = p + mid * item_size; + + if (!map_token || offset < map_start || + (offset + sizeof(struct btrfs_disk_key)) > + map_start + map_len) { + if (map_token) + unmap_extent_buffer(eb, map_token, KM_USER0); + map_extent_buffer(eb, offset, &map_token, &kaddr, + &map_start, &map_len, KM_USER0); + + } + if (offset + sizeof(struct btrfs_disk_key) > + map_start + map_len) { + unmap_extent_buffer(eb, map_token, KM_USER0); + read_extent_buffer(eb, &unaligned, + offset, sizeof(unaligned)); + map_token = NULL; + tmp = &unaligned; + } else { + tmp = (struct btrfs_disk_key *)(kaddr + offset - + map_start); + } ret = comp_keys(tmp, key); if (ret < 0) @@ -447,10 +544,13 @@ static int generic_bin_search(char *p, int item_size, struct btrfs_key *key, high = mid; else { *slot = mid; + unmap_extent_buffer(eb, map_token, KM_USER0); return 0; } } *slot = low; + if (map_token) + unmap_extent_buffer(eb, map_token, KM_USER0); return 1; } @@ -458,46 +558,42 @@ static int generic_bin_search(char *p, int item_size, struct btrfs_key *key, * simple bin_search frontend that does the right thing for * leaves vs nodes */ -static int bin_search(struct btrfs_node *c, struct btrfs_key *key, int *slot) +static int bin_search(struct extent_buffer *eb, struct btrfs_key *key, + int level, int *slot) { - if (btrfs_is_leaf(c)) { - struct btrfs_leaf *l = (struct btrfs_leaf *)c; - return generic_bin_search((void *)l->items, + if (level == 0) { + return generic_bin_search(eb, + offsetof(struct btrfs_leaf, items), sizeof(struct btrfs_item), - key, btrfs_header_nritems(&c->header), + key, btrfs_header_nritems(eb), slot); } else { - return generic_bin_search((void *)c->ptrs, + return generic_bin_search(eb, + offsetof(struct btrfs_node, ptrs), sizeof(struct btrfs_key_ptr), - key, btrfs_header_nritems(&c->header), + key, btrfs_header_nritems(eb), slot); } return -1; } -static struct buffer_head *read_node_slot(struct btrfs_root *root, - struct buffer_head *parent_buf, - int slot) +static struct extent_buffer *read_node_slot(struct btrfs_root *root, + struct extent_buffer *parent, int slot) { - struct btrfs_node *node = btrfs_buffer_node(parent_buf); if (slot < 0) return NULL; - if (slot >= btrfs_header_nritems(&node->header)) + if (slot >= btrfs_header_nritems(parent)) return NULL; - return read_tree_block(root, btrfs_node_blockptr(node, slot)); + return read_tree_block(root, btrfs_node_blockptr(parent, slot)); } static int balance_level(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct btrfs_path *path, int level) { - struct buffer_head *right_buf; - struct buffer_head *mid_buf; - struct buffer_head *left_buf; - struct buffer_head *parent_buf = NULL; - struct btrfs_node *right = NULL; - struct btrfs_node *mid; - struct btrfs_node *left = NULL; - struct btrfs_node *parent = NULL; + struct extent_buffer *right = NULL; + struct extent_buffer *mid; + struct extent_buffer *left = NULL; + struct extent_buffer *parent = NULL; int ret = 0; int wret; int pslot; @@ -508,60 +604,57 @@ static int balance_level(struct btrfs_trans_handle *trans, struct btrfs_root if (level == 0) return 0; - mid_buf = path->nodes[level]; - mid = btrfs_buffer_node(mid_buf); + mid = path->nodes[level]; orig_ptr = btrfs_node_blockptr(mid, orig_slot); if (level < BTRFS_MAX_LEVEL - 1) - parent_buf = path->nodes[level + 1]; + parent = path->nodes[level + 1]; pslot = path->slots[level + 1]; /* * deal with the case where there is only one pointer in the root * by promoting the node below to a root */ - if (!parent_buf) { - struct buffer_head *child; - u64 blocknr = bh_blocknr(mid_buf); + if (!parent) { + struct extent_buffer *child; + u64 blocknr = extent_buffer_blocknr(mid); - if (btrfs_header_nritems(&mid->header) != 1) + if (btrfs_header_nritems(mid) != 1) return 0; /* promote the child to a root */ - child = read_node_slot(root, mid_buf, 0); + child = read_node_slot(root, mid, 0); BUG_ON(!child); root->node = child; path->nodes[level] = NULL; - clean_tree_block(trans, root, mid_buf); - wait_on_buffer(mid_buf); + clean_tree_block(trans, root, mid); + wait_on_tree_block_writeback(root, mid); /* once for the path */ - btrfs_block_release(root, mid_buf); + free_extent_buffer(mid); /* once for the root ptr */ - btrfs_block_release(root, mid_buf); + free_extent_buffer(mid); return btrfs_free_extent(trans, root, blocknr, 1, 1); } - parent = btrfs_buffer_node(parent_buf); - - if (btrfs_header_nritems(&mid->header) > + if (btrfs_header_nritems(mid) > BTRFS_NODEPTRS_PER_BLOCK(root) / 4) return 0; - if (btrfs_header_nritems(&mid->header) < 2) + if (btrfs_header_nritems(mid) < 2) err_on_enospc = 1; - left_buf = read_node_slot(root, parent_buf, pslot - 1); - if (left_buf) { - wret = btrfs_cow_block(trans, root, left_buf, - parent_buf, pslot - 1, &left_buf); + left = read_node_slot(root, parent, pslot - 1); + if (left) { + wret = btrfs_cow_block(trans, root, left, + parent, pslot - 1, &left); if (wret) { ret = wret; goto enospc; } } - right_buf = read_node_slot(root, parent_buf, pslot + 1); - if (right_buf) { - wret = btrfs_cow_block(trans, root, right_buf, - parent_buf, pslot + 1, &right_buf); + right = read_node_slot(root, parent, pslot + 1); + if (right) { + wret = btrfs_cow_block(trans, root, right, + parent, pslot + 1, &right); if (wret) { ret = wret; goto enospc; @@ -569,30 +662,27 @@ static int balance_level(struct btrfs_trans_handle *trans, struct btrfs_root } /* first, try to make some room in the middle buffer */ - if (left_buf) { - left = btrfs_buffer_node(left_buf); - orig_slot += btrfs_header_nritems(&left->header); - wret = push_node_left(trans, root, left_buf, mid_buf); + if (left) { + orig_slot += btrfs_header_nritems(left); + wret = push_node_left(trans, root, left, mid); if (wret < 0) ret = wret; - if (btrfs_header_nritems(&mid->header) < 2) + if (btrfs_header_nritems(mid) < 2) err_on_enospc = 1; } /* * then try to empty the right most buffer into the middle */ - if (right_buf) { - right = btrfs_buffer_node(right_buf); - wret = push_node_left(trans, root, mid_buf, right_buf); + if (right) { + wret = push_node_left(trans, root, mid, right); if (wret < 0 && wret != -ENOSPC) ret = wret; - if (btrfs_header_nritems(&right->header) == 0) { - u64 blocknr = bh_blocknr(right_buf); - clean_tree_block(trans, root, right_buf); - wait_on_buffer(right_buf); - btrfs_block_release(root, right_buf); - right_buf = NULL; + if (btrfs_header_nritems(right) == 0) { + u64 blocknr = extent_buffer_blocknr(right); + clean_tree_block(trans, root, right); + wait_on_tree_block_writeback(root, right); + free_extent_buffer(right); right = NULL; wret = del_ptr(trans, root, path, level + 1, pslot + 1); @@ -602,14 +692,13 @@ static int balance_level(struct btrfs_trans_handle *trans, struct btrfs_root if (wret) ret = wret; } else { - btrfs_memcpy(root, parent, - &parent->ptrs[pslot + 1].key, - &right->ptrs[0].key, - sizeof(struct btrfs_disk_key)); - btrfs_mark_buffer_dirty(parent_buf); + struct btrfs_disk_key right_key; + btrfs_node_key(right, &right_key, 0); + btrfs_set_node_key(parent, &right_key, pslot + 1); + btrfs_mark_buffer_dirty(parent); } } - if (btrfs_header_nritems(&mid->header) == 1) { + if (btrfs_header_nritems(mid) == 1) { /* * we're not allowed to leave a node with one item in the * tree during a delete. A deletion from lower in the tree @@ -619,21 +708,20 @@ static int balance_level(struct btrfs_trans_handle *trans, struct btrfs_root * otherwise we would have pulled some pointers from the * right */ - BUG_ON(!left_buf); - wret = balance_node_right(trans, root, mid_buf, left_buf); + BUG_ON(!left); + wret = balance_node_right(trans, root, mid, left); if (wret < 0) { ret = wret; goto enospc; } BUG_ON(wret == 1); } - if (btrfs_header_nritems(&mid->header) == 0) { + if (btrfs_header_nritems(mid) == 0) { /* we've managed to empty the middle node, drop it */ - u64 blocknr = bh_blocknr(mid_buf); - clean_tree_block(trans, root, mid_buf); - wait_on_buffer(mid_buf); - btrfs_block_release(root, mid_buf); - mid_buf = NULL; + u64 blocknr = extent_buffer_blocknr(mid); + clean_tree_block(trans, root, mid); + wait_on_tree_block_writeback(root, mid); + free_extent_buffer(mid); mid = NULL; wret = del_ptr(trans, root, path, level + 1, pslot); if (wret) @@ -643,37 +731,36 @@ static int balance_level(struct btrfs_trans_handle *trans, struct btrfs_root ret = wret; } else { /* update the parent key to reflect our changes */ - btrfs_memcpy(root, parent, - &parent->ptrs[pslot].key, &mid->ptrs[0].key, - sizeof(struct btrfs_disk_key)); - btrfs_mark_buffer_dirty(parent_buf); + struct btrfs_disk_key mid_key; + btrfs_node_key(mid, &mid_key, 0); + btrfs_set_node_key(parent, &mid_key, pslot); + btrfs_mark_buffer_dirty(parent); } /* update the path */ - if (left_buf) { - if (btrfs_header_nritems(&left->header) > orig_slot) { - get_bh(left_buf); - path->nodes[level] = left_buf; + if (left) { + if (btrfs_header_nritems(left) > orig_slot) { + extent_buffer_get(left); + path->nodes[level] = left; path->slots[level + 1] -= 1; path->slots[level] = orig_slot; - if (mid_buf) - btrfs_block_release(root, mid_buf); + if (mid) + free_extent_buffer(mid); } else { - orig_slot -= btrfs_header_nritems(&left->header); + orig_slot -= btrfs_header_nritems(left); path->slots[level] = orig_slot; } } /* double check we haven't messed things up */ check_block(root, path, level); if (orig_ptr != - btrfs_node_blockptr(btrfs_buffer_node(path->nodes[level]), - path->slots[level])) + btrfs_node_blockptr(path->nodes[level], path->slots[level])) BUG(); enospc: - if (right_buf) - btrfs_block_release(root, right_buf); - if (left_buf) - btrfs_block_release(root, left_buf); + if (right) + free_extent_buffer(right); + if (left) + free_extent_buffer(left); return ret; } @@ -682,14 +769,10 @@ static int push_nodes_for_insert(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct btrfs_path *path, int level) { - struct buffer_head *right_buf; - struct buffer_head *mid_buf; - struct buffer_head *left_buf; - struct buffer_head *parent_buf = NULL; - struct btrfs_node *right = NULL; - struct btrfs_node *mid; - struct btrfs_node *left = NULL; - struct btrfs_node *parent = NULL; + struct extent_buffer *right = NULL; + struct extent_buffer *mid; + struct extent_buffer *left = NULL; + struct extent_buffer *parent = NULL; int ret = 0; int wret; int pslot; @@ -699,107 +782,101 @@ static int push_nodes_for_insert(struct btrfs_trans_handle *trans, if (level == 0) return 1; - mid_buf = path->nodes[level]; - mid = btrfs_buffer_node(mid_buf); + mid = path->nodes[level]; orig_ptr = btrfs_node_blockptr(mid, orig_slot); if (level < BTRFS_MAX_LEVEL - 1) - parent_buf = path->nodes[level + 1]; + parent = path->nodes[level + 1]; pslot = path->slots[level + 1]; - if (!parent_buf) + if (!parent) return 1; - parent = btrfs_buffer_node(parent_buf); - left_buf = read_node_slot(root, parent_buf, pslot - 1); + left = read_node_slot(root, parent, pslot - 1); /* first, try to make some room in the middle buffer */ - if (left_buf) { + if (left) { u32 left_nr; - left = btrfs_buffer_node(left_buf); - left_nr = btrfs_header_nritems(&left->header); + left_nr = btrfs_header_nritems(left); if (left_nr >= BTRFS_NODEPTRS_PER_BLOCK(root) - 1) { wret = 1; } else { - ret = btrfs_cow_block(trans, root, left_buf, parent_buf, - pslot - 1, &left_buf); + ret = btrfs_cow_block(trans, root, left, parent, + pslot - 1, &left); if (ret) wret = 1; else { - left = btrfs_buffer_node(left_buf); wret = push_node_left(trans, root, - left_buf, mid_buf); + left, mid); } } if (wret < 0) ret = wret; if (wret == 0) { + struct btrfs_disk_key disk_key; orig_slot += left_nr; - btrfs_memcpy(root, parent, - &parent->ptrs[pslot].key, - &mid->ptrs[0].key, - sizeof(struct btrfs_disk_key)); - btrfs_mark_buffer_dirty(parent_buf); - if (btrfs_header_nritems(&left->header) > orig_slot) { - path->nodes[level] = left_buf; + btrfs_node_key(mid, &disk_key, 0); + btrfs_set_node_key(parent, &disk_key, pslot); + btrfs_mark_buffer_dirty(parent); + if (btrfs_header_nritems(left) > orig_slot) { + path->nodes[level] = left; path->slots[level + 1] -= 1; path->slots[level] = orig_slot; - btrfs_block_release(root, mid_buf); + free_extent_buffer(mid); } else { orig_slot -= - btrfs_header_nritems(&left->header); + btrfs_header_nritems(left); path->slots[level] = orig_slot; - btrfs_block_release(root, left_buf); + free_extent_buffer(left); } check_node(root, path, level); return 0; } - btrfs_block_release(root, left_buf); + free_extent_buffer(left); } - right_buf = read_node_slot(root, parent_buf, pslot + 1); + right= read_node_slot(root, parent, pslot + 1); /* * then try to empty the right most buffer into the middle */ - if (right_buf) { + if (right) { u32 right_nr; - right = btrfs_buffer_node(right_buf); - right_nr = btrfs_header_nritems(&right->header); + right_nr = btrfs_header_nritems(right); if (right_nr >= BTRFS_NODEPTRS_PER_BLOCK(root) - 1) { wret = 1; } else { - ret = btrfs_cow_block(trans, root, right_buf, - parent_buf, pslot + 1, - &right_buf); + ret = btrfs_cow_block(trans, root, right, + parent, pslot + 1, + &right); if (ret) wret = 1; else { - right = btrfs_buffer_node(right_buf); wret = balance_node_right(trans, root, - right_buf, mid_buf); + right, mid); } } if (wret < 0) ret = wret; if (wret == 0) { - btrfs_memcpy(root, parent, - &parent->ptrs[pslot + 1].key, - &right->ptrs[0].key, - sizeof(struct btrfs_disk_key)); - btrfs_mark_buffer_dirty(parent_buf); - if (btrfs_header_nritems(&mid->header) <= orig_slot) { - path->nodes[level] = right_buf; + struct btrfs_disk_key disk_key; + + btrfs_node_key(right, &disk_key, 0); + btrfs_set_node_key(parent, &disk_key, pslot + 1); + btrfs_mark_buffer_dirty(parent); + + if (btrfs_header_nritems(mid) <= orig_slot) { + path->nodes[level] = right; path->slots[level + 1] += 1; path->slots[level] = orig_slot - - btrfs_header_nritems(&mid->header); - btrfs_block_release(root, mid_buf); + btrfs_header_nritems(mid); + free_extent_buffer(mid); } else { - btrfs_block_release(root, right_buf); + free_extent_buffer(right); } check_node(root, path, level); return 0; } - btrfs_block_release(root, right_buf); + free_extent_buffer(right); } check_node(root, path, level); return 1; @@ -811,10 +888,9 @@ static int push_nodes_for_insert(struct btrfs_trans_handle *trans, static void reada_for_search(struct btrfs_root *root, struct btrfs_path *path, int level, int slot) { - struct btrfs_node *node; + struct extent_buffer *node; int i; u32 nritems; - u64 item_objectid; u64 blocknr; u64 search; u64 cluster_start; @@ -823,7 +899,7 @@ static void reada_for_search(struct btrfs_root *root, struct btrfs_path *path, int direction = path->reada; struct radix_tree_root found; unsigned long gang[8]; - struct buffer_head *bh; + struct extent_buffer *eb; if (level == 0) return; @@ -831,18 +907,17 @@ static void reada_for_search(struct btrfs_root *root, struct btrfs_path *path, if (!path->nodes[level]) return; - node = btrfs_buffer_node(path->nodes[level]); + node = path->nodes[level]; search = btrfs_node_blockptr(node, slot); - bh = btrfs_find_tree_block(root, search); - if (bh) { - brelse(bh); + eb = btrfs_find_tree_block(root, search); + if (eb) { + free_extent_buffer(eb); return; } init_bit_radix(&found); - nritems = btrfs_header_nritems(&node->header); + nritems = btrfs_header_nritems(node); for (i = slot; i < nritems; i++) { - item_objectid = btrfs_disk_key_objectid(&node->ptrs[i].key); blocknr = btrfs_node_blockptr(node, i); set_radix_bit(&found, blocknr); } @@ -886,8 +961,7 @@ int btrfs_search_slot(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct btrfs_key *key, struct btrfs_path *p, int ins_len, int cow) { - struct buffer_head *b; - struct btrfs_node *c; + struct extent_buffer *b; u64 blocknr; int slot; int ret; @@ -901,10 +975,9 @@ int btrfs_search_slot(struct btrfs_trans_handle *trans, struct btrfs_root WARN_ON(!mutex_is_locked(&root->fs_info->fs_mutex)); again: b = root->node; - get_bh(b); + extent_buffer_get(b); while (b) { - c = btrfs_buffer_node(b); - level = btrfs_header_level(&c->header); + level = btrfs_header_level(b); if (cow) { int wret; wret = btrfs_cow_block(trans, root, b, @@ -912,32 +985,30 @@ again: p->slots[level + 1], &b); if (wret) { - btrfs_block_release(root, b); + free_extent_buffer(b); return wret; } - c = btrfs_buffer_node(b); } BUG_ON(!cow && ins_len); - if (level != btrfs_header_level(&c->header)) + if (level != btrfs_header_level(b)) WARN_ON(1); - level = btrfs_header_level(&c->header); + level = btrfs_header_level(b); p->nodes[level] = b; ret = check_block(root, p, level); if (ret) return -1; - ret = bin_search(c, key, &slot); - if (!btrfs_is_leaf(c)) { + ret = bin_search(b, key, level, &slot); + if (level != 0) { if (ret && slot > 0) slot -= 1; p->slots[level] = slot; - if (ins_len > 0 && btrfs_header_nritems(&c->header) >= + if (ins_len > 0 && btrfs_header_nritems(b) >= BTRFS_NODEPTRS_PER_BLOCK(root) - 1) { int sret = split_node(trans, root, p, level); BUG_ON(sret > 0); if (sret) return sret; b = p->nodes[level]; - c = btrfs_buffer_node(b); slot = p->slots[level]; } else if (ins_len < 0) { int sret = balance_level(trans, root, p, @@ -947,22 +1018,19 @@ again: b = p->nodes[level]; if (!b) goto again; - c = btrfs_buffer_node(b); slot = p->slots[level]; - BUG_ON(btrfs_header_nritems(&c->header) == 1); + BUG_ON(btrfs_header_nritems(b) == 1); } /* this is only true while dropping a snapshot */ if (level == lowest_level) break; - blocknr = btrfs_node_blockptr(c, slot); + blocknr = btrfs_node_blockptr(b, slot); if (should_reada) reada_for_search(root, p, level, slot); - b = read_tree_block(root, btrfs_node_blockptr(c, slot)); - + b = read_tree_block(root, btrfs_node_blockptr(b, slot)); } else { - struct btrfs_leaf *l = (struct btrfs_leaf *)c; p->slots[level] = slot; - if (ins_len > 0 && btrfs_leaf_free_space(root, l) < + if (ins_len > 0 && btrfs_leaf_free_space(root, b) < sizeof(struct btrfs_item) + ins_len) { int sret = split_leaf(trans, root, key, p, ins_len); @@ -986,19 +1054,20 @@ again: * If this fails to write a tree block, it returns -1, but continues * fixing up the blocks in ram so the tree is consistent. */ -static int fixup_low_keys(struct btrfs_trans_handle *trans, struct btrfs_root - *root, struct btrfs_path *path, struct btrfs_disk_key - *key, int level) +static int fixup_low_keys(struct btrfs_trans_handle *trans, + struct btrfs_root *root, struct btrfs_path *path, + struct btrfs_disk_key *key, int level) { int i; int ret = 0; + struct extent_buffer *t; + for (i = level; i < BTRFS_MAX_LEVEL; i++) { - struct btrfs_node *t; int tslot = path->slots[i]; if (!path->nodes[i]) break; - t = btrfs_buffer_node(path->nodes[i]); - btrfs_memcpy(root, t, &t->ptrs[tslot].key, key, sizeof(*key)); + t = path->nodes[i]; + btrfs_set_node_key(t, key, tslot); btrfs_mark_buffer_dirty(path->nodes[i]); if (tslot != 0) break; @@ -1014,18 +1083,16 @@ static int fixup_low_keys(struct btrfs_trans_handle *trans, struct btrfs_root * error, and > 0 if there was no room in the left hand block. */ static int push_node_left(struct btrfs_trans_handle *trans, struct btrfs_root - *root, struct buffer_head *dst_buf, struct - buffer_head *src_buf) + *root, struct extent_buffer *dst, + struct extent_buffer *src) { - struct btrfs_node *src = btrfs_buffer_node(src_buf); - struct btrfs_node *dst = btrfs_buffer_node(dst_buf); int push_items = 0; int src_nritems; int dst_nritems; int ret = 0; - src_nritems = btrfs_header_nritems(&src->header); - dst_nritems = btrfs_header_nritems(&dst->header); + src_nritems = btrfs_header_nritems(src); + dst_nritems = btrfs_header_nritems(dst); push_items = BTRFS_NODEPTRS_PER_BLOCK(root) - dst_nritems; if (push_items <= 0) { @@ -1035,17 +1102,21 @@ static int push_node_left(struct btrfs_trans_handle *trans, struct btrfs_root if (src_nritems < push_items) push_items = src_nritems; - btrfs_memcpy(root, dst, dst->ptrs + dst_nritems, src->ptrs, - push_items * sizeof(struct btrfs_key_ptr)); + copy_extent_buffer(dst, src, + btrfs_node_key_ptr_offset(dst_nritems), + btrfs_node_key_ptr_offset(0), + push_items * sizeof(struct btrfs_key_ptr)); + if (push_items < src_nritems) { - btrfs_memmove(root, src, src->ptrs, src->ptrs + push_items, - (src_nritems - push_items) * - sizeof(struct btrfs_key_ptr)); - } - btrfs_set_header_nritems(&src->header, src_nritems - push_items); - btrfs_set_header_nritems(&dst->header, dst_nritems + push_items); - btrfs_mark_buffer_dirty(src_buf); - btrfs_mark_buffer_dirty(dst_buf); + memmove_extent_buffer(src, btrfs_node_key_ptr_offset(0), + btrfs_node_key_ptr_offset(push_items), + (src_nritems - push_items) * + sizeof(struct btrfs_key_ptr)); + } + btrfs_set_header_nritems(src, src_nritems - push_items); + btrfs_set_header_nritems(dst, dst_nritems + push_items); + btrfs_mark_buffer_dirty(src); + btrfs_mark_buffer_dirty(dst); return ret; } @@ -1058,24 +1129,22 @@ static int push_node_left(struct btrfs_trans_handle *trans, struct btrfs_root * * this will only push up to 1/2 the contents of the left node over */ -static int balance_node_right(struct btrfs_trans_handle *trans, struct - btrfs_root *root, struct buffer_head *dst_buf, - struct buffer_head *src_buf) +static int balance_node_right(struct btrfs_trans_handle *trans, + struct btrfs_root *root, + struct extent_buffer *dst, + struct extent_buffer *src) { - struct btrfs_node *src = btrfs_buffer_node(src_buf); - struct btrfs_node *dst = btrfs_buffer_node(dst_buf); int push_items = 0; int max_push; int src_nritems; int dst_nritems; int ret = 0; - src_nritems = btrfs_header_nritems(&src->header); - dst_nritems = btrfs_header_nritems(&dst->header); + src_nritems = btrfs_header_nritems(src); + dst_nritems = btrfs_header_nritems(dst); push_items = BTRFS_NODEPTRS_PER_BLOCK(root) - dst_nritems; - if (push_items <= 0) { + if (push_items <= 0) return 1; - } max_push = src_nritems / 2 + 1; /* don't try to empty the node */ @@ -1085,18 +1154,21 @@ static int balance_node_right(struct btrfs_trans_handle *trans, struct if (max_push < push_items) push_items = max_push; - btrfs_memmove(root, dst, dst->ptrs + push_items, dst->ptrs, - dst_nritems * sizeof(struct btrfs_key_ptr)); + memmove_extent_buffer(dst, btrfs_node_key_ptr_offset(push_items), + btrfs_node_key_ptr_offset(0), + (dst_nritems) * + sizeof(struct btrfs_key_ptr)); - btrfs_memcpy(root, dst, dst->ptrs, - src->ptrs + src_nritems - push_items, - push_items * sizeof(struct btrfs_key_ptr)); + copy_extent_buffer(dst, src, + btrfs_node_key_ptr_offset(0), + btrfs_node_key_ptr_offset(src_nritems - push_items), + push_items * sizeof(struct btrfs_key_ptr)); - btrfs_set_header_nritems(&src->header, src_nritems - push_items); - btrfs_set_header_nritems(&dst->header, dst_nritems + push_items); + btrfs_set_header_nritems(src, src_nritems - push_items); + btrfs_set_header_nritems(dst, dst_nritems + push_items); - btrfs_mark_buffer_dirty(src_buf); - btrfs_mark_buffer_dirty(dst_buf); + btrfs_mark_buffer_dirty(src); + btrfs_mark_buffer_dirty(dst); return ret; } @@ -1107,45 +1179,46 @@ static int balance_node_right(struct btrfs_trans_handle *trans, struct * * returns zero on success or < 0 on failure. */ -static int insert_new_root(struct btrfs_trans_handle *trans, struct btrfs_root - *root, struct btrfs_path *path, int level) +static int insert_new_root(struct btrfs_trans_handle *trans, + struct btrfs_root *root, + struct btrfs_path *path, int level) { - struct buffer_head *t; - struct btrfs_node *lower; - struct btrfs_node *c; - struct btrfs_disk_key *lower_key; + struct extent_buffer *lower; + struct extent_buffer *c; + struct btrfs_disk_key lower_key; BUG_ON(path->nodes[level]); BUG_ON(path->nodes[level-1] != root->node); - t = btrfs_alloc_free_block(trans, root, root->node->b_blocknr, 0); - if (IS_ERR(t)) - return PTR_ERR(t); - c = btrfs_buffer_node(t); - memset(c, 0, root->blocksize); - btrfs_set_header_nritems(&c->header, 1); - btrfs_set_header_level(&c->header, level); - btrfs_set_header_blocknr(&c->header, bh_blocknr(t)); - btrfs_set_header_generation(&c->header, trans->transid); - btrfs_set_header_owner(&c->header, root->root_key.objectid); - lower = btrfs_buffer_node(path->nodes[level-1]); - memcpy(c->header.fsid, root->fs_info->disk_super->fsid, - sizeof(c->header.fsid)); - if (btrfs_is_leaf(lower)) - lower_key = &((struct btrfs_leaf *)lower)->items[0].key; + c = btrfs_alloc_free_block(trans, root, + extent_buffer_blocknr(root->node), 0); + if (IS_ERR(c)) + return PTR_ERR(c); + memset_extent_buffer(c, 0, 0, root->nodesize); + btrfs_set_header_nritems(c, 1); + btrfs_set_header_level(c, level); + btrfs_set_header_blocknr(c, extent_buffer_blocknr(c)); + btrfs_set_header_generation(c, trans->transid); + btrfs_set_header_owner(c, root->root_key.objectid); + lower = path->nodes[level-1]; + + write_extent_buffer(c, root->fs_info->fsid, + (unsigned long)btrfs_header_fsid(c), + BTRFS_FSID_SIZE); + if (level == 1) + btrfs_item_key(lower, &lower_key, 0); else - lower_key = &lower->ptrs[0].key; - btrfs_memcpy(root, c, &c->ptrs[0].key, lower_key, - sizeof(struct btrfs_disk_key)); - btrfs_set_node_blockptr(c, 0, bh_blocknr(path->nodes[level - 1])); + btrfs_node_key(lower, &lower_key, 0); + btrfs_set_node_key(c, &lower_key, 0); + btrfs_set_node_blockptr(c, 0, extent_buffer_blocknr(lower)); - btrfs_mark_buffer_dirty(t); + btrfs_mark_buffer_dirty(c); /* the super has an extra ref to root->node */ - btrfs_block_release(root, root->node); - root->node = t; - get_bh(t); - path->nodes[level] = t; + free_extent_buffer(root->node); + root->node = c; + extent_buffer_get(c); + path->nodes[level] = c; path->slots[level] = 0; return 0; } @@ -1163,26 +1236,26 @@ static int insert_ptr(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct btrfs_path *path, struct btrfs_disk_key *key, u64 blocknr, int slot, int level) { - struct btrfs_node *lower; + struct extent_buffer *lower; int nritems; BUG_ON(!path->nodes[level]); - lower = btrfs_buffer_node(path->nodes[level]); - nritems = btrfs_header_nritems(&lower->header); + lower = path->nodes[level]; + nritems = btrfs_header_nritems(lower); if (slot > nritems) BUG(); if (nritems == BTRFS_NODEPTRS_PER_BLOCK(root)) BUG(); if (slot != nritems) { - btrfs_memmove(root, lower, lower->ptrs + slot + 1, - lower->ptrs + slot, + memmove_extent_buffer(lower, + btrfs_node_key_ptr_offset(slot + 1), + btrfs_node_key_ptr_offset(slot), (nritems - slot) * sizeof(struct btrfs_key_ptr)); } - btrfs_memcpy(root, lower, &lower->ptrs[slot].key, - key, sizeof(struct btrfs_disk_key)); + btrfs_set_node_key(lower, key, slot); btrfs_set_node_blockptr(lower, slot, blocknr); - btrfs_set_header_nritems(&lower->header, nritems + 1); - btrfs_mark_buffer_dirty(path->nodes[level]); + btrfs_set_header_nritems(lower, nritems + 1); + btrfs_mark_buffer_dirty(lower); check_node(root, path, level); return 0; } @@ -1199,69 +1272,73 @@ static int insert_ptr(struct btrfs_trans_handle *trans, struct btrfs_root static int split_node(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct btrfs_path *path, int level) { - struct buffer_head *t; - struct btrfs_node *c; - struct buffer_head *split_buffer; - struct btrfs_node *split; + struct extent_buffer *c; + struct extent_buffer *split; + struct btrfs_disk_key disk_key; int mid; int ret; int wret; u32 c_nritems; - t = path->nodes[level]; - c = btrfs_buffer_node(t); - if (t == root->node) { + c = path->nodes[level]; + if (c == root->node) { /* trying to split the root, lets make a new one */ ret = insert_new_root(trans, root, path, level + 1); if (ret) return ret; } else { ret = push_nodes_for_insert(trans, root, path, level); - t = path->nodes[level]; - c = btrfs_buffer_node(t); - if (!ret && - btrfs_header_nritems(&c->header) < + c = path->nodes[level]; + if (!ret && btrfs_header_nritems(c) < BTRFS_NODEPTRS_PER_BLOCK(root) - 1) return 0; if (ret < 0) return ret; } - c_nritems = btrfs_header_nritems(&c->header); - split_buffer = btrfs_alloc_free_block(trans, root, t->b_blocknr, 0); - if (IS_ERR(split_buffer)) - return PTR_ERR(split_buffer); + c_nritems = btrfs_header_nritems(c); + split = btrfs_alloc_free_block(trans, root, + extent_buffer_blocknr(c), 0); + if (IS_ERR(split)) + return PTR_ERR(split); + + btrfs_set_header_flags(split, btrfs_header_flags(c)); + btrfs_set_header_level(split, btrfs_header_level(c)); + btrfs_set_header_blocknr(split, extent_buffer_blocknr(split)); + btrfs_set_header_generation(split, trans->transid); + btrfs_set_header_owner(split, root->root_key.objectid); + write_extent_buffer(split, root->fs_info->fsid, + (unsigned long)btrfs_header_fsid(split), + BTRFS_FSID_SIZE); - split = btrfs_buffer_node(split_buffer); - btrfs_set_header_flags(&split->header, btrfs_header_flags(&c->header)); - btrfs_set_header_level(&split->header, btrfs_header_level(&c->header)); - btrfs_set_header_blocknr(&split->header, bh_blocknr(split_buffer)); - btrfs_set_header_generation(&split->header, trans->transid); - btrfs_set_header_owner(&split->header, root->root_key.objectid); - memcpy(split->header.fsid, root->fs_info->disk_super->fsid, - sizeof(split->header.fsid)); mid = (c_nritems + 1) / 2; - btrfs_memcpy(root, split, split->ptrs, c->ptrs + mid, - (c_nritems - mid) * sizeof(struct btrfs_key_ptr)); - btrfs_set_header_nritems(&split->header, c_nritems - mid); - btrfs_set_header_nritems(&c->header, mid); + + copy_extent_buffer(split, c, + btrfs_node_key_ptr_offset(0), + btrfs_node_key_ptr_offset(mid), + (c_nritems - mid) * sizeof(struct btrfs_key_ptr)); + btrfs_set_header_nritems(split, c_nritems - mid); + btrfs_set_header_nritems(c, mid); ret = 0; - btrfs_mark_buffer_dirty(t); - btrfs_mark_buffer_dirty(split_buffer); - wret = insert_ptr(trans, root, path, &split->ptrs[0].key, - bh_blocknr(split_buffer), path->slots[level + 1] + 1, + btrfs_mark_buffer_dirty(c); + btrfs_mark_buffer_dirty(split); + + btrfs_node_key(split, &disk_key, 0); + wret = insert_ptr(trans, root, path, &disk_key, + extent_buffer_blocknr(split), + path->slots[level + 1] + 1, level + 1); if (wret) ret = wret; if (path->slots[level] >= mid) { path->slots[level] -= mid; - btrfs_block_release(root, t); - path->nodes[level] = split_buffer; + free_extent_buffer(c); + path->nodes[level] = split; path->slots[level + 1] += 1; } else { - btrfs_block_release(root, split_buffer); + free_extent_buffer(split); } return ret; } @@ -1271,16 +1348,16 @@ static int split_node(struct btrfs_trans_handle *trans, struct btrfs_root * and nr indicate which items in the leaf to check. This totals up the * space used both by the item structs and the item data */ -static int leaf_space_used(struct btrfs_leaf *l, int start, int nr) +static int leaf_space_used(struct extent_buffer *l, int start, int nr) { int data_len; - int nritems = btrfs_header_nritems(&l->header); + int nritems = btrfs_header_nritems(l); int end = min(nritems, start + nr) - 1; if (!nr) return 0; - data_len = btrfs_item_end(l->items + start); - data_len = data_len - btrfs_item_offset(l->items + end); + data_len = btrfs_item_end_nr(l, start); + data_len = data_len - btrfs_item_offset_nr(l, end); data_len += sizeof(struct btrfs_item) * nr; WARN_ON(data_len < 0); return data_len; @@ -1291,10 +1368,17 @@ static int leaf_space_used(struct btrfs_leaf *l, int start, int nr) * the start of the leaf data. IOW, how much room * the leaf has left for both items and data */ -int btrfs_leaf_free_space(struct btrfs_root *root, struct btrfs_leaf *leaf) +int btrfs_leaf_free_space(struct btrfs_root *root, struct extent_buffer *leaf) { - int nritems = btrfs_header_nritems(&leaf->header); - return BTRFS_LEAF_DATA_SIZE(root) - leaf_space_used(leaf, 0, nritems); + int nritems = btrfs_header_nritems(leaf); + int ret; + ret = BTRFS_LEAF_DATA_SIZE(root) - leaf_space_used(leaf, 0, nritems); + if (ret < 0) { + printk("leaf free space ret %d, leaf data size %lu, used %d nritems %d\n", + ret, BTRFS_LEAF_DATA_SIZE(root), + leaf_space_used(leaf, 0, nritems), nritems); + } + return ret; } /* @@ -1307,12 +1391,10 @@ int btrfs_leaf_free_space(struct btrfs_root *root, struct btrfs_leaf *leaf) static int push_leaf_right(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct btrfs_path *path, int data_size) { - struct buffer_head *left_buf = path->nodes[0]; - struct btrfs_leaf *left = btrfs_buffer_leaf(left_buf); - struct btrfs_leaf *right; - struct buffer_head *right_buf; - struct buffer_head *upper; - struct btrfs_node *upper_node; + struct extent_buffer *left = path->nodes[0]; + struct extent_buffer *right; + struct extent_buffer *upper; + struct btrfs_disk_key disk_key; int slot; int i; int free_space; @@ -1321,6 +1403,7 @@ static int push_leaf_right(struct btrfs_trans_handle *trans, struct btrfs_root struct btrfs_item *item; u32 left_nritems; u32 right_nritems; + u32 data_end; int ret; slot = path->slots[1]; @@ -1328,102 +1411,109 @@ static int push_leaf_right(struct btrfs_trans_handle *trans, struct btrfs_root return 1; } upper = path->nodes[1]; - upper_node = btrfs_buffer_node(upper); - if (slot >= btrfs_header_nritems(&upper_node->header) - 1) { + if (slot >= btrfs_header_nritems(upper) - 1) return 1; - } - right_buf = read_tree_block(root, - btrfs_node_blockptr(btrfs_buffer_node(upper), slot + 1)); - right = btrfs_buffer_leaf(right_buf); + + right = read_tree_block(root, btrfs_node_blockptr(upper, slot + 1)); free_space = btrfs_leaf_free_space(root, right); if (free_space < data_size + sizeof(struct btrfs_item)) { - btrfs_block_release(root, right_buf); + free_extent_buffer(right); return 1; } + /* cow and double check */ - ret = btrfs_cow_block(trans, root, right_buf, upper, - slot + 1, &right_buf); + ret = btrfs_cow_block(trans, root, right, upper, + slot + 1, &right); if (ret) { - btrfs_block_release(root, right_buf); + free_extent_buffer(right); return 1; } - right = btrfs_buffer_leaf(right_buf); free_space = btrfs_leaf_free_space(root, right); if (free_space < data_size + sizeof(struct btrfs_item)) { - btrfs_block_release(root, right_buf); + free_extent_buffer(right); return 1; } - left_nritems = btrfs_header_nritems(&left->header); + left_nritems = btrfs_header_nritems(left); if (left_nritems == 0) { - btrfs_block_release(root, right_buf); + free_extent_buffer(right); return 1; } + for (i = left_nritems - 1; i >= 1; i--) { - item = left->items + i; + item = btrfs_item_nr(left, i); if (path->slots[0] == i) push_space += data_size + sizeof(*item); - if (btrfs_item_size(item) + sizeof(*item) + push_space > + if (btrfs_item_size(left, item) + sizeof(*item) + push_space > free_space) break; push_items++; - push_space += btrfs_item_size(item) + sizeof(*item); + push_space += btrfs_item_size(left, item) + sizeof(*item); } + if (push_items == 0) { - btrfs_block_release(root, right_buf); + free_extent_buffer(right); return 1; } + if (push_items == left_nritems) WARN_ON(1); - right_nritems = btrfs_header_nritems(&right->header); + /* push left to right */ - push_space = btrfs_item_end(left->items + left_nritems - push_items); + right_nritems = btrfs_header_nritems(right); + push_space = btrfs_item_end_nr(left, left_nritems - push_items); push_space -= leaf_data_end(root, left); + /* make room in the right data area */ - btrfs_memmove(root, right, btrfs_leaf_data(right) + - leaf_data_end(root, right) - push_space, - btrfs_leaf_data(right) + - leaf_data_end(root, right), BTRFS_LEAF_DATA_SIZE(root) - - leaf_data_end(root, right)); + data_end = leaf_data_end(root, right); + memmove_extent_buffer(right, + btrfs_leaf_data(right) + data_end - push_space, + btrfs_leaf_data(right) + data_end, + BTRFS_LEAF_DATA_SIZE(root) - data_end); + /* copy from the left data area */ - btrfs_memcpy(root, right, btrfs_leaf_data(right) + + copy_extent_buffer(right, left, btrfs_leaf_data(right) + BTRFS_LEAF_DATA_SIZE(root) - push_space, btrfs_leaf_data(left) + leaf_data_end(root, left), push_space); - btrfs_memmove(root, right, right->items + push_items, right->items, - right_nritems * sizeof(struct btrfs_item)); + + memmove_extent_buffer(right, btrfs_item_nr_offset(push_items), + btrfs_item_nr_offset(0), + right_nritems * sizeof(struct btrfs_item)); + /* copy the items from left to right */ - btrfs_memcpy(root, right, right->items, left->items + - left_nritems - push_items, - push_items * sizeof(struct btrfs_item)); + copy_extent_buffer(right, left, btrfs_item_nr_offset(0), + btrfs_item_nr_offset(left_nritems - push_items), + push_items * sizeof(struct btrfs_item)); /* update the item pointers */ right_nritems += push_items; - btrfs_set_header_nritems(&right->header, right_nritems); + btrfs_set_header_nritems(right, right_nritems); push_space = BTRFS_LEAF_DATA_SIZE(root); for (i = 0; i < right_nritems; i++) { - btrfs_set_item_offset(right->items + i, push_space - - btrfs_item_size(right->items + i)); - push_space = btrfs_item_offset(right->items + i); + item = btrfs_item_nr(right, i); + btrfs_set_item_offset(right, item, push_space - + btrfs_item_size(right, item)); + push_space = btrfs_item_offset(right, item); } left_nritems -= push_items; - btrfs_set_header_nritems(&left->header, left_nritems); + btrfs_set_header_nritems(left, left_nritems); - btrfs_mark_buffer_dirty(left_buf); - btrfs_mark_buffer_dirty(right_buf); + btrfs_mark_buffer_dirty(left); + btrfs_mark_buffer_dirty(right); - btrfs_memcpy(root, upper_node, &upper_node->ptrs[slot + 1].key, - &right->items[0].key, sizeof(struct btrfs_disk_key)); + btrfs_item_key(right, &disk_key, 0); + btrfs_set_node_key(upper, &disk_key, slot + 1); btrfs_mark_buffer_dirty(upper); /* then fixup the leaf pointer in the path */ if (path->slots[0] >= left_nritems) { path->slots[0] -= left_nritems; - btrfs_block_release(root, path->nodes[0]); - path->nodes[0] = right_buf; + free_extent_buffer(path->nodes[0]); + path->nodes[0] = right; path->slots[1] += 1; } else { - btrfs_block_release(root, right_buf); + free_extent_buffer(right); } if (path->nodes[1]) check_node(root, path, 1); @@ -1436,10 +1526,9 @@ static int push_leaf_right(struct btrfs_trans_handle *trans, struct btrfs_root static int push_leaf_left(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct btrfs_path *path, int data_size) { - struct buffer_head *right_buf = path->nodes[0]; - struct btrfs_leaf *right = btrfs_buffer_leaf(right_buf); - struct buffer_head *t; - struct btrfs_leaf *left; + struct btrfs_disk_key disk_key; + struct extent_buffer *right = path->nodes[0]; + struct extent_buffer *left; int slot; int i; int free_space; @@ -1447,119 +1536,128 @@ static int push_leaf_left(struct btrfs_trans_handle *trans, struct btrfs_root int push_items = 0; struct btrfs_item *item; u32 old_left_nritems; + u32 right_nritems; int ret = 0; int wret; slot = path->slots[1]; - if (slot == 0) { + if (slot == 0) return 1; - } - if (!path->nodes[1]) { + if (!path->nodes[1]) return 1; - } - t = read_tree_block(root, - btrfs_node_blockptr(btrfs_buffer_node(path->nodes[1]), slot - 1)); - left = btrfs_buffer_leaf(t); + + left = read_tree_block(root, btrfs_node_blockptr(path->nodes[1], + slot - 1)); free_space = btrfs_leaf_free_space(root, left); if (free_space < data_size + sizeof(struct btrfs_item)) { - btrfs_block_release(root, t); + free_extent_buffer(left); return 1; } /* cow and double check */ - ret = btrfs_cow_block(trans, root, t, path->nodes[1], slot - 1, &t); + ret = btrfs_cow_block(trans, root, left, + path->nodes[1], slot - 1, &left); if (ret) { /* we hit -ENOSPC, but it isn't fatal here */ - btrfs_block_release(root, t); + free_extent_buffer(left); return 1; } - left = btrfs_buffer_leaf(t); free_space = btrfs_leaf_free_space(root, left); if (free_space < data_size + sizeof(struct btrfs_item)) { - btrfs_block_release(root, t); + free_extent_buffer(left); return 1; } - if (btrfs_header_nritems(&right->header) == 0) { - btrfs_block_release(root, t); + right_nritems = btrfs_header_nritems(right); + if (right_nritems == 0) { + free_extent_buffer(left); return 1; } - for (i = 0; i < btrfs_header_nritems(&right->header) - 1; i++) { - item = right->items + i; + for (i = 0; i < right_nritems - 1; i++) { + item = btrfs_item_nr(right, i); if (path->slots[0] == i) push_space += data_size + sizeof(*item); - if (btrfs_item_size(item) + sizeof(*item) + push_space > + if (btrfs_item_size(right, item) + sizeof(*item) + push_space > free_space) break; push_items++; - push_space += btrfs_item_size(item) + sizeof(*item); + push_space += btrfs_item_size(right, item) + sizeof(*item); } if (push_items == 0) { - btrfs_block_release(root, t); + free_extent_buffer(left); return 1; } - if (push_items == btrfs_header_nritems(&right->header)) + if (push_items == btrfs_header_nritems(right)) WARN_ON(1); + /* push data from right to left */ - btrfs_memcpy(root, left, left->items + - btrfs_header_nritems(&left->header), - right->items, push_items * sizeof(struct btrfs_item)); + copy_extent_buffer(left, right, + btrfs_item_nr_offset(btrfs_header_nritems(left)), + btrfs_item_nr_offset(0), + push_items * sizeof(struct btrfs_item)); + push_space = BTRFS_LEAF_DATA_SIZE(root) - - btrfs_item_offset(right->items + push_items -1); - btrfs_memcpy(root, left, btrfs_leaf_data(left) + + btrfs_item_offset_nr(right, push_items -1); + + copy_extent_buffer(left, right, btrfs_leaf_data(left) + leaf_data_end(root, left) - push_space, btrfs_leaf_data(right) + - btrfs_item_offset(right->items + push_items - 1), + btrfs_item_offset_nr(right, push_items - 1), push_space); - old_left_nritems = btrfs_header_nritems(&left->header); + old_left_nritems = btrfs_header_nritems(left); BUG_ON(old_left_nritems < 0); for (i = old_left_nritems; i < old_left_nritems + push_items; i++) { - u32 ioff = btrfs_item_offset(left->items + i); - btrfs_set_item_offset(left->items + i, ioff - - (BTRFS_LEAF_DATA_SIZE(root) - - btrfs_item_offset(left->items + - old_left_nritems - 1))); + u32 ioff; + item = btrfs_item_nr(left, i); + ioff = btrfs_item_offset(left, item); + btrfs_set_item_offset(left, item, + ioff - (BTRFS_LEAF_DATA_SIZE(root) - + btrfs_item_offset_nr(left, old_left_nritems - 1))); } - btrfs_set_header_nritems(&left->header, old_left_nritems + push_items); + btrfs_set_header_nritems(left, old_left_nritems + push_items); /* fixup right node */ - push_space = btrfs_item_offset(right->items + push_items - 1) - - leaf_data_end(root, right); - btrfs_memmove(root, right, btrfs_leaf_data(right) + - BTRFS_LEAF_DATA_SIZE(root) - push_space, - btrfs_leaf_data(right) + - leaf_data_end(root, right), push_space); - btrfs_memmove(root, right, right->items, right->items + push_items, - (btrfs_header_nritems(&right->header) - push_items) * - sizeof(struct btrfs_item)); - btrfs_set_header_nritems(&right->header, - btrfs_header_nritems(&right->header) - - push_items); + push_space = btrfs_item_offset_nr(right, push_items - 1) - + leaf_data_end(root, right); + memmove_extent_buffer(right, btrfs_leaf_data(right) + + BTRFS_LEAF_DATA_SIZE(root) - push_space, + btrfs_leaf_data(right) + + leaf_data_end(root, right), push_space); + + memmove_extent_buffer(right, btrfs_item_nr_offset(0), + btrfs_item_nr_offset(push_items), + (btrfs_header_nritems(right) - push_items) * + sizeof(struct btrfs_item)); + + right_nritems = btrfs_header_nritems(right) - push_items; + btrfs_set_header_nritems(right, right_nritems); push_space = BTRFS_LEAF_DATA_SIZE(root); - for (i = 0; i < btrfs_header_nritems(&right->header); i++) { - btrfs_set_item_offset(right->items + i, push_space - - btrfs_item_size(right->items + i)); - push_space = btrfs_item_offset(right->items + i); + for (i = 0; i < right_nritems; i++) { + item = btrfs_item_nr(right, i); + btrfs_set_item_offset(right, item, push_space - + btrfs_item_size(right, item)); + push_space = btrfs_item_offset(right, item); } - btrfs_mark_buffer_dirty(t); - btrfs_mark_buffer_dirty(right_buf); + btrfs_mark_buffer_dirty(left); + btrfs_mark_buffer_dirty(right); - wret = fixup_low_keys(trans, root, path, &right->items[0].key, 1); + btrfs_item_key(right, &disk_key, 0); + wret = fixup_low_keys(trans, root, path, &disk_key, 1); if (wret) ret = wret; /* then fixup the leaf pointer in the path */ if (path->slots[0] < push_items) { path->slots[0] += old_left_nritems; - btrfs_block_release(root, path->nodes[0]); - path->nodes[0] = t; + free_extent_buffer(path->nodes[0]); + path->nodes[0] = left; path->slots[1] -= 1; } else { - btrfs_block_release(root, t); + free_extent_buffer(left); path->slots[0] -= push_items; } BUG_ON(path->slots[0] < 0); @@ -1578,13 +1676,11 @@ static int split_leaf(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct btrfs_key *ins_key, struct btrfs_path *path, int data_size) { - struct buffer_head *l_buf; - struct btrfs_leaf *l; + struct extent_buffer *l; u32 nritems; int mid; int slot; - struct btrfs_leaf *right; - struct buffer_head *right_buffer; + struct extent_buffer *right; int space_needed = data_size + sizeof(struct btrfs_item); int data_copy_size; int rt_data_off; @@ -1603,8 +1699,7 @@ static int split_leaf(struct btrfs_trans_handle *trans, struct btrfs_root if (wret < 0) return wret; } - l_buf = path->nodes[0]; - l = btrfs_buffer_leaf(l_buf); + l = path->nodes[0]; /* did the pushes work? */ if (btrfs_leaf_free_space(root, l) >= @@ -1617,36 +1712,38 @@ static int split_leaf(struct btrfs_trans_handle *trans, struct btrfs_root return ret; } slot = path->slots[0]; - nritems = btrfs_header_nritems(&l->header); + nritems = btrfs_header_nritems(l); mid = (nritems + 1)/ 2; - right_buffer = btrfs_alloc_free_block(trans, root, l_buf->b_blocknr, 0); - if (IS_ERR(right_buffer)) - return PTR_ERR(right_buffer); - - right = btrfs_buffer_leaf(right_buffer); - memset(&right->header, 0, sizeof(right->header)); - btrfs_set_header_blocknr(&right->header, bh_blocknr(right_buffer)); - btrfs_set_header_generation(&right->header, trans->transid); - btrfs_set_header_owner(&right->header, root->root_key.objectid); - btrfs_set_header_level(&right->header, 0); - memcpy(right->header.fsid, root->fs_info->disk_super->fsid, - sizeof(right->header.fsid)); + right = btrfs_alloc_free_block(trans, root, + extent_buffer_blocknr(l), 0); + if (IS_ERR(right)) + return PTR_ERR(right); + + memset_extent_buffer(right, 0, 0, sizeof(struct btrfs_header)); + btrfs_set_header_blocknr(right, extent_buffer_blocknr(right)); + btrfs_set_header_generation(right, trans->transid); + btrfs_set_header_owner(right, root->root_key.objectid); + btrfs_set_header_level(right, 0); + write_extent_buffer(right, root->fs_info->fsid, + (unsigned long)btrfs_header_fsid(right), + BTRFS_FSID_SIZE); + if (mid <= slot) { if (nritems == 1 || leaf_space_used(l, mid, nritems - mid) + space_needed > BTRFS_LEAF_DATA_SIZE(root)) { if (slot >= nritems) { btrfs_cpu_key_to_disk(&disk_key, ins_key); - btrfs_set_header_nritems(&right->header, 0); + btrfs_set_header_nritems(right, 0); wret = insert_ptr(trans, root, path, &disk_key, - bh_blocknr(right_buffer), + extent_buffer_blocknr(right), path->slots[1] + 1, 1); if (wret) ret = wret; - btrfs_block_release(root, path->nodes[0]); - path->nodes[0] = right_buffer; + free_extent_buffer(path->nodes[0]); + path->nodes[0] = right; path->slots[0] = 0; path->slots[1] += 1; return ret; @@ -1659,15 +1756,15 @@ static int split_leaf(struct btrfs_trans_handle *trans, struct btrfs_root BTRFS_LEAF_DATA_SIZE(root)) { if (slot == 0) { btrfs_cpu_key_to_disk(&disk_key, ins_key); - btrfs_set_header_nritems(&right->header, 0); + btrfs_set_header_nritems(right, 0); wret = insert_ptr(trans, root, path, &disk_key, - bh_blocknr(right_buffer), + extent_buffer_blocknr(right), path->slots[1], 1); if (wret) ret = wret; - btrfs_block_release(root, path->nodes[0]); - path->nodes[0] = right_buffer; + free_extent_buffer(path->nodes[0]); + path->nodes[0] = right; path->slots[0] = 0; if (path->slots[1] == 0) { wret = fixup_low_keys(trans, root, @@ -1681,61 +1778,74 @@ static int split_leaf(struct btrfs_trans_handle *trans, struct btrfs_root double_split = 1; } } - btrfs_set_header_nritems(&right->header, nritems - mid); - data_copy_size = btrfs_item_end(l->items + mid) - - leaf_data_end(root, l); - btrfs_memcpy(root, right, right->items, l->items + mid, - (nritems - mid) * sizeof(struct btrfs_item)); - btrfs_memcpy(root, right, + nritems = nritems - mid; + btrfs_set_header_nritems(right, nritems); + data_copy_size = btrfs_item_end_nr(l, mid) - leaf_data_end(root, l); + + copy_extent_buffer(right, l, btrfs_item_nr_offset(0), + btrfs_item_nr_offset(mid), + nritems * sizeof(struct btrfs_item)); + + copy_extent_buffer(right, l, btrfs_leaf_data(right) + BTRFS_LEAF_DATA_SIZE(root) - data_copy_size, btrfs_leaf_data(l) + leaf_data_end(root, l), data_copy_size); + rt_data_off = BTRFS_LEAF_DATA_SIZE(root) - - btrfs_item_end(l->items + mid); + btrfs_item_end_nr(l, mid); - for (i = 0; i < btrfs_header_nritems(&right->header); i++) { - u32 ioff = btrfs_item_offset(right->items + i); - btrfs_set_item_offset(right->items + i, ioff + rt_data_off); + for (i = 0; i < nritems; i++) { + struct btrfs_item *item = btrfs_item_nr(right, i); + u32 ioff = btrfs_item_offset(right, item); + btrfs_set_item_offset(right, item, ioff + rt_data_off); } - btrfs_set_header_nritems(&l->header, mid); + btrfs_set_header_nritems(l, mid); ret = 0; - wret = insert_ptr(trans, root, path, &right->items[0].key, - bh_blocknr(right_buffer), path->slots[1] + 1, 1); + btrfs_item_key(right, &disk_key, 0); + wret = insert_ptr(trans, root, path, &disk_key, + extent_buffer_blocknr(right), path->slots[1] + 1, 1); if (wret) ret = wret; - btrfs_mark_buffer_dirty(right_buffer); - btrfs_mark_buffer_dirty(l_buf); + + btrfs_mark_buffer_dirty(right); + btrfs_mark_buffer_dirty(l); BUG_ON(path->slots[0] != slot); + if (mid <= slot) { - btrfs_block_release(root, path->nodes[0]); - path->nodes[0] = right_buffer; + free_extent_buffer(path->nodes[0]); + path->nodes[0] = right; path->slots[0] -= mid; path->slots[1] += 1; } else - btrfs_block_release(root, right_buffer); + free_extent_buffer(right); + BUG_ON(path->slots[0] < 0); check_node(root, path, 1); + check_leaf(root, path, 0); if (!double_split) return ret; - right_buffer = btrfs_alloc_free_block(trans, root, l_buf->b_blocknr, 0); - if (IS_ERR(right_buffer)) - return PTR_ERR(right_buffer); - - right = btrfs_buffer_leaf(right_buffer); - memset(&right->header, 0, sizeof(right->header)); - btrfs_set_header_blocknr(&right->header, bh_blocknr(right_buffer)); - btrfs_set_header_generation(&right->header, trans->transid); - btrfs_set_header_owner(&right->header, root->root_key.objectid); - btrfs_set_header_level(&right->header, 0); - memcpy(right->header.fsid, root->fs_info->disk_super->fsid, - sizeof(right->header.fsid)); + + right = btrfs_alloc_free_block(trans, root, + extent_buffer_blocknr(l), 0); + if (IS_ERR(right)) + return PTR_ERR(right); + + memset_extent_buffer(right, 0, 0, sizeof(struct btrfs_header)); + btrfs_set_header_blocknr(right, extent_buffer_blocknr(right)); + btrfs_set_header_generation(right, trans->transid); + btrfs_set_header_owner(right, root->root_key.objectid); + btrfs_set_header_level(right, 0); + write_extent_buffer(right, root->fs_info->fsid, + (unsigned long)btrfs_header_fsid(right), + BTRFS_FSID_SIZE); + btrfs_cpu_key_to_disk(&disk_key, ins_key); - btrfs_set_header_nritems(&right->header, 0); + btrfs_set_header_nritems(right, 0); wret = insert_ptr(trans, root, path, &disk_key, - bh_blocknr(right_buffer), + extent_buffer_blocknr(right), path->slots[1], 1); if (wret) ret = wret; @@ -1744,8 +1854,8 @@ static int split_leaf(struct btrfs_trans_handle *trans, struct btrfs_root if (wret) ret = wret; } - btrfs_block_release(root, path->nodes[0]); - path->nodes[0] = right_buffer; + free_extent_buffer(path->nodes[0]); + path->nodes[0] = right; path->slots[0] = 0; check_node(root, path, 1); check_leaf(root, path, 0); @@ -1760,8 +1870,8 @@ int btrfs_truncate_item(struct btrfs_trans_handle *trans, int ret = 0; int slot; int slot_orig; - struct btrfs_leaf *leaf; - struct buffer_head *leaf_buf; + struct extent_buffer *leaf; + struct btrfs_item *item; u32 nritems; unsigned int data_end; unsigned int old_data_start; @@ -1770,15 +1880,14 @@ int btrfs_truncate_item(struct btrfs_trans_handle *trans, int i; slot_orig = path->slots[0]; - leaf_buf = path->nodes[0]; - leaf = btrfs_buffer_leaf(leaf_buf); + leaf = path->nodes[0]; - nritems = btrfs_header_nritems(&leaf->header); + nritems = btrfs_header_nritems(leaf); data_end = leaf_data_end(root, leaf); slot = path->slots[0]; - old_data_start = btrfs_item_offset(leaf->items + slot); - old_size = btrfs_item_size(leaf->items + slot); + old_data_start = btrfs_item_offset_nr(leaf, slot); + old_size = btrfs_item_size_nr(leaf, slot); BUG_ON(old_size <= new_size); size_diff = old_size - new_size; @@ -1790,32 +1899,38 @@ int btrfs_truncate_item(struct btrfs_trans_handle *trans, */ /* first correct the data pointers */ for (i = slot; i < nritems; i++) { - u32 ioff = btrfs_item_offset(leaf->items + i); - btrfs_set_item_offset(leaf->items + i, - ioff + size_diff); + u32 ioff; + item = btrfs_item_nr(leaf, i); + ioff = btrfs_item_offset(leaf, item); + btrfs_set_item_offset(leaf, item, ioff + size_diff); } /* shift the data */ - btrfs_memmove(root, leaf, btrfs_leaf_data(leaf) + + memmove_extent_buffer(leaf, btrfs_leaf_data(leaf) + data_end + size_diff, btrfs_leaf_data(leaf) + data_end, old_data_start + new_size - data_end); - btrfs_set_item_size(leaf->items + slot, new_size); - btrfs_mark_buffer_dirty(leaf_buf); + + item = btrfs_item_nr(leaf, slot); + btrfs_set_item_size(leaf, item, new_size); + btrfs_mark_buffer_dirty(leaf); ret = 0; - if (btrfs_leaf_free_space(root, leaf) < 0) + if (btrfs_leaf_free_space(root, leaf) < 0) { + btrfs_print_leaf(root, leaf); BUG(); + } check_leaf(root, path, 0); return ret; } -int btrfs_extend_item(struct btrfs_trans_handle *trans, struct btrfs_root - *root, struct btrfs_path *path, u32 data_size) +int btrfs_extend_item(struct btrfs_trans_handle *trans, + struct btrfs_root *root, struct btrfs_path *path, + u32 data_size) { int ret = 0; int slot; int slot_orig; - struct btrfs_leaf *leaf; - struct buffer_head *leaf_buf; + struct extent_buffer *leaf; + struct btrfs_item *item; u32 nritems; unsigned int data_end; unsigned int old_data; @@ -1823,16 +1938,17 @@ int btrfs_extend_item(struct btrfs_trans_handle *trans, struct btrfs_root int i; slot_orig = path->slots[0]; - leaf_buf = path->nodes[0]; - leaf = btrfs_buffer_leaf(leaf_buf); + leaf = path->nodes[0]; - nritems = btrfs_header_nritems(&leaf->header); + nritems = btrfs_header_nritems(leaf); data_end = leaf_data_end(root, leaf); - if (btrfs_leaf_free_space(root, leaf) < data_size) + if (btrfs_leaf_free_space(root, leaf) < data_size) { + btrfs_print_leaf(root, leaf); BUG(); + } slot = path->slots[0]; - old_data = btrfs_item_end(leaf->items + slot); + old_data = btrfs_item_end_nr(leaf, slot); BUG_ON(slot < 0); BUG_ON(slot >= nritems); @@ -1842,22 +1958,28 @@ int btrfs_extend_item(struct btrfs_trans_handle *trans, struct btrfs_root */ /* first correct the data pointers */ for (i = slot; i < nritems; i++) { - u32 ioff = btrfs_item_offset(leaf->items + i); - btrfs_set_item_offset(leaf->items + i, - ioff - data_size); + u32 ioff; + item = btrfs_item_nr(leaf, i); + ioff = btrfs_item_offset(leaf, item); + btrfs_set_item_offset(leaf, item, ioff - data_size); } + /* shift the data */ - btrfs_memmove(root, leaf, btrfs_leaf_data(leaf) + + memmove_extent_buffer(leaf, btrfs_leaf_data(leaf) + data_end - data_size, btrfs_leaf_data(leaf) + data_end, old_data - data_end); + data_end = old_data; - old_size = btrfs_item_size(leaf->items + slot); - btrfs_set_item_size(leaf->items + slot, old_size + data_size); - btrfs_mark_buffer_dirty(leaf_buf); + old_size = btrfs_item_size_nr(leaf, slot); + item = btrfs_item_nr(leaf, slot); + btrfs_set_item_size(leaf, item, old_size + data_size); + btrfs_mark_buffer_dirty(leaf); ret = 0; - if (btrfs_leaf_free_space(root, leaf) < 0) + if (btrfs_leaf_free_space(root, leaf) < 0) { + btrfs_print_leaf(root, leaf); BUG(); + } check_leaf(root, path, 0); return ret; } @@ -1866,15 +1988,16 @@ int btrfs_extend_item(struct btrfs_trans_handle *trans, struct btrfs_root * Given a key and some data, insert an item into the tree. * This does all the path init required, making room in the tree if needed. */ -int btrfs_insert_empty_item(struct btrfs_trans_handle *trans, struct btrfs_root - *root, struct btrfs_path *path, struct btrfs_key - *cpu_key, u32 data_size) +int btrfs_insert_empty_item(struct btrfs_trans_handle *trans, + struct btrfs_root *root, + struct btrfs_path *path, + struct btrfs_key *cpu_key, u32 data_size) { + struct extent_buffer *leaf; + struct btrfs_item *item; int ret = 0; int slot; int slot_orig; - struct btrfs_leaf *leaf; - struct buffer_head *leaf_buf; u32 nritems; unsigned int data_end; struct btrfs_disk_key disk_key; @@ -1884,6 +2007,7 @@ int btrfs_insert_empty_item(struct btrfs_trans_handle *trans, struct btrfs_root /* create a root if there isn't one */ if (!root->node) BUG(); + ret = btrfs_search_slot(trans, root, cpu_key, path, data_size, 1); if (ret == 0) { return -EEXIST; @@ -1892,57 +2016,68 @@ int btrfs_insert_empty_item(struct btrfs_trans_handle *trans, struct btrfs_root goto out; slot_orig = path->slots[0]; - leaf_buf = path->nodes[0]; - leaf = btrfs_buffer_leaf(leaf_buf); + leaf = path->nodes[0]; - nritems = btrfs_header_nritems(&leaf->header); + nritems = btrfs_header_nritems(leaf); data_end = leaf_data_end(root, leaf); if (btrfs_leaf_free_space(root, leaf) < sizeof(struct btrfs_item) + data_size) { BUG(); } + slot = path->slots[0]; BUG_ON(slot < 0); + if (slot != nritems) { int i; - unsigned int old_data = btrfs_item_end(leaf->items + slot); + unsigned int old_data = btrfs_item_end_nr(leaf, slot); + if (old_data < data_end) { + btrfs_print_leaf(root, leaf); + printk("slot %d old_data %d data_end %d\n", + slot, old_data, data_end); + BUG_ON(1); + } /* * item0..itemN ... dataN.offset..dataN.size .. data0.size */ /* first correct the data pointers */ for (i = slot; i < nritems; i++) { - u32 ioff = btrfs_item_offset(leaf->items + i); - btrfs_set_item_offset(leaf->items + i, - ioff - data_size); + u32 ioff; + item = btrfs_item_nr(leaf, i); + ioff = btrfs_item_offset(leaf, item); + btrfs_set_item_offset(leaf, item, ioff - data_size); } /* shift the items */ - btrfs_memmove(root, leaf, leaf->items + slot + 1, - leaf->items + slot, + memmove_extent_buffer(leaf, btrfs_item_nr_offset(slot + 1), + btrfs_item_nr_offset(slot), (nritems - slot) * sizeof(struct btrfs_item)); /* shift the data */ - btrfs_memmove(root, leaf, btrfs_leaf_data(leaf) + + memmove_extent_buffer(leaf, btrfs_leaf_data(leaf) + data_end - data_size, btrfs_leaf_data(leaf) + data_end, old_data - data_end); data_end = old_data; } + /* setup the item for the new data */ - btrfs_memcpy(root, leaf, &leaf->items[slot].key, &disk_key, - sizeof(struct btrfs_disk_key)); - btrfs_set_item_offset(leaf->items + slot, data_end - data_size); - btrfs_set_item_size(leaf->items + slot, data_size); - btrfs_set_header_nritems(&leaf->header, nritems + 1); - btrfs_mark_buffer_dirty(leaf_buf); + btrfs_set_item_key(leaf, &disk_key, slot); + item = btrfs_item_nr(leaf, slot); + btrfs_set_item_offset(leaf, item, data_end - data_size); + btrfs_set_item_size(leaf, item, data_size); + btrfs_set_header_nritems(leaf, nritems + 1); + btrfs_mark_buffer_dirty(leaf); ret = 0; if (slot == 0) ret = fixup_low_keys(trans, root, path, &disk_key, 1); - if (btrfs_leaf_free_space(root, leaf) < 0) + if (btrfs_leaf_free_space(root, leaf) < 0) { + btrfs_print_leaf(root, leaf); BUG(); + } check_leaf(root, path, 0); out: return ret; @@ -1958,17 +2093,17 @@ int btrfs_insert_item(struct btrfs_trans_handle *trans, struct btrfs_root { int ret = 0; struct btrfs_path *path; - u8 *ptr; + struct extent_buffer *leaf; + unsigned long ptr; path = btrfs_alloc_path(); BUG_ON(!path); ret = btrfs_insert_empty_item(trans, root, path, cpu_key, data_size); if (!ret) { - ptr = btrfs_item_ptr(btrfs_buffer_leaf(path->nodes[0]), - path->slots[0], u8); - btrfs_memcpy(root, path->nodes[0]->b_data, - ptr, data, data_size); - btrfs_mark_buffer_dirty(path->nodes[0]); + leaf = path->nodes[0]; + ptr = btrfs_item_ptr_offset(leaf, path->slots[0]); + write_extent_buffer(leaf, data, ptr, data_size); + btrfs_mark_buffer_dirty(leaf); } btrfs_free_path(path); return ret; @@ -1984,30 +2119,30 @@ int btrfs_insert_item(struct btrfs_trans_handle *trans, struct btrfs_root static int del_ptr(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct btrfs_path *path, int level, int slot) { - struct btrfs_node *node; - struct buffer_head *parent = path->nodes[level]; + struct extent_buffer *parent = path->nodes[level]; u32 nritems; int ret = 0; int wret; - node = btrfs_buffer_node(parent); - nritems = btrfs_header_nritems(&node->header); + nritems = btrfs_header_nritems(parent); if (slot != nritems -1) { - btrfs_memmove(root, node, node->ptrs + slot, - node->ptrs + slot + 1, + memmove_extent_buffer(parent, + btrfs_node_key_ptr_offset(slot), + btrfs_node_key_ptr_offset(slot + 1), sizeof(struct btrfs_key_ptr) * (nritems - slot - 1)); } nritems--; - btrfs_set_header_nritems(&node->header, nritems); + btrfs_set_header_nritems(parent, nritems); if (nritems == 0 && parent == root->node) { - struct btrfs_header *header = btrfs_buffer_header(root->node); - BUG_ON(btrfs_header_level(header) != 1); + BUG_ON(btrfs_header_level(root->node) != 1); /* just turn the root into a leaf and break */ - btrfs_set_header_level(header, 0); + btrfs_set_header_level(root->node, 0); } else if (slot == 0) { - wret = fixup_low_keys(trans, root, path, &node->ptrs[0].key, - level + 1); + struct btrfs_disk_key disk_key; + + btrfs_node_key(parent, &disk_key, 0); + wret = fixup_low_keys(trans, root, path, &disk_key, level + 1); if (wret) ret = wret; } @@ -2023,59 +2158,67 @@ int btrfs_del_item(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct btrfs_path *path) { int slot; - struct btrfs_leaf *leaf; - struct buffer_head *leaf_buf; + struct extent_buffer *leaf; + struct btrfs_item *item; int doff; int dsize; int ret = 0; int wret; u32 nritems; - leaf_buf = path->nodes[0]; - leaf = btrfs_buffer_leaf(leaf_buf); + leaf = path->nodes[0]; slot = path->slots[0]; - doff = btrfs_item_offset(leaf->items + slot); - dsize = btrfs_item_size(leaf->items + slot); - nritems = btrfs_header_nritems(&leaf->header); + doff = btrfs_item_offset_nr(leaf, slot); + dsize = btrfs_item_size_nr(leaf, slot); + nritems = btrfs_header_nritems(leaf); if (slot != nritems - 1) { int i; int data_end = leaf_data_end(root, leaf); - btrfs_memmove(root, leaf, btrfs_leaf_data(leaf) + + + memmove_extent_buffer(leaf, btrfs_leaf_data(leaf) + data_end + dsize, btrfs_leaf_data(leaf) + data_end, doff - data_end); + for (i = slot + 1; i < nritems; i++) { - u32 ioff = btrfs_item_offset(leaf->items + i); - btrfs_set_item_offset(leaf->items + i, ioff + dsize); + u32 ioff; + item = btrfs_item_nr(leaf, i); + ioff = btrfs_item_offset(leaf, item); + btrfs_set_item_offset(leaf, item, ioff + dsize); } - btrfs_memmove(root, leaf, leaf->items + slot, - leaf->items + slot + 1, + memmove_extent_buffer(leaf, btrfs_item_nr_offset(slot), + btrfs_item_nr_offset(slot + 1), sizeof(struct btrfs_item) * (nritems - slot - 1)); } - btrfs_set_header_nritems(&leaf->header, nritems - 1); + btrfs_set_header_nritems(leaf, nritems - 1); nritems--; + /* delete the leaf if we've emptied it */ if (nritems == 0) { - if (leaf_buf == root->node) { - btrfs_set_header_level(&leaf->header, 0); + if (leaf == root->node) { + btrfs_set_header_level(leaf, 0); } else { - clean_tree_block(trans, root, leaf_buf); - wait_on_buffer(leaf_buf); + clean_tree_block(trans, root, leaf); + wait_on_tree_block_writeback(root, leaf); wret = del_ptr(trans, root, path, 1, path->slots[1]); if (wret) ret = wret; wret = btrfs_free_extent(trans, root, - bh_blocknr(leaf_buf), 1, 1); + extent_buffer_blocknr(leaf), + 1, 1); if (wret) ret = wret; } } else { int used = leaf_space_used(leaf, 0, nritems); if (slot == 0) { + struct btrfs_disk_key disk_key; + + btrfs_item_key(leaf, &disk_key, 0); wret = fixup_low_keys(trans, root, path, - &leaf->items[0].key, 1); + &disk_key, 1); if (wret) ret = wret; } @@ -2087,34 +2230,40 @@ int btrfs_del_item(struct btrfs_trans_handle *trans, struct btrfs_root *root, * for possible call to del_ptr below */ slot = path->slots[1]; - get_bh(leaf_buf); + extent_buffer_get(leaf); + wret = push_leaf_left(trans, root, path, 1); if (wret < 0 && wret != -ENOSPC) ret = wret; - if (path->nodes[0] == leaf_buf && - btrfs_header_nritems(&leaf->header)) { + + if (path->nodes[0] == leaf && + btrfs_header_nritems(leaf)) { wret = push_leaf_right(trans, root, path, 1); if (wret < 0 && wret != -ENOSPC) ret = wret; } - if (btrfs_header_nritems(&leaf->header) == 0) { - u64 blocknr = bh_blocknr(leaf_buf); - clean_tree_block(trans, root, leaf_buf); - wait_on_buffer(leaf_buf); + + if (btrfs_header_nritems(leaf) == 0) { + u64 blocknr = extent_buffer_blocknr(leaf); + + clean_tree_block(trans, root, leaf); + wait_on_tree_block_writeback(root, leaf); + wret = del_ptr(trans, root, path, 1, slot); if (wret) ret = wret; - btrfs_block_release(root, leaf_buf); + + free_extent_buffer(leaf); wret = btrfs_free_extent(trans, root, blocknr, 1, 1); if (wret) ret = wret; } else { - btrfs_mark_buffer_dirty(leaf_buf); - btrfs_block_release(root, leaf_buf); + btrfs_mark_buffer_dirty(leaf); + free_extent_buffer(leaf); } } else { - btrfs_mark_buffer_dirty(leaf_buf); + btrfs_mark_buffer_dirty(leaf); } } return ret; @@ -2130,25 +2279,27 @@ int btrfs_next_leaf(struct btrfs_root *root, struct btrfs_path *path) int slot; int level = 1; u64 blocknr; - struct buffer_head *c; - struct btrfs_node *c_node; - struct buffer_head *next = NULL; + struct extent_buffer *c; + struct extent_buffer *next = NULL; while(level < BTRFS_MAX_LEVEL) { if (!path->nodes[level]) return 1; + slot = path->slots[level] + 1; c = path->nodes[level]; - c_node = btrfs_buffer_node(c); - if (slot >= btrfs_header_nritems(&c_node->header)) { + if (slot >= btrfs_header_nritems(c)) { level++; continue; } - blocknr = btrfs_node_blockptr(c_node, slot); + + blocknr = btrfs_node_blockptr(c, slot); if (next) - btrfs_block_release(root, next); + free_extent_buffer(next); + if (path->reada) reada_for_search(root, path, level, slot); + next = read_tree_block(root, blocknr); break; } @@ -2156,15 +2307,14 @@ int btrfs_next_leaf(struct btrfs_root *root, struct btrfs_path *path) while(1) { level--; c = path->nodes[level]; - btrfs_block_release(root, c); + free_extent_buffer(c); path->nodes[level] = next; path->slots[level] = 0; if (!level) break; if (path->reada) reada_for_search(root, path, level, 0); - next = read_tree_block(root, - btrfs_node_blockptr(btrfs_buffer_node(next), 0)); + next = read_tree_block(root, btrfs_node_blockptr(next, 0)); } return 0; } diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 800a3499cc3f..c4b829806855 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -20,10 +20,10 @@ #define __BTRFS__ #include -#include #include #include #include "bit-radix.h" +#include "extent_map.h" struct btrfs_trans_handle; struct btrfs_transaction; @@ -32,7 +32,7 @@ extern struct kmem_cache *btrfs_transaction_cachep; extern struct kmem_cache *btrfs_bit_radix_cachep; extern struct kmem_cache *btrfs_path_cachep; -#define BTRFS_MAGIC "_BtRfS_M" +#define BTRFS_MAGIC "_B2RfS_M" #define BTRFS_ROOT_TREE_OBJECTID 1ULL #define BTRFS_EXTENT_TREE_OBJECTID 2ULL @@ -78,41 +78,41 @@ extern struct kmem_cache *btrfs_path_cachep; */ struct btrfs_disk_key { __le64 objectid; - __le32 flags; + u8 type; __le64 offset; } __attribute__ ((__packed__)); struct btrfs_key { u64 objectid; - u32 flags; + u8 type; u64 offset; } __attribute__ ((__packed__)); +#define BTRFS_FSID_SIZE 16 /* * every tree block (leaf or node) starts with this header. */ struct btrfs_header { u8 csum[BTRFS_CSUM_SIZE]; - u8 fsid[16]; /* FS specific uuid */ + u8 fsid[BTRFS_FSID_SIZE]; /* FS specific uuid */ __le64 blocknr; /* which block this node is supposed to live in */ __le64 generation; __le64 owner; - __le16 nritems; + __le32 nritems; __le16 flags; u8 level; } __attribute__ ((__packed__)); #define BTRFS_MAX_LEVEL 8 -#define BTRFS_NODEPTRS_PER_BLOCK(r) (((r)->blocksize - \ +#define BTRFS_NODEPTRS_PER_BLOCK(r) (((r)->nodesize - \ sizeof(struct btrfs_header)) / \ (sizeof(struct btrfs_disk_key) + sizeof(u64))) #define __BTRFS_LEAF_DATA_SIZE(bs) ((bs) - sizeof(struct btrfs_header)) -#define BTRFS_LEAF_DATA_SIZE(r) (__BTRFS_LEAF_DATA_SIZE(r->blocksize)) +#define BTRFS_LEAF_DATA_SIZE(r) (__BTRFS_LEAF_DATA_SIZE(r->leafsize)) #define BTRFS_MAX_INLINE_DATA_SIZE(r) (BTRFS_LEAF_DATA_SIZE(r) - \ sizeof(struct btrfs_item) - \ sizeof(struct btrfs_file_extent_item)) -struct buffer_head; /* * the super block basically lists the main trees of the FS * it currently lacks any block count etc etc @@ -128,7 +128,9 @@ struct btrfs_super_block { __le64 total_blocks; __le64 blocks_used; __le64 root_dir_objectid; - __le32 blocksize; + __le32 sectorsize; + __le32 nodesize; + __le32 leafsize; } __attribute__ ((__packed__)); /* @@ -138,7 +140,7 @@ struct btrfs_super_block { struct btrfs_item { struct btrfs_disk_key key; __le32 offset; - __le16 size; + __le32 size; } __attribute__ ((__packed__)); /* @@ -176,7 +178,7 @@ struct btrfs_node { * used while walking the tree. */ struct btrfs_path { - struct buffer_head *nodes[BTRFS_MAX_LEVEL]; + struct extent_buffer *nodes[BTRFS_MAX_LEVEL]; int slots[BTRFS_MAX_LEVEL]; int reada; int lowest_level; @@ -292,6 +294,7 @@ struct btrfs_block_group_cache { }; struct btrfs_fs_info { + u8 fsid[BTRFS_FSID_SIZE]; struct btrfs_root *extent_root; struct btrfs_root *tree_root; struct radix_tree_root fs_roots_radix; @@ -304,9 +307,8 @@ struct btrfs_fs_info { u64 generation; u64 last_trans_committed; struct btrfs_transaction *running_transaction; - struct btrfs_super_block *disk_super; struct btrfs_super_block super_copy; - struct buffer_head *sb_buffer; + struct extent_buffer *sb_buffer; struct super_block *sb; struct inode *btree_inode; struct mutex trans_mutex; @@ -325,8 +327,8 @@ struct btrfs_fs_info { * and for the extent tree extent_root root. */ struct btrfs_root { - struct buffer_head *node; - struct buffer_head *commit_root; + struct extent_buffer *node; + struct extent_buffer *commit_root; struct btrfs_root_item root_item; struct btrfs_key root_key; struct btrfs_fs_info *fs_info; @@ -336,7 +338,16 @@ struct btrfs_root { struct rw_semaphore snap_sem; u64 objectid; u64 last_trans; - u32 blocksize; + + /* data allocations are done in sectorsize units */ + u32 sectorsize; + + /* node allocations are done in nodesize units */ + u32 nodesize; + + /* leaf allocations are done in leafsize units */ + u32 leafsize; + u32 type; u64 highest_inode; u64 last_inode_alloc; @@ -347,12 +358,6 @@ struct btrfs_root { char *name; }; -/* the lower bits in the key flags defines the item type */ -#define BTRFS_KEY_TYPE_MAX 256 -#define BTRFS_KEY_TYPE_SHIFT 24 -#define BTRFS_KEY_TYPE_MASK (((u32)BTRFS_KEY_TYPE_MAX - 1) << \ - BTRFS_KEY_TYPE_SHIFT) - /* * inode items have the data typically returned from stat and store other * info about object characteristics. There is one for every file and dir in @@ -402,246 +407,253 @@ struct btrfs_root { */ #define BTRFS_STRING_ITEM_KEY 253 +/* some macros to generate set/get funcs for the struct fields. This + * assumes there is a lefoo_to_cpu for every type, so lets make a simple + * one for u8: + */ +#define le8_to_cpu(v) (v) +#define cpu_to_le8(v) (v) +#define __le8 u8 + +#define read_eb_member(eb, ptr, type, member, result) ( \ + read_extent_buffer(eb, (char *)(result), \ + ((unsigned long)(ptr)) + \ + offsetof(type, member), \ + sizeof(((type *)0)->member))) + +#define write_eb_member(eb, ptr, type, member, result) ( \ + write_extent_buffer(eb, (char *)(result), \ + ((unsigned long)(ptr)) + \ + offsetof(type, member), \ + sizeof(((type *)0)->member))) + +#define BTRFS_SETGET_FUNCS(name, type, member, bits) \ +static inline u##bits btrfs_##name(struct extent_buffer *eb, \ + type *s) \ +{ \ + __le##bits res; \ + read_eb_member(eb, s, type, member, &res); \ + return le##bits##_to_cpu(res); \ +} \ +static inline void btrfs_set_##name(struct extent_buffer *eb, \ + type *s, u##bits val) \ +{ \ + val = cpu_to_le##bits(val); \ + write_eb_member(eb, s, type, member, &val); \ +} + +#define BTRFS_SETGET_HEADER_FUNCS(name, type, member, bits) \ +static inline u##bits btrfs_##name(struct extent_buffer *eb) \ +{ \ + __le##bits res; \ + read_eb_member(eb, NULL, type, member, &res); \ + return le##bits##_to_cpu(res); \ +} \ +static inline void btrfs_set_##name(struct extent_buffer *eb, \ + u##bits val) \ +{ \ + val = cpu_to_le##bits(val); \ + write_eb_member(eb, NULL, type, member, &val); \ +} -static inline u64 btrfs_block_group_used(struct btrfs_block_group_item *bi) -{ - return le64_to_cpu(bi->used); -} - -static inline void btrfs_set_block_group_used(struct - btrfs_block_group_item *bi, - u64 val) -{ - bi->used = cpu_to_le64(val); -} - -static inline u64 btrfs_inode_generation(struct btrfs_inode_item *i) -{ - return le64_to_cpu(i->generation); -} - -static inline void btrfs_set_inode_generation(struct btrfs_inode_item *i, - u64 val) -{ - i->generation = cpu_to_le64(val); -} - -static inline u64 btrfs_inode_size(struct btrfs_inode_item *i) -{ - return le64_to_cpu(i->size); -} - -static inline void btrfs_set_inode_size(struct btrfs_inode_item *i, u64 val) -{ - i->size = cpu_to_le64(val); -} - -static inline u64 btrfs_inode_nblocks(struct btrfs_inode_item *i) -{ - return le64_to_cpu(i->nblocks); -} - -static inline void btrfs_set_inode_nblocks(struct btrfs_inode_item *i, u64 val) -{ - i->nblocks = cpu_to_le64(val); -} - -static inline u64 btrfs_inode_block_group(struct btrfs_inode_item *i) -{ - return le64_to_cpu(i->block_group); -} - -static inline void btrfs_set_inode_block_group(struct btrfs_inode_item *i, - u64 val) -{ - i->block_group = cpu_to_le64(val); -} - -static inline u32 btrfs_inode_nlink(struct btrfs_inode_item *i) -{ - return le32_to_cpu(i->nlink); -} - -static inline void btrfs_set_inode_nlink(struct btrfs_inode_item *i, u32 val) -{ - i->nlink = cpu_to_le32(val); -} - -static inline u32 btrfs_inode_uid(struct btrfs_inode_item *i) -{ - return le32_to_cpu(i->uid); -} - -static inline void btrfs_set_inode_uid(struct btrfs_inode_item *i, u32 val) -{ - i->uid = cpu_to_le32(val); -} - -static inline u32 btrfs_inode_gid(struct btrfs_inode_item *i) -{ - return le32_to_cpu(i->gid); -} - -static inline void btrfs_set_inode_gid(struct btrfs_inode_item *i, u32 val) -{ - i->gid = cpu_to_le32(val); -} - -static inline u32 btrfs_inode_mode(struct btrfs_inode_item *i) -{ - return le32_to_cpu(i->mode); -} - -static inline void btrfs_set_inode_mode(struct btrfs_inode_item *i, u32 val) -{ - i->mode = cpu_to_le32(val); +#define BTRFS_SETGET_STACK_FUNCS(name, type, member, bits) \ +static inline u##bits btrfs_##name(type *s) \ +{ \ + return le##bits##_to_cpu(s->member); \ +} \ +static inline void btrfs_set_##name(type *s, u##bits val) \ +{ \ + s->member = cpu_to_le##bits(val); \ } -static inline u32 btrfs_inode_rdev(struct btrfs_inode_item *i) -{ - return le32_to_cpu(i->rdev); -} +/* struct btrfs_block_group_item */ +BTRFS_SETGET_STACK_FUNCS(block_group_used, struct btrfs_block_group_item, + used, 64); +BTRFS_SETGET_FUNCS(disk_block_group_used, struct btrfs_block_group_item, + used, 64); -static inline void btrfs_set_inode_rdev(struct btrfs_inode_item *i, u32 val) -{ - i->rdev = cpu_to_le32(val); -} +/* struct btrfs_inode_item */ +BTRFS_SETGET_FUNCS(inode_generation, struct btrfs_inode_item, generation, 64); +BTRFS_SETGET_FUNCS(inode_size, struct btrfs_inode_item, size, 64); +BTRFS_SETGET_FUNCS(inode_nblocks, struct btrfs_inode_item, nblocks, 64); +BTRFS_SETGET_FUNCS(inode_block_group, struct btrfs_inode_item, block_group, 64); +BTRFS_SETGET_FUNCS(inode_nlink, struct btrfs_inode_item, nlink, 32); +BTRFS_SETGET_FUNCS(inode_uid, struct btrfs_inode_item, uid, 32); +BTRFS_SETGET_FUNCS(inode_gid, struct btrfs_inode_item, gid, 32); +BTRFS_SETGET_FUNCS(inode_mode, struct btrfs_inode_item, mode, 32); +BTRFS_SETGET_FUNCS(inode_rdev, struct btrfs_inode_item, rdev, 32); +BTRFS_SETGET_FUNCS(inode_flags, struct btrfs_inode_item, flags, 16); +BTRFS_SETGET_FUNCS(inode_compat_flags, struct btrfs_inode_item, + compat_flags, 16); -static inline u16 btrfs_inode_flags(struct btrfs_inode_item *i) +static inline struct btrfs_inode_timespec * +btrfs_inode_atime(struct btrfs_inode_item *inode_item) { - return le16_to_cpu(i->flags); + unsigned long ptr = (unsigned long)inode_item; + ptr += offsetof(struct btrfs_inode_item, atime); + return (struct btrfs_inode_timespec *)ptr; } -static inline void btrfs_set_inode_flags(struct btrfs_inode_item *i, u16 val) +static inline struct btrfs_inode_timespec * +btrfs_inode_mtime(struct btrfs_inode_item *inode_item) { - i->flags = cpu_to_le16(val); + unsigned long ptr = (unsigned long)inode_item; + ptr += offsetof(struct btrfs_inode_item, mtime); + return (struct btrfs_inode_timespec *)ptr; } -static inline u16 btrfs_inode_compat_flags(struct btrfs_inode_item *i) +static inline struct btrfs_inode_timespec * +btrfs_inode_ctime(struct btrfs_inode_item *inode_item) { - return le16_to_cpu(i->compat_flags); + unsigned long ptr = (unsigned long)inode_item; + ptr += offsetof(struct btrfs_inode_item, ctime); + return (struct btrfs_inode_timespec *)ptr; } -static inline void btrfs_set_inode_compat_flags(struct btrfs_inode_item *i, - u16 val) +static inline struct btrfs_inode_timespec * +btrfs_inode_otime(struct btrfs_inode_item *inode_item) { - i->compat_flags = cpu_to_le16(val); + unsigned long ptr = (unsigned long)inode_item; + ptr += offsetof(struct btrfs_inode_item, otime); + return (struct btrfs_inode_timespec *)ptr; } -static inline u64 btrfs_timespec_sec(struct btrfs_inode_timespec *ts) -{ - return le64_to_cpu(ts->sec); -} +BTRFS_SETGET_FUNCS(timespec_sec, struct btrfs_inode_timespec, sec, 64); +BTRFS_SETGET_FUNCS(timespec_nsec, struct btrfs_inode_timespec, nsec, 32); -static inline void btrfs_set_timespec_sec(struct btrfs_inode_timespec *ts, - u64 val) -{ - ts->sec = cpu_to_le64(val); -} +/* struct btrfs_extent_item */ +BTRFS_SETGET_FUNCS(extent_refs, struct btrfs_extent_item, refs, 32); +BTRFS_SETGET_FUNCS(extent_owner, struct btrfs_extent_item, owner, 32); -static inline u32 btrfs_timespec_nsec(struct btrfs_inode_timespec *ts) -{ - return le32_to_cpu(ts->nsec); -} +BTRFS_SETGET_STACK_FUNCS(stack_extent_refs, struct btrfs_extent_item, + refs, 32); +BTRFS_SETGET_STACK_FUNCS(stack_extent_owner, struct btrfs_extent_item, + owner, 32); -static inline void btrfs_set_timespec_nsec(struct btrfs_inode_timespec *ts, - u32 val) -{ - ts->nsec = cpu_to_le32(val); -} +/* struct btrfs_node */ +BTRFS_SETGET_FUNCS(key_blockptr, struct btrfs_key_ptr, blockptr, 64); -static inline u32 btrfs_extent_refs(struct btrfs_extent_item *ei) +static inline u64 btrfs_node_blockptr(struct extent_buffer *eb, int nr) { - return le32_to_cpu(ei->refs); + unsigned long ptr; + ptr = offsetof(struct btrfs_node, ptrs) + + sizeof(struct btrfs_key_ptr) * nr; + return btrfs_key_blockptr(eb, (struct btrfs_key_ptr *)ptr); } -static inline void btrfs_set_extent_refs(struct btrfs_extent_item *ei, u32 val) +static inline void btrfs_set_node_blockptr(struct extent_buffer *eb, + int nr, u64 val) { - ei->refs = cpu_to_le32(val); + unsigned long ptr; + ptr = offsetof(struct btrfs_node, ptrs) + + sizeof(struct btrfs_key_ptr) * nr; + btrfs_set_key_blockptr(eb, (struct btrfs_key_ptr *)ptr, val); } -static inline u64 btrfs_extent_owner(struct btrfs_extent_item *ei) +static unsigned long btrfs_node_key_ptr_offset(int nr) { - return le64_to_cpu(ei->owner); + return offsetof(struct btrfs_node, ptrs) + + sizeof(struct btrfs_key_ptr) * nr; } -static inline void btrfs_set_extent_owner(struct btrfs_extent_item *ei, u64 val) +static void btrfs_node_key(struct extent_buffer *eb, + struct btrfs_disk_key *disk_key, int nr) { - ei->owner = cpu_to_le64(val); + unsigned long ptr; + ptr = btrfs_node_key_ptr_offset(nr); + read_eb_member(eb, (struct btrfs_key_ptr *)ptr, + struct btrfs_key_ptr, key, disk_key); } - -static inline u64 btrfs_node_blockptr(struct btrfs_node *n, int nr) +static inline void btrfs_set_node_key(struct extent_buffer *eb, + struct btrfs_disk_key *disk_key, int nr) { - return le64_to_cpu(n->ptrs[nr].blockptr); + unsigned long ptr; + ptr = btrfs_node_key_ptr_offset(nr); + write_eb_member(eb, (struct btrfs_key_ptr *)ptr, + struct btrfs_key_ptr, key, disk_key); } +/* struct btrfs_item */ +BTRFS_SETGET_FUNCS(item_offset, struct btrfs_item, offset, 32); +BTRFS_SETGET_FUNCS(item_size, struct btrfs_item, size, 32); -static inline void btrfs_set_node_blockptr(struct btrfs_node *n, int nr, - u64 val) +static inline unsigned long btrfs_item_nr_offset(int nr) { - n->ptrs[nr].blockptr = cpu_to_le64(val); + return offsetof(struct btrfs_leaf, items) + + sizeof(struct btrfs_item) * nr; } -static inline u32 btrfs_item_offset(struct btrfs_item *item) +static inline struct btrfs_item *btrfs_item_nr(struct extent_buffer *eb, + int nr) { - return le32_to_cpu(item->offset); + return (struct btrfs_item *)btrfs_item_nr_offset(nr); } -static inline void btrfs_set_item_offset(struct btrfs_item *item, u32 val) +static inline u32 btrfs_item_end(struct extent_buffer *eb, + struct btrfs_item *item) { - item->offset = cpu_to_le32(val); + return btrfs_item_offset(eb, item) + btrfs_item_size(eb, item); } -static inline u32 btrfs_item_end(struct btrfs_item *item) +static inline u32 btrfs_item_end_nr(struct extent_buffer *eb, int nr) { - return le32_to_cpu(item->offset) + le16_to_cpu(item->size); + return btrfs_item_end(eb, btrfs_item_nr(eb, nr)); } -static inline u16 btrfs_item_size(struct btrfs_item *item) +static inline u32 btrfs_item_offset_nr(struct extent_buffer *eb, int nr) { - return le16_to_cpu(item->size); + return btrfs_item_offset(eb, btrfs_item_nr(eb, nr)); } -static inline void btrfs_set_item_size(struct btrfs_item *item, u16 val) +static inline u32 btrfs_item_size_nr(struct extent_buffer *eb, int nr) { - item->size = cpu_to_le16(val); + return btrfs_item_size(eb, btrfs_item_nr(eb, nr)); } -static inline u16 btrfs_dir_flags(struct btrfs_dir_item *d) +static inline void btrfs_item_key(struct extent_buffer *eb, + struct btrfs_disk_key *disk_key, int nr) { - return le16_to_cpu(d->flags); + struct btrfs_item *item = btrfs_item_nr(eb, nr); + read_eb_member(eb, item, struct btrfs_item, key, disk_key); } -static inline void btrfs_set_dir_flags(struct btrfs_dir_item *d, u16 val) +static inline void btrfs_set_item_key(struct extent_buffer *eb, + struct btrfs_disk_key *disk_key, int nr) { - d->flags = cpu_to_le16(val); + struct btrfs_item *item = btrfs_item_nr(eb, nr); + write_eb_member(eb, item, struct btrfs_item, key, disk_key); } -static inline u8 btrfs_dir_type(struct btrfs_dir_item *d) -{ - return d->type; -} +/* struct btrfs_dir_item */ +BTRFS_SETGET_FUNCS(dir_flags, struct btrfs_dir_item, flags, 16); +BTRFS_SETGET_FUNCS(dir_type, struct btrfs_dir_item, type, 8); +BTRFS_SETGET_FUNCS(dir_name_len, struct btrfs_dir_item, name_len, 16); -static inline void btrfs_set_dir_type(struct btrfs_dir_item *d, u8 val) +static inline void btrfs_dir_item_key(struct extent_buffer *eb, + struct btrfs_dir_item *item, + struct btrfs_disk_key *key) { - d->type = val; + read_eb_member(eb, item, struct btrfs_dir_item, location, key); } -static inline u16 btrfs_dir_name_len(struct btrfs_dir_item *d) +static inline void btrfs_set_dir_item_key(struct extent_buffer *eb, + struct btrfs_dir_item *item, + struct btrfs_disk_key *key) { - return le16_to_cpu(d->name_len); + write_eb_member(eb, item, struct btrfs_dir_item, location, key); } -static inline void btrfs_set_dir_name_len(struct btrfs_dir_item *d, u16 val) -{ - d->name_len = cpu_to_le16(val); -} +/* struct btrfs_disk_key */ +BTRFS_SETGET_STACK_FUNCS(disk_key_objectid, struct btrfs_disk_key, + objectid, 64); +BTRFS_SETGET_STACK_FUNCS(disk_key_offset, struct btrfs_disk_key, offset, 64); +BTRFS_SETGET_STACK_FUNCS(disk_key_type, struct btrfs_disk_key, type, 8); static inline void btrfs_disk_key_to_cpu(struct btrfs_key *cpu, struct btrfs_disk_key *disk) { cpu->offset = le64_to_cpu(disk->offset); - cpu->flags = le32_to_cpu(disk->flags); + cpu->type = disk->type; cpu->objectid = le64_to_cpu(disk->objectid); } @@ -649,400 +661,167 @@ static inline void btrfs_cpu_key_to_disk(struct btrfs_disk_key *disk, struct btrfs_key *cpu) { disk->offset = cpu_to_le64(cpu->offset); - disk->flags = cpu_to_le32(cpu->flags); + disk->type = cpu->type; disk->objectid = cpu_to_le64(cpu->objectid); } -static inline u64 btrfs_disk_key_objectid(struct btrfs_disk_key *disk) -{ - return le64_to_cpu(disk->objectid); -} - -static inline void btrfs_set_disk_key_objectid(struct btrfs_disk_key *disk, - u64 val) -{ - disk->objectid = cpu_to_le64(val); -} - -static inline u64 btrfs_disk_key_offset(struct btrfs_disk_key *disk) -{ - return le64_to_cpu(disk->offset); -} - -static inline void btrfs_set_disk_key_offset(struct btrfs_disk_key *disk, - u64 val) -{ - disk->offset = cpu_to_le64(val); -} - -static inline u32 btrfs_disk_key_flags(struct btrfs_disk_key *disk) -{ - return le32_to_cpu(disk->flags); -} - -static inline void btrfs_set_disk_key_flags(struct btrfs_disk_key *disk, - u32 val) -{ - disk->flags = cpu_to_le32(val); -} - -static inline u32 btrfs_disk_key_type(struct btrfs_disk_key *key) -{ - return le32_to_cpu(key->flags) >> BTRFS_KEY_TYPE_SHIFT; -} - -static inline void btrfs_set_disk_key_type(struct btrfs_disk_key *key, - u32 val) -{ - u32 flags = btrfs_disk_key_flags(key); - BUG_ON(val >= BTRFS_KEY_TYPE_MAX); - val = val << BTRFS_KEY_TYPE_SHIFT; - flags = (flags & ~BTRFS_KEY_TYPE_MASK) | val; - btrfs_set_disk_key_flags(key, flags); -} - -static inline u32 btrfs_key_type(struct btrfs_key *key) -{ - return key->flags >> BTRFS_KEY_TYPE_SHIFT; -} - -static inline void btrfs_set_key_type(struct btrfs_key *key, u32 val) -{ - BUG_ON(val >= BTRFS_KEY_TYPE_MAX); - val = val << BTRFS_KEY_TYPE_SHIFT; - key->flags = (key->flags & ~(BTRFS_KEY_TYPE_MASK)) | val; -} - -static inline u64 btrfs_header_blocknr(struct btrfs_header *h) -{ - return le64_to_cpu(h->blocknr); -} - -static inline void btrfs_set_header_blocknr(struct btrfs_header *h, u64 blocknr) -{ - h->blocknr = cpu_to_le64(blocknr); -} - -static inline u64 btrfs_header_generation(struct btrfs_header *h) +static inline void btrfs_node_key_to_cpu(struct extent_buffer *eb, + struct btrfs_key *key, int nr) { - return le64_to_cpu(h->generation); + struct btrfs_disk_key disk_key; + btrfs_node_key(eb, &disk_key, nr); + btrfs_disk_key_to_cpu(key, &disk_key); } -static inline void btrfs_set_header_generation(struct btrfs_header *h, - u64 val) +static inline void btrfs_item_key_to_cpu(struct extent_buffer *eb, + struct btrfs_key *key, int nr) { - h->generation = cpu_to_le64(val); + struct btrfs_disk_key disk_key; + btrfs_item_key(eb, &disk_key, nr); + btrfs_disk_key_to_cpu(key, &disk_key); } -static inline u64 btrfs_header_owner(struct btrfs_header *h) +static inline void btrfs_dir_item_key_to_cpu(struct extent_buffer *eb, + struct btrfs_dir_item *item, + struct btrfs_key *key) { - return le64_to_cpu(h->owner); + struct btrfs_disk_key disk_key; + btrfs_dir_item_key(eb, item, &disk_key); + btrfs_disk_key_to_cpu(key, &disk_key); } -static inline void btrfs_set_header_owner(struct btrfs_header *h, - u64 val) -{ - h->owner = cpu_to_le64(val); -} - -static inline u16 btrfs_header_nritems(struct btrfs_header *h) -{ - return le16_to_cpu(h->nritems); -} - -static inline void btrfs_set_header_nritems(struct btrfs_header *h, u16 val) -{ - h->nritems = cpu_to_le16(val); -} - -static inline u16 btrfs_header_flags(struct btrfs_header *h) -{ - return le16_to_cpu(h->flags); -} - -static inline void btrfs_set_header_flags(struct btrfs_header *h, u16 val) -{ - h->flags = cpu_to_le16(val); -} - -static inline int btrfs_header_level(struct btrfs_header *h) -{ - return h->level; -} - -static inline void btrfs_set_header_level(struct btrfs_header *h, int level) -{ - BUG_ON(level > BTRFS_MAX_LEVEL); - h->level = level; -} - -static inline int btrfs_is_leaf(struct btrfs_node *n) -{ - return (btrfs_header_level(&n->header) == 0); -} - -static inline u64 btrfs_root_blocknr(struct btrfs_root_item *item) -{ - return le64_to_cpu(item->blocknr); -} - -static inline void btrfs_set_root_blocknr(struct btrfs_root_item *item, u64 val) -{ - item->blocknr = cpu_to_le64(val); -} - -static inline u64 btrfs_root_dirid(struct btrfs_root_item *item) -{ - return le64_to_cpu(item->root_dirid); -} - -static inline void btrfs_set_root_dirid(struct btrfs_root_item *item, u64 val) -{ - item->root_dirid = cpu_to_le64(val); -} - -static inline u32 btrfs_root_refs(struct btrfs_root_item *item) -{ - return le32_to_cpu(item->refs); -} - -static inline void btrfs_set_root_refs(struct btrfs_root_item *item, u32 val) -{ - item->refs = cpu_to_le32(val); -} - -static inline u32 btrfs_root_flags(struct btrfs_root_item *item) -{ - return le32_to_cpu(item->flags); -} - -static inline void btrfs_set_root_flags(struct btrfs_root_item *item, u32 val) -{ - item->flags = cpu_to_le32(val); -} - -static inline void btrfs_set_root_blocks_used(struct btrfs_root_item *item, - u64 val) -{ - item->blocks_used = cpu_to_le64(val); -} - -static inline u64 btrfs_root_blocks_used(struct btrfs_root_item *item) -{ - return le64_to_cpu(item->blocks_used); -} - -static inline void btrfs_set_root_block_limit(struct btrfs_root_item *item, - u64 val) -{ - item->block_limit = cpu_to_le64(val); -} - -static inline u64 btrfs_root_block_limit(struct btrfs_root_item *item) -{ - return le64_to_cpu(item->block_limit); -} -static inline u64 btrfs_super_blocknr(struct btrfs_super_block *s) +static inline u8 btrfs_key_type(struct btrfs_key *key) { - return le64_to_cpu(s->blocknr); + return key->type; } -static inline void btrfs_set_super_blocknr(struct btrfs_super_block *s, u64 val) +static inline void btrfs_set_key_type(struct btrfs_key *key, u8 val) { - s->blocknr = cpu_to_le64(val); + key->type = val; } -static inline u64 btrfs_super_generation(struct btrfs_super_block *s) -{ - return le64_to_cpu(s->generation); -} +/* struct btrfs_header */ +BTRFS_SETGET_HEADER_FUNCS(header_blocknr, struct btrfs_header, blocknr, 64); +BTRFS_SETGET_HEADER_FUNCS(header_generation, struct btrfs_header, + generation, 64); +BTRFS_SETGET_HEADER_FUNCS(header_owner, struct btrfs_header, owner, 64); +BTRFS_SETGET_HEADER_FUNCS(header_nritems, struct btrfs_header, nritems, 32); +BTRFS_SETGET_HEADER_FUNCS(header_flags, struct btrfs_header, flags, 16); +BTRFS_SETGET_HEADER_FUNCS(header_level, struct btrfs_header, level, 8); -static inline void btrfs_set_super_generation(struct btrfs_super_block *s, - u64 val) +static inline u8 *btrfs_header_fsid(struct extent_buffer *eb) { - s->generation = cpu_to_le64(val); + unsigned long ptr = offsetof(struct btrfs_header, fsid); + return (u8 *)ptr; } -static inline u64 btrfs_super_root(struct btrfs_super_block *s) +static inline u8 *btrfs_super_fsid(struct extent_buffer *eb) { - return le64_to_cpu(s->root); + unsigned long ptr = offsetof(struct btrfs_super_block, fsid); + return (u8 *)ptr; } -static inline void btrfs_set_super_root(struct btrfs_super_block *s, u64 val) +static inline u8 *btrfs_header_csum(struct extent_buffer *eb) { - s->root = cpu_to_le64(val); + unsigned long ptr = offsetof(struct btrfs_header, csum); + return (u8 *)ptr; } -static inline u64 btrfs_super_total_blocks(struct btrfs_super_block *s) +static inline struct btrfs_node *btrfs_buffer_node(struct extent_buffer *eb) { - return le64_to_cpu(s->total_blocks); + return NULL; } -static inline void btrfs_set_super_total_blocks(struct btrfs_super_block *s, - u64 val) +static inline struct btrfs_leaf *btrfs_buffer_leaf(struct extent_buffer *eb) { - s->total_blocks = cpu_to_le64(val); + return NULL; } -static inline u64 btrfs_super_blocks_used(struct btrfs_super_block *s) +static inline struct btrfs_header *btrfs_buffer_header(struct extent_buffer *eb) { - return le64_to_cpu(s->blocks_used); + return NULL; } -static inline void btrfs_set_super_blocks_used(struct btrfs_super_block *s, - u64 val) +static inline int btrfs_is_leaf(struct extent_buffer *eb) { - s->blocks_used = cpu_to_le64(val); + return (btrfs_header_level(eb) == 0); } -static inline u32 btrfs_super_blocksize(struct btrfs_super_block *s) -{ - return le32_to_cpu(s->blocksize); -} +/* struct btrfs_root_item */ +BTRFS_SETGET_FUNCS(disk_root_refs, struct btrfs_root_item, refs, 32); +BTRFS_SETGET_FUNCS(disk_root_blocknr, struct btrfs_root_item, blocknr, 64); -static inline void btrfs_set_super_blocksize(struct btrfs_super_block *s, - u32 val) -{ - s->blocksize = cpu_to_le32(val); -} +BTRFS_SETGET_STACK_FUNCS(root_blocknr, struct btrfs_root_item, blocknr, 64); +BTRFS_SETGET_STACK_FUNCS(root_dirid, struct btrfs_root_item, root_dirid, 64); +BTRFS_SETGET_STACK_FUNCS(root_refs, struct btrfs_root_item, refs, 32); +BTRFS_SETGET_STACK_FUNCS(root_flags, struct btrfs_root_item, flags, 32); +BTRFS_SETGET_STACK_FUNCS(root_used, struct btrfs_root_item, blocks_used, 64); +BTRFS_SETGET_STACK_FUNCS(root_limit, struct btrfs_root_item, block_limit, 64); -static inline u64 btrfs_super_root_dir(struct btrfs_super_block *s) -{ - return le64_to_cpu(s->root_dir_objectid); -} +/* struct btrfs_super_block */ +BTRFS_SETGET_STACK_FUNCS(super_blocknr, struct btrfs_super_block, blocknr, 64); +BTRFS_SETGET_STACK_FUNCS(super_generation, struct btrfs_super_block, + generation, 64); +BTRFS_SETGET_STACK_FUNCS(super_root, struct btrfs_super_block, root, 64); +BTRFS_SETGET_STACK_FUNCS(super_total_blocks, struct btrfs_super_block, + total_blocks, 64); +BTRFS_SETGET_STACK_FUNCS(super_blocks_used, struct btrfs_super_block, + blocks_used, 64); +BTRFS_SETGET_STACK_FUNCS(super_sectorsize, struct btrfs_super_block, + sectorsize, 32); +BTRFS_SETGET_STACK_FUNCS(super_nodesize, struct btrfs_super_block, + nodesize, 32); +BTRFS_SETGET_STACK_FUNCS(super_leafsize, struct btrfs_super_block, + leafsize, 32); +BTRFS_SETGET_STACK_FUNCS(super_root_dir, struct btrfs_super_block, + root_dir_objectid, 64); -static inline void btrfs_set_super_root_dir(struct btrfs_super_block *s, u64 - val) +static inline unsigned long btrfs_leaf_data(struct extent_buffer *l) { - s->root_dir_objectid = cpu_to_le64(val); + return offsetof(struct btrfs_leaf, items); } -static inline u8 *btrfs_leaf_data(struct btrfs_leaf *l) -{ - return (u8 *)l->items; -} +/* struct btrfs_file_extent_item */ +BTRFS_SETGET_FUNCS(file_extent_type, struct btrfs_file_extent_item, type, 8); -static inline int btrfs_file_extent_type(struct btrfs_file_extent_item *e) -{ - return e->type; -} -static inline void btrfs_set_file_extent_type(struct btrfs_file_extent_item *e, - u8 val) -{ - e->type = val; -} - -static inline char *btrfs_file_extent_inline_start(struct +static inline unsigned long btrfs_file_extent_inline_start(struct btrfs_file_extent_item *e) { - return (char *)(&e->disk_blocknr); + unsigned long offset = (unsigned long)e; + offset += offsetof(struct btrfs_file_extent_item, disk_blocknr); + return offset; } static inline u32 btrfs_file_extent_calc_inline_size(u32 datasize) { - return (unsigned long)(&((struct - btrfs_file_extent_item *)NULL)->disk_blocknr) + datasize; -} - -static inline u32 btrfs_file_extent_inline_len(struct btrfs_item *e) -{ - struct btrfs_file_extent_item *fe = NULL; - return btrfs_item_size(e) - (unsigned long)(&fe->disk_blocknr); -} - -static inline u64 btrfs_file_extent_disk_blocknr(struct btrfs_file_extent_item - *e) -{ - return le64_to_cpu(e->disk_blocknr); + return offsetof(struct btrfs_file_extent_item, disk_blocknr) + datasize; } -static inline void btrfs_set_file_extent_disk_blocknr(struct - btrfs_file_extent_item - *e, u64 val) +static inline u32 btrfs_file_extent_inline_len(struct extent_buffer *eb, + struct btrfs_item *e) { - e->disk_blocknr = cpu_to_le64(val); + unsigned long offset; + offset = offsetof(struct btrfs_file_extent_item, disk_blocknr); + return btrfs_item_size(eb, e) - offset; } -static inline u64 btrfs_file_extent_generation(struct btrfs_file_extent_item *e) -{ - return le64_to_cpu(e->generation); -} - -static inline void btrfs_set_file_extent_generation(struct - btrfs_file_extent_item *e, - u64 val) -{ - e->generation = cpu_to_le64(val); -} - -static inline u64 btrfs_file_extent_disk_num_blocks(struct - btrfs_file_extent_item *e) -{ - return le64_to_cpu(e->disk_num_blocks); -} - -static inline void btrfs_set_file_extent_disk_num_blocks(struct - btrfs_file_extent_item - *e, u64 val) -{ - e->disk_num_blocks = cpu_to_le64(val); -} - -static inline u64 btrfs_file_extent_offset(struct btrfs_file_extent_item *e) -{ - return le64_to_cpu(e->offset); -} - -static inline void btrfs_set_file_extent_offset(struct btrfs_file_extent_item - *e, u64 val) -{ - e->offset = cpu_to_le64(val); -} - -static inline u64 btrfs_file_extent_num_blocks(struct btrfs_file_extent_item - *e) -{ - return le64_to_cpu(e->num_blocks); -} - -static inline void btrfs_set_file_extent_num_blocks(struct - btrfs_file_extent_item *e, - u64 val) -{ - e->num_blocks = cpu_to_le64(val); -} +BTRFS_SETGET_FUNCS(file_extent_disk_blocknr, struct btrfs_file_extent_item, + disk_blocknr, 64); +BTRFS_SETGET_FUNCS(file_extent_generation, struct btrfs_file_extent_item, + generation, 64); +BTRFS_SETGET_FUNCS(file_extent_disk_num_blocks, struct btrfs_file_extent_item, + disk_num_blocks, 64); +BTRFS_SETGET_FUNCS(file_extent_offset, struct btrfs_file_extent_item, + offset, 64); +BTRFS_SETGET_FUNCS(file_extent_num_blocks, struct btrfs_file_extent_item, + num_blocks, 64); static inline struct btrfs_root *btrfs_sb(struct super_block *sb) { return sb->s_fs_info; } -static inline void btrfs_check_bounds(void *vptr, size_t len, - void *vcontainer, size_t container_len) -{ - char *ptr = vptr; - char *container = vcontainer; - WARN_ON(ptr < container); - WARN_ON(ptr + len > container + container_len); -} - -static inline void btrfs_memcpy(struct btrfs_root *root, - void *dst_block, - void *dst, const void *src, size_t nr) -{ - btrfs_check_bounds(dst, nr, dst_block, root->fs_info->sb->s_blocksize); - memcpy(dst, src, nr); -} - -static inline void btrfs_memmove(struct btrfs_root *root, - void *dst_block, - void *dst, void *src, size_t nr) -{ - btrfs_check_bounds(dst, nr, dst_block, root->fs_info->sb->s_blocksize); - memmove(dst, src, nr); -} - static inline int btrfs_set_root_name(struct btrfs_root *root, const char *name, int len) { @@ -1063,7 +842,11 @@ static inline int btrfs_set_root_name(struct btrfs_root *root, /* helper function to cast into the data area of the leaf. */ #define btrfs_item_ptr(leaf, slot, type) \ ((type *)(btrfs_leaf_data(leaf) + \ - btrfs_item_offset((leaf)->items + (slot)))) + btrfs_item_offset_nr(leaf, slot))) + +#define btrfs_item_ptr_offset(leaf, slot) \ + ((unsigned long)(btrfs_leaf_data(leaf) + \ + btrfs_item_offset_nr(leaf, slot))) /* mount option defines and helpers */ #define BTRFS_MOUNT_SUBVOL 0x000001 @@ -1084,7 +867,7 @@ struct btrfs_block_group_cache *btrfs_find_block_group(struct btrfs_root *root, int data, int owner); int btrfs_inc_root_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root); -struct buffer_head *btrfs_alloc_free_block(struct btrfs_trans_handle *trans, +struct extent_buffer *btrfs_alloc_free_block(struct btrfs_trans_handle *trans, struct btrfs_root *root, u64 hint, u64 empty_size); int btrfs_alloc_extent(struct btrfs_trans_handle *trans, @@ -1092,7 +875,7 @@ int btrfs_alloc_extent(struct btrfs_trans_handle *trans, u64 num_blocks, u64 empty_size, u64 search_start, u64 search_end, struct btrfs_key *ins, int data); int btrfs_inc_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root, - struct buffer_head *buf); + struct extent_buffer *buf); int btrfs_free_extent(struct btrfs_trans_handle *trans, struct btrfs_root *root, u64 blocknr, u64 num_blocks, int pin); int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans, @@ -1106,10 +889,10 @@ int btrfs_write_dirty_block_groups(struct btrfs_trans_handle *trans, int btrfs_free_block_groups(struct btrfs_fs_info *info); int btrfs_read_block_groups(struct btrfs_root *root); /* ctree.c */ -int btrfs_cow_block(struct btrfs_trans_handle *trans, struct btrfs_root - *root, struct buffer_head *buf, struct buffer_head - *parent, int parent_slot, struct buffer_head - **cow_ret); +int btrfs_cow_block(struct btrfs_trans_handle *trans, + struct btrfs_root *root, struct extent_buffer *buf, + struct extent_buffer *parent, int parent_slot, + struct extent_buffer **cow_ret); int btrfs_extend_item(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct btrfs_path *path, u32 data_size); int btrfs_truncate_item(struct btrfs_trans_handle *trans, @@ -1120,7 +903,7 @@ int btrfs_search_slot(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct btrfs_key *key, struct btrfs_path *p, int ins_len, int cow); int btrfs_realloc_node(struct btrfs_trans_handle *trans, - struct btrfs_root *root, struct buffer_head *parent, + struct btrfs_root *root, struct extent_buffer *parent, int cache_only, u64 *last_ret); void btrfs_release_path(struct btrfs_root *root, struct btrfs_path *p); struct btrfs_path *btrfs_alloc_path(void); @@ -1134,7 +917,7 @@ int btrfs_insert_empty_item(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct btrfs_path *path, struct btrfs_key *cpu_key, u32 data_size); int btrfs_next_leaf(struct btrfs_root *root, struct btrfs_path *path); -int btrfs_leaf_free_space(struct btrfs_root *root, struct btrfs_leaf *leaf); +int btrfs_leaf_free_space(struct btrfs_root *root, struct extent_buffer *leaf); int btrfs_drop_snapshot(struct btrfs_trans_handle *trans, struct btrfs_root *root); /* root-item.c */ @@ -1179,9 +962,9 @@ int btrfs_find_free_objectid(struct btrfs_trans_handle *trans, int btrfs_find_highest_inode(struct btrfs_root *fs_root, u64 *objectid); /* inode-item.c */ -int btrfs_insert_inode(struct btrfs_trans_handle *trans, struct btrfs_root - *root, u64 objectid, struct btrfs_inode_item - *inode_item); +int btrfs_insert_empty_inode(struct btrfs_trans_handle *trans, + struct btrfs_root *root, + struct btrfs_path *path, u64 objectid); int btrfs_lookup_inode(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct btrfs_path *path, struct btrfs_key *location, int mod); @@ -1224,8 +1007,6 @@ struct inode *btrfs_iget_locked(struct super_block *s, u64 objectid, struct btrfs_root *root); int btrfs_commit_write(struct file *file, struct page *page, unsigned from, unsigned to); -int btrfs_get_block(struct inode *inode, sector_t iblock, - struct buffer_head *result, int create); struct extent_map *btrfs_get_extent(struct inode *inode, struct page *page, size_t page_offset, u64 start, u64 end, int create); diff --git a/fs/btrfs/dir-item.c b/fs/btrfs/dir-item.c index 49db5fa7ced3..6f19de41b878 100644 --- a/fs/btrfs/dir-item.c +++ b/fs/btrfs/dir-item.c @@ -33,7 +33,7 @@ static struct btrfs_dir_item *insert_with_overflow(struct btrfs_trans_handle int ret; char *ptr; struct btrfs_item *item; - struct btrfs_leaf *leaf; + struct extent_buffer *leaf; ret = btrfs_insert_empty_item(trans, root, path, cpu_key, data_size); if (ret == -EEXIST) { @@ -49,11 +49,11 @@ static struct btrfs_dir_item *insert_with_overflow(struct btrfs_trans_handle if (ret < 0) return ERR_PTR(ret); WARN_ON(ret > 0); - leaf = btrfs_buffer_leaf(path->nodes[0]); - item = leaf->items + path->slots[0]; + leaf = path->nodes[0]; + item = btrfs_item_nr(leaf, path->slots[0]); ptr = btrfs_item_ptr(leaf, path->slots[0], char); - BUG_ON(data_size > btrfs_item_size(item)); - ptr += btrfs_item_size(item) - data_size; + BUG_ON(data_size > btrfs_item_size(leaf, item)); + ptr += btrfs_item_size(leaf, item) - data_size; return (struct btrfs_dir_item *)ptr; } @@ -65,12 +65,13 @@ int btrfs_insert_dir_item(struct btrfs_trans_handle *trans, struct btrfs_root int ret2 = 0; struct btrfs_path *path; struct btrfs_dir_item *dir_item; - char *name_ptr; + struct extent_buffer *leaf; + unsigned long name_ptr; struct btrfs_key key; + struct btrfs_disk_key disk_key; u32 data_size; key.objectid = dir; - key.flags = 0; btrfs_set_key_type(&key, BTRFS_DIR_ITEM_KEY); ret = btrfs_name_hash(name, name_len, &key.offset); BUG_ON(ret); @@ -85,14 +86,16 @@ int btrfs_insert_dir_item(struct btrfs_trans_handle *trans, struct btrfs_root goto out; } - btrfs_cpu_key_to_disk(&dir_item->location, location); - btrfs_set_dir_type(dir_item, type); - btrfs_set_dir_flags(dir_item, 0); - btrfs_set_dir_name_len(dir_item, name_len); - name_ptr = (char *)(dir_item + 1); + leaf = path->nodes[0]; + btrfs_cpu_key_to_disk(&disk_key, location); + btrfs_set_dir_item_key(leaf, dir_item, &disk_key); + btrfs_set_dir_type(leaf, dir_item, type); + btrfs_set_dir_flags(leaf, dir_item, 0); + btrfs_set_dir_name_len(leaf, dir_item, name_len); + name_ptr = (unsigned long)(dir_item + 1); - btrfs_memcpy(root, path->nodes[0]->b_data, name_ptr, name, name_len); - btrfs_mark_buffer_dirty(path->nodes[0]); + write_extent_buffer(leaf, name, name_ptr, name_len); + btrfs_mark_buffer_dirty(leaf); second_insert: /* FIXME, use some real flag for selecting the extra index */ @@ -110,13 +113,15 @@ second_insert: ret2 = PTR_ERR(dir_item); goto out; } - btrfs_cpu_key_to_disk(&dir_item->location, location); - btrfs_set_dir_type(dir_item, type); - btrfs_set_dir_flags(dir_item, 0); - btrfs_set_dir_name_len(dir_item, name_len); - name_ptr = (char *)(dir_item + 1); - btrfs_memcpy(root, path->nodes[0]->b_data, name_ptr, name, name_len); - btrfs_mark_buffer_dirty(path->nodes[0]); + leaf = path->nodes[0]; + btrfs_cpu_key_to_disk(&disk_key, location); + btrfs_set_dir_item_key(leaf, dir_item, &disk_key); + btrfs_set_dir_type(leaf, dir_item, type); + btrfs_set_dir_flags(leaf, dir_item, 0); + btrfs_set_dir_name_len(leaf, dir_item, name_len); + name_ptr = (unsigned long)(dir_item + 1); + write_extent_buffer(leaf, name, name_ptr, name_len); + btrfs_mark_buffer_dirty(leaf); out: btrfs_free_path(path); if (ret) @@ -136,14 +141,15 @@ struct btrfs_dir_item *btrfs_lookup_dir_item(struct btrfs_trans_handle *trans, struct btrfs_key key; int ins_len = mod < 0 ? -1 : 0; int cow = mod != 0; - struct btrfs_disk_key *found_key; - struct btrfs_leaf *leaf; + struct btrfs_key found_key; + struct extent_buffer *leaf; key.objectid = dir; - key.flags = 0; btrfs_set_key_type(&key, BTRFS_DIR_ITEM_KEY); + ret = btrfs_name_hash(name, name_len, &key.offset); BUG_ON(ret); + ret = btrfs_search_slot(trans, root, &key, path, ins_len, cow); if (ret < 0) return ERR_PTR(ret); @@ -152,12 +158,13 @@ struct btrfs_dir_item *btrfs_lookup_dir_item(struct btrfs_trans_handle *trans, return NULL; path->slots[0]--; } - leaf = btrfs_buffer_leaf(path->nodes[0]); - found_key = &leaf->items[path->slots[0]].key; - if (btrfs_disk_key_objectid(found_key) != dir || - btrfs_disk_key_type(found_key) != BTRFS_DIR_ITEM_KEY || - btrfs_disk_key_offset(found_key) != key.offset) + leaf = path->nodes[0]; + btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]); + + if (found_key.objectid != dir || + btrfs_key_type(&found_key) != BTRFS_DIR_ITEM_KEY || + found_key.offset != key.offset) return NULL; return btrfs_match_dir_item_name(root, path, name, name_len); @@ -176,7 +183,6 @@ btrfs_lookup_dir_index_item(struct btrfs_trans_handle *trans, int cow = mod != 0; key.objectid = dir; - key.flags = 0; btrfs_set_key_type(&key, BTRFS_DIR_INDEX_KEY); key.offset = objectid; @@ -193,21 +199,22 @@ struct btrfs_dir_item *btrfs_match_dir_item_name(struct btrfs_root *root, const char *name, int name_len) { struct btrfs_dir_item *dir_item; - char *name_ptr; + unsigned long name_ptr; u32 total_len; u32 cur = 0; u32 this_len; - struct btrfs_leaf *leaf; + struct extent_buffer *leaf; - leaf = btrfs_buffer_leaf(path->nodes[0]); + leaf = path->nodes[0]; dir_item = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_dir_item); - total_len = btrfs_item_size(leaf->items + path->slots[0]); + total_len = btrfs_item_size_nr(leaf, path->slots[0]); while(cur < total_len) { - this_len = sizeof(*dir_item) + btrfs_dir_name_len(dir_item); - name_ptr = (char *)(dir_item + 1); + this_len = sizeof(*dir_item) + + btrfs_dir_name_len(leaf, dir_item); + name_ptr = (unsigned long)(dir_item + 1); - if (btrfs_dir_name_len(dir_item) == name_len && - memcmp(name_ptr, name, name_len) == 0) + if (btrfs_dir_name_len(leaf, dir_item) == name_len && + memcmp_extent_buffer(leaf, name, name_ptr, name_len) == 0) return dir_item; cur += this_len; @@ -223,20 +230,23 @@ int btrfs_delete_one_dir_name(struct btrfs_trans_handle *trans, struct btrfs_dir_item *di) { - struct btrfs_leaf *leaf; + struct extent_buffer *leaf; u32 sub_item_len; u32 item_len; int ret = 0; - leaf = btrfs_buffer_leaf(path->nodes[0]); - sub_item_len = sizeof(*di) + btrfs_dir_name_len(di); - item_len = btrfs_item_size(leaf->items + path->slots[0]); - if (sub_item_len == btrfs_item_size(leaf->items + path->slots[0])) { + leaf = path->nodes[0]; + sub_item_len = sizeof(*di) + btrfs_dir_name_len(leaf, di); + item_len = btrfs_item_size_nr(leaf, path->slots[0]); + if (sub_item_len == item_len) { ret = btrfs_del_item(trans, root, path); } else { - char *ptr = (char *)di; - char *start = btrfs_item_ptr(leaf, path->slots[0], char); - btrfs_memmove(root, leaf, ptr, ptr + sub_item_len, + /* MARKER */ + unsigned long ptr = (unsigned long)di; + unsigned long start; + + start = btrfs_item_ptr_offset(leaf, path->slots[0]); + memmove_extent_buffer(leaf, ptr, ptr + sub_item_len, item_len - (ptr + sub_item_len - start)); ret = btrfs_truncate_item(trans, root, path, item_len - sub_item_len); diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index ef6d76f2ec20..0c1f90cbedb0 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -23,138 +23,132 @@ #include #include #include +#include // for block_sync_page #include "ctree.h" #include "disk-io.h" #include "transaction.h" #include "btrfs_inode.h" -u64 bh_blocknr(struct buffer_head *bh) +#if 0 +static int check_tree_block(struct btrfs_root *root, struct extent_buffer *buf) { - return bh->b_blocknr; -} - -static int check_tree_block(struct btrfs_root *root, struct buffer_head *buf) -{ - struct btrfs_node *node = btrfs_buffer_node(buf); - if (bh_blocknr(buf) != btrfs_header_blocknr(&node->header)) { - printk(KERN_CRIT "bh_blocknr(buf) is %llu, header is %llu\n", - (unsigned long long)bh_blocknr(buf), - (unsigned long long)btrfs_header_blocknr(&node->header)); + if (extent_buffer_blocknr(buf) != btrfs_header_blocknr(buf)) { + printk(KERN_CRIT "buf blocknr(buf) is %llu, header is %llu\n", + (unsigned long long)extent_buffer_blocknr(buf), + (unsigned long long)btrfs_header_blocknr(buf)); return 1; } return 0; } +#endif -struct buffer_head *btrfs_find_tree_block(struct btrfs_root *root, u64 blocknr) +struct extent_buffer *btrfs_find_tree_block(struct btrfs_root *root, + u64 blocknr) { - struct address_space *mapping = root->fs_info->btree_inode->i_mapping; - int blockbits = root->fs_info->sb->s_blocksize_bits; - unsigned long index = blocknr >> (PAGE_CACHE_SHIFT - blockbits); - struct page *page; - struct buffer_head *bh; - struct buffer_head *head; - struct buffer_head *ret = NULL; - - - page = find_lock_page(mapping, index); - if (!page) - return NULL; - - if (!page_has_buffers(page)) - goto out_unlock; + struct inode *btree_inode = root->fs_info->btree_inode; + return find_extent_buffer(&BTRFS_I(btree_inode)->extent_tree, + blocknr * root->sectorsize, + root->sectorsize, GFP_NOFS); +} - head = page_buffers(page); - bh = head; - do { - if (buffer_mapped(bh) && bh_blocknr(bh) == blocknr) { - ret = bh; - get_bh(bh); - goto out_unlock; - } - bh = bh->b_this_page; - } while (bh != head); -out_unlock: - unlock_page(page); - page_cache_release(page); - return ret; +struct extent_buffer *btrfs_find_create_tree_block(struct btrfs_root *root, + u64 blocknr) +{ + struct inode *btree_inode = root->fs_info->btree_inode; + return alloc_extent_buffer(&BTRFS_I(btree_inode)->extent_tree, + blocknr * root->sectorsize, + root->sectorsize, GFP_NOFS); } -int btrfs_map_bh_to_logical(struct btrfs_root *root, struct buffer_head *bh, - u64 logical) +struct extent_map *btree_get_extent(struct inode *inode, struct page *page, + size_t page_offset, u64 start, u64 end, + int create) { - if (logical == 0) { - bh->b_bdev = NULL; - bh->b_blocknr = 0; - set_buffer_mapped(bh); - } else { - map_bh(bh, root->fs_info->sb, logical); + struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree; + struct extent_map *em; + int ret; + +again: + em = lookup_extent_mapping(em_tree, start, end); + if (em) { + goto out; } - return 0; + em = alloc_extent_map(GFP_NOFS); + if (!em) { + em = ERR_PTR(-ENOMEM); + goto out; + } + em->start = 0; + em->end = (i_size_read(inode) & ~((u64)PAGE_CACHE_SIZE -1)) - 1; + em->block_start = 0; + em->block_end = em->end; + em->bdev = inode->i_sb->s_bdev; + ret = add_extent_mapping(em_tree, em); + if (ret == -EEXIST) { + free_extent_map(em); + em = NULL; + goto again; + } else if (ret) { + em = ERR_PTR(ret); + } +out: + return em; } -struct buffer_head *btrfs_find_create_tree_block(struct btrfs_root *root, - u64 blocknr) +static int btree_writepage(struct page *page, struct writeback_control *wbc) { - struct address_space *mapping = root->fs_info->btree_inode->i_mapping; - int blockbits = root->fs_info->sb->s_blocksize_bits; - unsigned long index = blocknr >> (PAGE_CACHE_SHIFT - blockbits); - struct page *page; - struct buffer_head *bh; - struct buffer_head *head; - struct buffer_head *ret = NULL; - int err; - u64 first_block = index << (PAGE_CACHE_SHIFT - blockbits); + struct extent_map_tree *tree; + tree = &BTRFS_I(page->mapping->host)->extent_tree; + return extent_write_full_page(tree, page, btree_get_extent, wbc); +} +int btree_readpage(struct file *file, struct page *page) +{ + struct extent_map_tree *tree; + tree = &BTRFS_I(page->mapping->host)->extent_tree; + return extent_read_full_page(tree, page, btree_get_extent); +} - page = find_or_create_page(mapping, index, GFP_NOFS); - if (!page) - return NULL; +static int btree_releasepage(struct page *page, gfp_t unused_gfp_flags) +{ + struct extent_map_tree *tree; + int ret; - if (!page_has_buffers(page)) - create_empty_buffers(page, root->fs_info->sb->s_blocksize, 0); - head = page_buffers(page); - bh = head; - do { - if (!buffer_mapped(bh)) { - err = btrfs_map_bh_to_logical(root, bh, first_block); - BUG_ON(err); - } - if (bh_blocknr(bh) == blocknr) { - ret = bh; - get_bh(bh); - goto out_unlock; - } - bh = bh->b_this_page; - first_block++; - } while (bh != head); -out_unlock: - unlock_page(page); - if (ret) - touch_buffer(ret); - page_cache_release(page); + BUG_ON(page->private != 1); + tree = &BTRFS_I(page->mapping->host)->extent_tree; + ret = try_release_extent_mapping(tree, page); + if (ret == 1) { + ClearPagePrivate(page); + set_page_private(page, 0); + page_cache_release(page); + } return ret; } -static int btree_get_block(struct inode *inode, sector_t iblock, - struct buffer_head *bh, int create) +static void btree_invalidatepage(struct page *page, unsigned long offset) { - int err; - struct btrfs_root *root = BTRFS_I(bh->b_page->mapping->host)->root; - err = btrfs_map_bh_to_logical(root, bh, iblock); - return err; + struct extent_map_tree *tree; + tree = &BTRFS_I(page->mapping->host)->extent_tree; + extent_invalidatepage(tree, page, offset); + btree_releasepage(page, GFP_NOFS); } int btrfs_csum_data(struct btrfs_root * root, char *data, size_t len, char *result) { + return 0; +#if 0 u32 crc; crc = crc32c(0, data, len); memcpy(result, &crc, BTRFS_CRC32_SIZE); return 0; +#endif } -static int csum_tree_block(struct btrfs_root *root, struct buffer_head *bh, +#if 0 +static int csum_tree_block(struct btrfs_root *root, struct extent_buffer *buf, int verify) { + return 0; char result[BTRFS_CRC32_SIZE]; int ret; struct btrfs_node *node; @@ -176,7 +170,9 @@ static int csum_tree_block(struct btrfs_root *root, struct buffer_head *bh, } return 0; } +#endif +#if 0 static int btree_writepage(struct page *page, struct writeback_control *wbc) { struct buffer_head *bh; @@ -195,87 +191,65 @@ static int btree_writepage(struct page *page, struct writeback_control *wbc) } while (bh != head); return block_write_full_page(page, btree_get_block, wbc); } - -static int btree_readpage(struct file * file, struct page * page) -{ - return block_read_full_page(page, btree_get_block); -} +#endif static struct address_space_operations btree_aops = { .readpage = btree_readpage, .writepage = btree_writepage, + .releasepage = btree_releasepage, + .invalidatepage = btree_invalidatepage, .sync_page = block_sync_page, }; int readahead_tree_block(struct btrfs_root *root, u64 blocknr) { - struct buffer_head *bh = NULL; + struct extent_buffer *buf = NULL; + struct inode *btree_inode = root->fs_info->btree_inode; int ret = 0; - bh = btrfs_find_create_tree_block(root, blocknr); - if (!bh) + buf = btrfs_find_create_tree_block(root, blocknr); + if (!buf) return 0; - if (buffer_uptodate(bh)) { - ret = 1; - goto done; - } - if (test_set_buffer_locked(bh)) { - ret = 1; - goto done; - } - if (!buffer_uptodate(bh)) { - get_bh(bh); - bh->b_end_io = end_buffer_read_sync; - submit_bh(READ, bh); - } else { - unlock_buffer(bh); - ret = 1; - } -done: - brelse(bh); + read_extent_buffer_pages(&BTRFS_I(btree_inode)->extent_tree, + buf, 0); + free_extent_buffer(buf); return ret; } -struct buffer_head *read_tree_block(struct btrfs_root *root, u64 blocknr) +struct extent_buffer *read_tree_block(struct btrfs_root *root, u64 blocknr) { - struct buffer_head *bh = NULL; - - bh = btrfs_find_create_tree_block(root, blocknr); - if (!bh) - return bh; - if (buffer_uptodate(bh)) - goto uptodate; - lock_buffer(bh); - if (!buffer_uptodate(bh)) { - get_bh(bh); - bh->b_end_io = end_buffer_read_sync; - submit_bh(READ, bh); - wait_on_buffer(bh); - if (!buffer_uptodate(bh)) - goto fail; - } else { - unlock_buffer(bh); - } -uptodate: - if (!buffer_checked(bh)) { - csum_tree_block(root, bh, 1); - set_buffer_checked(bh); - } - if (check_tree_block(root, bh)) - goto fail; - return bh; -fail: - brelse(bh); - return NULL; + struct extent_buffer *buf = NULL; + struct inode *btree_inode = root->fs_info->btree_inode; + + buf = btrfs_find_create_tree_block(root, blocknr); + if (!buf) + return NULL; + read_extent_buffer_pages(&BTRFS_I(btree_inode)->extent_tree, + buf, 1); + return buf; } int clean_tree_block(struct btrfs_trans_handle *trans, struct btrfs_root *root, - struct buffer_head *buf) + struct extent_buffer *buf) { - WARN_ON(atomic_read(&buf->b_count) == 0); - lock_buffer(buf); - clear_buffer_dirty(buf); - unlock_buffer(buf); + struct inode *btree_inode = root->fs_info->btree_inode; + clear_extent_buffer_dirty(&BTRFS_I(btree_inode)->extent_tree, buf); + return 0; +} + +int wait_on_tree_block_writeback(struct btrfs_root *root, + struct extent_buffer *buf) +{ + struct inode *btree_inode = root->fs_info->btree_inode; + wait_on_extent_buffer_writeback(&BTRFS_I(btree_inode)->extent_tree, + buf); + return 0; +} + +int set_tree_block_dirty(struct btrfs_root *root, struct extent_buffer *buf) +{ + struct inode *btree_inode = root->fs_info->btree_inode; + set_extent_buffer_dirty(&BTRFS_I(btree_inode)->extent_tree, buf); return 0; } @@ -287,7 +261,9 @@ static int __setup_root(int blocksize, root->node = NULL; root->inode = NULL; root->commit_root = NULL; - root->blocksize = blocksize; + root->sectorsize = blocksize; + root->nodesize = blocksize; + root->leafsize = blocksize; root->ref_cows = 0; root->fs_info = fs_info; root->objectid = objectid; @@ -332,7 +308,7 @@ struct btrfs_root *btrfs_read_fs_root_no_radix(struct btrfs_fs_info *fs_info, struct btrfs_root *root; struct btrfs_root *tree_root = fs_info->tree_root; struct btrfs_path *path; - struct btrfs_leaf *l; + struct extent_buffer *l; u64 highest_inode; int ret = 0; @@ -361,11 +337,10 @@ struct btrfs_root *btrfs_read_fs_root_no_radix(struct btrfs_fs_info *fs_info, ret = -ENOENT; goto out; } - l = btrfs_buffer_leaf(path->nodes[0]); - memcpy(&root->root_item, - btrfs_item_ptr(l, path->slots[0], struct btrfs_root_item), + l = path->nodes[0]; + read_extent_buffer(l, &root->root_item, + btrfs_item_ptr_offset(l, path->slots[0]), sizeof(root->root_item)); - memcpy(&root->root_key, location, sizeof(*location)); ret = 0; out: btrfs_release_path(root, path); @@ -406,21 +381,21 @@ struct btrfs_root *btrfs_read_fs_root(struct btrfs_fs_info *fs_info, (unsigned long)root->root_key.objectid, root); if (ret) { - brelse(root->node); + free_extent_buffer(root->node); kfree(root); return ERR_PTR(ret); } ret = btrfs_set_root_name(root, name, namelen); if (ret) { - brelse(root->node); + free_extent_buffer(root->node); kfree(root); return ERR_PTR(ret); } ret = btrfs_sysfs_add_root(root); if (ret) { - brelse(root->node); + free_extent_buffer(root->node); kfree(root->name); kfree(root); return ERR_PTR(ret); @@ -471,6 +446,9 @@ struct btrfs_root *open_ctree(struct super_block *sb) fs_info->btree_inode->i_nlink = 1; fs_info->btree_inode->i_size = sb->s_bdev->bd_inode->i_size; fs_info->btree_inode->i_mapping->a_ops = &btree_aops; + extent_map_tree_init(&BTRFS_I(fs_info->btree_inode)->extent_tree, + fs_info->btree_inode->i_mapping, + GFP_NOFS); fs_info->do_barriers = 1; fs_info->closing = 0; @@ -493,10 +471,14 @@ struct btrfs_root *open_ctree(struct super_block *sb) if (!fs_info->sb_buffer) goto fail_iput; - disk_super = (struct btrfs_super_block *)fs_info->sb_buffer->b_data; - fs_info->disk_super = disk_super; - memcpy(&fs_info->super_copy, disk_super, sizeof(fs_info->super_copy)); + read_extent_buffer(fs_info->sb_buffer, &fs_info->super_copy, 0, + sizeof(fs_info->super_copy)); + + read_extent_buffer(fs_info->sb_buffer, fs_info->fsid, + (unsigned long)btrfs_super_fsid(fs_info->sb_buffer), + BTRFS_FSID_SIZE); + disk_super = &fs_info->super_copy; if (!btrfs_super_root(disk_super)) goto fail_sb_buffer; @@ -530,9 +512,9 @@ struct btrfs_root *open_ctree(struct super_block *sb) return tree_root; fail_tree_root: - btrfs_block_release(tree_root, tree_root->node); + free_extent_buffer(tree_root->node); fail_sb_buffer: - btrfs_block_release(tree_root, fs_info->sb_buffer); + free_extent_buffer(fs_info->sb_buffer); fail_iput: iput(fs_info->btree_inode); fail: @@ -546,31 +528,13 @@ int write_ctree_super(struct btrfs_trans_handle *trans, struct btrfs_root *root) { int ret; - struct buffer_head *bh = root->fs_info->sb_buffer; - - lock_buffer(bh); - WARN_ON(atomic_read(&bh->b_count) < 1); - clear_buffer_dirty(bh); - csum_tree_block(root, bh, 0); - bh->b_end_io = end_buffer_write_sync; - get_bh(bh); - if (root->fs_info->do_barriers) - ret = submit_bh(WRITE_BARRIER, bh); - else - ret = submit_bh(WRITE, bh); - if (ret == -EOPNOTSUPP) { - get_bh(bh); - lock_buffer(bh); - set_buffer_uptodate(bh); - root->fs_info->do_barriers = 0; - ret = submit_bh(WRITE, bh); - } - wait_on_buffer(bh); - if (!buffer_uptodate(bh)) { - WARN_ON(1); - return -EIO; - } - return 0; + struct extent_buffer *super = root->fs_info->sb_buffer; + struct inode *btree_inode = root->fs_info->btree_inode; + + set_extent_buffer_dirty(&BTRFS_I(btree_inode)->extent_tree, super); + ret = sync_page_range_nolock(btree_inode, btree_inode->i_mapping, + super->start, super->len); + return ret; } int btrfs_free_fs_root(struct btrfs_fs_info *fs_info, struct btrfs_root *root) @@ -581,9 +545,9 @@ int btrfs_free_fs_root(struct btrfs_fs_info *fs_info, struct btrfs_root *root) if (root->inode) iput(root->inode); if (root->node) - brelse(root->node); + free_extent_buffer(root->node); if (root->commit_root) - brelse(root->commit_root); + free_extent_buffer(root->commit_root); if (root->name) kfree(root->name); kfree(root); @@ -629,12 +593,10 @@ int close_ctree(struct btrfs_root *root) mutex_unlock(&fs_info->fs_mutex); if (fs_info->extent_root->node) - btrfs_block_release(fs_info->extent_root, - fs_info->extent_root->node); + free_extent_buffer(fs_info->extent_root->node); if (fs_info->tree_root->node) - btrfs_block_release(fs_info->tree_root, - fs_info->tree_root->node); - btrfs_block_release(root, fs_info->sb_buffer); + free_extent_buffer(fs_info->tree_root->node); + free_extent_buffer(fs_info->sb_buffer); truncate_inode_pages(fs_info->btree_inode->i_mapping, 0); iput(fs_info->btree_inode); @@ -645,25 +607,32 @@ int close_ctree(struct btrfs_root *root) return 0; } -void btrfs_mark_buffer_dirty(struct buffer_head *bh) +int btrfs_buffer_uptodate(struct extent_buffer *buf) +{ + struct inode *btree_inode = buf->pages[0]->mapping->host; + return extent_buffer_uptodate(&BTRFS_I(btree_inode)->extent_tree, buf); +} + +int btrfs_set_buffer_uptodate(struct extent_buffer *buf) { - struct btrfs_root *root = BTRFS_I(bh->b_page->mapping->host)->root; - u64 transid = btrfs_header_generation(btrfs_buffer_header(bh)); + struct inode *btree_inode = buf->pages[0]->mapping->host; + return set_extent_buffer_uptodate(&BTRFS_I(btree_inode)->extent_tree, + buf); +} - WARN_ON(!atomic_read(&bh->b_count)); +void btrfs_mark_buffer_dirty(struct extent_buffer *buf) +{ + struct btrfs_root *root = BTRFS_I(buf->pages[0]->mapping->host)->root; + u64 transid = btrfs_header_generation(buf); + struct inode *btree_inode = root->fs_info->btree_inode; if (transid != root->fs_info->generation) { printk(KERN_CRIT "transid mismatch buffer %llu, found %Lu running %Lu\n", - (unsigned long long)bh->b_blocknr, + (unsigned long long)extent_buffer_blocknr(buf), transid, root->fs_info->generation); WARN_ON(1); } - mark_buffer_dirty(bh); -} - -void btrfs_block_release(struct btrfs_root *root, struct buffer_head *buf) -{ - brelse(buf); + set_extent_buffer_dirty(&BTRFS_I(btree_inode)->extent_tree, buf); } void btrfs_btree_balance_dirty(struct btrfs_root *root, unsigned long nr) diff --git a/fs/btrfs/disk-io.h b/fs/btrfs/disk-io.h index fc6b520c6e0b..70d9413c599f 100644 --- a/fs/btrfs/disk-io.h +++ b/fs/btrfs/disk-io.h @@ -19,68 +19,35 @@ #ifndef __DISKIO__ #define __DISKIO__ -#include - #define BTRFS_SUPER_INFO_OFFSET (16 * 1024) -enum btrfs_bh_state_bits { - BH_Checked = BH_PrivateStart, - BH_Defrag, - BH_DefragDone, -}; -BUFFER_FNS(Checked, checked); -BUFFER_FNS(Defrag, defrag); -BUFFER_FNS(DefragDone, defrag_done); - -static inline struct btrfs_node *btrfs_buffer_node(struct buffer_head *bh) -{ - return (struct btrfs_node *)bh->b_data; -} - -static inline struct btrfs_leaf *btrfs_buffer_leaf(struct buffer_head *bh) -{ - return (struct btrfs_leaf *)bh->b_data; -} - -static inline struct btrfs_header *btrfs_buffer_header(struct buffer_head *bh) -{ - return &((struct btrfs_node *)bh->b_data)->header; -} - -struct buffer_head *read_tree_block(struct btrfs_root *root, u64 blocknr); +struct extent_buffer *read_tree_block(struct btrfs_root *root, u64 blocknr); int readahead_tree_block(struct btrfs_root *root, u64 blocknr); -struct buffer_head *btrfs_find_create_tree_block(struct btrfs_root *root, - u64 blocknr); -int write_tree_block(struct btrfs_trans_handle *trans, struct btrfs_root *root, - struct buffer_head *buf); -int dirty_tree_block(struct btrfs_trans_handle *trans, struct btrfs_root *root, - struct buffer_head *buf); +struct extent_buffer *btrfs_find_create_tree_block(struct btrfs_root *root, + u64 blocknr); int clean_tree_block(struct btrfs_trans_handle *trans, - struct btrfs_root *root, struct buffer_head *buf); -int btrfs_commit_transaction(struct btrfs_trans_handle *trans, - struct btrfs_root *root); + struct btrfs_root *root, struct extent_buffer *buf); struct btrfs_root *open_ctree(struct super_block *sb); int close_ctree(struct btrfs_root *root); -void btrfs_block_release(struct btrfs_root *root, struct buffer_head *buf); int write_ctree_super(struct btrfs_trans_handle *trans, struct btrfs_root *root); -struct buffer_head *btrfs_find_tree_block(struct btrfs_root *root, u64 blocknr); -int btrfs_csum_data(struct btrfs_root * root, char *data, size_t len, - char *result); +struct extent_buffer *btrfs_find_tree_block(struct btrfs_root *root, + u64 blocknr); struct btrfs_root *btrfs_read_fs_root(struct btrfs_fs_info *fs_info, struct btrfs_key *location, const char *name, int namelen); struct btrfs_root *btrfs_read_fs_root_no_radix(struct btrfs_fs_info *fs_info, struct btrfs_key *location); -u64 bh_blocknr(struct buffer_head *bh); int btrfs_insert_dev_radix(struct btrfs_root *root, struct block_device *bdev, u64 device_id, u64 block_start, u64 num_blocks); -int btrfs_map_bh_to_logical(struct btrfs_root *root, struct buffer_head *bh, - u64 logical); void btrfs_btree_balance_dirty(struct btrfs_root *root, unsigned long nr); int btrfs_free_fs_root(struct btrfs_fs_info *fs_info, struct btrfs_root *root); -void btrfs_mark_buffer_dirty(struct buffer_head *bh); +void btrfs_mark_buffer_dirty(struct extent_buffer *buf); +int btrfs_buffer_uptodate(struct extent_buffer *buf); +int btrfs_set_buffer_uptodate(struct extent_buffer *buf); +int wait_on_tree_block_writeback(struct btrfs_root *root, + struct extent_buffer *buf); #endif diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index f261a8326cdf..089c41cbca74 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -33,7 +33,7 @@ static int cache_block_group(struct btrfs_root *root, struct btrfs_path *path; int ret; struct btrfs_key key; - struct btrfs_leaf *leaf; + struct extent_buffer *leaf; struct radix_tree_root *extent_radix; int slot; u64 i; @@ -56,7 +56,6 @@ static int cache_block_group(struct btrfs_root *root, path->reada = 2; first_free = block_group->key.objectid; key.objectid = block_group->key.objectid; - key.flags = 0; key.offset = 0; btrfs_set_key_type(&key, BTRFS_EXTENT_ITEM_KEY); @@ -69,9 +68,9 @@ static int cache_block_group(struct btrfs_root *root, path->slots[0]--; while(1) { - leaf = btrfs_buffer_leaf(path->nodes[0]); + leaf = path->nodes[0]; slot = path->slots[0]; - if (slot >= btrfs_header_nritems(&leaf->header)) { + if (slot >= btrfs_header_nritems(leaf)) { ret = btrfs_next_leaf(root, path); if (ret < 0) goto err; @@ -82,7 +81,7 @@ static int cache_block_group(struct btrfs_root *root, } } - btrfs_disk_key_to_cpu(&key, &leaf->items[slot].key); + btrfs_item_key_to_cpu(leaf, &key, slot); if (key.objectid < block_group->key.objectid) { if (key.objectid + key.offset > first_free) first_free = key.objectid + key.offset; @@ -116,8 +115,7 @@ next: hole_size = block_group->key.objectid + block_group->key.offset - last; for (i = 0; i < hole_size; i++) { - set_radix_bit(extent_radix, - last + i); + set_radix_bit(extent_radix, last + i); } } block_group->cached = 1; @@ -366,7 +364,7 @@ int btrfs_inc_extent_ref(struct btrfs_trans_handle *trans, struct btrfs_path *path; int ret; struct btrfs_key key; - struct btrfs_leaf *l; + struct extent_buffer *l; struct btrfs_extent_item *item; u32 refs; @@ -375,7 +373,6 @@ int btrfs_inc_extent_ref(struct btrfs_trans_handle *trans, return -ENOMEM; key.objectid = blocknr; - key.flags = 0; btrfs_set_key_type(&key, BTRFS_EXTENT_ITEM_KEY); key.offset = num_blocks; ret = btrfs_search_slot(trans, root->fs_info->extent_root, &key, path, @@ -386,10 +383,10 @@ int btrfs_inc_extent_ref(struct btrfs_trans_handle *trans, BUG(); } BUG_ON(ret != 0); - l = btrfs_buffer_leaf(path->nodes[0]); + l = path->nodes[0]; item = btrfs_item_ptr(l, path->slots[0], struct btrfs_extent_item); - refs = btrfs_extent_refs(item); - btrfs_set_extent_refs(item, refs + 1); + refs = btrfs_extent_refs(l, item); + btrfs_set_extent_refs(l, item, refs + 1); btrfs_mark_buffer_dirty(path->nodes[0]); btrfs_release_path(root->fs_info->extent_root, path); @@ -414,23 +411,25 @@ static int lookup_extent_ref(struct btrfs_trans_handle *trans, struct btrfs_path *path; int ret; struct btrfs_key key; - struct btrfs_leaf *l; + struct extent_buffer *l; struct btrfs_extent_item *item; path = btrfs_alloc_path(); key.objectid = blocknr; key.offset = num_blocks; - key.flags = 0; btrfs_set_key_type(&key, BTRFS_EXTENT_ITEM_KEY); ret = btrfs_search_slot(trans, root->fs_info->extent_root, &key, path, 0, 0); if (ret < 0) goto out; - if (ret != 0) + if (ret != 0) { + btrfs_print_leaf(root, path->nodes[0]); + printk("failed to find block number %Lu\n", blocknr); BUG(); - l = btrfs_buffer_leaf(path->nodes[0]); + } + l = path->nodes[0]; item = btrfs_item_ptr(l, path->slots[0], struct btrfs_extent_item); - *refs = btrfs_extent_refs(item); + *refs = btrfs_extent_refs(l, item); out: btrfs_free_path(path); return 0; @@ -439,16 +438,16 @@ out: int btrfs_inc_root_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root) { - return btrfs_inc_extent_ref(trans, root, bh_blocknr(root->node), 1); + return btrfs_inc_extent_ref(trans, root, + extent_buffer_blocknr(root->node), 1); } int btrfs_inc_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root, - struct buffer_head *buf) + struct extent_buffer *buf) { u64 blocknr; - struct btrfs_node *buf_node; - struct btrfs_leaf *buf_leaf; - struct btrfs_disk_key *key; + u32 nritems; + struct btrfs_key key; struct btrfs_file_extent_item *fi; int i; int leaf; @@ -458,31 +457,31 @@ int btrfs_inc_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root, if (!root->ref_cows) return 0; - buf_node = btrfs_buffer_node(buf); - leaf = btrfs_is_leaf(buf_node); - buf_leaf = btrfs_buffer_leaf(buf); - for (i = 0; i < btrfs_header_nritems(&buf_node->header); i++) { + + leaf = btrfs_is_leaf(buf); + nritems = btrfs_header_nritems(buf); + for (i = 0; i < nritems; i++) { if (leaf) { u64 disk_blocknr; - key = &buf_leaf->items[i].key; - if (btrfs_disk_key_type(key) != BTRFS_EXTENT_DATA_KEY) + btrfs_item_key_to_cpu(buf, &key, i); + if (btrfs_key_type(&key) != BTRFS_EXTENT_DATA_KEY) continue; - fi = btrfs_item_ptr(buf_leaf, i, + fi = btrfs_item_ptr(buf, i, struct btrfs_file_extent_item); - if (btrfs_file_extent_type(fi) == + if (btrfs_file_extent_type(buf, fi) == BTRFS_FILE_EXTENT_INLINE) continue; - disk_blocknr = btrfs_file_extent_disk_blocknr(fi); + disk_blocknr = btrfs_file_extent_disk_blocknr(buf, fi); if (disk_blocknr == 0) continue; ret = btrfs_inc_extent_ref(trans, root, disk_blocknr, - btrfs_file_extent_disk_num_blocks(fi)); + btrfs_file_extent_disk_num_blocks(buf, fi)); if (ret) { faili = i; goto fail; } } else { - blocknr = btrfs_node_blockptr(buf_node, i); + blocknr = btrfs_node_blockptr(buf, i); ret = btrfs_inc_extent_ref(trans, root, blocknr, 1); if (ret) { faili = i; @@ -496,22 +495,23 @@ fail: for (i =0; i < faili; i++) { if (leaf) { u64 disk_blocknr; - key = &buf_leaf->items[i].key; - if (btrfs_disk_key_type(key) != BTRFS_EXTENT_DATA_KEY) + btrfs_item_key_to_cpu(buf, &key, i); + if (btrfs_key_type(&key) != BTRFS_EXTENT_DATA_KEY) continue; - fi = btrfs_item_ptr(buf_leaf, i, + fi = btrfs_item_ptr(buf, i, struct btrfs_file_extent_item); - if (btrfs_file_extent_type(fi) == + if (btrfs_file_extent_type(buf, fi) == BTRFS_FILE_EXTENT_INLINE) continue; - disk_blocknr = btrfs_file_extent_disk_blocknr(fi); + disk_blocknr = btrfs_file_extent_disk_blocknr(buf, fi); if (disk_blocknr == 0) continue; err = btrfs_free_extent(trans, root, disk_blocknr, - btrfs_file_extent_disk_num_blocks(fi), 0); + btrfs_file_extent_disk_num_blocks(buf, + fi), 0); BUG_ON(err); } else { - blocknr = btrfs_node_blockptr(buf_node, i); + blocknr = btrfs_node_blockptr(buf, i); err = btrfs_free_extent(trans, root, blocknr, 1, 0); BUG_ON(err); } @@ -527,16 +527,18 @@ static int write_one_cache_group(struct btrfs_trans_handle *trans, int ret; int pending_ret; struct btrfs_root *extent_root = root->fs_info->extent_root; - struct btrfs_block_group_item *bi; + unsigned long bi; + struct extent_buffer *leaf; ret = btrfs_search_slot(trans, extent_root, &cache->key, path, 0, 1); if (ret < 0) goto fail; BUG_ON(ret); - bi = btrfs_item_ptr(btrfs_buffer_leaf(path->nodes[0]), path->slots[0], - struct btrfs_block_group_item); - memcpy(bi, &cache->item, sizeof(*bi)); - btrfs_mark_buffer_dirty(path->nodes[0]); + + leaf = path->nodes[0]; + bi = btrfs_item_ptr_offset(leaf, path->slots[0]); + write_extent_buffer(leaf, &cache->item, bi, sizeof(cache->item)); + btrfs_mark_buffer_dirty(leaf); btrfs_release_path(extent_root, path); fail: finish_current_insert(trans, extent_root); @@ -768,11 +770,11 @@ static int finish_current_insert(struct btrfs_trans_handle *trans, struct unsigned long gang[8]; struct btrfs_fs_info *info = extent_root->fs_info; - btrfs_set_extent_refs(&extent_item, 1); + btrfs_set_stack_extent_refs(&extent_item, 1); ins.offset = 1; - ins.flags = 0; btrfs_set_key_type(&ins, BTRFS_EXTENT_ITEM_KEY); - btrfs_set_extent_owner(&extent_item, extent_root->root_key.objectid); + btrfs_set_stack_extent_owner(&extent_item, + extent_root->root_key.objectid); while(1) { ret = find_first_radix_bit(&info->extent_ins_radix, gang, 0, @@ -795,23 +797,20 @@ static int finish_current_insert(struct btrfs_trans_handle *trans, struct static int pin_down_block(struct btrfs_root *root, u64 blocknr, int pending) { int err; - struct btrfs_header *header; - struct buffer_head *bh; + struct extent_buffer *buf; if (!pending) { - bh = btrfs_find_tree_block(root, blocknr); - if (bh) { - if (buffer_uptodate(bh)) { + buf = btrfs_find_tree_block(root, blocknr); + if (buf) { + if (btrfs_buffer_uptodate(buf)) { u64 transid = root->fs_info->running_transaction->transid; - header = btrfs_buffer_header(bh); - if (btrfs_header_generation(header) == - transid) { - btrfs_block_release(root, bh); + if (btrfs_header_generation(buf) == transid) { + free_extent_buffer(buf); return 0; } } - btrfs_block_release(root, bh); + free_extent_buffer(buf); } err = set_radix_bit(&root->fs_info->pinned_radix, blocknr); if (!err) { @@ -839,12 +838,12 @@ static int __free_extent(struct btrfs_trans_handle *trans, struct btrfs_root struct btrfs_key key; struct btrfs_fs_info *info = root->fs_info; struct btrfs_root *extent_root = info->extent_root; + struct extent_buffer *leaf; int ret; struct btrfs_extent_item *ei; u32 refs; key.objectid = blocknr; - key.flags = 0; btrfs_set_key_type(&key, BTRFS_EXTENT_ITEM_KEY); key.offset = num_blocks; @@ -856,12 +855,16 @@ static int __free_extent(struct btrfs_trans_handle *trans, struct btrfs_root if (ret < 0) return ret; BUG_ON(ret); - ei = btrfs_item_ptr(btrfs_buffer_leaf(path->nodes[0]), path->slots[0], + + leaf = path->nodes[0]; + ei = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_extent_item); - BUG_ON(ei->refs == 0); - refs = btrfs_extent_refs(ei) - 1; - btrfs_set_extent_refs(ei, refs); - btrfs_mark_buffer_dirty(path->nodes[0]); + refs = btrfs_extent_refs(leaf, ei); + BUG_ON(refs == 0); + refs -= 1; + btrfs_set_extent_refs(leaf, ei, refs); + btrfs_mark_buffer_dirty(leaf); + if (refs == 0) { u64 super_blocks_used, root_blocks_used; @@ -876,8 +879,8 @@ static int __free_extent(struct btrfs_trans_handle *trans, struct btrfs_root super_blocks_used - num_blocks); /* block accounting for root item */ - root_blocks_used = btrfs_root_blocks_used(&root->root_item); - btrfs_set_root_blocks_used(&root->root_item, + root_blocks_used = btrfs_root_used(&root->root_item); + btrfs_set_root_used(&root->root_item, root_blocks_used - num_blocks); ret = btrfs_del_item(trans, extent_root, path); @@ -984,7 +987,7 @@ static int find_free_extent(struct btrfs_trans_handle *trans, struct btrfs_root u64 test_block; u64 orig_search_start = search_start; int start_found; - struct btrfs_leaf *l; + struct extent_buffer *l; struct btrfs_root * root = orig_root->fs_info->extent_root; struct btrfs_fs_info *info = root->fs_info; int total_needed = num_blocks; @@ -994,10 +997,10 @@ static int find_free_extent(struct btrfs_trans_handle *trans, struct btrfs_root int wrapped = 0; WARN_ON(num_blocks < 1); - ins->flags = 0; btrfs_set_key_type(ins, BTRFS_EXTENT_ITEM_KEY); - level = btrfs_header_level(btrfs_buffer_header(root->node)); + level = btrfs_header_level(root->node); + if (search_end == (u64)-1) search_end = btrfs_super_total_blocks(&info->super_copy); if (hint_block) { @@ -1034,8 +1037,9 @@ check_failed: path->slots[0]--; } - l = btrfs_buffer_leaf(path->nodes[0]); - btrfs_disk_key_to_cpu(&key, &l->items[path->slots[0]].key); + l = path->nodes[0]; + btrfs_item_key_to_cpu(l, &key, path->slots[0]); + /* * a rare case, go back one key if we hit a block group item * instead of an extent item @@ -1055,9 +1059,9 @@ check_failed: } while (1) { - l = btrfs_buffer_leaf(path->nodes[0]); + l = path->nodes[0]; slot = path->slots[0]; - if (slot >= btrfs_header_nritems(&l->header)) { + if (slot >= btrfs_header_nritems(l)) { ret = btrfs_next_leaf(root, path); if (ret == 0) continue; @@ -1075,7 +1079,7 @@ check_failed: goto check_pending; } - btrfs_disk_key_to_cpu(&key, &l->items[slot].key); + btrfs_item_key_to_cpu(l, &key, slot); if (key.objectid >= search_start && key.objectid > last_block && start_found) { if (last_block < search_start) @@ -1183,8 +1187,8 @@ int btrfs_alloc_extent(struct btrfs_trans_handle *trans, struct btrfs_root *extent_root = info->extent_root; struct btrfs_extent_item extent_item; - btrfs_set_extent_refs(&extent_item, 1); - btrfs_set_extent_owner(&extent_item, owner); + btrfs_set_stack_extent_refs(&extent_item, 1); + btrfs_set_stack_extent_owner(&extent_item, owner); WARN_ON(num_blocks < 1); ret = find_free_extent(trans, root, num_blocks, empty_size, @@ -1201,8 +1205,8 @@ int btrfs_alloc_extent(struct btrfs_trans_handle *trans, num_blocks); /* block accounting for root item */ - root_blocks_used = btrfs_root_blocks_used(&root->root_item); - btrfs_set_root_blocks_used(&root->root_item, root_blocks_used + + root_blocks_used = btrfs_root_used(&root->root_item); + btrfs_set_root_used(&root->root_item, root_blocks_used + num_blocks); if (root == extent_root) { @@ -1241,13 +1245,13 @@ update_block: * helper function to allocate a block for a given tree * returns the tree buffer or NULL. */ -struct buffer_head *btrfs_alloc_free_block(struct btrfs_trans_handle *trans, - struct btrfs_root *root, u64 hint, - u64 empty_size) +struct extent_buffer *btrfs_alloc_free_block(struct btrfs_trans_handle *trans, + struct btrfs_root *root, u64 hint, + u64 empty_size) { struct btrfs_key ins; int ret; - struct buffer_head *buf; + struct extent_buffer *buf; ret = btrfs_alloc_extent(trans, root, root->root_key.objectid, 1, empty_size, hint, (u64)-1, &ins, 0); @@ -1260,53 +1264,57 @@ struct buffer_head *btrfs_alloc_free_block(struct btrfs_trans_handle *trans, btrfs_free_extent(trans, root, ins.objectid, 1, 0); return ERR_PTR(-ENOMEM); } - WARN_ON(buffer_dirty(buf)); - set_buffer_uptodate(buf); + btrfs_set_buffer_uptodate(buf); + set_extent_dirty(&trans->transaction->dirty_pages, buf->start, + buf->start + buf->len - 1, GFP_NOFS); + /* set_buffer_checked(buf); set_buffer_defrag(buf); - set_radix_bit(&trans->transaction->dirty_pages, buf->b_page->index); + */ + /* FIXME!!!!!!!!!!!!!!!! + set_radix_bit(&trans->transaction->dirty_pages, buf->pages[0]->index); + */ trans->blocks_used++; return buf; } static int drop_leaf_ref(struct btrfs_trans_handle *trans, - struct btrfs_root *root, struct buffer_head *cur) + struct btrfs_root *root, struct extent_buffer *leaf) { - struct btrfs_disk_key *key; - struct btrfs_leaf *leaf; + struct btrfs_key key; struct btrfs_file_extent_item *fi; int i; int nritems; int ret; - BUG_ON(!btrfs_is_leaf(btrfs_buffer_node(cur))); - leaf = btrfs_buffer_leaf(cur); - nritems = btrfs_header_nritems(&leaf->header); + BUG_ON(!btrfs_is_leaf(leaf)); + nritems = btrfs_header_nritems(leaf); for (i = 0; i < nritems; i++) { u64 disk_blocknr; - key = &leaf->items[i].key; - if (btrfs_disk_key_type(key) != BTRFS_EXTENT_DATA_KEY) + + btrfs_item_key_to_cpu(leaf, &key, i); + if (btrfs_key_type(&key) != BTRFS_EXTENT_DATA_KEY) continue; fi = btrfs_item_ptr(leaf, i, struct btrfs_file_extent_item); - if (btrfs_file_extent_type(fi) == BTRFS_FILE_EXTENT_INLINE) + if (btrfs_file_extent_type(leaf, fi) == + BTRFS_FILE_EXTENT_INLINE) continue; /* * FIXME make sure to insert a trans record that * repeats the snapshot del on crash */ - disk_blocknr = btrfs_file_extent_disk_blocknr(fi); + disk_blocknr = btrfs_file_extent_disk_blocknr(leaf, fi); if (disk_blocknr == 0) continue; ret = btrfs_free_extent(trans, root, disk_blocknr, - btrfs_file_extent_disk_num_blocks(fi), - 0); + btrfs_file_extent_disk_num_blocks(leaf, fi), 0); BUG_ON(ret); } return 0; } static void reada_walk_down(struct btrfs_root *root, - struct btrfs_node *node) + struct extent_buffer *node) { int i; u32 nritems; @@ -1314,7 +1322,7 @@ static void reada_walk_down(struct btrfs_root *root, int ret; u32 refs; - nritems = btrfs_header_nritems(&node->header); + nritems = btrfs_header_nritems(node); for (i = 0; i < nritems; i++) { blocknr = btrfs_node_blockptr(node, i); ret = lookup_extent_ref(NULL, root, blocknr, 1, &refs); @@ -1337,16 +1345,17 @@ static void reada_walk_down(struct btrfs_root *root, static int walk_down_tree(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct btrfs_path *path, int *level) { - struct buffer_head *next; - struct buffer_head *cur; + struct extent_buffer *next; + struct extent_buffer *cur; u64 blocknr; int ret; u32 refs; WARN_ON(*level < 0); WARN_ON(*level >= BTRFS_MAX_LEVEL); - ret = lookup_extent_ref(trans, root, bh_blocknr(path->nodes[*level]), - 1, &refs); + ret = lookup_extent_ref(trans, root, + extent_buffer_blocknr(path->nodes[*level]), + 1, &refs); BUG_ON(ret); if (refs > 1) goto out; @@ -1360,21 +1369,20 @@ static int walk_down_tree(struct btrfs_trans_handle *trans, struct btrfs_root cur = path->nodes[*level]; if (*level > 0 && path->slots[*level] == 0) - reada_walk_down(root, btrfs_buffer_node(cur)); + reada_walk_down(root, cur); - if (btrfs_header_level(btrfs_buffer_header(cur)) != *level) + if (btrfs_header_level(cur) != *level) WARN_ON(1); if (path->slots[*level] >= - btrfs_header_nritems(btrfs_buffer_header(cur))) + btrfs_header_nritems(cur)) break; if (*level == 0) { ret = drop_leaf_ref(trans, root, cur); BUG_ON(ret); break; } - blocknr = btrfs_node_blockptr(btrfs_buffer_node(cur), - path->slots[*level]); + blocknr = btrfs_node_blockptr(cur, path->slots[*level]); ret = lookup_extent_ref(trans, root, blocknr, 1, &refs); BUG_ON(ret); if (refs != 1) { @@ -1384,8 +1392,8 @@ static int walk_down_tree(struct btrfs_trans_handle *trans, struct btrfs_root continue; } next = btrfs_find_tree_block(root, blocknr); - if (!next || !buffer_uptodate(next)) { - brelse(next); + if (!next || !btrfs_buffer_uptodate(next)) { + free_extent_buffer(next); mutex_unlock(&root->fs_info->fs_mutex); next = read_tree_block(root, blocknr); mutex_lock(&root->fs_info->fs_mutex); @@ -1395,7 +1403,7 @@ static int walk_down_tree(struct btrfs_trans_handle *trans, struct btrfs_root BUG_ON(ret); if (refs != 1) { path->slots[*level]++; - brelse(next); + free_extent_buffer(next); ret = btrfs_free_extent(trans, root, blocknr, 1, 1); BUG_ON(ret); @@ -1404,17 +1412,17 @@ static int walk_down_tree(struct btrfs_trans_handle *trans, struct btrfs_root } WARN_ON(*level <= 0); if (path->nodes[*level-1]) - btrfs_block_release(root, path->nodes[*level-1]); + free_extent_buffer(path->nodes[*level-1]); path->nodes[*level-1] = next; - *level = btrfs_header_level(btrfs_buffer_header(next)); + *level = btrfs_header_level(next); path->slots[*level] = 0; } out: WARN_ON(*level < 0); WARN_ON(*level >= BTRFS_MAX_LEVEL); ret = btrfs_free_extent(trans, root, - bh_blocknr(path->nodes[*level]), 1, 1); - btrfs_block_release(root, path->nodes[*level]); + extent_buffer_blocknr(path->nodes[*level]), 1, 1); + free_extent_buffer(path->nodes[*level]); path->nodes[*level] = NULL; *level += 1; BUG_ON(ret); @@ -1436,24 +1444,24 @@ static int walk_up_tree(struct btrfs_trans_handle *trans, struct btrfs_root for(i = *level; i < BTRFS_MAX_LEVEL - 1 && path->nodes[i]; i++) { slot = path->slots[i]; - if (slot < btrfs_header_nritems( - btrfs_buffer_header(path->nodes[i])) - 1) { - struct btrfs_node *node; - node = btrfs_buffer_node(path->nodes[i]); + if (slot < btrfs_header_nritems(path->nodes[i]) - 1) { + struct extent_buffer *node; + struct btrfs_disk_key disk_key; + node = path->nodes[i]; path->slots[i]++; *level = i; WARN_ON(*level == 0); + btrfs_node_key(node, &disk_key, path->slots[i]); memcpy(&root_item->drop_progress, - &node->ptrs[path->slots[i]].key, - sizeof(root_item->drop_progress)); + &disk_key, sizeof(disk_key)); root_item->drop_level = i; return 0; } else { ret = btrfs_free_extent(trans, root, - bh_blocknr(path->nodes[*level]), - 1, 1); + extent_buffer_blocknr(path->nodes[*level]), + 1, 1); BUG_ON(ret); - btrfs_block_release(root, path->nodes[*level]); + free_extent_buffer(path->nodes[*level]); path->nodes[*level] = NULL; *level = i + 1; } @@ -1480,15 +1488,15 @@ int btrfs_drop_snapshot(struct btrfs_trans_handle *trans, struct btrfs_root path = btrfs_alloc_path(); BUG_ON(!path); - level = btrfs_header_level(btrfs_buffer_header(root->node)); + level = btrfs_header_level(root->node); orig_level = level; if (btrfs_disk_key_objectid(&root_item->drop_progress) == 0) { path->nodes[level] = root->node; path->slots[level] = 0; } else { struct btrfs_key key; - struct btrfs_disk_key *found_key; - struct btrfs_node *node; + struct btrfs_disk_key found_key; + struct extent_buffer *node; btrfs_disk_key_to_cpu(&key, &root_item->drop_progress); level = root_item->drop_level; @@ -1498,10 +1506,10 @@ int btrfs_drop_snapshot(struct btrfs_trans_handle *trans, struct btrfs_root ret = wret; goto out; } - node = btrfs_buffer_node(path->nodes[level]); - found_key = &node->ptrs[path->slots[level]].key; - WARN_ON(memcmp(found_key, &root_item->drop_progress, - sizeof(*found_key))); + node = path->nodes[level]; + btrfs_node_key(node, &found_key, path->slots[level]); + WARN_ON(memcmp(&found_key, &root_item->drop_progress, + sizeof(found_key))); } while(1) { wret = walk_down_tree(trans, root, path, &level); @@ -1516,12 +1524,12 @@ int btrfs_drop_snapshot(struct btrfs_trans_handle *trans, struct btrfs_root if (wret < 0) ret = wret; ret = -EAGAIN; - get_bh(root->node); + extent_buffer_get(root->node); break; } for (i = 0; i <= orig_level; i++) { if (path->nodes[i]) { - btrfs_block_release(root, path->nodes[i]); + free_extent_buffer(path->nodes[i]); path->nodes[i] = 0; } } @@ -1581,13 +1589,12 @@ int btrfs_read_block_groups(struct btrfs_root *root) struct btrfs_path *path; int ret; int err = 0; - struct btrfs_block_group_item *bi; struct btrfs_block_group_cache *cache; struct btrfs_fs_info *info = root->fs_info; struct radix_tree_root *radix; struct btrfs_key key; struct btrfs_key found_key; - struct btrfs_leaf *leaf; + struct extent_buffer *leaf; u64 group_size_blocks; u64 used; @@ -1596,7 +1603,6 @@ int btrfs_read_block_groups(struct btrfs_root *root) root = info->extent_root; key.objectid = 0; key.offset = group_size_blocks; - key.flags = 0; btrfs_set_key_type(&key, BTRFS_BLOCK_GROUP_ITEM_KEY); path = btrfs_alloc_path(); @@ -1610,18 +1616,18 @@ int btrfs_read_block_groups(struct btrfs_root *root) err = ret; break; } - leaf = btrfs_buffer_leaf(path->nodes[0]); - btrfs_disk_key_to_cpu(&found_key, - &leaf->items[path->slots[0]].key); + leaf = path->nodes[0]; + btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]); cache = kmalloc(sizeof(*cache), GFP_NOFS); if (!cache) { err = -1; break; } - bi = btrfs_item_ptr(leaf, path->slots[0], - struct btrfs_block_group_item); - if (bi->flags & BTRFS_BLOCK_GROUP_DATA) { + read_extent_buffer(leaf, &cache->item, + btrfs_item_ptr_offset(leaf, path->slots[0]), + sizeof(cache->item)); + if (cache->item.flags & BTRFS_BLOCK_GROUP_DATA) { radix = &info->block_group_data_radix; cache->data = 1; } else { @@ -1629,7 +1635,6 @@ int btrfs_read_block_groups(struct btrfs_root *root) cache->data = 0; } - memcpy(&cache->item, bi, sizeof(*bi)); memcpy(&cache->key, &found_key, sizeof(found_key)); cache->last_alloc = cache->key.objectid; cache->first_free = cache->key.objectid; @@ -1640,11 +1645,12 @@ int btrfs_read_block_groups(struct btrfs_root *root) key.objectid = found_key.objectid + found_key.offset; btrfs_release_path(root, path); + ret = radix_tree_insert(radix, found_key.objectid + found_key.offset - 1, (void *)cache); BUG_ON(ret); - used = btrfs_block_group_used(bi); + used = btrfs_block_group_used(&cache->item); if (used < div_factor(key.offset, 8)) { radix_tree_tag_set(radix, found_key.objectid + found_key.offset - 1, diff --git a/fs/btrfs/extent_map.c b/fs/btrfs/extent_map.c index 0ab368e091f9..55fefdfab84c 100644 --- a/fs/btrfs/extent_map.c +++ b/fs/btrfs/extent_map.c @@ -26,16 +26,6 @@ struct tree_entry { struct rb_node rb_node; }; -/* bits for the extent state */ -#define EXTENT_DIRTY 1 -#define EXTENT_WRITEBACK (1 << 1) -#define EXTENT_UPTODATE (1 << 2) -#define EXTENT_LOCKED (1 << 3) -#define EXTENT_NEW (1 << 4) -#define EXTENT_DELALLOC (1 << 5) - -#define EXTENT_IOBITS (EXTENT_LOCKED | EXTENT_WRITEBACK) - void __init extent_map_init(void) { extent_map_cache = btrfs_cache_create("extent_map", @@ -223,7 +213,8 @@ int add_extent_mapping(struct extent_map_tree *tree, if (rb) prev = rb_entry(rb, struct extent_map, rb_node); if (prev && prev->end + 1 == em->start && - ((em->block_start == 0 && prev->block_start == 0) || + ((em->block_start == EXTENT_MAP_HOLE && + prev->block_start == EXTENT_MAP_HOLE) || (em->block_start == prev->block_end + 1))) { em->start = prev->start; em->block_start = prev->block_start; @@ -926,6 +917,40 @@ int set_range_writeback(struct extent_map_tree *tree, u64 start, u64 end) } EXPORT_SYMBOL(set_range_writeback); +int find_first_extent_bit(struct extent_map_tree *tree, u64 start, + u64 *start_ret, u64 *end_ret, int bits) +{ + struct rb_node *node; + struct extent_state *state; + int ret = 1; + + write_lock_irq(&tree->lock); + /* + * this search will find all the extents that end after + * our range starts. + */ + node = tree_search(&tree->state, start); + if (!node || IS_ERR(node)) { + goto out; + } + + while(1) { + state = rb_entry(node, struct extent_state, rb_node); + if (state->state & bits) { + *start_ret = state->start; + *end_ret = state->end; + ret = 0; + } + node = rb_next(node); + if (!node) + break; + } +out: + write_unlock_irq(&tree->lock); + return ret; +} +EXPORT_SYMBOL(find_first_extent_bit); + u64 find_lock_delalloc_range(struct extent_map_tree *tree, u64 start, u64 lock_start, u64 *end, u64 max_bytes) { @@ -1450,7 +1475,7 @@ int extent_read_full_page(struct extent_map_tree *tree, struct page *page, em = NULL; /* we've found a hole, just zero and go on */ - if (block_start == 0) { + if (block_start == EXTENT_MAP_HOLE) { zero_user_page(page, page_offset, iosize, KM_USER0); set_extent_uptodate(tree, cur, cur + iosize - 1, GFP_NOFS); @@ -1593,7 +1618,8 @@ int extent_write_full_page(struct extent_map_tree *tree, struct page *page, free_extent_map(em); em = NULL; - if (block_start == 0 || block_start == EXTENT_MAP_INLINE) { + if (block_start == EXTENT_MAP_HOLE || + block_start == EXTENT_MAP_INLINE) { clear_extent_dirty(tree, cur, cur + iosize - 1, GFP_NOFS); cur = cur + iosize; @@ -1630,7 +1656,6 @@ int extent_write_full_page(struct extent_map_tree *tree, struct page *page, nr++; } done: - WARN_ON(test_range_bit(tree, start, page_end, EXTENT_DIRTY, 0)); unlock_extent(tree, start, page_end, GFP_NOFS); unlock_page(page); return 0; @@ -1827,8 +1852,623 @@ sector_t extent_bmap(struct address_space *mapping, sector_t iblock, // XXX(hch): block 0 is valid in some cases, e.g. XFS RT device if (em->block_start == EXTENT_MAP_INLINE || - em->block_start == 0) - return 0; + em->block_start == EXTENT_MAP_HOLE) + return 0; return (em->block_start + start - em->start) >> inode->i_blkbits; } + +struct extent_buffer *alloc_extent_buffer(struct extent_map_tree *tree, + u64 start, unsigned long len, + gfp_t mask) +{ + unsigned long num_pages = ((start + len - 1) >> PAGE_CACHE_SHIFT) - + (start >> PAGE_CACHE_SHIFT) + 1; + unsigned long i; + unsigned long index = start >> PAGE_CACHE_SHIFT; + struct extent_buffer *eb; + struct page *p; + struct address_space *mapping = tree->mapping; + int uptodate = 0; + + eb = kzalloc(EXTENT_BUFFER_SIZE(num_pages), mask); + if (!eb || IS_ERR(eb)) + return NULL; + + eb->start = start; + eb->len = len; + atomic_set(&eb->refs, 1); + + for (i = 0; i < num_pages; i++, index++) { + p = find_or_create_page(mapping, index, mask | __GFP_HIGHMEM); + if (!p) + goto fail; + eb->pages[i] = p; + if (!PageUptodate(p)) + uptodate = 0; + unlock_page(p); + } + if (uptodate) + eb->flags |= EXTENT_UPTODATE; + return eb; +fail: + free_extent_buffer(eb); + return NULL; +} +EXPORT_SYMBOL(alloc_extent_buffer); + +struct extent_buffer *find_extent_buffer(struct extent_map_tree *tree, + u64 start, unsigned long len, + gfp_t mask) +{ + unsigned long num_pages = ((start + len - 1) >> PAGE_CACHE_SHIFT) - + (start >> PAGE_CACHE_SHIFT) + 1; + unsigned long i; + unsigned long index = start >> PAGE_CACHE_SHIFT; + struct extent_buffer *eb; + struct page *p; + struct address_space *mapping = tree->mapping; + + eb = kzalloc(EXTENT_BUFFER_SIZE(num_pages), mask); + if (!eb || IS_ERR(eb)) + return NULL; + + eb->start = start; + eb->len = len; + atomic_set(&eb->refs, 1); + + for (i = 0; i < num_pages; i++, index++) { + p = find_get_page(mapping, index); + if (!p) + goto fail; + eb->pages[i] = p; + } + return eb; +fail: + free_extent_buffer(eb); + return NULL; +} +EXPORT_SYMBOL(find_extent_buffer); + +void free_extent_buffer(struct extent_buffer *eb) +{ + unsigned long i; + unsigned long num_pages; + + if (!eb) + return; + + if (!atomic_dec_and_test(&eb->refs)) + return; + + num_pages = ((eb->start + eb->len - 1) >> PAGE_CACHE_SHIFT) - + (eb->start >> PAGE_CACHE_SHIFT) + 1; + + for (i = 0; i < num_pages; i++) { + if (eb->pages[i]) + page_cache_release(eb->pages[i]); + } + kfree(eb); +} +EXPORT_SYMBOL(free_extent_buffer); + +int clear_extent_buffer_dirty(struct extent_map_tree *tree, + struct extent_buffer *eb) +{ + int set; + unsigned long i; + unsigned long num_pages; + struct page *page; + + u64 start = eb->start; + u64 end = start + eb->len - 1; + + set = clear_extent_dirty(tree, start, end, GFP_NOFS); + num_pages = ((eb->start + eb->len - 1) >> PAGE_CACHE_SHIFT) - + (eb->start >> PAGE_CACHE_SHIFT) + 1; + + for (i = 0; i < num_pages; i++) { + page = eb->pages[i]; + lock_page(page); + /* + * if we're on the last page or the first page and the + * block isn't aligned on a page boundary, do extra checks + * to make sure we don't clean page that is partially dirty + */ + if ((i == 0 && (eb->start & (PAGE_CACHE_SIZE - 1))) || + ((i == num_pages - 1) && + ((eb->start + eb->len - 1) & (PAGE_CACHE_SIZE - 1)))) { + start = page->index << PAGE_CACHE_SHIFT; + end = start + PAGE_CACHE_SIZE - 1; + if (test_range_bit(tree, start, end, + EXTENT_DIRTY, 0)) { + unlock_page(page); + continue; + } + } + clear_page_dirty_for_io(page); + unlock_page(page); + } + return 0; +} +EXPORT_SYMBOL(clear_extent_buffer_dirty); + +int wait_on_extent_buffer_writeback(struct extent_map_tree *tree, + struct extent_buffer *eb) +{ + return wait_on_extent_writeback(tree, eb->start, + eb->start + eb->len - 1); +} +EXPORT_SYMBOL(wait_on_extent_buffer_writeback); + +int set_extent_buffer_dirty(struct extent_map_tree *tree, + struct extent_buffer *eb) +{ + return set_range_dirty(tree, eb->start, eb->start + eb->len - 1); +} +EXPORT_SYMBOL(set_extent_buffer_dirty); + +int set_extent_buffer_uptodate(struct extent_map_tree *tree, + struct extent_buffer *eb) +{ + unsigned long i; + struct page *page; + unsigned long num_pages; + + num_pages = ((eb->start + eb->len - 1) >> PAGE_CACHE_SHIFT) - + (eb->start >> PAGE_CACHE_SHIFT) + 1; + + set_extent_uptodate(tree, eb->start, eb->start + eb->len - 1, + GFP_NOFS); + for (i = 0; i < num_pages; i++) { + page = eb->pages[i]; + if ((i == 0 && (eb->start & (PAGE_CACHE_SIZE - 1))) || + ((i == num_pages - 1) && + ((eb->start + eb->len - 1) & (PAGE_CACHE_SIZE - 1)))) { + check_page_uptodate(tree, page); + continue; + } + SetPageUptodate(page); + } + return 0; +} +EXPORT_SYMBOL(set_extent_buffer_uptodate); + +int extent_buffer_uptodate(struct extent_map_tree *tree, + struct extent_buffer *eb) +{ + if (eb->flags & EXTENT_UPTODATE) + return 1; + return test_range_bit(tree, eb->start, eb->start + eb->len - 1, + EXTENT_UPTODATE, 1); +} +EXPORT_SYMBOL(extent_buffer_uptodate); + +int read_extent_buffer_pages(struct extent_map_tree *tree, + struct extent_buffer *eb, int wait) +{ + unsigned long i; + struct page *page; + int err; + int ret = 0; + unsigned long num_pages; + + if (eb->flags & EXTENT_UPTODATE) + return 0; + + if (test_range_bit(tree, eb->start, eb->start + eb->len - 1, + EXTENT_UPTODATE, 1)) { + return 0; + } + + num_pages = ((eb->start + eb->len - 1) >> PAGE_CACHE_SHIFT) - + (eb->start >> PAGE_CACHE_SHIFT) + 1; + for (i = 0; i < num_pages; i++) { + page = eb->pages[i]; + if (PageUptodate(page)) { + continue; + } + if (!wait) { + if (TestSetPageLocked(page)) { + continue; + } + } else { + lock_page(page); + } + if (!PageUptodate(page)) { + err = page->mapping->a_ops->readpage(NULL, page); + if (err) { + ret = err; + } + } else { + unlock_page(page); + } + } + + if (ret || !wait) { + return ret; + } + + for (i = 0; i < num_pages; i++) { + page = eb->pages[i]; + wait_on_page_locked(page); + if (!PageUptodate(page)) { + ret = -EIO; + } + } + eb->flags |= EXTENT_UPTODATE; + return ret; +} +EXPORT_SYMBOL(read_extent_buffer_pages); + +void read_extent_buffer(struct extent_buffer *eb, void *dstv, + unsigned long start, + unsigned long len) +{ + size_t cur; + size_t offset; + struct page *page; + char *kaddr; + char *dst = (char *)dstv; + size_t start_offset = eb->start & ((u64)PAGE_CACHE_SIZE - 1); + unsigned long i = (start_offset + start) >> PAGE_CACHE_SHIFT; + + WARN_ON(start > eb->len); + WARN_ON(start + len > eb->start + eb->len); + + page = eb->pages[i]; + offset = start & ((unsigned long)PAGE_CACHE_SIZE - 1); + if (i == 0) + offset += start_offset; + + while(len > 0) { + WARN_ON(!PageUptodate(page)); + + cur = min(len, (PAGE_CACHE_SIZE - offset)); + // kaddr = kmap_atomic(page, KM_USER0); + kaddr = page_address(page); + memcpy(dst, kaddr + offset, cur); + // kunmap_atomic(kaddr, KM_USER0); + + dst += cur; + len -= cur; + offset = 0; + i++; + page = eb->pages[i]; + } +} +EXPORT_SYMBOL(read_extent_buffer); + +int map_extent_buffer(struct extent_buffer *eb, unsigned long start, + char **token, char **map, + unsigned long *map_start, + unsigned long *map_len, int km) +{ + size_t offset; + char *kaddr; + size_t start_offset = eb->start & ((u64)PAGE_CACHE_SIZE - 1); + unsigned long i = (start_offset + start) >> PAGE_CACHE_SHIFT; + + WARN_ON(start > eb->len); + + if (i == 0) { + offset = start_offset; + *map_start = 0; + } else { + offset = 0; + *map_start = (i << PAGE_CACHE_SHIFT) - offset; + } + + // kaddr = kmap_atomic(eb->pages[i], km); + kaddr = page_address(eb->pages[i]); + *token = kaddr; + *map = kaddr + offset; + *map_len = PAGE_CACHE_SIZE - offset; + return 0; +} +EXPORT_SYMBOL(map_extent_buffer); + +void unmap_extent_buffer(struct extent_buffer *eb, char *token, int km) +{ + // kunmap_atomic(token, km); +} +EXPORT_SYMBOL(unmap_extent_buffer); + +int memcmp_extent_buffer(struct extent_buffer *eb, const void *ptrv, + unsigned long start, + unsigned long len) +{ + size_t cur; + size_t offset; + struct page *page; + char *kaddr; + char *ptr = (char *)ptrv; + size_t start_offset = eb->start & ((u64)PAGE_CACHE_SIZE - 1); + unsigned long i = (start_offset + start) >> PAGE_CACHE_SHIFT; + int ret = 0; + + WARN_ON(start > eb->len); + WARN_ON(start + len > eb->start + eb->len); + + page = eb->pages[i]; + offset = start & ((unsigned long)PAGE_CACHE_SIZE - 1); + if (i == 0) + offset += start_offset; + + while(len > 0) { + WARN_ON(!PageUptodate(page)); + + cur = min(len, (PAGE_CACHE_SIZE - offset)); + + // kaddr = kmap_atomic(page, KM_USER0); + kaddr = page_address(page); + ret = memcmp(ptr, kaddr + offset, cur); + // kunmap_atomic(kaddr, KM_USER0); + if (ret) + break; + + ptr += cur; + len -= cur; + offset = 0; + i++; + page = eb->pages[i]; + } + return ret; +} +EXPORT_SYMBOL(memcmp_extent_buffer); + +void write_extent_buffer(struct extent_buffer *eb, const void *srcv, + unsigned long start, unsigned long len) +{ + size_t cur; + size_t offset; + struct page *page; + char *kaddr; + char *src = (char *)srcv; + size_t start_offset = eb->start & ((u64)PAGE_CACHE_SIZE - 1); + unsigned long i = (start_offset + start) >> PAGE_CACHE_SHIFT; + + WARN_ON(start > eb->len); + WARN_ON(start + len > eb->start + eb->len); + + page = eb->pages[i]; + offset = start & ((unsigned long)PAGE_CACHE_SIZE - 1); + if (i == 0) + offset += start_offset; + + while(len > 0) { + WARN_ON(!PageUptodate(page)); + + cur = min(len, PAGE_CACHE_SIZE - offset); + // kaddr = kmap_atomic(page, KM_USER0); + kaddr = page_address(page); + memcpy(kaddr + offset, src, cur); + // kunmap_atomic(kaddr, KM_USER0); + + src += cur; + len -= cur; + offset = 0; + i++; + page = eb->pages[i]; + } +} +EXPORT_SYMBOL(write_extent_buffer); + +void memset_extent_buffer(struct extent_buffer *eb, char c, + unsigned long start, unsigned long len) +{ + size_t cur; + size_t offset; + struct page *page; + char *kaddr; + size_t start_offset = eb->start & ((u64)PAGE_CACHE_SIZE - 1); + unsigned long i = (start_offset + start) >> PAGE_CACHE_SHIFT; + + WARN_ON(start > eb->len); + WARN_ON(start + len > eb->start + eb->len); + + page = eb->pages[i]; + offset = start & ((unsigned long)PAGE_CACHE_SIZE - 1); + if (i == 0) + offset += start_offset; + + while(len > 0) { + WARN_ON(!PageUptodate(page)); + + cur = min(len, PAGE_CACHE_SIZE - offset); + // kaddr = kmap_atomic(page, KM_USER0); + kaddr = page_address(page); + memset(kaddr + offset, c, cur); + // kunmap_atomic(kaddr, KM_USER0); + + len -= cur; + offset = 0; + i++; + page = eb->pages[i]; + } +} +EXPORT_SYMBOL(memset_extent_buffer); + +void copy_extent_buffer(struct extent_buffer *dst, struct extent_buffer *src, + unsigned long dst_offset, unsigned long src_offset, + unsigned long len) +{ + u64 dst_len = dst->len; + size_t cur; + size_t offset; + struct page *page; + char *kaddr; + size_t start_offset = dst->start & ((u64)PAGE_CACHE_SIZE - 1); + unsigned long i = (start_offset + dst_offset) >> PAGE_CACHE_SHIFT; + + WARN_ON(src->len != dst_len); + + offset = dst_offset & ((unsigned long)PAGE_CACHE_SIZE - 1); + if (i == 0) + offset += start_offset; + + while(len > 0) { + page = dst->pages[i]; + WARN_ON(!PageUptodate(page)); + + cur = min(len, (unsigned long)(PAGE_CACHE_SIZE - offset)); + + // kaddr = kmap_atomic(page, KM_USER1); + kaddr = page_address(page); + read_extent_buffer(src, kaddr + offset, src_offset, cur); + // kunmap_atomic(kaddr, KM_USER1); + + src_offset += cur; + len -= cur; + offset = 0; + i++; + } +} +EXPORT_SYMBOL(copy_extent_buffer); + +static void move_pages(struct page *dst_page, struct page *src_page, + unsigned long dst_off, unsigned long src_off, + unsigned long len) +{ + // char *dst_kaddr = kmap_atomic(dst_page, KM_USER0); + char *dst_kaddr = page_address(dst_page); + if (dst_page == src_page) { + memmove(dst_kaddr + dst_off, dst_kaddr + src_off, len); + } else { + // char *src_kaddr = kmap_atomic(src_page, KM_USER1); + char *src_kaddr = page_address(src_page); + char *p = dst_kaddr + dst_off + len; + char *s = src_kaddr + src_off + len; + + while (len--) + *--p = *--s; + + // kunmap_atomic(src_kaddr, KM_USER1); + } + // kunmap_atomic(dst_kaddr, KM_USER0); +} + +static void copy_pages(struct page *dst_page, struct page *src_page, + unsigned long dst_off, unsigned long src_off, + unsigned long len) +{ + //kmap_atomic(dst_page, KM_USER0); + char *dst_kaddr = page_address(dst_page); + char *src_kaddr; + + if (dst_page != src_page) + src_kaddr = page_address(src_page); // kmap_atomic(src_page, KM_USER1); + else + src_kaddr = dst_kaddr; + + memcpy(dst_kaddr + dst_off, src_kaddr + src_off, len); + /* + kunmap_atomic(dst_kaddr, KM_USER0); + if (dst_page != src_page) + kunmap_atomic(src_kaddr, KM_USER1); + */ +} + +void memcpy_extent_buffer(struct extent_buffer *dst, unsigned long dst_offset, + unsigned long src_offset, unsigned long len) +{ + size_t cur; + size_t dst_off_in_page; + size_t src_off_in_page; + size_t start_offset = dst->start & ((u64)PAGE_CACHE_SIZE - 1); + unsigned long dst_i; + unsigned long src_i; + + if (src_offset + len > dst->len) { + printk("memmove bogus src_offset %lu move len %lu len %lu\n", + src_offset, len, dst->len); + BUG_ON(1); + } + if (dst_offset + len > dst->len) { + printk("memmove bogus dst_offset %lu move len %lu len %lu\n", + dst_offset, len, dst->len); + BUG_ON(1); + } + + while(len > 0) { + dst_off_in_page = dst_offset & + ((unsigned long)PAGE_CACHE_SIZE - 1); + src_off_in_page = src_offset & + ((unsigned long)PAGE_CACHE_SIZE - 1); + + dst_i = (start_offset + dst_offset) >> PAGE_CACHE_SHIFT; + src_i = (start_offset + src_offset) >> PAGE_CACHE_SHIFT; + + if (src_i == 0) + src_off_in_page += start_offset; + if (dst_i == 0) + dst_off_in_page += start_offset; + + cur = min(len, (unsigned long)(PAGE_CACHE_SIZE - + src_off_in_page)); + cur = min(cur, (unsigned long)(PAGE_CACHE_SIZE - + dst_off_in_page)); + + copy_pages(dst->pages[dst_i], dst->pages[src_i], + dst_off_in_page, src_off_in_page, cur); + + src_offset += cur; + dst_offset += cur; + len -= cur; + } +} +EXPORT_SYMBOL(memcpy_extent_buffer); + +void memmove_extent_buffer(struct extent_buffer *dst, unsigned long dst_offset, + unsigned long src_offset, unsigned long len) +{ + size_t cur; + size_t dst_off_in_page; + size_t src_off_in_page; + unsigned long dst_end = dst_offset + len - 1; + unsigned long src_end = src_offset + len - 1; + size_t start_offset = dst->start & ((u64)PAGE_CACHE_SIZE - 1); + unsigned long dst_i; + unsigned long src_i; + + if (src_offset + len > dst->len) { + printk("memmove bogus src_offset %lu move len %lu len %lu\n", + src_offset, len, dst->len); + BUG_ON(1); + } + if (dst_offset + len > dst->len) { + printk("memmove bogus dst_offset %lu move len %lu len %lu\n", + dst_offset, len, dst->len); + BUG_ON(1); + } + if (dst_offset < src_offset) { + memcpy_extent_buffer(dst, dst_offset, src_offset, len); + return; + } + while(len > 0) { + dst_i = (start_offset + dst_end) >> PAGE_CACHE_SHIFT; + src_i = (start_offset + src_end) >> PAGE_CACHE_SHIFT; + + dst_off_in_page = dst_end & + ((unsigned long)PAGE_CACHE_SIZE - 1); + src_off_in_page = src_end & + ((unsigned long)PAGE_CACHE_SIZE - 1); + + if (src_i == 0) + src_off_in_page += start_offset; + if (dst_i == 0) + dst_off_in_page += start_offset; + + cur = min(len, src_off_in_page + 1); + cur = min(cur, dst_off_in_page + 1); +// printk("move pages orig dst %lu src %lu len %lu, this %lu %lu %lu\n", dst_offset, src_offset, len, dst_off_in_page - cur + 1, src_off_in_page - cur + 1, cur); + move_pages(dst->pages[dst_i], dst->pages[src_i], + dst_off_in_page - cur + 1, + src_off_in_page - cur + 1, cur); + + dst_end -= cur - 1; + src_end -= cur - 1; + len -= cur; + } +} +EXPORT_SYMBOL(memmove_extent_buffer); diff --git a/fs/btrfs/extent_map.h b/fs/btrfs/extent_map.h index d913ce01248d..430b997a70f6 100644 --- a/fs/btrfs/extent_map.h +++ b/fs/btrfs/extent_map.h @@ -3,9 +3,20 @@ #include +#define EXTENT_MAP_HOLE (u64)-3 #define EXTENT_MAP_INLINE (u64)-2 #define EXTENT_MAP_DELALLOC (u64)-1 +/* bits for the extent state */ +#define EXTENT_DIRTY 1 +#define EXTENT_WRITEBACK (1 << 1) +#define EXTENT_UPTODATE (1 << 2) +#define EXTENT_LOCKED (1 << 3) +#define EXTENT_NEW (1 << 4) +#define EXTENT_DELALLOC (1 << 5) +#define EXTENT_IOBITS (EXTENT_LOCKED | EXTENT_WRITEBACK) + + struct extent_map_ops { int (*fill_delalloc)(struct inode *inode, u64 start, u64 end); int (*writepage_io_hook)(struct page *page, u64 start, u64 end); @@ -51,10 +62,13 @@ struct extent_state { struct list_head list; }; +#define EXTENT_BUFFER_SIZE(nr) (sizeof(struct extent_buffer) + \ + (nr - 1) * sizeof(struct page *)) struct extent_buffer { u64 start; - u64 end; /* inclusive */ - char *addr; + unsigned long len; + atomic_t refs; + int flags; struct page *pages[]; }; @@ -87,8 +101,12 @@ int set_extent_new(struct extent_map_tree *tree, u64 start, u64 end, gfp_t mask); int set_extent_dirty(struct extent_map_tree *tree, u64 start, u64 end, gfp_t mask); +int clear_extent_dirty(struct extent_map_tree *tree, u64 start, u64 end, + gfp_t mask); int set_extent_delalloc(struct extent_map_tree *tree, u64 start, u64 end, gfp_t mask); +int find_first_extent_bit(struct extent_map_tree *tree, u64 start, + u64 *start_ret, u64 *end_ret, int bits); int extent_invalidatepage(struct extent_map_tree *tree, struct page *page, unsigned long offset); int extent_write_full_page(struct extent_map_tree *tree, struct page *page, @@ -106,4 +124,57 @@ int set_range_dirty(struct extent_map_tree *tree, u64 start, u64 end); int set_state_private(struct extent_map_tree *tree, u64 start, u64 private); int get_state_private(struct extent_map_tree *tree, u64 start, u64 *private); void set_page_extent_mapped(struct page *page); + +struct extent_buffer *alloc_extent_buffer(struct extent_map_tree *tree, + u64 start, unsigned long len, + gfp_t mask); +struct extent_buffer *find_extent_buffer(struct extent_map_tree *tree, + u64 start, unsigned long len, + gfp_t mask); +void free_extent_buffer(struct extent_buffer *eb); +int read_extent_buffer_pages(struct extent_map_tree *tree, + struct extent_buffer *eb, int wait); + +static inline void extent_buffer_get(struct extent_buffer *eb) +{ + atomic_inc(&eb->refs); +} + +static inline u64 extent_buffer_blocknr(struct extent_buffer *eb) +{ + return eb->start / 4096; +} + +int memcmp_extent_buffer(struct extent_buffer *eb, const void *ptrv, + unsigned long start, + unsigned long len); +void read_extent_buffer(struct extent_buffer *eb, void *dst, + unsigned long start, + unsigned long len); +void write_extent_buffer(struct extent_buffer *eb, const void *src, + unsigned long start, unsigned long len); +void copy_extent_buffer(struct extent_buffer *dst, struct extent_buffer *src, + unsigned long dst_offset, unsigned long src_offset, + unsigned long len); +void memcpy_extent_buffer(struct extent_buffer *dst, unsigned long dst_offset, + unsigned long src_offset, unsigned long len); +void memmove_extent_buffer(struct extent_buffer *dst, unsigned long dst_offset, + unsigned long src_offset, unsigned long len); +void memset_extent_buffer(struct extent_buffer *eb, char c, + unsigned long start, unsigned long len); +int wait_on_extent_buffer_writeback(struct extent_map_tree *tree, + struct extent_buffer *eb); +int clear_extent_buffer_dirty(struct extent_map_tree *tree, + struct extent_buffer *eb); +int set_extent_buffer_dirty(struct extent_map_tree *tree, + struct extent_buffer *eb); +int set_extent_buffer_uptodate(struct extent_map_tree *tree, + struct extent_buffer *eb); +int extent_buffer_uptodate(struct extent_map_tree *tree, + struct extent_buffer *eb); +int map_extent_buffer(struct extent_buffer *eb, unsigned long offset, + char **token, char **map, + unsigned long *map_start, + unsigned long *map_len, int km); +void unmap_extent_buffer(struct extent_buffer *eb, char *token, int km); #endif diff --git a/fs/btrfs/file-item.c b/fs/btrfs/file-item.c index ea4dd4c5fce3..226f6d028c3f 100644 --- a/fs/btrfs/file-item.c +++ b/fs/btrfs/file-item.c @@ -34,12 +34,12 @@ int btrfs_insert_file_extent(struct btrfs_trans_handle *trans, struct btrfs_file_extent_item *item; struct btrfs_key file_key; struct btrfs_path *path; + struct extent_buffer *leaf; path = btrfs_alloc_path(); BUG_ON(!path); file_key.objectid = objectid; file_key.offset = pos; - file_key.flags = 0; btrfs_set_key_type(&file_key, BTRFS_EXTENT_DATA_KEY); ret = btrfs_insert_empty_item(trans, root, path, &file_key, @@ -47,15 +47,16 @@ int btrfs_insert_file_extent(struct btrfs_trans_handle *trans, if (ret < 0) goto out; BUG_ON(ret); - item = btrfs_item_ptr(btrfs_buffer_leaf(path->nodes[0]), path->slots[0], + leaf = path->nodes[0]; + item = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_file_extent_item); - btrfs_set_file_extent_disk_blocknr(item, offset); - btrfs_set_file_extent_disk_num_blocks(item, disk_num_blocks); - btrfs_set_file_extent_offset(item, 0); - btrfs_set_file_extent_num_blocks(item, num_blocks); - btrfs_set_file_extent_generation(item, trans->transid); - btrfs_set_file_extent_type(item, BTRFS_FILE_EXTENT_REG); - btrfs_mark_buffer_dirty(path->nodes[0]); + btrfs_set_file_extent_disk_blocknr(leaf, item, offset); + btrfs_set_file_extent_disk_num_blocks(leaf, item, disk_num_blocks); + btrfs_set_file_extent_offset(leaf, item, 0); + btrfs_set_file_extent_num_blocks(leaf, item, num_blocks); + btrfs_set_file_extent_generation(leaf, item, trans->transid); + btrfs_set_file_extent_type(leaf, item, BTRFS_FILE_EXTENT_REG); + btrfs_mark_buffer_dirty(leaf); out: btrfs_free_path(path); return ret; @@ -71,32 +72,30 @@ struct btrfs_csum_item *btrfs_lookup_csum(struct btrfs_trans_handle *trans, struct btrfs_key file_key; struct btrfs_key found_key; struct btrfs_csum_item *item; - struct btrfs_leaf *leaf; + struct extent_buffer *leaf; u64 csum_offset = 0; int csums_in_item; file_key.objectid = objectid; file_key.offset = offset; - file_key.flags = 0; btrfs_set_key_type(&file_key, BTRFS_CSUM_ITEM_KEY); ret = btrfs_search_slot(trans, root, &file_key, path, 0, cow); if (ret < 0) goto fail; - leaf = btrfs_buffer_leaf(path->nodes[0]); + leaf = path->nodes[0]; if (ret > 0) { ret = 1; if (path->slots[0] == 0) goto fail; path->slots[0]--; - btrfs_disk_key_to_cpu(&found_key, - &leaf->items[path->slots[0]].key); + btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]); if (btrfs_key_type(&found_key) != BTRFS_CSUM_ITEM_KEY || found_key.objectid != objectid) { goto fail; } csum_offset = (offset - found_key.offset) >> root->fs_info->sb->s_blocksize_bits; - csums_in_item = btrfs_item_size(leaf->items + path->slots[0]); + csums_in_item = btrfs_item_size_nr(leaf, path->slots[0]); csums_in_item /= BTRFS_CRC32_SIZE; if (csum_offset >= csums_in_item) { @@ -127,7 +126,6 @@ int btrfs_lookup_file_extent(struct btrfs_trans_handle *trans, file_key.objectid = objectid; file_key.offset = offset; - file_key.flags = 0; btrfs_set_key_type(&file_key, BTRFS_EXTENT_DATA_KEY); ret = btrfs_search_slot(trans, root, &file_key, path, ins_len, cow); return ret; @@ -138,12 +136,14 @@ int btrfs_csum_file_block(struct btrfs_trans_handle *trans, u64 objectid, u64 offset, char *data, size_t len) { + return 0; +#if 0 int ret; struct btrfs_key file_key; struct btrfs_key found_key; struct btrfs_path *path; struct btrfs_csum_item *item; - struct btrfs_leaf *leaf; + struct extent_buffer *leaf; u64 csum_offset; path = btrfs_alloc_path(); @@ -161,8 +161,8 @@ int btrfs_csum_file_block(struct btrfs_trans_handle *trans, if (ret == -EFBIG) { u32 item_size; /* we found one, but it isn't big enough yet */ - leaf = btrfs_buffer_leaf(path->nodes[0]); - item_size = btrfs_item_size(leaf->items + path->slots[0]); + leaf = path->nodes[0]; + item_size = btrfs_item_size_nr(leaf, path->slots[0]); if ((item_size / BTRFS_CRC32_SIZE) >= MAX_CSUM_ITEMS(root)) { /* already at max size, make a new one */ goto insert; @@ -188,8 +188,8 @@ int btrfs_csum_file_block(struct btrfs_trans_handle *trans, goto insert; } path->slots[0]--; - leaf = btrfs_buffer_leaf(path->nodes[0]); - btrfs_disk_key_to_cpu(&found_key, &leaf->items[path->slots[0]].key); + leaf = path->nodes[0]; + btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]); csum_offset = (offset - found_key.offset) >> root->fs_info->sb->s_blocksize_bits; if (btrfs_key_type(&found_key) != BTRFS_CSUM_ITEM_KEY || @@ -197,10 +197,10 @@ int btrfs_csum_file_block(struct btrfs_trans_handle *trans, csum_offset >= MAX_CSUM_ITEMS(root)) { goto insert; } - if (csum_offset >= btrfs_item_size(leaf->items + path->slots[0]) / + if (csum_offset >= btrfs_item_size_nr(leaf, path->slots[0]) / BTRFS_CRC32_SIZE) { u32 diff = (csum_offset + 1) * BTRFS_CRC32_SIZE; - diff = diff - btrfs_item_size(leaf->items + path->slots[0]); + diff = diff - btrfs_item_size_nr(leaf, path->slots[0]); if (diff != BTRFS_CRC32_SIZE) goto insert; ret = btrfs_extend_item(trans, root, path, diff); @@ -220,21 +220,20 @@ insert: goto fail; } csum: - item = btrfs_item_ptr(btrfs_buffer_leaf(path->nodes[0]), path->slots[0], - struct btrfs_csum_item); + leaf = path->nodes[0]; + item = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_csum_item); ret = 0; item = (struct btrfs_csum_item *)((unsigned char *)item + csum_offset * BTRFS_CRC32_SIZE); found: - btrfs_check_bounds(&item->csum, BTRFS_CRC32_SIZE, - path->nodes[0]->b_data, - root->fs_info->sb->s_blocksize); + /* FIXME!!!!!!!!!!!! */ ret = btrfs_csum_data(root, data, len, &item->csum); btrfs_mark_buffer_dirty(path->nodes[0]); fail: btrfs_release_path(root, path); btrfs_free_path(path); return ret; +#endif } int btrfs_csum_truncate(struct btrfs_trans_handle *trans, @@ -242,21 +241,21 @@ int btrfs_csum_truncate(struct btrfs_trans_handle *trans, u64 isize) { struct btrfs_key key; - struct btrfs_leaf *leaf = btrfs_buffer_leaf(path->nodes[0]); + struct extent_buffer *leaf = path->nodes[0]; int slot = path->slots[0]; int ret; u32 new_item_size; u64 new_item_span; u64 blocks; - btrfs_disk_key_to_cpu(&key, &leaf->items[slot].key); + btrfs_item_key_to_cpu(leaf, &key, slot); if (isize <= key.offset) return 0; new_item_span = isize - key.offset; - blocks = (new_item_span + root->blocksize - 1) >> + blocks = (new_item_span + root->sectorsize - 1) >> root->fs_info->sb->s_blocksize_bits; new_item_size = blocks * BTRFS_CRC32_SIZE; - if (new_item_size >= btrfs_item_size(leaf->items + slot)) + if (new_item_size >= btrfs_item_size_nr(leaf, slot)) return 0; ret = btrfs_truncate_item(trans, root, path, new_item_size); BUG_ON(ret); diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c index 4cc459c943ec..1734ca695555 100644 --- a/fs/btrfs/file.c +++ b/fs/btrfs/file.c @@ -16,7 +16,6 @@ * Boston, MA 021110-1307, USA. */ -#include #include #include #include @@ -88,7 +87,9 @@ static int insert_inline_extent(struct btrfs_trans_handle *trans, { struct btrfs_key key; struct btrfs_path *path; - char *ptr, *kaddr; + struct extent_buffer *leaf; + char *kaddr; + unsigned long ptr; struct btrfs_file_extent_item *ei; u32 datasize; int err = 0; @@ -102,7 +103,6 @@ static int insert_inline_extent(struct btrfs_trans_handle *trans, key.objectid = inode->i_ino; key.offset = offset; - key.flags = 0; btrfs_set_key_type(&key, BTRFS_EXTENT_DATA_KEY); BUG_ON(size >= PAGE_CACHE_SIZE); datasize = btrfs_file_extent_calc_inline_size(size); @@ -113,18 +113,17 @@ static int insert_inline_extent(struct btrfs_trans_handle *trans, err = ret; goto fail; } - ei = btrfs_item_ptr(btrfs_buffer_leaf(path->nodes[0]), - path->slots[0], struct btrfs_file_extent_item); - btrfs_set_file_extent_generation(ei, trans->transid); - btrfs_set_file_extent_type(ei, - BTRFS_FILE_EXTENT_INLINE); + leaf = path->nodes[0]; + ei = btrfs_item_ptr(leaf, path->slots[0], + struct btrfs_file_extent_item); + btrfs_set_file_extent_generation(leaf, ei, trans->transid); + btrfs_set_file_extent_type(leaf, ei, BTRFS_FILE_EXTENT_INLINE); ptr = btrfs_file_extent_inline_start(ei); kaddr = kmap_atomic(page, KM_USER0); - btrfs_memcpy(root, path->nodes[0]->b_data, - ptr, kaddr + page_offset, size); + write_extent_buffer(leaf, kaddr + page_offset, ptr, size); kunmap_atomic(kaddr, KM_USER0); - btrfs_mark_buffer_dirty(path->nodes[0]); + btrfs_mark_buffer_dirty(leaf); fail: btrfs_free_path(path); return err; @@ -156,8 +155,8 @@ static int dirty_and_release_pages(struct btrfs_trans_handle *trans, em->bdev = inode->i_sb->s_bdev; - start_pos = pos & ~((u64)root->blocksize - 1); - num_blocks = (write_bytes + pos - start_pos + root->blocksize - 1) >> + start_pos = pos & ~((u64)root->sectorsize - 1); + num_blocks = (write_bytes + pos - start_pos + root->sectorsize - 1) >> inode->i_blkbits; down_read(&BTRFS_I(inode)->root->snap_sem); @@ -184,7 +183,7 @@ static int dirty_and_release_pages(struct btrfs_trans_handle *trans, if (inode->i_size < start_pos) { u64 last_pos_in_file; u64 hole_size; - u64 mask = root->blocksize - 1; + u64 mask = root->sectorsize - 1; last_pos_in_file = (isize + mask) & ~mask; hole_size = (start_pos - last_pos_in_file + mask) & ~mask; @@ -227,8 +226,8 @@ static int dirty_and_release_pages(struct btrfs_trans_handle *trans, /* step one, delete the existing extents in this range */ /* FIXME blocksize != pagesize */ err = btrfs_drop_extents(trans, root, inode, start_pos, - (pos + write_bytes + root->blocksize -1) & - ~((u64)root->blocksize - 1), &hint_block); + (pos + write_bytes + root->sectorsize -1) & + ~((u64)root->sectorsize - 1), &hint_block); if (err) goto failed; @@ -288,7 +287,7 @@ int btrfs_drop_extents(struct btrfs_trans_handle *trans, { int ret; struct btrfs_key key; - struct btrfs_leaf *leaf; + struct extent_buffer *leaf; int slot; struct btrfs_file_extent_item *extent; u64 extent_end = 0; @@ -327,10 +326,10 @@ next_slot: found_extent = 0; found_inline = 0; extent = NULL; - leaf = btrfs_buffer_leaf(path->nodes[0]); + leaf = path->nodes[0]; slot = path->slots[0]; ret = 0; - btrfs_disk_key_to_cpu(&key, &leaf->items[slot].key); + btrfs_item_key_to_cpu(leaf, &key, slot); if (key.offset >= end || key.objectid != inode->i_ino) { goto out; } @@ -344,17 +343,18 @@ next_slot: if (btrfs_key_type(&key) == BTRFS_EXTENT_DATA_KEY) { extent = btrfs_item_ptr(leaf, slot, struct btrfs_file_extent_item); - found_type = btrfs_file_extent_type(extent); + found_type = btrfs_file_extent_type(leaf, extent); if (found_type == BTRFS_FILE_EXTENT_REG) { extent_end = key.offset + - (btrfs_file_extent_num_blocks(extent) << + (btrfs_file_extent_num_blocks(leaf, extent) << inode->i_blkbits); found_extent = 1; } else if (found_type == BTRFS_FILE_EXTENT_INLINE) { + struct btrfs_item *item; + item = btrfs_item_nr(leaf, slot); found_inline = 1; extent_end = key.offset + - btrfs_file_extent_inline_len(leaf->items + - slot); + btrfs_file_extent_inline_len(leaf, item); } } else { extent_end = search_start; @@ -365,8 +365,7 @@ next_slot: search_start >= extent_end) { int nextret; u32 nritems; - nritems = btrfs_header_nritems( - btrfs_buffer_header(path->nodes[0])); + nritems = btrfs_header_nritems(leaf); if (slot >= nritems - 1) { nextret = btrfs_next_leaf(root, path); if (nextret) @@ -380,7 +379,7 @@ next_slot: /* FIXME, there's only one inline extent allowed right now */ if (found_inline) { - u64 mask = root->blocksize - 1; + u64 mask = root->sectorsize - 1; search_start = (extent_end + mask) & ~mask; } else search_start = extent_end; @@ -388,10 +387,13 @@ next_slot: if (end < extent_end && end >= key.offset) { if (found_extent) { u64 disk_blocknr = - btrfs_file_extent_disk_blocknr(extent); + btrfs_file_extent_disk_blocknr(leaf,extent); u64 disk_num_blocks = - btrfs_file_extent_disk_num_blocks(extent); - memcpy(&old, extent, sizeof(old)); + btrfs_file_extent_disk_num_blocks(leaf, + extent); + read_extent_buffer(leaf, &old, + (unsigned long)extent, + sizeof(old)); if (disk_blocknr != 0) { ret = btrfs_inc_extent_ref(trans, root, disk_blocknr, disk_num_blocks); @@ -406,20 +408,24 @@ next_slot: u64 new_num; u64 old_num; keep = 1; - WARN_ON(start & (root->blocksize - 1)); + WARN_ON(start & (root->sectorsize - 1)); if (found_extent) { new_num = (start - key.offset) >> inode->i_blkbits; - old_num = btrfs_file_extent_num_blocks(extent); + old_num = btrfs_file_extent_num_blocks(leaf, + extent); *hint_block = - btrfs_file_extent_disk_blocknr(extent); - if (btrfs_file_extent_disk_blocknr(extent)) { + btrfs_file_extent_disk_blocknr(leaf, + extent); + if (btrfs_file_extent_disk_blocknr(leaf, + extent)) { inode->i_blocks -= (old_num - new_num) << 3; } - btrfs_set_file_extent_num_blocks(extent, + btrfs_set_file_extent_num_blocks(leaf, + extent, new_num); - btrfs_mark_buffer_dirty(path->nodes[0]); + btrfs_mark_buffer_dirty(leaf); } else { WARN_ON(1); } @@ -431,13 +437,17 @@ next_slot: u64 extent_num_blocks = 0; if (found_extent) { disk_blocknr = - btrfs_file_extent_disk_blocknr(extent); + btrfs_file_extent_disk_blocknr(leaf, + extent); disk_num_blocks = - btrfs_file_extent_disk_num_blocks(extent); + btrfs_file_extent_disk_num_blocks(leaf, + extent); extent_num_blocks = - btrfs_file_extent_num_blocks(extent); + btrfs_file_extent_num_blocks(leaf, + extent); *hint_block = - btrfs_file_extent_disk_blocknr(extent); + btrfs_file_extent_disk_blocknr(leaf, + extent); } ret = btrfs_del_item(trans, root, path); /* TODO update progress marker and return */ @@ -464,42 +474,37 @@ next_slot: struct btrfs_key ins; ins.objectid = inode->i_ino; ins.offset = end; - ins.flags = 0; btrfs_set_key_type(&ins, BTRFS_EXTENT_DATA_KEY); btrfs_release_path(root, path); ret = btrfs_insert_empty_item(trans, root, path, &ins, sizeof(*extent)); + leaf = path->nodes[0]; if (ret) { - btrfs_print_leaf(root, btrfs_buffer_leaf(path->nodes[0])); - printk("got %d on inserting %Lu %u %Lu start %Lu end %Lu found %Lu %Lu keep was %d\n", ret , ins.objectid, ins.flags, ins.offset, start, end, key.offset, extent_end, keep); + btrfs_print_leaf(root, leaf); + printk("got %d on inserting %Lu %u %Lu start %Lu end %Lu found %Lu %Lu keep was %d\n", ret , ins.objectid, ins.type, ins.offset, start, end, key.offset, extent_end, keep); } BUG_ON(ret); - extent = btrfs_item_ptr( - btrfs_buffer_leaf(path->nodes[0]), - path->slots[0], - struct btrfs_file_extent_item); - btrfs_set_file_extent_disk_blocknr(extent, - btrfs_file_extent_disk_blocknr(&old)); - btrfs_set_file_extent_disk_num_blocks(extent, - btrfs_file_extent_disk_num_blocks(&old)); - - btrfs_set_file_extent_offset(extent, - btrfs_file_extent_offset(&old) + + extent = btrfs_item_ptr(leaf, path->slots[0], + struct btrfs_file_extent_item); + write_extent_buffer(leaf, &old, + (unsigned long)extent, sizeof(old)); + + btrfs_set_file_extent_offset(leaf, extent, + le64_to_cpu(old.offset) + ((end - key.offset) >> inode->i_blkbits)); - WARN_ON(btrfs_file_extent_num_blocks(&old) < + WARN_ON(le64_to_cpu(old.num_blocks) < (extent_end - end) >> inode->i_blkbits); - btrfs_set_file_extent_num_blocks(extent, + btrfs_set_file_extent_num_blocks(leaf, extent, (extent_end - end) >> inode->i_blkbits); - btrfs_set_file_extent_type(extent, + btrfs_set_file_extent_type(leaf, extent, BTRFS_FILE_EXTENT_REG); - btrfs_set_file_extent_generation(extent, - btrfs_file_extent_generation(&old)); btrfs_mark_buffer_dirty(path->nodes[0]); - if (btrfs_file_extent_disk_blocknr(&old) != 0) { + if (le64_to_cpu(old.disk_blocknr) != 0) { inode->i_blocks += - btrfs_file_extent_num_blocks(extent) << 3; + btrfs_file_extent_num_blocks(leaf, + extent) << 3; } ret = 0; goto out; @@ -529,8 +534,8 @@ static int prepare_pages(struct btrfs_root *root, u64 num_blocks; u64 start_pos; - start_pos = pos & ~((u64)root->blocksize - 1); - num_blocks = (write_bytes + pos - start_pos + root->blocksize - 1) >> + start_pos = pos & ~((u64)root->sectorsize - 1); + num_blocks = (write_bytes + pos - start_pos + root->sectorsize - 1) >> inode->i_blkbits; memset(pages, 0, num_pages * sizeof(struct page *)); diff --git a/fs/btrfs/inode-item.c b/fs/btrfs/inode-item.c index b78346177539..35d2608f8918 100644 --- a/fs/btrfs/inode-item.c +++ b/fs/btrfs/inode-item.c @@ -20,24 +20,18 @@ #include "disk-io.h" #include "transaction.h" -int btrfs_insert_inode(struct btrfs_trans_handle *trans, struct btrfs_root - *root, u64 objectid, struct btrfs_inode_item - *inode_item) +int btrfs_insert_empty_inode(struct btrfs_trans_handle *trans, + struct btrfs_root *root, + struct btrfs_path *path, u64 objectid) { - struct btrfs_path *path; struct btrfs_key key; int ret; key.objectid = objectid; - key.flags = 0; btrfs_set_key_type(&key, BTRFS_INODE_ITEM_KEY); key.offset = 0; - path = btrfs_alloc_path(); - BUG_ON(!path); - ret = btrfs_insert_item(trans, root, &key, inode_item, - sizeof(*inode_item)); - btrfs_release_path(root, path); - btrfs_free_path(path); + ret = btrfs_insert_empty_item(trans, root, path, &key, + sizeof(struct btrfs_inode_item)); if (ret == 0 && objectid > root->highest_inode) root->highest_inode = objectid; return ret; @@ -51,15 +45,15 @@ int btrfs_lookup_inode(struct btrfs_trans_handle *trans, struct btrfs_root int cow = mod != 0; int ret; int slot; - struct btrfs_leaf *leaf; + struct extent_buffer *leaf; struct btrfs_key found_key; ret = btrfs_search_slot(trans, root, location, path, ins_len, cow); if (ret > 0 && btrfs_key_type(location) == BTRFS_ROOT_ITEM_KEY && location->offset == (u64)-1 && path->slots[0] != 0) { slot = path->slots[0] - 1; - leaf = btrfs_buffer_leaf(path->nodes[0]); - btrfs_disk_key_to_cpu(&found_key, &leaf->items[slot].key); + leaf = path->nodes[0]; + btrfs_item_key_to_cpu(leaf, &found_key, slot); if (found_key.objectid == location->objectid && btrfs_key_type(&found_key) == btrfs_key_type(location)) { path->slots[0]--; diff --git a/fs/btrfs/inode-map.c b/fs/btrfs/inode-map.c index 405470866254..ab74977adf5c 100644 --- a/fs/btrfs/inode-map.c +++ b/fs/btrfs/inode-map.c @@ -24,8 +24,9 @@ int btrfs_find_highest_inode(struct btrfs_root *root, u64 *objectid) { struct btrfs_path *path; int ret; - struct btrfs_leaf *l; + struct extent_buffer *l; struct btrfs_key search_key; + struct btrfs_key found_key; int slot; path = btrfs_alloc_path(); @@ -39,8 +40,9 @@ int btrfs_find_highest_inode(struct btrfs_root *root, u64 *objectid) BUG_ON(ret == 0); if (path->slots[0] > 0) { slot = path->slots[0] - 1; - l = btrfs_buffer_leaf(path->nodes[0]); - *objectid = btrfs_disk_key_objectid(&l->items[slot].key); + l = path->nodes[0]; + btrfs_item_key_to_cpu(l, &found_key, slot); + *objectid = found_key.objectid; } else { *objectid = BTRFS_FIRST_FREE_OBJECTID; } @@ -64,13 +66,12 @@ int btrfs_find_free_objectid(struct btrfs_trans_handle *trans, int slot = 0; u64 last_ino = 0; int start_found; - struct btrfs_leaf *l; + struct extent_buffer *l; struct btrfs_key search_key; u64 search_start = dirid; path = btrfs_alloc_path(); BUG_ON(!path); - search_key.flags = 0; search_start = root->last_inode_alloc; search_start = max(search_start, BTRFS_FIRST_FREE_OBJECTID); search_key.objectid = search_start; @@ -86,9 +87,9 @@ int btrfs_find_free_objectid(struct btrfs_trans_handle *trans, path->slots[0]--; while (1) { - l = btrfs_buffer_leaf(path->nodes[0]); + l = path->nodes[0]; slot = path->slots[0]; - if (slot >= btrfs_header_nritems(&l->header)) { + if (slot >= btrfs_header_nritems(l)) { ret = btrfs_next_leaf(root, path); if (ret == 0) continue; @@ -103,7 +104,7 @@ int btrfs_find_free_objectid(struct btrfs_trans_handle *trans, last_ino : search_start; goto found; } - btrfs_disk_key_to_cpu(&key, &l->items[slot].key); + btrfs_item_key_to_cpu(l, &key, slot); if (key.objectid >= search_start) { if (start_found) { if (last_ino < search_start) diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index b03d40a907ca..fbe2836364e0 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -159,10 +159,8 @@ out: int btrfs_readpage_end_io_hook(struct page *page, u64 start, u64 end) { - char csum[BTRFS_CRC32_SIZE]; size_t offset = start - (page->index << PAGE_CACHE_SHIFT); struct inode *inode = page->mapping->host; - struct btrfs_root *root = BTRFS_I(inode)->root; struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree; char *kaddr; u64 private; @@ -173,11 +171,15 @@ int btrfs_readpage_end_io_hook(struct page *page, u64 start, u64 end) if (ret) { goto zeroit; } + /* + struct btrfs_root *root = BTRFS_I(inode)->root; + char csum[BTRFS_CRC32_SIZE]; ret = btrfs_csum_data(root, kaddr + offset, end - start + 1, csum); BUG_ON(ret); if (memcmp(csum, &private, BTRFS_CRC32_SIZE)) { goto zeroit; } + */ kunmap_atomic(kaddr, KM_IRQ0); return 0; @@ -192,7 +194,9 @@ zeroit: void btrfs_read_locked_inode(struct inode *inode) { struct btrfs_path *path; + struct extent_buffer *leaf; struct btrfs_inode_item *inode_item; + struct btrfs_inode_timespec *tspec; struct btrfs_root *root = BTRFS_I(inode)->root; struct btrfs_key location; u64 alloc_group_block; @@ -205,29 +209,37 @@ void btrfs_read_locked_inode(struct inode *inode) memcpy(&location, &BTRFS_I(inode)->location, sizeof(location)); ret = btrfs_lookup_inode(NULL, root, path, &location, 0); - if (ret) { + if (ret) goto make_bad; - } - inode_item = btrfs_item_ptr(btrfs_buffer_leaf(path->nodes[0]), - path->slots[0], - struct btrfs_inode_item); - inode->i_mode = btrfs_inode_mode(inode_item); - inode->i_nlink = btrfs_inode_nlink(inode_item); - inode->i_uid = btrfs_inode_uid(inode_item); - inode->i_gid = btrfs_inode_gid(inode_item); - inode->i_size = btrfs_inode_size(inode_item); - inode->i_atime.tv_sec = btrfs_timespec_sec(&inode_item->atime); - inode->i_atime.tv_nsec = btrfs_timespec_nsec(&inode_item->atime); - inode->i_mtime.tv_sec = btrfs_timespec_sec(&inode_item->mtime); - inode->i_mtime.tv_nsec = btrfs_timespec_nsec(&inode_item->mtime); - inode->i_ctime.tv_sec = btrfs_timespec_sec(&inode_item->ctime); - inode->i_ctime.tv_nsec = btrfs_timespec_nsec(&inode_item->ctime); - inode->i_blocks = btrfs_inode_nblocks(inode_item); - inode->i_generation = btrfs_inode_generation(inode_item); + leaf = path->nodes[0]; + inode_item = btrfs_item_ptr(leaf, path->slots[0], + struct btrfs_inode_item); + + inode->i_mode = btrfs_inode_mode(leaf, inode_item); + inode->i_nlink = btrfs_inode_nlink(leaf, inode_item); + inode->i_uid = btrfs_inode_uid(leaf, inode_item); + inode->i_gid = btrfs_inode_gid(leaf, inode_item); + inode->i_size = btrfs_inode_size(leaf, inode_item); + + tspec = btrfs_inode_atime(inode_item); + inode->i_atime.tv_sec = btrfs_timespec_sec(leaf, tspec); + inode->i_atime.tv_nsec = btrfs_timespec_nsec(leaf, tspec); + + tspec = btrfs_inode_mtime(inode_item); + inode->i_mtime.tv_sec = btrfs_timespec_sec(leaf, tspec); + inode->i_mtime.tv_nsec = btrfs_timespec_nsec(leaf, tspec); + + tspec = btrfs_inode_ctime(inode_item); + inode->i_ctime.tv_sec = btrfs_timespec_sec(leaf, tspec); + inode->i_ctime.tv_nsec = btrfs_timespec_nsec(leaf, tspec); + + inode->i_blocks = btrfs_inode_nblocks(leaf, inode_item); + inode->i_generation = btrfs_inode_generation(leaf, inode_item); inode->i_rdev = 0; - rdev = btrfs_inode_rdev(inode_item); - alloc_group_block = btrfs_inode_block_group(inode_item); + rdev = btrfs_inode_rdev(leaf, inode_item); + + alloc_group_block = btrfs_inode_block_group(leaf, inode_item); BTRFS_I(inode)->block_group = btrfs_lookup_block_group(root->fs_info, alloc_group_block); @@ -267,24 +279,35 @@ make_bad: make_bad_inode(inode); } -static void fill_inode_item(struct btrfs_inode_item *item, +static void fill_inode_item(struct extent_buffer *leaf, + struct btrfs_inode_item *item, struct inode *inode) { - btrfs_set_inode_uid(item, inode->i_uid); - btrfs_set_inode_gid(item, inode->i_gid); - btrfs_set_inode_size(item, inode->i_size); - btrfs_set_inode_mode(item, inode->i_mode); - btrfs_set_inode_nlink(item, inode->i_nlink); - btrfs_set_timespec_sec(&item->atime, inode->i_atime.tv_sec); - btrfs_set_timespec_nsec(&item->atime, inode->i_atime.tv_nsec); - btrfs_set_timespec_sec(&item->mtime, inode->i_mtime.tv_sec); - btrfs_set_timespec_nsec(&item->mtime, inode->i_mtime.tv_nsec); - btrfs_set_timespec_sec(&item->ctime, inode->i_ctime.tv_sec); - btrfs_set_timespec_nsec(&item->ctime, inode->i_ctime.tv_nsec); - btrfs_set_inode_nblocks(item, inode->i_blocks); - btrfs_set_inode_generation(item, inode->i_generation); - btrfs_set_inode_rdev(item, inode->i_rdev); - btrfs_set_inode_block_group(item, + btrfs_set_inode_uid(leaf, item, inode->i_uid); + btrfs_set_inode_gid(leaf, item, inode->i_gid); + btrfs_set_inode_size(leaf, item, inode->i_size); + btrfs_set_inode_mode(leaf, item, inode->i_mode); + btrfs_set_inode_nlink(leaf, item, inode->i_nlink); + + btrfs_set_timespec_sec(leaf, btrfs_inode_atime(item), + inode->i_atime.tv_sec); + btrfs_set_timespec_nsec(leaf, btrfs_inode_atime(item), + inode->i_atime.tv_nsec); + + btrfs_set_timespec_sec(leaf, btrfs_inode_mtime(item), + inode->i_mtime.tv_sec); + btrfs_set_timespec_nsec(leaf, btrfs_inode_mtime(item), + inode->i_mtime.tv_nsec); + + btrfs_set_timespec_sec(leaf, btrfs_inode_ctime(item), + inode->i_ctime.tv_sec); + btrfs_set_timespec_nsec(leaf, btrfs_inode_ctime(item), + inode->i_ctime.tv_nsec); + + btrfs_set_inode_nblocks(leaf, item, inode->i_blocks); + btrfs_set_inode_generation(leaf, item, inode->i_generation); + btrfs_set_inode_rdev(leaf, item, inode->i_rdev); + btrfs_set_inode_block_group(leaf, item, BTRFS_I(inode)->block_group->key.objectid); } @@ -294,6 +317,7 @@ int btrfs_update_inode(struct btrfs_trans_handle *trans, { struct btrfs_inode_item *inode_item; struct btrfs_path *path; + struct extent_buffer *leaf; int ret; path = btrfs_alloc_path(); @@ -306,12 +330,12 @@ int btrfs_update_inode(struct btrfs_trans_handle *trans, goto failed; } - inode_item = btrfs_item_ptr(btrfs_buffer_leaf(path->nodes[0]), - path->slots[0], + leaf = path->nodes[0]; + inode_item = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_inode_item); - fill_inode_item(inode_item, inode); - btrfs_mark_buffer_dirty(path->nodes[0]); + fill_inode_item(leaf, inode_item, inode); + btrfs_mark_buffer_dirty(leaf); btrfs_set_inode_last_trans(trans, inode); ret = 0; failed: @@ -330,8 +354,9 @@ static int btrfs_unlink_trans(struct btrfs_trans_handle *trans, const char *name = dentry->d_name.name; int name_len = dentry->d_name.len; int ret = 0; - u64 objectid; + struct extent_buffer *leaf; struct btrfs_dir_item *di; + struct btrfs_key key; path = btrfs_alloc_path(); if (!path) { @@ -349,14 +374,15 @@ static int btrfs_unlink_trans(struct btrfs_trans_handle *trans, ret = -ENOENT; goto err; } - objectid = btrfs_disk_key_objectid(&di->location); + leaf = path->nodes[0]; + btrfs_dir_item_key_to_cpu(leaf, di, &key); ret = btrfs_delete_one_dir_name(trans, root, path, di); if (ret) goto err; btrfs_release_path(root, path); di = btrfs_lookup_dir_index_item(trans, root, path, dir->i_ino, - objectid, name, name_len, -1); + key.objectid, name, name_len, -1); if (IS_ERR(di)) { ret = PTR_ERR(di); goto err; @@ -391,12 +417,15 @@ static int btrfs_unlink(struct inode *dir, struct dentry *dentry) root = BTRFS_I(dir)->root; mutex_lock(&root->fs_info->fs_mutex); trans = btrfs_start_transaction(root, 1); + btrfs_set_trans_block_group(trans, dir); ret = btrfs_unlink_trans(trans, root, dir, dentry); nr = trans->blocks_used; + btrfs_end_transaction(trans, root); mutex_unlock(&root->fs_info->fs_mutex); btrfs_btree_balance_dirty(root, nr); + return ret; } @@ -411,7 +440,7 @@ static int btrfs_rmdir(struct inode *dir, struct dentry *dentry) struct btrfs_trans_handle *trans; struct btrfs_key found_key; int found_type; - struct btrfs_leaf *leaf; + struct extent_buffer *leaf; char *goodnames = ".."; unsigned long nr; @@ -419,10 +448,11 @@ static int btrfs_rmdir(struct inode *dir, struct dentry *dentry) BUG_ON(!path); mutex_lock(&root->fs_info->fs_mutex); trans = btrfs_start_transaction(root, 1); + btrfs_set_trans_block_group(trans, dir); key.objectid = inode->i_ino; key.offset = (u64)-1; - key.flags = (u32)-1; + key.type = (u8)-1; while(1) { ret = btrfs_search_slot(trans, root, &key, path, -1, 1); if (ret < 0) { @@ -435,9 +465,8 @@ static int btrfs_rmdir(struct inode *dir, struct dentry *dentry) goto out; } path->slots[0]--; - leaf = btrfs_buffer_leaf(path->nodes[0]); - btrfs_disk_key_to_cpu(&found_key, - &leaf->items[path->slots[0]].key); + leaf = path->nodes[0]; + btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]); found_type = btrfs_key_type(&found_key); if (found_key.objectid != inode->i_ino) { err = -ENOENT; @@ -513,9 +542,9 @@ static int btrfs_truncate_in_trans(struct btrfs_trans_handle *trans, int ret; struct btrfs_path *path; struct btrfs_key key; - struct btrfs_disk_key *found_key; + struct btrfs_key found_key; u32 found_type; - struct btrfs_leaf *leaf; + struct extent_buffer *leaf; struct btrfs_file_extent_item *fi; u64 extent_start = 0; u64 extent_num_blocks = 0; @@ -527,10 +556,12 @@ static int btrfs_truncate_in_trans(struct btrfs_trans_handle *trans, path = btrfs_alloc_path(); path->reada = -1; BUG_ON(!path); + /* FIXME, add redo link to tree so we don't leak on crash */ key.objectid = inode->i_ino; key.offset = (u64)-1; - key.flags = (u32)-1; + key.type = (u8)-1; + while(1) { btrfs_init_path(path); fi = NULL; @@ -542,27 +573,28 @@ static int btrfs_truncate_in_trans(struct btrfs_trans_handle *trans, BUG_ON(path->slots[0] == 0); path->slots[0]--; } - leaf = btrfs_buffer_leaf(path->nodes[0]); - found_key = &leaf->items[path->slots[0]].key; - found_type = btrfs_disk_key_type(found_key); + leaf = path->nodes[0]; + btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]); + found_type = btrfs_key_type(&found_key); - if (btrfs_disk_key_objectid(found_key) != inode->i_ino) + if (found_key.objectid != inode->i_ino) break; + if (found_type != BTRFS_CSUM_ITEM_KEY && found_type != BTRFS_DIR_ITEM_KEY && found_type != BTRFS_DIR_INDEX_KEY && found_type != BTRFS_EXTENT_DATA_KEY) break; - item_end = btrfs_disk_key_offset(found_key); + item_end = found_key.offset; if (found_type == BTRFS_EXTENT_DATA_KEY) { - fi = btrfs_item_ptr(btrfs_buffer_leaf(path->nodes[0]), - path->slots[0], + fi = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_file_extent_item); - if (btrfs_file_extent_type(fi) != + if (btrfs_file_extent_type(leaf, fi) != BTRFS_FILE_EXTENT_INLINE) { - item_end += btrfs_file_extent_num_blocks(fi) << - inode->i_blkbits; + item_end += + btrfs_file_extent_num_blocks(leaf, fi) << + inode->i_blkbits; } } if (found_type == BTRFS_CSUM_ITEM_KEY) { @@ -583,7 +615,7 @@ static int btrfs_truncate_in_trans(struct btrfs_trans_handle *trans, btrfs_set_key_type(&key, found_type); continue; } - if (btrfs_disk_key_offset(found_key) >= inode->i_size) + if (found_key.offset >= inode->i_size) del_item = 1; else del_item = 0; @@ -591,30 +623,31 @@ static int btrfs_truncate_in_trans(struct btrfs_trans_handle *trans, /* FIXME, shrink the extent if the ref count is only 1 */ if (found_type == BTRFS_EXTENT_DATA_KEY && - btrfs_file_extent_type(fi) != + btrfs_file_extent_type(leaf, fi) != BTRFS_FILE_EXTENT_INLINE) { u64 num_dec; - extent_start = btrfs_file_extent_disk_blocknr(fi); + extent_start = btrfs_file_extent_disk_blocknr(leaf, fi); if (!del_item) { u64 orig_num_blocks = - btrfs_file_extent_num_blocks(fi); + btrfs_file_extent_num_blocks(leaf, fi); extent_num_blocks = inode->i_size - - btrfs_disk_key_offset(found_key) + - root->blocksize - 1; + found_key.offset + root->sectorsize - 1; extent_num_blocks >>= inode->i_blkbits; - btrfs_set_file_extent_num_blocks(fi, + btrfs_set_file_extent_num_blocks(leaf, fi, extent_num_blocks); num_dec = (orig_num_blocks - extent_num_blocks) << 3; if (extent_start != 0) { inode->i_blocks -= num_dec; } - btrfs_mark_buffer_dirty(path->nodes[0]); + btrfs_mark_buffer_dirty(leaf); } else { extent_num_blocks = - btrfs_file_extent_disk_num_blocks(fi); + btrfs_file_extent_disk_num_blocks(leaf, + fi); /* FIXME blocksize != 4096 */ - num_dec = btrfs_file_extent_num_blocks(fi) << 3; + num_dec = btrfs_file_extent_num_blocks(leaf, + fi) << 3; if (extent_start != 0) { found_extent = 1; inode->i_blocks -= num_dec; @@ -725,7 +758,7 @@ static int btrfs_setattr(struct dentry *dentry, struct iattr *attr) struct btrfs_root *root = BTRFS_I(inode)->root; struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree; - u64 mask = root->blocksize - 1; + u64 mask = root->sectorsize - 1; u64 pos = (inode->i_size + mask) & ~mask; u64 block_end = attr->ia_size | mask; u64 hole_size; @@ -771,9 +804,11 @@ void btrfs_delete_inode(struct inode *inode) if (is_bad_inode(inode)) { goto no_delete; } + inode->i_size = 0; mutex_lock(&root->fs_info->fs_mutex); trans = btrfs_start_transaction(root, 1); + btrfs_set_trans_block_group(trans, inode); ret = btrfs_truncate_in_trans(trans, root, inode); if (ret) @@ -782,6 +817,7 @@ void btrfs_delete_inode(struct inode *inode) if (ret) goto no_delete_lock; nr = trans->blocks_used; + btrfs_end_transaction(trans, root); mutex_unlock(&root->fs_info->fs_mutex); btrfs_btree_balance_dirty(root, nr); @@ -819,7 +855,7 @@ static int btrfs_inode_by_name(struct inode *dir, struct dentry *dentry, ret = 0; goto out; } - btrfs_disk_key_to_cpu(location, &di->location); + btrfs_dir_item_key_to_cpu(path->nodes[0], di, location); out: btrfs_release_path(root, path); btrfs_free_path(path); @@ -856,7 +892,6 @@ static int fixup_tree_root_location(struct btrfs_root *root, ri = &(*sub_root)->root_item; location->objectid = btrfs_root_dirid(ri); - location->flags = 0; btrfs_set_key_type(location, BTRFS_INODE_ITEM_KEY); location->offset = 0; @@ -908,11 +943,14 @@ static struct dentry *btrfs_lookup(struct inode *dir, struct dentry *dentry, if (dentry->d_name.len > BTRFS_NAME_LEN) return ERR_PTR(-ENAMETOOLONG); + mutex_lock(&root->fs_info->fs_mutex); ret = btrfs_inode_by_name(dir, dentry, &location); mutex_unlock(&root->fs_info->fs_mutex); + if (ret < 0) return ERR_PTR(ret); + inode = NULL; if (location.objectid) { ret = fixup_tree_root_location(root, &location, &sub_root, @@ -952,10 +990,11 @@ static int btrfs_readdir(struct file *filp, void *dirent, filldir_t filldir) struct btrfs_item *item; struct btrfs_dir_item *di; struct btrfs_key key; + struct btrfs_key found_key; struct btrfs_path *path; int ret; u32 nritems; - struct btrfs_leaf *leaf; + struct extent_buffer *leaf; int slot; int advance; unsigned char d_type; @@ -964,15 +1003,19 @@ static int btrfs_readdir(struct file *filp, void *dirent, filldir_t filldir) u32 di_total; u32 di_len; int key_type = BTRFS_DIR_INDEX_KEY; + char tmp_name[32]; + char *name_ptr; + int name_len; /* FIXME, use a real flag for deciding about the key type */ if (root->fs_info->tree_root == root) key_type = BTRFS_DIR_ITEM_KEY; + mutex_lock(&root->fs_info->fs_mutex); key.objectid = inode->i_ino; - key.flags = 0; btrfs_set_key_type(&key, key_type); key.offset = filp->f_pos; + path = btrfs_alloc_path(); path->reada = 2; ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); @@ -980,16 +1023,16 @@ static int btrfs_readdir(struct file *filp, void *dirent, filldir_t filldir) goto err; advance = 0; while(1) { - leaf = btrfs_buffer_leaf(path->nodes[0]); - nritems = btrfs_header_nritems(&leaf->header); + leaf = path->nodes[0]; + nritems = btrfs_header_nritems(leaf); slot = path->slots[0]; if (advance || slot >= nritems) { if (slot >= nritems -1) { ret = btrfs_next_leaf(root, path); if (ret) break; - leaf = btrfs_buffer_leaf(path->nodes[0]); - nritems = btrfs_header_nritems(&leaf->header); + leaf = path->nodes[0]; + nritems = btrfs_header_nritems(leaf); slot = path->slots[0]; } else { slot++; @@ -997,28 +1040,48 @@ static int btrfs_readdir(struct file *filp, void *dirent, filldir_t filldir) } } advance = 1; - item = leaf->items + slot; - if (btrfs_disk_key_objectid(&item->key) != key.objectid) + item = btrfs_item_nr(leaf, slot); + btrfs_item_key_to_cpu(leaf, &found_key, slot); + + if (found_key.objectid != key.objectid) break; - if (btrfs_disk_key_type(&item->key) != key_type) + if (btrfs_key_type(&found_key) != key_type) break; - if (btrfs_disk_key_offset(&item->key) < filp->f_pos) + if (found_key.offset < filp->f_pos) continue; - filp->f_pos = btrfs_disk_key_offset(&item->key); + + filp->f_pos = found_key.offset; advance = 1; di = btrfs_item_ptr(leaf, slot, struct btrfs_dir_item); di_cur = 0; - di_total = btrfs_item_size(leaf->items + slot); + di_total = btrfs_item_size(leaf, item); while(di_cur < di_total) { - d_type = btrfs_filetype_table[btrfs_dir_type(di)]; - over = filldir(dirent, (const char *)(di + 1), - btrfs_dir_name_len(di), - btrfs_disk_key_offset(&item->key), - btrfs_disk_key_objectid(&di->location), + struct btrfs_key location; + + name_len = btrfs_dir_name_len(leaf, di); + if (name_len < 32) { + name_ptr = tmp_name; + } else { + name_ptr = kmalloc(name_len, GFP_NOFS); + BUG_ON(!name_ptr); + } + read_extent_buffer(leaf, name_ptr, + (unsigned long)(di + 1), name_len); + + d_type = btrfs_filetype_table[btrfs_dir_type(leaf, di)]; + btrfs_dir_item_key_to_cpu(leaf, di, &location); + + over = filldir(dirent, name_ptr, name_len, + found_key.offset, + location.objectid, d_type); + + if (name_ptr != tmp_name) + kfree(name_ptr); + if (over) goto nopos; - di_len = btrfs_dir_name_len(di) + sizeof(*di); + di_len = btrfs_dir_name_len(leaf, di) + sizeof(*di); di_cur += di_len; di = (struct btrfs_dir_item *)((char *)di + di_len); } @@ -1075,11 +1138,15 @@ static struct inode *btrfs_new_inode(struct btrfs_trans_handle *trans, int mode) { struct inode *inode; - struct btrfs_inode_item inode_item; + struct btrfs_inode_item *inode_item; struct btrfs_key *location; + struct btrfs_path *path; int ret; int owner; + path = btrfs_alloc_path(); + BUG_ON(!path); + inode = new_inode(root->fs_info->sb); if (!inode) return ERR_PTR(-ENOMEM); @@ -1095,24 +1162,32 @@ static struct inode *btrfs_new_inode(struct btrfs_trans_handle *trans, group = btrfs_find_block_group(root, group, 0, 0, owner); BTRFS_I(inode)->block_group = group; + ret = btrfs_insert_empty_inode(trans, root, path, objectid); + if (ret) + goto fail; + inode->i_uid = current->fsuid; inode->i_gid = current->fsgid; inode->i_mode = mode; inode->i_ino = objectid; inode->i_blocks = 0; inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME; - fill_inode_item(&inode_item, inode); + inode_item = btrfs_item_ptr(path->nodes[0], path->slots[0], + struct btrfs_inode_item); + fill_inode_item(path->nodes[0], inode_item, inode); + btrfs_mark_buffer_dirty(path->nodes[0]); + btrfs_free_path(path); + location = &BTRFS_I(inode)->location; location->objectid = objectid; - location->flags = 0; location->offset = 0; btrfs_set_key_type(location, BTRFS_INODE_ITEM_KEY); - ret = btrfs_insert_inode(trans, root, objectid, &inode_item); - if (ret) - return ERR_PTR(ret); insert_inode_hash(inode); return inode; +fail: + btrfs_free_path(path); + return ERR_PTR(ret); } static inline u8 btrfs_inode_type(struct inode *inode) @@ -1127,8 +1202,8 @@ static int btrfs_add_link(struct btrfs_trans_handle *trans, struct btrfs_key key; struct btrfs_root *root = BTRFS_I(dentry->d_parent->d_inode)->root; struct inode *parent_inode; + key.objectid = inode->i_ino; - key.flags = 0; btrfs_set_key_type(&key, BTRFS_INODE_ITEM_KEY); key.offset = 0; @@ -1285,14 +1360,18 @@ static int btrfs_link(struct dentry *old_dentry, struct inode *dir, inc_nlink(inode); mutex_lock(&root->fs_info->fs_mutex); trans = btrfs_start_transaction(root, 1); + btrfs_set_trans_block_group(trans, dir); atomic_inc(&inode->i_count); err = btrfs_add_nondir(trans, dentry, inode); + if (err) drop_inode = 1; + dir->i_sb->s_dirt = 1; btrfs_update_inode_block_group(trans, dir); err = btrfs_update_inode(trans, root, inode); + if (err) drop_inode = 1; @@ -1321,13 +1400,13 @@ static int btrfs_make_empty_dir(struct btrfs_trans_handle *trans, key.objectid = objectid; key.offset = 0; - key.flags = 0; btrfs_set_key_type(&key, BTRFS_INODE_ITEM_KEY); ret = btrfs_insert_dir_item(trans, root, buf, 1, objectid, &key, BTRFS_FT_DIR); if (ret) goto error; + key.objectid = dirid; ret = btrfs_insert_dir_item(trans, root, buf, 2, objectid, &key, BTRFS_FT_DIR); @@ -1350,6 +1429,7 @@ static int btrfs_mkdir(struct inode *dir, struct dentry *dentry, int mode) mutex_lock(&root->fs_info->fs_mutex); trans = btrfs_start_transaction(root, 1); btrfs_set_trans_block_group(trans, dir); + if (IS_ERR(trans)) { err = PTR_ERR(trans); goto out_unlock; @@ -1367,6 +1447,7 @@ static int btrfs_mkdir(struct inode *dir, struct dentry *dentry, int mode) err = PTR_ERR(inode); goto out_fail; } + drop_on_err = 1; inode->i_op = &btrfs_dir_inode_operations; inode->i_fop = &btrfs_dir_file_operations; @@ -1380,9 +1461,11 @@ static int btrfs_mkdir(struct inode *dir, struct dentry *dentry, int mode) err = btrfs_update_inode(trans, root, inode); if (err) goto out_fail; + err = btrfs_add_link(trans, dentry, inode); if (err) goto out_fail; + d_instantiate(dentry, inode); drop_on_err = 0; dir->i_sb->s_dirt = 1; @@ -1392,6 +1475,7 @@ static int btrfs_mkdir(struct inode *dir, struct dentry *dentry, int mode) out_fail: nr = trans->blocks_used; btrfs_end_transaction(trans, root); + out_unlock: mutex_unlock(&root->fs_info->fs_mutex); if (drop_on_err) @@ -1415,8 +1499,8 @@ struct extent_map *btrfs_get_extent(struct inode *inode, struct page *page, struct btrfs_path *path; struct btrfs_root *root = BTRFS_I(inode)->root; struct btrfs_file_extent_item *item; - struct btrfs_leaf *leaf; - struct btrfs_disk_key *found_key; + struct extent_buffer *leaf; + struct btrfs_key found_key; struct extent_map *em = NULL; struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree; struct btrfs_trans_handle *trans = NULL; @@ -1436,8 +1520,8 @@ again: err = -ENOMEM; goto out; } - em->start = 0; - em->end = 0; + em->start = EXTENT_MAP_HOLE; + em->end = EXTENT_MAP_HOLE; } em->bdev = inode->i_sb->s_bdev; ret = btrfs_lookup_file_extent(NULL, root, path, @@ -1453,25 +1537,27 @@ again: path->slots[0]--; } - item = btrfs_item_ptr(btrfs_buffer_leaf(path->nodes[0]), path->slots[0], + leaf = path->nodes[0]; + item = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_file_extent_item); - leaf = btrfs_buffer_leaf(path->nodes[0]); - blocknr = btrfs_file_extent_disk_blocknr(item); - blocknr += btrfs_file_extent_offset(item); + + blocknr = btrfs_file_extent_disk_blocknr(leaf, item); + blocknr += btrfs_file_extent_offset(leaf, item); /* are we inside the extent that was found? */ - found_key = &leaf->items[path->slots[0]].key; - found_type = btrfs_disk_key_type(found_key); - if (btrfs_disk_key_objectid(found_key) != objectid || + btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]); + found_type = btrfs_key_type(&found_key); + if (found_key.objectid != objectid || found_type != BTRFS_EXTENT_DATA_KEY) { goto not_found; } - found_type = btrfs_file_extent_type(item); - extent_start = btrfs_disk_key_offset(&leaf->items[path->slots[0]].key); + found_type = btrfs_file_extent_type(leaf, item); + extent_start = found_key.offset; if (found_type == BTRFS_FILE_EXTENT_REG) { extent_end = extent_start + - (btrfs_file_extent_num_blocks(item) << inode->i_blkbits); + (btrfs_file_extent_num_blocks(leaf, item) << + inode->i_blkbits); err = 0; if (start < extent_start || start >= extent_end) { em->start = start; @@ -1484,28 +1570,29 @@ again: } goto not_found_em; } - if (btrfs_file_extent_disk_blocknr(item) == 0) { + if (btrfs_file_extent_disk_blocknr(leaf, item) == 0) { em->start = extent_start; em->end = extent_end - 1; - em->block_start = 0; - em->block_end = 0; + em->block_start = EXTENT_MAP_HOLE; + em->block_end = EXTENT_MAP_HOLE; goto insert; } em->block_start = blocknr << inode->i_blkbits; em->block_end = em->block_start + - (btrfs_file_extent_num_blocks(item) << + (btrfs_file_extent_num_blocks(leaf, item) << inode->i_blkbits) - 1; em->start = extent_start; em->end = extent_end - 1; goto insert; } else if (found_type == BTRFS_FILE_EXTENT_INLINE) { - char *ptr; + unsigned long ptr; char *map; u32 size; - size = btrfs_file_extent_inline_len(leaf->items + - path->slots[0]); - extent_end = extent_start | ((u64)root->blocksize - 1); + size = btrfs_file_extent_inline_len(leaf, btrfs_item_nr(leaf, + path->slots[0])); + + extent_end = extent_start | ((u64)root->sectorsize - 1); if (start < extent_start || start >= extent_end) { em->start = start; if (start < extent_start) { @@ -1517,18 +1604,21 @@ again: } goto not_found_em; } + em->block_start = EXTENT_MAP_INLINE; em->block_end = EXTENT_MAP_INLINE; em->start = extent_start; em->end = extent_end; + if (!page) { goto insert; } + ptr = btrfs_file_extent_inline_start(item); map = kmap(page); - memcpy(map + page_offset, ptr, size); + read_extent_buffer(leaf, map + page_offset, ptr, size); memset(map + page_offset + size, 0, - root->blocksize - (page_offset + size)); + root->sectorsize - (page_offset + size)); flush_dcache_page(page); kunmap(page); set_extent_uptodate(em_tree, extent_start, @@ -1542,8 +1632,8 @@ not_found: em->start = start; em->end = end; not_found_em: - em->block_start = 0; - em->block_end = 0; + em->block_start = EXTENT_MAP_HOLE; + em->block_end = EXTENT_MAP_HOLE; insert: btrfs_release_path(root, path); if (em->start > start || em->end < start) { @@ -1712,6 +1802,7 @@ static void btrfs_truncate(struct inode *inode) ret = btrfs_truncate_in_trans(trans, root, inode); btrfs_update_inode(trans, root, inode); nr = trans->blocks_used; + ret = btrfs_end_transaction(trans, root); BUG_ON(ret); mutex_unlock(&root->fs_info->fs_mutex); @@ -1731,8 +1822,7 @@ static int create_subvol(struct btrfs_root *root, char *name, int namelen) struct btrfs_key key; struct btrfs_root_item root_item; struct btrfs_inode_item *inode_item; - struct buffer_head *subvol; - struct btrfs_leaf *leaf; + struct extent_buffer *leaf; struct btrfs_root *new_root; struct inode *inode; struct inode *dir; @@ -1746,34 +1836,37 @@ static int create_subvol(struct btrfs_root *root, char *name, int namelen) trans = btrfs_start_transaction(root, 1); BUG_ON(!trans); - subvol = btrfs_alloc_free_block(trans, root, 0, 0); - if (IS_ERR(subvol)) - return PTR_ERR(subvol); - leaf = btrfs_buffer_leaf(subvol); - btrfs_set_header_nritems(&leaf->header, 0); - btrfs_set_header_level(&leaf->header, 0); - btrfs_set_header_blocknr(&leaf->header, bh_blocknr(subvol)); - btrfs_set_header_generation(&leaf->header, trans->transid); - btrfs_set_header_owner(&leaf->header, root->root_key.objectid); - memcpy(leaf->header.fsid, root->fs_info->disk_super->fsid, - sizeof(leaf->header.fsid)); - btrfs_mark_buffer_dirty(subvol); + leaf = btrfs_alloc_free_block(trans, root, 0, 0); + if (IS_ERR(leaf)) + return PTR_ERR(leaf); + + btrfs_set_header_nritems(leaf, 0); + btrfs_set_header_level(leaf, 0); + btrfs_set_header_blocknr(leaf, extent_buffer_blocknr(leaf)); + btrfs_set_header_generation(leaf, trans->transid); + btrfs_set_header_owner(leaf, root->root_key.objectid); + write_extent_buffer(leaf, root->fs_info->fsid, + (unsigned long)btrfs_header_fsid(leaf), + BTRFS_FSID_SIZE); + btrfs_mark_buffer_dirty(leaf); inode_item = &root_item.inode; memset(inode_item, 0, sizeof(*inode_item)); - btrfs_set_inode_generation(inode_item, 1); - btrfs_set_inode_size(inode_item, 3); - btrfs_set_inode_nlink(inode_item, 1); - btrfs_set_inode_nblocks(inode_item, 1); - btrfs_set_inode_mode(inode_item, S_IFDIR | 0755); + inode_item->generation = cpu_to_le64(1); + inode_item->size = cpu_to_le64(3); + inode_item->nlink = cpu_to_le32(1); + inode_item->nblocks = cpu_to_le64(1); + inode_item->mode = cpu_to_le32(S_IFDIR | 0755); - btrfs_set_root_blocknr(&root_item, bh_blocknr(subvol)); + btrfs_set_root_blocknr(&root_item, extent_buffer_blocknr(leaf)); btrfs_set_root_refs(&root_item, 1); - btrfs_set_root_blocks_used(&root_item, 0); + btrfs_set_root_used(&root_item, 0); + memset(&root_item.drop_progress, 0, sizeof(root_item.drop_progress)); root_item.drop_level = 0; - brelse(subvol); - subvol = NULL; + + free_extent_buffer(leaf); + leaf = NULL; ret = btrfs_find_free_objectid(trans, root->fs_info->tree_root, 0, &objectid); @@ -1784,7 +1877,6 @@ static int create_subvol(struct btrfs_root *root, char *name, int namelen) key.objectid = objectid; key.offset = 1; - key.flags = 0; btrfs_set_key_type(&key, BTRFS_ROOT_ITEM_KEY); ret = btrfs_insert_root(trans, root->fs_info->tree_root, &key, &root_item); @@ -1845,7 +1937,7 @@ static int create_snapshot(struct btrfs_root *root, char *name, int namelen) struct btrfs_trans_handle *trans; struct btrfs_key key; struct btrfs_root_item new_root_item; - struct buffer_head *tmp; + struct extent_buffer *tmp; int ret; int err; u64 objectid; @@ -1876,10 +1968,11 @@ static int create_snapshot(struct btrfs_root *root, char *name, int namelen) key.objectid = objectid; key.offset = 1; - key.flags = 0; btrfs_set_key_type(&key, BTRFS_ROOT_ITEM_KEY); + btrfs_cow_block(trans, root, root->node, NULL, 0, &tmp); - btrfs_set_root_blocknr(&new_root_item, bh_blocknr(root->node)); + btrfs_set_root_blocknr(&new_root_item, + extent_buffer_blocknr(root->node)); ret = btrfs_insert_root(trans, root->fs_info->tree_root, &key, &new_root_item); @@ -1904,8 +1997,10 @@ static int create_snapshot(struct btrfs_root *root, char *name, int namelen) fail: nr = trans->blocks_used; err = btrfs_commit_transaction(trans, root); + if (err && !ret) ret = err; + mutex_unlock(&root->fs_info->fs_mutex); up_write(&root->snap_sem); btrfs_btree_balance_dirty(root, nr); @@ -1986,7 +2081,7 @@ static int btrfs_ioctl_snap_create(struct btrfs_root *root, void __user *arg) if (copy_from_user(&vol_args, arg, sizeof(vol_args))) return -EFAULT; - + namelen = strlen(vol_args.name); if (namelen > BTRFS_VOL_NAME_MAX) return -EINVAL; @@ -2164,8 +2259,10 @@ static int btrfs_rename(struct inode * old_dir, struct dentry *old_dentry, new_inode->i_size > BTRFS_EMPTY_DIR_SIZE) { return -ENOTEMPTY; } + mutex_lock(&root->fs_info->fs_mutex); trans = btrfs_start_transaction(root, 1); + btrfs_set_trans_block_group(trans, new_dir); path = btrfs_alloc_path(); if (!path) { @@ -2177,9 +2274,10 @@ static int btrfs_rename(struct inode * old_dir, struct dentry *old_dentry, old_dir->i_ctime = old_dir->i_mtime = ctime; new_dir->i_ctime = new_dir->i_mtime = ctime; old_inode->i_ctime = ctime; + if (S_ISDIR(old_inode->i_mode) && old_dir != new_dir) { struct btrfs_key *location = &BTRFS_I(new_dir)->location; - u64 old_parent_oid; + struct btrfs_key old_parent_key; di = btrfs_lookup_dir_item(trans, root, path, old_inode->i_ino, "..", 2, -1); if (IS_ERR(di)) { @@ -2190,7 +2288,7 @@ static int btrfs_rename(struct inode * old_dir, struct dentry *old_dentry, ret = -ENOENT; goto out_fail; } - old_parent_oid = btrfs_disk_key_objectid(&di->location); + btrfs_dir_item_key_to_cpu(path->nodes[0], di, &old_parent_key); ret = btrfs_del_item(trans, root, path); if (ret) { goto out_fail; @@ -2199,7 +2297,7 @@ static int btrfs_rename(struct inode * old_dir, struct dentry *old_dentry, di = btrfs_lookup_dir_index_item(trans, root, path, old_inode->i_ino, - old_parent_oid, + old_parent_key.objectid, "..", 2, -1); if (IS_ERR(di)) { ret = PTR_ERR(di); @@ -2257,8 +2355,9 @@ static int btrfs_symlink(struct inode *dir, struct dentry *dentry, u64 objectid; int name_len; int datasize; - char *ptr; + unsigned long ptr; struct btrfs_file_extent_item *ei; + struct extent_buffer *leaf; unsigned long nr; name_len = strlen(symname) + 1; @@ -2302,7 +2401,6 @@ static int btrfs_symlink(struct inode *dir, struct dentry *dentry, BUG_ON(!path); key.objectid = inode->i_ino; key.offset = 0; - key.flags = 0; btrfs_set_key_type(&key, BTRFS_EXTENT_DATA_KEY); datasize = btrfs_file_extent_calc_inline_size(name_len); err = btrfs_insert_empty_item(trans, root, path, &key, @@ -2311,16 +2409,17 @@ static int btrfs_symlink(struct inode *dir, struct dentry *dentry, drop_inode = 1; goto out_unlock; } - ei = btrfs_item_ptr(btrfs_buffer_leaf(path->nodes[0]), - path->slots[0], struct btrfs_file_extent_item); - btrfs_set_file_extent_generation(ei, trans->transid); - btrfs_set_file_extent_type(ei, + leaf = path->nodes[0]; + ei = btrfs_item_ptr(leaf, path->slots[0], + struct btrfs_file_extent_item); + btrfs_set_file_extent_generation(leaf, ei, trans->transid); + btrfs_set_file_extent_type(leaf, ei, BTRFS_FILE_EXTENT_INLINE); ptr = btrfs_file_extent_inline_start(ei); - btrfs_memcpy(root, path->nodes[0]->b_data, - ptr, symname, name_len); - btrfs_mark_buffer_dirty(path->nodes[0]); + write_extent_buffer(leaf, symname, ptr, name_len); + btrfs_mark_buffer_dirty(leaf); btrfs_free_path(path); + inode->i_op = &btrfs_symlink_inode_operations; inode->i_mapping->a_ops = &btrfs_symlink_aops; inode->i_size = name_len - 1; diff --git a/fs/btrfs/print-tree.c b/fs/btrfs/print-tree.c index bafa78603bd2..a825ce078a54 100644 --- a/fs/btrfs/print-tree.c +++ b/fs/btrfs/print-tree.c @@ -20,10 +20,10 @@ #include "disk-io.h" #include "print-tree.h" -void btrfs_print_leaf(struct btrfs_root *root, struct btrfs_leaf *l) +void btrfs_print_leaf(struct btrfs_root *root, struct extent_buffer *l) { int i; - u32 nr = btrfs_header_nritems(&l->header); + u32 nr = btrfs_header_nritems(l); struct btrfs_item *item; struct btrfs_extent_item *ei; struct btrfs_root_item *ri; @@ -31,119 +31,113 @@ void btrfs_print_leaf(struct btrfs_root *root, struct btrfs_leaf *l) struct btrfs_inode_item *ii; struct btrfs_block_group_item *bi; struct btrfs_file_extent_item *fi; + struct btrfs_key key; + struct btrfs_key found_key; u32 type; printk("leaf %llu total ptrs %d free space %d\n", - (unsigned long long)btrfs_header_blocknr(&l->header), nr, + (unsigned long long)btrfs_header_blocknr(l), nr, btrfs_leaf_free_space(root, l)); for (i = 0 ; i < nr ; i++) { - item = l->items + i; - type = btrfs_disk_key_type(&item->key); + item = btrfs_item_nr(l, i); + btrfs_item_key_to_cpu(l, &key, i); + type = btrfs_key_type(&key); printk("\titem %d key (%llu %x %llu) itemoff %d itemsize %d\n", i, - (unsigned long long)btrfs_disk_key_objectid(&item->key), - btrfs_disk_key_flags(&item->key), - (unsigned long long)btrfs_disk_key_offset(&item->key), - btrfs_item_offset(item), - btrfs_item_size(item)); + (unsigned long long)key.objectid, type, + (unsigned long long)key.offset, + btrfs_item_offset(l, item), btrfs_item_size(l, item)); switch (type) { case BTRFS_INODE_ITEM_KEY: ii = btrfs_item_ptr(l, i, struct btrfs_inode_item); printk("\t\tinode generation %llu size %llu mode %o\n", - (unsigned long long)btrfs_inode_generation(ii), - (unsigned long long)btrfs_inode_size(ii), - btrfs_inode_mode(ii)); + (unsigned long long)btrfs_inode_generation(l, ii), + (unsigned long long)btrfs_inode_size(l, ii), + btrfs_inode_mode(l, ii)); break; case BTRFS_DIR_ITEM_KEY: di = btrfs_item_ptr(l, i, struct btrfs_dir_item); + btrfs_dir_item_key_to_cpu(l, di, &found_key); printk("\t\tdir oid %llu flags %u type %u\n", - (unsigned long long)btrfs_disk_key_objectid( - &di->location), - btrfs_dir_flags(di), - btrfs_dir_type(di)); - printk("\t\tname %.*s\n", - btrfs_dir_name_len(di),(char *)(di + 1)); + (unsigned long long)found_key.objectid, + btrfs_dir_flags(l, di), + btrfs_dir_type(l, di)); break; case BTRFS_ROOT_ITEM_KEY: ri = btrfs_item_ptr(l, i, struct btrfs_root_item); printk("\t\troot data blocknr %llu refs %u\n", - (unsigned long long)btrfs_root_blocknr(ri), - btrfs_root_refs(ri)); + (unsigned long long)btrfs_disk_root_blocknr(l, ri), + btrfs_disk_root_refs(l, ri)); break; case BTRFS_EXTENT_ITEM_KEY: ei = btrfs_item_ptr(l, i, struct btrfs_extent_item); printk("\t\textent data refs %u\n", - btrfs_extent_refs(ei)); + btrfs_extent_refs(l, ei)); break; case BTRFS_EXTENT_DATA_KEY: fi = btrfs_item_ptr(l, i, struct btrfs_file_extent_item); - if (btrfs_file_extent_type(fi) == + if (btrfs_file_extent_type(l, fi) == BTRFS_FILE_EXTENT_INLINE) { printk("\t\tinline extent data size %u\n", - btrfs_file_extent_inline_len(l->items + i)); + btrfs_file_extent_inline_len(l, item)); break; } printk("\t\textent data disk block %llu nr %llu\n", - (unsigned long long)btrfs_file_extent_disk_blocknr(fi), - (unsigned long long)btrfs_file_extent_disk_num_blocks(fi)); + (unsigned long long)btrfs_file_extent_disk_blocknr(l, fi), + (unsigned long long)btrfs_file_extent_disk_num_blocks(l, fi)); printk("\t\textent data offset %llu nr %llu\n", - (unsigned long long)btrfs_file_extent_offset(fi), - (unsigned long long)btrfs_file_extent_num_blocks(fi)); + (unsigned long long)btrfs_file_extent_offset(l, fi), + (unsigned long long)btrfs_file_extent_num_blocks(l, fi)); break; case BTRFS_BLOCK_GROUP_ITEM_KEY: bi = btrfs_item_ptr(l, i, struct btrfs_block_group_item); printk("\t\tblock group used %llu\n", - (unsigned long long)btrfs_block_group_used(bi)); - break; - case BTRFS_STRING_ITEM_KEY: - printk("\t\titem data %.*s\n", btrfs_item_size(item), - btrfs_leaf_data(l) + btrfs_item_offset(item)); + (unsigned long long)btrfs_disk_block_group_used(l, bi)); break; }; } } -void btrfs_print_tree(struct btrfs_root *root, struct buffer_head *t) +void btrfs_print_tree(struct btrfs_root *root, struct extent_buffer *c) { int i; u32 nr; - struct btrfs_node *c; + struct btrfs_key key; - if (!t) + if (!c) return; - c = btrfs_buffer_node(t); - nr = btrfs_header_nritems(&c->header); + nr = btrfs_header_nritems(c); if (btrfs_is_leaf(c)) { - btrfs_print_leaf(root, (struct btrfs_leaf *)c); + btrfs_print_leaf(root, c); return; } printk("node %llu level %d total ptrs %d free spc %u\n", - (unsigned long long)btrfs_header_blocknr(&c->header), - btrfs_header_level(&c->header), nr, + (unsigned long long)btrfs_header_blocknr(c), + btrfs_header_level(c), nr, (u32)BTRFS_NODEPTRS_PER_BLOCK(root) - nr); for (i = 0; i < nr; i++) { + btrfs_node_key_to_cpu(c, &key, i); printk("\tkey %d (%llu %u %llu) block %llu\n", i, - (unsigned long long)c->ptrs[i].key.objectid, - c->ptrs[i].key.flags, - (unsigned long long)c->ptrs[i].key.offset, + (unsigned long long)key.objectid, + key.type, + (unsigned long long)key.offset, (unsigned long long)btrfs_node_blockptr(c, i)); } for (i = 0; i < nr; i++) { - struct buffer_head *next_buf = read_tree_block(root, + struct extent_buffer *next = read_tree_block(root, btrfs_node_blockptr(c, i)); - struct btrfs_node *next = btrfs_buffer_node(next_buf); if (btrfs_is_leaf(next) && - btrfs_header_level(&c->header) != 1) + btrfs_header_level(c) != 1) BUG(); - if (btrfs_header_level(&next->header) != - btrfs_header_level(&c->header) - 1) + if (btrfs_header_level(next) != + btrfs_header_level(c) - 1) BUG(); - btrfs_print_tree(root, next_buf); - btrfs_block_release(root, next_buf); + btrfs_print_tree(root, next); + free_extent_buffer(next); } } diff --git a/fs/btrfs/print-tree.h b/fs/btrfs/print-tree.h index 205cd03601ab..da75efe534d5 100644 --- a/fs/btrfs/print-tree.h +++ b/fs/btrfs/print-tree.h @@ -18,6 +18,6 @@ #ifndef __PRINT_TREE_ #define __PRINT_TREE_ -void btrfs_print_leaf(struct btrfs_root *root, struct btrfs_leaf *l); -void btrfs_print_tree(struct btrfs_root *root, struct buffer_head *t); +void btrfs_print_leaf(struct btrfs_root *root, struct extent_buffer *l); +void btrfs_print_tree(struct btrfs_root *root, struct extent_buffer *t); #endif diff --git a/fs/btrfs/root-tree.c b/fs/btrfs/root-tree.c index 3b5926dfbeba..88bcdd33f56e 100644 --- a/fs/btrfs/root-tree.c +++ b/fs/btrfs/root-tree.c @@ -26,12 +26,13 @@ int btrfs_find_last_root(struct btrfs_root *root, u64 objectid, { struct btrfs_path *path; struct btrfs_key search_key; - struct btrfs_leaf *l; + struct btrfs_key found_key; + struct extent_buffer *l; int ret; int slot; search_key.objectid = objectid; - search_key.flags = (u32)-1; + search_key.type = (u8)-1; search_key.offset = (u64)-1; path = btrfs_alloc_path(); @@ -39,17 +40,19 @@ int btrfs_find_last_root(struct btrfs_root *root, u64 objectid, ret = btrfs_search_slot(NULL, root, &search_key, path, 0, 0); if (ret < 0) goto out; + BUG_ON(ret == 0); - l = btrfs_buffer_leaf(path->nodes[0]); + l = path->nodes[0]; BUG_ON(path->slots[0] == 0); slot = path->slots[0] - 1; - if (btrfs_disk_key_objectid(&l->items[slot].key) != objectid) { + btrfs_item_key_to_cpu(l, &found_key, slot); + if (found_key.objectid != objectid) { ret = 1; goto out; } - memcpy(item, btrfs_item_ptr(l, slot, struct btrfs_root_item), - sizeof(*item)); - btrfs_disk_key_to_cpu(key, &l->items[slot].key); + read_extent_buffer(l, item, btrfs_item_ptr_offset(l, slot), + sizeof(*item)); + memcpy(key, &found_key, sizeof(found_key)); ret = 0; out: btrfs_release_path(root, path); @@ -62,10 +65,10 @@ int btrfs_update_root(struct btrfs_trans_handle *trans, struct btrfs_root *item) { struct btrfs_path *path; - struct btrfs_leaf *l; + struct extent_buffer *l; int ret; int slot; - struct btrfs_root_item *update_item; + unsigned long ptr; path = btrfs_alloc_path(); BUG_ON(!path); @@ -73,10 +76,10 @@ int btrfs_update_root(struct btrfs_trans_handle *trans, struct btrfs_root if (ret < 0) goto out; BUG_ON(ret != 0); - l = btrfs_buffer_leaf(path->nodes[0]); + l = path->nodes[0]; slot = path->slots[0]; - update_item = btrfs_item_ptr(l, slot, struct btrfs_root_item); - btrfs_memcpy(root, l, update_item, item, sizeof(*item)); + ptr = btrfs_item_ptr_offset(l, slot); + write_extent_buffer(l, item, ptr, sizeof(*item)); btrfs_mark_buffer_dirty(path->nodes[0]); out: btrfs_release_path(root, path); @@ -103,11 +106,10 @@ int btrfs_find_dead_roots(struct btrfs_root *root, u64 objectid, struct btrfs_path *path; int ret; u32 nritems; - struct btrfs_leaf *leaf; + struct extent_buffer *leaf; int slot; key.objectid = objectid; - key.flags = 0; btrfs_set_key_type(&key, BTRFS_ROOT_ITEM_KEY); key.offset = 0; path = btrfs_alloc_path(); @@ -117,19 +119,19 @@ int btrfs_find_dead_roots(struct btrfs_root *root, u64 objectid, if (ret < 0) goto err; while(1) { - leaf = btrfs_buffer_leaf(path->nodes[0]); - nritems = btrfs_header_nritems(&leaf->header); + leaf = path->nodes[0]; + nritems = btrfs_header_nritems(leaf); slot = path->slots[0]; if (slot >= nritems) { ret = btrfs_next_leaf(root, path); if (ret) break; - leaf = btrfs_buffer_leaf(path->nodes[0]); - nritems = btrfs_header_nritems(&leaf->header); + leaf = path->nodes[0]; + nritems = btrfs_header_nritems(leaf); slot = path->slots[0]; } - item = leaf->items + slot; - btrfs_disk_key_to_cpu(&key, &item->key); + item = btrfs_item_nr(leaf, slot); + btrfs_item_key_to_cpu(leaf, &key, slot); if (btrfs_key_type(&key) != BTRFS_ROOT_ITEM_KEY) goto next; @@ -140,7 +142,7 @@ int btrfs_find_dead_roots(struct btrfs_root *root, u64 objectid, break; ri = btrfs_item_ptr(leaf, slot, struct btrfs_root_item); - if (btrfs_root_refs(ri) != 0) + if (btrfs_disk_root_refs(leaf, ri) != 0) goto next; dead_root = btrfs_read_fs_root_no_radix(root->fs_info, &key); @@ -170,6 +172,7 @@ int btrfs_del_root(struct btrfs_trans_handle *trans, struct btrfs_root *root, int ret; u32 refs; struct btrfs_root_item *ri; + struct extent_buffer *leaf; path = btrfs_alloc_path(); BUG_ON(!path); @@ -177,10 +180,10 @@ int btrfs_del_root(struct btrfs_trans_handle *trans, struct btrfs_root *root, if (ret < 0) goto out; BUG_ON(ret != 0); - ri = btrfs_item_ptr(btrfs_buffer_leaf(path->nodes[0]), - path->slots[0], struct btrfs_root_item); + leaf = path->nodes[0]; + ri = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_root_item); - refs = btrfs_root_refs(ri); + refs = btrfs_disk_root_refs(leaf, ri); BUG_ON(refs != 0); ret = btrfs_del_item(trans, root, path); out: diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index 66a01cbbbea1..39a1435c68f1 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c @@ -41,7 +41,7 @@ #include "ioctl.h" #include "print-tree.h" -#define BTRFS_SUPER_MAGIC 0x9123682E +#define BTRFS_SUPER_MAGIC 0x9123683E static struct super_operations btrfs_super_ops; @@ -115,13 +115,12 @@ static int btrfs_fill_super(struct super_block * sb, void * data, int silent) return -EIO; } sb->s_fs_info = tree_root; - disk_super = tree_root->fs_info->disk_super; + disk_super = &tree_root->fs_info->super_copy; inode = btrfs_iget_locked(sb, btrfs_super_root_dir(disk_super), tree_root); bi = BTRFS_I(inode); bi->location.objectid = inode->i_ino; bi->location.offset = 0; - bi->location.flags = 0; bi->root = tree_root; btrfs_set_key_type(&bi->location, BTRFS_INODE_ITEM_KEY); @@ -281,6 +280,7 @@ error_s: error_bdev: close_bdev_excl(bdev); error: +printk("get_sb failed\n"); return error; } /* end copy & paste */ @@ -295,6 +295,7 @@ static int btrfs_get_sb(struct file_system_type *fs_type, ret = btrfs_get_sb_bdev(fs_type, flags, dev_name, data, btrfs_fill_super, mnt, subvol_name ? subvol_name : "default"); +printk("btrfs_get_sb returns %d\n", ret); return ret; } diff --git a/fs/btrfs/sysfs.c b/fs/btrfs/sysfs.c index 2058783373eb..9654e90eec89 100644 --- a/fs/btrfs/sysfs.c +++ b/fs/btrfs/sysfs.c @@ -31,31 +31,31 @@ static ssize_t root_blocks_used_show(struct btrfs_root *root, char *buf) { return snprintf(buf, PAGE_SIZE, "%llu\n", - (unsigned long long)btrfs_root_blocks_used(&root->root_item)); + (unsigned long long)btrfs_root_used(&root->root_item)); } static ssize_t root_block_limit_show(struct btrfs_root *root, char *buf) { return snprintf(buf, PAGE_SIZE, "%llu\n", - (unsigned long long)btrfs_root_block_limit(&root->root_item)); + (unsigned long long)btrfs_root_limit(&root->root_item)); } static ssize_t super_blocks_used_show(struct btrfs_fs_info *fs, char *buf) { return snprintf(buf, PAGE_SIZE, "%llu\n", - (unsigned long long)btrfs_super_blocks_used(fs->disk_super)); + (unsigned long long)btrfs_super_blocks_used(&fs->super_copy)); } static ssize_t super_total_blocks_show(struct btrfs_fs_info *fs, char *buf) { return snprintf(buf, PAGE_SIZE, "%llu\n", - (unsigned long long)btrfs_super_total_blocks(fs->disk_super)); + (unsigned long long)btrfs_super_total_blocks(&fs->super_copy)); } static ssize_t super_blocksize_show(struct btrfs_fs_info *fs, char *buf) { return snprintf(buf, PAGE_SIZE, "%llu\n", - (unsigned long long)btrfs_super_blocksize(fs->disk_super)); + (unsigned long long)btrfs_super_sectorsize(&fs->super_copy)); } /* this is for root attrs (subvols/snapshots) */ diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c index 087074db0bd5..750f35a37aae 100644 --- a/fs/btrfs/transaction.c +++ b/fs/btrfs/transaction.c @@ -19,6 +19,7 @@ #include #include #include +#include #include "ctree.h" #include "disk-io.h" #include "transaction.h" @@ -66,7 +67,9 @@ static int join_transaction(struct btrfs_root *root) cur_trans->commit_done = 0; cur_trans->start_time = get_seconds(); list_add_tail(&cur_trans->list, &root->fs_info->trans_list); - init_bit_radix(&cur_trans->dirty_pages); + extent_map_tree_init(&cur_trans->dirty_pages, + root->fs_info->btree_inode->i_mapping, + GFP_NOFS); } else { cur_trans->num_writers++; cur_trans->num_joined++; @@ -88,7 +91,7 @@ static int record_root_in_trans(struct btrfs_root *root) (unsigned long)root->root_key.objectid, BTRFS_ROOT_DEFRAG_TAG); root->commit_root = root->node; - get_bh(root->node); + extent_buffer_get(root->node); } else { WARN_ON(1); } @@ -144,29 +147,30 @@ int btrfs_end_transaction(struct btrfs_trans_handle *trans, int btrfs_write_and_wait_transaction(struct btrfs_trans_handle *trans, struct btrfs_root *root) { - unsigned long gang[16]; int ret; - int i; int err; int werr = 0; + struct extent_map_tree *dirty_pages; struct page *page; - struct radix_tree_root *dirty_pages; struct inode *btree_inode = root->fs_info->btree_inode; + u64 start; + u64 end; + unsigned long index; if (!trans || !trans->transaction) { return filemap_write_and_wait(btree_inode->i_mapping); } dirty_pages = &trans->transaction->dirty_pages; while(1) { - ret = find_first_radix_bit(dirty_pages, gang, - 0, ARRAY_SIZE(gang)); - if (!ret) + ret = find_first_extent_bit(dirty_pages, 0, &start, &end, + EXTENT_DIRTY); + if (ret) break; - for (i = 0; i < ret; i++) { - /* FIXME EIO */ - clear_radix_bit(dirty_pages, gang[i]); - page = find_lock_page(btree_inode->i_mapping, - gang[i]); + clear_extent_dirty(dirty_pages, start, end, GFP_NOFS); + while(start <= end) { + index = start >> PAGE_CACHE_SHIFT; + start = (index + 1) << PAGE_CACHE_SHIFT; + page = find_lock_page(btree_inode->i_mapping, index); if (!page) continue; if (PageWriteback(page)) { @@ -202,10 +206,11 @@ int btrfs_commit_tree_roots(struct btrfs_trans_handle *trans, btrfs_write_dirty_block_groups(trans, extent_root); while(1) { old_extent_block = btrfs_root_blocknr(&extent_root->root_item); - if (old_extent_block == bh_blocknr(extent_root->node)) + if (old_extent_block == + extent_buffer_blocknr(extent_root->node)) break; btrfs_set_root_blocknr(&extent_root->root_item, - bh_blocknr(extent_root->node)); + extent_buffer_blocknr(extent_root->node)); ret = btrfs_update_root(trans, tree_root, &extent_root->root_key, &extent_root->root_item); @@ -279,9 +284,9 @@ static int add_dirty_roots(struct btrfs_trans_handle *trans, (unsigned long)root->root_key.objectid, BTRFS_ROOT_TRANS_TAG); if (root->commit_root == root->node) { - WARN_ON(bh_blocknr(root->node) != + WARN_ON(extent_buffer_blocknr(root->node) != btrfs_root_blocknr(&root->root_item)); - brelse(root->commit_root); + free_extent_buffer(root->commit_root); root->commit_root = NULL; /* make sure to update the root on disk @@ -310,7 +315,7 @@ static int add_dirty_roots(struct btrfs_trans_handle *trans, root->root_key.offset = root->fs_info->generation; btrfs_set_root_blocknr(&root->root_item, - bh_blocknr(root->node)); + extent_buffer_blocknr(root->node)); err = btrfs_insert_root(trans, root->fs_info->tree_root, &root->root_key, &root->root_item); @@ -389,10 +394,10 @@ int btrfs_defrag_dirty_roots(struct btrfs_fs_info *info) for (i = 0; i < ret; i++) { root = gang[i]; last = root->root_key.objectid + 1; - btrfs_defrag_root(root, 1); + // btrfs_defrag_root(root, 1); } } - btrfs_defrag_root(info->extent_root, 1); + // btrfs_defrag_root(info->extent_root, 1); return err; } @@ -414,7 +419,7 @@ static int drop_dirty_roots(struct btrfs_root *tree_root, dirty = list_entry(list->next, struct dirty_root, list); list_del_init(&dirty->list); - num_blocks = btrfs_root_blocks_used(&dirty->root->root_item); + num_blocks = btrfs_root_used(&dirty->root->root_item); root = dirty->latest_root; while(1) { @@ -441,11 +446,11 @@ static int drop_dirty_roots(struct btrfs_root *tree_root, } BUG_ON(ret); - num_blocks -= btrfs_root_blocks_used(&dirty->root->root_item); - blocks_used = btrfs_root_blocks_used(&root->root_item); + num_blocks -= btrfs_root_used(&dirty->root->root_item); + blocks_used = btrfs_root_used(&root->root_item); if (num_blocks) { record_root_in_trans(root); - btrfs_set_root_blocks_used(&root->root_item, + btrfs_set_root_used(&root->root_item, blocks_used - num_blocks); } ret = btrfs_del_root(trans, tree_root, &dirty->root->root_key); @@ -553,9 +558,11 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans, btrfs_set_super_generation(&root->fs_info->super_copy, cur_trans->transid); btrfs_set_super_root(&root->fs_info->super_copy, - bh_blocknr(root->fs_info->tree_root->node)); - memcpy(root->fs_info->disk_super, &root->fs_info->super_copy, - sizeof(root->fs_info->super_copy)); + extent_buffer_blocknr(root->fs_info->tree_root->node)); + + write_extent_buffer(root->fs_info->sb_buffer, + &root->fs_info->super_copy, 0, + sizeof(root->fs_info->super_copy)); btrfs_copy_pinned(root, &pinned_copy); diff --git a/fs/btrfs/transaction.h b/fs/btrfs/transaction.h index 4bc328cbb24c..ae39fcfc169a 100644 --- a/fs/btrfs/transaction.h +++ b/fs/btrfs/transaction.h @@ -28,7 +28,7 @@ struct btrfs_transaction { int use_count; int commit_done; struct list_head list; - struct radix_tree_root dirty_pages; + struct extent_map_tree dirty_pages; unsigned long start_time; wait_queue_head_t writer_wait; wait_queue_head_t commit_wait; @@ -83,5 +83,6 @@ int btrfs_add_dead_root(struct btrfs_root *root, struct btrfs_root *latest, int btrfs_defrag_dirty_roots(struct btrfs_fs_info *info); int btrfs_defrag_root(struct btrfs_root *root, int cacheonly); int btrfs_clean_old_snapshots(struct btrfs_root *root); - +int btrfs_commit_transaction(struct btrfs_trans_handle *trans, + struct btrfs_root *root); #endif diff --git a/fs/btrfs/tree-defrag.c b/fs/btrfs/tree-defrag.c index 420597127ed1..daf019afa0a1 100644 --- a/fs/btrfs/tree-defrag.c +++ b/fs/btrfs/tree-defrag.c @@ -23,14 +23,14 @@ #include "transaction.h" static void reada_defrag(struct btrfs_root *root, - struct btrfs_node *node) + struct extent_buffer *node) { int i; u32 nritems; u64 blocknr; int ret; - nritems = btrfs_header_nritems(&node->header); + nritems = btrfs_header_nritems(node); for (i = 0; i < nritems; i++) { blocknr = btrfs_node_blockptr(node, i); ret = readahead_tree_block(root, blocknr); @@ -44,8 +44,8 @@ static int defrag_walk_down(struct btrfs_trans_handle *trans, struct btrfs_path *path, int *level, int cache_only, u64 *last_ret) { - struct buffer_head *next; - struct buffer_head *cur; + struct extent_buffer *next; + struct extent_buffer *cur; u64 blocknr; int ret = 0; int is_extent = 0; @@ -62,13 +62,13 @@ static int defrag_walk_down(struct btrfs_trans_handle *trans, cur = path->nodes[*level]; if (!cache_only && *level > 1 && path->slots[*level] == 0) - reada_defrag(root, btrfs_buffer_node(cur)); + reada_defrag(root, cur); - if (btrfs_header_level(btrfs_buffer_header(cur)) != *level) + if (btrfs_header_level(cur) != *level) WARN_ON(1); if (path->slots[*level] >= - btrfs_header_nritems(btrfs_buffer_header(cur))) + btrfs_header_nritems(cur)) break; if (*level == 1) { @@ -80,14 +80,13 @@ static int defrag_walk_down(struct btrfs_trans_handle *trans, break; } - blocknr = btrfs_node_blockptr(btrfs_buffer_node(cur), - path->slots[*level]); + blocknr = btrfs_node_blockptr(cur, path->slots[*level]); if (cache_only) { next = btrfs_find_tree_block(root, blocknr); - if (!next || !buffer_uptodate(next) || - buffer_locked(next) || !buffer_defrag(next)) { - brelse(next); + /* FIXME, test for defrag */ + if (!next || !btrfs_buffer_uptodate(next)) { + free_extent_buffer(next); path->slots[*level]++; continue; } @@ -106,16 +105,18 @@ static int defrag_walk_down(struct btrfs_trans_handle *trans, WARN_ON(*level <= 0); if (path->nodes[*level-1]) - btrfs_block_release(root, path->nodes[*level-1]); + free_extent_buffer(path->nodes[*level-1]); path->nodes[*level-1] = next; - *level = btrfs_header_level(btrfs_buffer_header(next)); + *level = btrfs_header_level(next); path->slots[*level] = 0; } WARN_ON(*level < 0); WARN_ON(*level >= BTRFS_MAX_LEVEL); +#if 0 clear_buffer_defrag(path->nodes[*level]); clear_buffer_defrag_done(path->nodes[*level]); - btrfs_block_release(root, path->nodes[*level]); +#endif + free_extent_buffer(path->nodes[*level]); path->nodes[*level] = NULL; *level += 1; WARN_ON(ret); @@ -129,24 +130,25 @@ static int defrag_walk_up(struct btrfs_trans_handle *trans, { int i; int slot; - struct btrfs_node *node; + struct extent_buffer *node; for(i = *level; i < BTRFS_MAX_LEVEL - 1 && path->nodes[i]; i++) { slot = path->slots[i]; - if (slot < btrfs_header_nritems( - btrfs_buffer_header(path->nodes[i])) - 1) { + if (slot < btrfs_header_nritems(path->nodes[i]) - 1) { path->slots[i]++; *level = i; - node = btrfs_buffer_node(path->nodes[i]); + node = path->nodes[i]; WARN_ON(i == 0); - btrfs_disk_key_to_cpu(&root->defrag_progress, - &node->ptrs[path->slots[i]].key); + btrfs_node_key_to_cpu(node, &root->defrag_progress, + path->slots[i]); root->defrag_level = i; return 0; } else { + /* clear_buffer_defrag(path->nodes[*level]); clear_buffer_defrag_done(path->nodes[*level]); - btrfs_block_release(root, path->nodes[*level]); + */ + free_extent_buffer(path->nodes[*level]); path->nodes[*level] = NULL; *level = i + 1; } @@ -158,7 +160,7 @@ int btrfs_defrag_leaves(struct btrfs_trans_handle *trans, struct btrfs_root *root, int cache_only) { struct btrfs_path *path = NULL; - struct buffer_head *tmp; + struct extent_buffer *tmp; int ret = 0; int wret; int level; @@ -172,17 +174,18 @@ int btrfs_defrag_leaves(struct btrfs_trans_handle *trans, if (root->ref_cows == 0 && !is_extent) goto out; + path = btrfs_alloc_path(); if (!path) return -ENOMEM; - level = btrfs_header_level(btrfs_buffer_header(root->node)); + level = btrfs_header_level(root->node); orig_level = level; if (level == 0) { goto out; } if (root->defrag_progress.objectid == 0) { - get_bh(root->node); + extent_buffer_get(root->node); ret = btrfs_cow_block(trans, root, root->node, NULL, 0, &tmp); BUG_ON(ret); ret = btrfs_realloc_node(trans, root, root->node, cache_only, @@ -200,12 +203,15 @@ int btrfs_defrag_leaves(struct btrfs_trans_handle *trans, if (is_extent) btrfs_extent_post_op(trans, root); + if (wret < 0) { ret = wret; goto out; } + while(level > 0 && !path->nodes[level]) level--; + if (!path->nodes[level]) { ret = 0; goto out; @@ -230,7 +236,7 @@ int btrfs_defrag_leaves(struct btrfs_trans_handle *trans, } for (i = 0; i <= orig_level; i++) { if (path->nodes[i]) { - btrfs_block_release(root, path->nodes[i]); + free_extent_buffer(path->nodes[i]); path->nodes[i] = 0; } } -- cgit v1.2.3-70-g09d2 From 479965d66e320f1a095bb76027171daa675a9c72 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Mon, 15 Oct 2007 16:14:27 -0400 Subject: Btrfs: Optimizations for the extent_buffer code Signed-off-by: Chris Mason --- fs/btrfs/ctree.c | 51 ++++++++++++++++++++++++--------------------------- fs/btrfs/ctree.h | 48 +++++++++++++++++++++++++++++++++++++++++++----- fs/btrfs/extent_map.c | 11 ++++++++--- fs/btrfs/extent_map.h | 2 +- 4 files changed, 76 insertions(+), 36 deletions(-) (limited to 'fs/btrfs/extent_map.c') diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c index f60920e8a0e0..9427b79c5d79 100644 --- a/fs/btrfs/ctree.c +++ b/fs/btrfs/ctree.c @@ -464,22 +464,12 @@ static int check_block(struct btrfs_root *root, struct btrfs_path *path, int level) { struct extent_buffer *buf = path->nodes[level]; - char fsid[BTRFS_FSID_SIZE]; - read_extent_buffer(buf, fsid, (unsigned long)btrfs_header_fsid(buf), - BTRFS_FSID_SIZE); - - if (memcmp(fsid, root->fs_info->fsid, BTRFS_FSID_SIZE)) { - int i = 0; + if (memcmp_extent_buffer(buf, root->fs_info->fsid, + (unsigned long)btrfs_header_fsid(buf), + BTRFS_FSID_SIZE)) { printk("warning bad block %Lu\n", buf->start); - if (!btrfs_buffer_uptodate(buf)) { - WARN_ON(1); - } - for (i = 0; i < BTRFS_FSID_SIZE; i++) { - printk("%x:%x ", root->fs_info->fsid[i], fsid[i]); - } - printk("\n"); - // BUG(); + BUG(); } if (level == 0) return check_leaf(root, path, level); @@ -504,13 +494,14 @@ static int generic_bin_search(struct extent_buffer *eb, unsigned long p, int high = max; int mid; int ret; - struct btrfs_disk_key *tmp; + struct btrfs_disk_key *tmp = NULL; struct btrfs_disk_key unaligned; unsigned long offset; char *map_token = NULL; char *kaddr = NULL; unsigned long map_start = 0; unsigned long map_len = 0; + int err; while(low < high) { mid = (low + high) / 2; @@ -519,19 +510,24 @@ static int generic_bin_search(struct extent_buffer *eb, unsigned long p, if (!map_token || offset < map_start || (offset + sizeof(struct btrfs_disk_key)) > map_start + map_len) { - if (map_token) + if (map_token) { unmap_extent_buffer(eb, map_token, KM_USER0); - map_extent_buffer(eb, offset, &map_token, &kaddr, - &map_start, &map_len, KM_USER0); + map_token = NULL; + } + err = map_extent_buffer(eb, offset, + sizeof(struct btrfs_disk_key), + &map_token, &kaddr, + &map_start, &map_len, KM_USER0); + + if (!err) { + tmp = (struct btrfs_disk_key *)(kaddr + offset - + map_start); + } else { + read_extent_buffer(eb, &unaligned, + offset, sizeof(unaligned)); + tmp = &unaligned; + } - } - if (offset + sizeof(struct btrfs_disk_key) > - map_start + map_len) { - unmap_extent_buffer(eb, map_token, KM_USER0); - read_extent_buffer(eb, &unaligned, - offset, sizeof(unaligned)); - map_token = NULL; - tmp = &unaligned; } else { tmp = (struct btrfs_disk_key *)(kaddr + offset - map_start); @@ -544,7 +540,8 @@ static int generic_bin_search(struct extent_buffer *eb, unsigned long p, high = mid; else { *slot = mid; - unmap_extent_buffer(eb, map_token, KM_USER0); + if (map_token) + unmap_extent_buffer(eb, map_token, KM_USER0); return 0; } } diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index c4b829806855..30fbbd7221a9 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -22,6 +22,7 @@ #include #include #include +#include #include "bit-radix.h" #include "extent_map.h" @@ -431,15 +432,52 @@ struct btrfs_root { static inline u##bits btrfs_##name(struct extent_buffer *eb, \ type *s) \ { \ - __le##bits res; \ - read_eb_member(eb, s, type, member, &res); \ - return le##bits##_to_cpu(res); \ + int err; \ + char *map_token; \ + char *kaddr; \ + unsigned long map_start; \ + unsigned long map_len; \ + unsigned long offset = (unsigned long)s + \ + offsetof(type, member); \ + err = map_extent_buffer(eb, offset, \ + sizeof(((type *)0)->member), \ + &map_token, &kaddr, \ + &map_start, &map_len, KM_USER0); \ + if (!err) { \ + __le##bits *tmp = (__le##bits *)(kaddr + offset - \ + map_start); \ + u##bits res = le##bits##_to_cpu(*tmp); \ + unmap_extent_buffer(eb, map_token, KM_USER0); \ + return res; \ + } else { \ + __le##bits res; \ + read_eb_member(eb, s, type, member, &res); \ + return le##bits##_to_cpu(res); \ + } \ } \ static inline void btrfs_set_##name(struct extent_buffer *eb, \ type *s, u##bits val) \ { \ - val = cpu_to_le##bits(val); \ - write_eb_member(eb, s, type, member, &val); \ + int err; \ + char *map_token; \ + char *kaddr; \ + unsigned long map_start; \ + unsigned long map_len; \ + unsigned long offset = (unsigned long)s + \ + offsetof(type, member); \ + err = map_extent_buffer(eb, offset, \ + sizeof(((type *)0)->member), \ + &map_token, &kaddr, \ + &map_start, &map_len, KM_USER0); \ + if (!err) { \ + __le##bits *tmp = (__le##bits *)(kaddr + offset - \ + map_start); \ + *tmp = cpu_to_le##bits(val); \ + unmap_extent_buffer(eb, map_token, KM_USER0); \ + } else { \ + val = cpu_to_le##bits(val); \ + write_eb_member(eb, s, type, member, &val); \ + } \ } #define BTRFS_SETGET_HEADER_FUNCS(name, type, member, bits) \ diff --git a/fs/btrfs/extent_map.c b/fs/btrfs/extent_map.c index 55fefdfab84c..8bef309e1b37 100644 --- a/fs/btrfs/extent_map.c +++ b/fs/btrfs/extent_map.c @@ -2140,14 +2140,20 @@ void read_extent_buffer(struct extent_buffer *eb, void *dstv, EXPORT_SYMBOL(read_extent_buffer); int map_extent_buffer(struct extent_buffer *eb, unsigned long start, + unsigned long min_len, char **token, char **map, unsigned long *map_start, unsigned long *map_len, int km) { - size_t offset; + size_t offset = start & (PAGE_CACHE_SIZE - 1); char *kaddr; size_t start_offset = eb->start & ((u64)PAGE_CACHE_SIZE - 1); unsigned long i = (start_offset + start) >> PAGE_CACHE_SHIFT; + unsigned long end_i = (start_offset + start + min_len) >> + PAGE_CACHE_SHIFT; + + if (i != end_i) + return -EINVAL; WARN_ON(start > eb->len); @@ -2155,8 +2161,7 @@ int map_extent_buffer(struct extent_buffer *eb, unsigned long start, offset = start_offset; *map_start = 0; } else { - offset = 0; - *map_start = (i << PAGE_CACHE_SHIFT) - offset; + *map_start = (i << PAGE_CACHE_SHIFT) - start_offset; } // kaddr = kmap_atomic(eb->pages[i], km); diff --git a/fs/btrfs/extent_map.h b/fs/btrfs/extent_map.h index 430b997a70f6..895789039ddd 100644 --- a/fs/btrfs/extent_map.h +++ b/fs/btrfs/extent_map.h @@ -173,7 +173,7 @@ int set_extent_buffer_uptodate(struct extent_map_tree *tree, int extent_buffer_uptodate(struct extent_map_tree *tree, struct extent_buffer *eb); int map_extent_buffer(struct extent_buffer *eb, unsigned long offset, - char **token, char **map, + unsigned long min_len, char **token, char **map, unsigned long *map_start, unsigned long *map_len, int km); void unmap_extent_buffer(struct extent_buffer *eb, char *token, int km); -- cgit v1.2.3-70-g09d2 From 6d36dcd48f1e4e7446d603a3df9638bd314a182d Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Mon, 15 Oct 2007 16:14:37 -0400 Subject: Btrfs: Avoid memcpy where possible in extent_buffers Signed-off-by: Chris Mason --- fs/btrfs/ctree.h | 45 +++++++++++++++-- fs/btrfs/disk-io.c | 6 +-- fs/btrfs/extent_map.c | 132 ++++++++++++++++++++++++++++++++++++++------------ fs/btrfs/extent_map.h | 5 +- 4 files changed, 147 insertions(+), 41 deletions(-) (limited to 'fs/btrfs/extent_map.c') diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 30fbbd7221a9..aed08618aca6 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -483,15 +483,50 @@ static inline void btrfs_set_##name(struct extent_buffer *eb, \ #define BTRFS_SETGET_HEADER_FUNCS(name, type, member, bits) \ static inline u##bits btrfs_##name(struct extent_buffer *eb) \ { \ - __le##bits res; \ - read_eb_member(eb, NULL, type, member, &res); \ - return le##bits##_to_cpu(res); \ + int err; \ + char *map_token; \ + char *kaddr; \ + unsigned long map_start; \ + unsigned long map_len; \ + unsigned long offset = offsetof(type, member); \ + err = map_extent_buffer(eb, offset, \ + sizeof(((type *)0)->member), \ + &map_token, &kaddr, \ + &map_start, &map_len, KM_USER0); \ + if (!err) { \ + __le##bits *tmp = (__le##bits *)(kaddr + offset - \ + map_start); \ + u##bits res = le##bits##_to_cpu(*tmp); \ + unmap_extent_buffer(eb, map_token, KM_USER0); \ + return res; \ + } else { \ + __le##bits res; \ + read_eb_member(eb, NULL, type, member, &res); \ + return le##bits##_to_cpu(res); \ + } \ } \ static inline void btrfs_set_##name(struct extent_buffer *eb, \ u##bits val) \ { \ - val = cpu_to_le##bits(val); \ - write_eb_member(eb, NULL, type, member, &val); \ + int err; \ + char *map_token; \ + char *kaddr; \ + unsigned long map_start; \ + unsigned long map_len; \ + unsigned long offset = offsetof(type, member); \ + err = map_extent_buffer(eb, offset, \ + sizeof(((type *)0)->member), \ + &map_token, &kaddr, \ + &map_start, &map_len, KM_USER0); \ + if (!err) { \ + __le##bits *tmp = (__le##bits *)(kaddr + offset - \ + map_start); \ + *tmp = cpu_to_le##bits(val); \ + unmap_extent_buffer(eb, map_token, KM_USER0); \ + } else { \ + val = cpu_to_le##bits(val); \ + write_eb_member(eb, NULL, type, member, &val); \ + } \ } #define BTRFS_SETGET_STACK_FUNCS(name, type, member, bits) \ diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 0c1f90cbedb0..8242933a1d8b 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -609,20 +609,20 @@ int close_ctree(struct btrfs_root *root) int btrfs_buffer_uptodate(struct extent_buffer *buf) { - struct inode *btree_inode = buf->pages[0]->mapping->host; + struct inode *btree_inode = buf->first_page->mapping->host; return extent_buffer_uptodate(&BTRFS_I(btree_inode)->extent_tree, buf); } int btrfs_set_buffer_uptodate(struct extent_buffer *buf) { - struct inode *btree_inode = buf->pages[0]->mapping->host; + struct inode *btree_inode = buf->first_page->mapping->host; return set_extent_buffer_uptodate(&BTRFS_I(btree_inode)->extent_tree, buf); } void btrfs_mark_buffer_dirty(struct extent_buffer *buf) { - struct btrfs_root *root = BTRFS_I(buf->pages[0]->mapping->host)->root; + struct btrfs_root *root = BTRFS_I(buf->first_page->mapping->host)->root; u64 transid = btrfs_header_generation(buf); struct inode *btree_inode = root->fs_info->btree_inode; diff --git a/fs/btrfs/extent_map.c b/fs/btrfs/extent_map.c index 8bef309e1b37..d2c733c68b4c 100644 --- a/fs/btrfs/extent_map.c +++ b/fs/btrfs/extent_map.c @@ -18,6 +18,11 @@ struct kmem_cache *btrfs_cache_create(const char *name, size_t size, static struct kmem_cache *extent_map_cache; static struct kmem_cache *extent_state_cache; +static struct kmem_cache *extent_buffer_cache; +static LIST_HEAD(extent_buffers); +static spinlock_t extent_buffers_lock; +static int nr_extent_buffers; +#define MAX_EXTENT_BUFFER_CACHE 128 struct tree_entry { u64 start; @@ -29,21 +34,33 @@ struct tree_entry { void __init extent_map_init(void) { extent_map_cache = btrfs_cache_create("extent_map", - sizeof(struct extent_map), - SLAB_DESTROY_BY_RCU, + sizeof(struct extent_map), 0, NULL); extent_state_cache = btrfs_cache_create("extent_state", - sizeof(struct extent_state), - SLAB_DESTROY_BY_RCU, + sizeof(struct extent_state), 0, NULL); + extent_buffer_cache = btrfs_cache_create("extent_buffers", + sizeof(struct extent_buffer), 0, + NULL); + spin_lock_init(&extent_buffers_lock); } void __exit extent_map_exit(void) { + struct extent_buffer *eb; + + while (!list_empty(&extent_buffers)) { + eb = list_entry(extent_buffers.next, + struct extent_buffer, list); + list_del(&eb->list); + kmem_cache_free(extent_buffer_cache, eb); + } if (extent_map_cache) kmem_cache_destroy(extent_map_cache); if (extent_state_cache) kmem_cache_destroy(extent_state_cache); + if (extent_buffer_cache) + kmem_cache_destroy(extent_buffer_cache); } void extent_map_tree_init(struct extent_map_tree *tree, @@ -1858,6 +1875,48 @@ sector_t extent_bmap(struct address_space *mapping, sector_t iblock, return (em->block_start + start - em->start) >> inode->i_blkbits; } +static struct extent_buffer *__alloc_extent_buffer(gfp_t mask) +{ + struct extent_buffer *eb = NULL; + spin_lock(&extent_buffers_lock); + if (!list_empty(&extent_buffers)) { + eb = list_entry(extent_buffers.next, struct extent_buffer, + list); + list_del(&eb->list); + WARN_ON(nr_extent_buffers == 0); + nr_extent_buffers--; + } + spin_unlock(&extent_buffers_lock); + if (eb) { + memset(eb, 0, sizeof(*eb)); + return eb; + } + return kmem_cache_zalloc(extent_buffer_cache, mask); +} + +static void __free_extent_buffer(struct extent_buffer *eb) +{ + if (nr_extent_buffers >= MAX_EXTENT_BUFFER_CACHE) { + kmem_cache_free(extent_buffer_cache, eb); + } else { + spin_lock(&extent_buffers_lock); + list_add(&eb->list, &extent_buffers); + nr_extent_buffers++; + spin_unlock(&extent_buffers_lock); + } +} + +static inline struct page *extent_buffer_page(struct extent_buffer *eb, int i) +{ + struct page *p; + if (i == 0) + return eb->first_page; + i += eb->start >> PAGE_CACHE_SHIFT; + p = find_get_page(eb->first_page->mapping, i); + page_cache_release(p); + return p; +} + struct extent_buffer *alloc_extent_buffer(struct extent_map_tree *tree, u64 start, unsigned long len, gfp_t mask) @@ -1871,7 +1930,7 @@ struct extent_buffer *alloc_extent_buffer(struct extent_map_tree *tree, struct address_space *mapping = tree->mapping; int uptodate = 0; - eb = kzalloc(EXTENT_BUFFER_SIZE(num_pages), mask); + eb = __alloc_extent_buffer(mask); if (!eb || IS_ERR(eb)) return NULL; @@ -1881,9 +1940,16 @@ struct extent_buffer *alloc_extent_buffer(struct extent_map_tree *tree, for (i = 0; i < num_pages; i++, index++) { p = find_or_create_page(mapping, index, mask | __GFP_HIGHMEM); - if (!p) + if (!p) { + /* make sure the free only frees the pages we've + * grabbed a reference on + */ + eb->len = i << PAGE_CACHE_SHIFT; + eb->start &= ~((u64)PAGE_CACHE_SIZE - 1); goto fail; - eb->pages[i] = p; + } + if (i == 0) + eb->first_page = p; if (!PageUptodate(p)) uptodate = 0; unlock_page(p); @@ -1909,7 +1975,7 @@ struct extent_buffer *find_extent_buffer(struct extent_map_tree *tree, struct page *p; struct address_space *mapping = tree->mapping; - eb = kzalloc(EXTENT_BUFFER_SIZE(num_pages), mask); + eb = __alloc_extent_buffer(mask); if (!eb || IS_ERR(eb)) return NULL; @@ -1919,9 +1985,16 @@ struct extent_buffer *find_extent_buffer(struct extent_map_tree *tree, for (i = 0; i < num_pages; i++, index++) { p = find_get_page(mapping, index); - if (!p) + if (!p) { + /* make sure the free only frees the pages we've + * grabbed a reference on + */ + eb->len = i << PAGE_CACHE_SHIFT; + eb->start &= ~((u64)PAGE_CACHE_SIZE - 1); goto fail; - eb->pages[i] = p; + } + if (i == 0) + eb->first_page = p; } return eb; fail: @@ -1944,11 +2017,12 @@ void free_extent_buffer(struct extent_buffer *eb) num_pages = ((eb->start + eb->len - 1) >> PAGE_CACHE_SHIFT) - (eb->start >> PAGE_CACHE_SHIFT) + 1; - for (i = 0; i < num_pages; i++) { - if (eb->pages[i]) - page_cache_release(eb->pages[i]); + if (eb->first_page) + page_cache_release(eb->first_page); + for (i = 1; i < num_pages; i++) { + page_cache_release(extent_buffer_page(eb, i)); } - kfree(eb); + __free_extent_buffer(eb); } EXPORT_SYMBOL(free_extent_buffer); @@ -1968,7 +2042,7 @@ int clear_extent_buffer_dirty(struct extent_map_tree *tree, (eb->start >> PAGE_CACHE_SHIFT) + 1; for (i = 0; i < num_pages; i++) { - page = eb->pages[i]; + page = extent_buffer_page(eb, i); lock_page(page); /* * if we're on the last page or the first page and the @@ -2021,7 +2095,7 @@ int set_extent_buffer_uptodate(struct extent_map_tree *tree, set_extent_uptodate(tree, eb->start, eb->start + eb->len - 1, GFP_NOFS); for (i = 0; i < num_pages; i++) { - page = eb->pages[i]; + page = extent_buffer_page(eb, i); if ((i == 0 && (eb->start & (PAGE_CACHE_SIZE - 1))) || ((i == num_pages - 1) && ((eb->start + eb->len - 1) & (PAGE_CACHE_SIZE - 1)))) { @@ -2064,7 +2138,7 @@ int read_extent_buffer_pages(struct extent_map_tree *tree, num_pages = ((eb->start + eb->len - 1) >> PAGE_CACHE_SHIFT) - (eb->start >> PAGE_CACHE_SHIFT) + 1; for (i = 0; i < num_pages; i++) { - page = eb->pages[i]; + page = extent_buffer_page(eb, i); if (PageUptodate(page)) { continue; } @@ -2090,7 +2164,7 @@ int read_extent_buffer_pages(struct extent_map_tree *tree, } for (i = 0; i < num_pages; i++) { - page = eb->pages[i]; + page = extent_buffer_page(eb, i); wait_on_page_locked(page); if (!PageUptodate(page)) { ret = -EIO; @@ -2116,12 +2190,12 @@ void read_extent_buffer(struct extent_buffer *eb, void *dstv, WARN_ON(start > eb->len); WARN_ON(start + len > eb->start + eb->len); - page = eb->pages[i]; offset = start & ((unsigned long)PAGE_CACHE_SIZE - 1); if (i == 0) offset += start_offset; while(len > 0) { + page = extent_buffer_page(eb, i); WARN_ON(!PageUptodate(page)); cur = min(len, (PAGE_CACHE_SIZE - offset)); @@ -2134,7 +2208,6 @@ void read_extent_buffer(struct extent_buffer *eb, void *dstv, len -= cur; offset = 0; i++; - page = eb->pages[i]; } } EXPORT_SYMBOL(read_extent_buffer); @@ -2165,7 +2238,7 @@ int map_extent_buffer(struct extent_buffer *eb, unsigned long start, } // kaddr = kmap_atomic(eb->pages[i], km); - kaddr = page_address(eb->pages[i]); + kaddr = page_address(extent_buffer_page(eb, i)); *token = kaddr; *map = kaddr + offset; *map_len = PAGE_CACHE_SIZE - offset; @@ -2195,12 +2268,12 @@ int memcmp_extent_buffer(struct extent_buffer *eb, const void *ptrv, WARN_ON(start > eb->len); WARN_ON(start + len > eb->start + eb->len); - page = eb->pages[i]; offset = start & ((unsigned long)PAGE_CACHE_SIZE - 1); if (i == 0) offset += start_offset; while(len > 0) { + page = extent_buffer_page(eb, i); WARN_ON(!PageUptodate(page)); cur = min(len, (PAGE_CACHE_SIZE - offset)); @@ -2216,7 +2289,6 @@ int memcmp_extent_buffer(struct extent_buffer *eb, const void *ptrv, len -= cur; offset = 0; i++; - page = eb->pages[i]; } return ret; } @@ -2236,12 +2308,12 @@ void write_extent_buffer(struct extent_buffer *eb, const void *srcv, WARN_ON(start > eb->len); WARN_ON(start + len > eb->start + eb->len); - page = eb->pages[i]; offset = start & ((unsigned long)PAGE_CACHE_SIZE - 1); if (i == 0) offset += start_offset; while(len > 0) { + page = extent_buffer_page(eb, i); WARN_ON(!PageUptodate(page)); cur = min(len, PAGE_CACHE_SIZE - offset); @@ -2254,7 +2326,6 @@ void write_extent_buffer(struct extent_buffer *eb, const void *srcv, len -= cur; offset = 0; i++; - page = eb->pages[i]; } } EXPORT_SYMBOL(write_extent_buffer); @@ -2272,12 +2343,12 @@ void memset_extent_buffer(struct extent_buffer *eb, char c, WARN_ON(start > eb->len); WARN_ON(start + len > eb->start + eb->len); - page = eb->pages[i]; offset = start & ((unsigned long)PAGE_CACHE_SIZE - 1); if (i == 0) offset += start_offset; while(len > 0) { + page = extent_buffer_page(eb, i); WARN_ON(!PageUptodate(page)); cur = min(len, PAGE_CACHE_SIZE - offset); @@ -2289,7 +2360,6 @@ void memset_extent_buffer(struct extent_buffer *eb, char c, len -= cur; offset = 0; i++; - page = eb->pages[i]; } } EXPORT_SYMBOL(memset_extent_buffer); @@ -2313,7 +2383,7 @@ void copy_extent_buffer(struct extent_buffer *dst, struct extent_buffer *src, offset += start_offset; while(len > 0) { - page = dst->pages[i]; + page = extent_buffer_page(dst, i); WARN_ON(!PageUptodate(page)); cur = min(len, (unsigned long)(PAGE_CACHE_SIZE - offset)); @@ -2414,7 +2484,8 @@ void memcpy_extent_buffer(struct extent_buffer *dst, unsigned long dst_offset, cur = min(cur, (unsigned long)(PAGE_CACHE_SIZE - dst_off_in_page)); - copy_pages(dst->pages[dst_i], dst->pages[src_i], + copy_pages(extent_buffer_page(dst, dst_i), + extent_buffer_page(dst, src_i), dst_off_in_page, src_off_in_page, cur); src_offset += cur; @@ -2467,7 +2538,8 @@ void memmove_extent_buffer(struct extent_buffer *dst, unsigned long dst_offset, cur = min(len, src_off_in_page + 1); cur = min(cur, dst_off_in_page + 1); // printk("move pages orig dst %lu src %lu len %lu, this %lu %lu %lu\n", dst_offset, src_offset, len, dst_off_in_page - cur + 1, src_off_in_page - cur + 1, cur); - move_pages(dst->pages[dst_i], dst->pages[src_i], + move_pages(extent_buffer_page(dst, dst_i), + extent_buffer_page(dst, src_i), dst_off_in_page - cur + 1, src_off_in_page - cur + 1, cur); diff --git a/fs/btrfs/extent_map.h b/fs/btrfs/extent_map.h index 895789039ddd..3b3abf34c403 100644 --- a/fs/btrfs/extent_map.h +++ b/fs/btrfs/extent_map.h @@ -62,14 +62,13 @@ struct extent_state { struct list_head list; }; -#define EXTENT_BUFFER_SIZE(nr) (sizeof(struct extent_buffer) + \ - (nr - 1) * sizeof(struct page *)) struct extent_buffer { u64 start; unsigned long len; atomic_t refs; int flags; - struct page *pages[]; + struct list_head list; + struct page *first_page; }; typedef struct extent_map *(get_extent_t)(struct inode *inode, -- cgit v1.2.3-70-g09d2 From ae5252bd51a252b7b8b02289337c36774835101c Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Mon, 15 Oct 2007 16:14:41 -0400 Subject: Btrfs: Go back to kmaps instead of page_address in extent_buffers Signed-off-by: Chris Mason --- fs/btrfs/extent_map.c | 50 +++++++++++++++++++------------------------------- 1 file changed, 19 insertions(+), 31 deletions(-) (limited to 'fs/btrfs/extent_map.c') diff --git a/fs/btrfs/extent_map.c b/fs/btrfs/extent_map.c index d2c733c68b4c..f150188f621c 100644 --- a/fs/btrfs/extent_map.c +++ b/fs/btrfs/extent_map.c @@ -1867,7 +1867,6 @@ sector_t extent_bmap(struct address_space *mapping, sector_t iblock, if (!em || IS_ERR(em)) return 0; - // XXX(hch): block 0 is valid in some cases, e.g. XFS RT device if (em->block_start == EXTENT_MAP_INLINE || em->block_start == EXTENT_MAP_HOLE) return 0; @@ -2199,10 +2198,9 @@ void read_extent_buffer(struct extent_buffer *eb, void *dstv, WARN_ON(!PageUptodate(page)); cur = min(len, (PAGE_CACHE_SIZE - offset)); - // kaddr = kmap_atomic(page, KM_USER0); - kaddr = page_address(page); + kaddr = kmap_atomic(page, KM_USER0); memcpy(dst, kaddr + offset, cur); - // kunmap_atomic(kaddr, KM_USER0); + kunmap_atomic(kaddr, KM_USER0); dst += cur; len -= cur; @@ -2237,8 +2235,7 @@ int map_extent_buffer(struct extent_buffer *eb, unsigned long start, *map_start = (i << PAGE_CACHE_SHIFT) - start_offset; } - // kaddr = kmap_atomic(eb->pages[i], km); - kaddr = page_address(extent_buffer_page(eb, i)); + kaddr = kmap_atomic(extent_buffer_page(eb, i), km); *token = kaddr; *map = kaddr + offset; *map_len = PAGE_CACHE_SIZE - offset; @@ -2248,7 +2245,7 @@ EXPORT_SYMBOL(map_extent_buffer); void unmap_extent_buffer(struct extent_buffer *eb, char *token, int km) { - // kunmap_atomic(token, km); + kunmap_atomic(token, km); } EXPORT_SYMBOL(unmap_extent_buffer); @@ -2278,10 +2275,9 @@ int memcmp_extent_buffer(struct extent_buffer *eb, const void *ptrv, cur = min(len, (PAGE_CACHE_SIZE - offset)); - // kaddr = kmap_atomic(page, KM_USER0); - kaddr = page_address(page); + kaddr = kmap_atomic(page, KM_USER0); ret = memcmp(ptr, kaddr + offset, cur); - // kunmap_atomic(kaddr, KM_USER0); + kunmap_atomic(kaddr, KM_USER0); if (ret) break; @@ -2317,10 +2313,9 @@ void write_extent_buffer(struct extent_buffer *eb, const void *srcv, WARN_ON(!PageUptodate(page)); cur = min(len, PAGE_CACHE_SIZE - offset); - // kaddr = kmap_atomic(page, KM_USER0); - kaddr = page_address(page); + kaddr = kmap_atomic(page, KM_USER0); memcpy(kaddr + offset, src, cur); - // kunmap_atomic(kaddr, KM_USER0); + kunmap_atomic(kaddr, KM_USER0); src += cur; len -= cur; @@ -2352,10 +2347,9 @@ void memset_extent_buffer(struct extent_buffer *eb, char c, WARN_ON(!PageUptodate(page)); cur = min(len, PAGE_CACHE_SIZE - offset); - // kaddr = kmap_atomic(page, KM_USER0); - kaddr = page_address(page); + kaddr = kmap_atomic(page, KM_USER0); memset(kaddr + offset, c, cur); - // kunmap_atomic(kaddr, KM_USER0); + kunmap_atomic(kaddr, KM_USER0); len -= cur; offset = 0; @@ -2388,10 +2382,9 @@ void copy_extent_buffer(struct extent_buffer *dst, struct extent_buffer *src, cur = min(len, (unsigned long)(PAGE_CACHE_SIZE - offset)); - // kaddr = kmap_atomic(page, KM_USER1); - kaddr = page_address(page); + kaddr = kmap_atomic(page, KM_USER1); read_extent_buffer(src, kaddr + offset, src_offset, cur); - // kunmap_atomic(kaddr, KM_USER1); + kunmap_atomic(kaddr, KM_USER1); src_offset += cur; len -= cur; @@ -2405,43 +2398,38 @@ static void move_pages(struct page *dst_page, struct page *src_page, unsigned long dst_off, unsigned long src_off, unsigned long len) { - // char *dst_kaddr = kmap_atomic(dst_page, KM_USER0); - char *dst_kaddr = page_address(dst_page); + char *dst_kaddr = kmap_atomic(dst_page, KM_USER0); if (dst_page == src_page) { memmove(dst_kaddr + dst_off, dst_kaddr + src_off, len); } else { - // char *src_kaddr = kmap_atomic(src_page, KM_USER1); - char *src_kaddr = page_address(src_page); + char *src_kaddr = kmap_atomic(src_page, KM_USER1); char *p = dst_kaddr + dst_off + len; char *s = src_kaddr + src_off + len; while (len--) *--p = *--s; - // kunmap_atomic(src_kaddr, KM_USER1); + kunmap_atomic(src_kaddr, KM_USER1); } - // kunmap_atomic(dst_kaddr, KM_USER0); + kunmap_atomic(dst_kaddr, KM_USER0); } static void copy_pages(struct page *dst_page, struct page *src_page, unsigned long dst_off, unsigned long src_off, unsigned long len) { - //kmap_atomic(dst_page, KM_USER0); - char *dst_kaddr = page_address(dst_page); + char *dst_kaddr = kmap_atomic(dst_page, KM_USER0); char *src_kaddr; if (dst_page != src_page) - src_kaddr = page_address(src_page); // kmap_atomic(src_page, KM_USER1); + src_kaddr = kmap_atomic(src_page, KM_USER1); else src_kaddr = dst_kaddr; memcpy(dst_kaddr + dst_off, src_kaddr + src_off, len); - /* kunmap_atomic(dst_kaddr, KM_USER0); if (dst_page != src_page) kunmap_atomic(src_kaddr, KM_USER1); - */ } void memcpy_extent_buffer(struct extent_buffer *dst, unsigned long dst_offset, @@ -2537,7 +2525,7 @@ void memmove_extent_buffer(struct extent_buffer *dst, unsigned long dst_offset, cur = min(len, src_off_in_page + 1); cur = min(cur, dst_off_in_page + 1); -// printk("move pages orig dst %lu src %lu len %lu, this %lu %lu %lu\n", dst_offset, src_offset, len, dst_off_in_page - cur + 1, src_off_in_page - cur + 1, cur); + move_pages(extent_buffer_page(dst, dst_i), extent_buffer_page(dst, src_i), dst_off_in_page - cur + 1, -- cgit v1.2.3-70-g09d2 From f510cfecfc98759d75283823cfccf0cc0d59a4c6 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Mon, 15 Oct 2007 16:14:48 -0400 Subject: Btrfs: Fix extent_buffer and extent_state leaks Signed-off-by: Chris Mason --- fs/btrfs/ctree.c | 10 +++- fs/btrfs/ctree.h | 2 +- fs/btrfs/disk-io.c | 17 +++++-- fs/btrfs/extent-tree.c | 126 +++++++++++++++++++++++-------------------------- fs/btrfs/extent_map.c | 64 ++++++++++++++++++++----- fs/btrfs/extent_map.h | 2 + fs/btrfs/transaction.c | 5 +- 7 files changed, 139 insertions(+), 87 deletions(-) (limited to 'fs/btrfs/extent_map.c') diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c index 9427b79c5d79..50ef351ef6b3 100644 --- a/fs/btrfs/ctree.c +++ b/fs/btrfs/ctree.c @@ -87,6 +87,7 @@ static int __btrfs_cow_block(struct btrfs_trans_handle *trans, if (IS_ERR(cow)) return PTR_ERR(cow); + cow->alloc_addr = (unsigned long)__builtin_return_address(0); if (buf->len != root->sectorsize || cow->len != root->sectorsize) WARN_ON(1); @@ -132,6 +133,7 @@ int btrfs_cow_block(struct btrfs_trans_handle *trans, struct extent_buffer **cow_ret) { u64 search_start; + int ret; if (trans->transaction != root->fs_info->running_transaction) { printk(KERN_CRIT "trans %Lu running %Lu\n", trans->transid, root->fs_info->running_transaction->transid); @@ -148,8 +150,10 @@ int btrfs_cow_block(struct btrfs_trans_handle *trans, } search_start = extent_buffer_blocknr(buf) & ~((u64)65535); - return __btrfs_cow_block(trans, root, buf, parent, + ret = __btrfs_cow_block(trans, root, buf, parent, parent_slot, cow_ret, search_start, 0); + (*cow_ret)->alloc_addr = (unsigned long)__builtin_return_address(0); + return ret; } static int close_blocks(u64 blocknr, u64 other) @@ -1013,8 +1017,10 @@ again: if (sret) return sret; b = p->nodes[level]; - if (!b) + if (!b) { + btrfs_release_path(NULL, p); goto again; + } slot = p->slots[level]; BUG_ON(btrfs_header_nritems(b) == 1); } diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index aed08618aca6..5262b28f468c 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -303,8 +303,8 @@ struct btrfs_fs_info { struct radix_tree_root pinned_radix; struct radix_tree_root block_group_radix; struct radix_tree_root block_group_data_radix; - struct radix_tree_root extent_map_radix; struct radix_tree_root extent_ins_radix; + struct extent_map_tree free_space_cache; u64 generation; u64 last_trans_committed; struct btrfs_transaction *running_transaction; diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 8242933a1d8b..09f4e694624d 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -46,18 +46,25 @@ struct extent_buffer *btrfs_find_tree_block(struct btrfs_root *root, u64 blocknr) { struct inode *btree_inode = root->fs_info->btree_inode; - return find_extent_buffer(&BTRFS_I(btree_inode)->extent_tree, + struct extent_buffer *eb; + eb = find_extent_buffer(&BTRFS_I(btree_inode)->extent_tree, blocknr * root->sectorsize, root->sectorsize, GFP_NOFS); + if (eb) + eb->alloc_addr = (unsigned long)__builtin_return_address(0); + return eb; } struct extent_buffer *btrfs_find_create_tree_block(struct btrfs_root *root, u64 blocknr) { struct inode *btree_inode = root->fs_info->btree_inode; - return alloc_extent_buffer(&BTRFS_I(btree_inode)->extent_tree, + struct extent_buffer *eb; + eb = alloc_extent_buffer(&BTRFS_I(btree_inode)->extent_tree, blocknr * root->sectorsize, root->sectorsize, GFP_NOFS); + eb->alloc_addr = (unsigned long)__builtin_return_address(0); + return eb; } struct extent_map *btree_get_extent(struct inode *inode, struct page *page, @@ -226,6 +233,7 @@ struct extent_buffer *read_tree_block(struct btrfs_root *root, u64 blocknr) return NULL; read_extent_buffer_pages(&BTRFS_I(btree_inode)->extent_tree, buf, 1); + buf->alloc_addr = (unsigned long)__builtin_return_address(0); return buf; } @@ -426,7 +434,6 @@ struct btrfs_root *open_ctree(struct super_block *sb) } init_bit_radix(&fs_info->pinned_radix); init_bit_radix(&fs_info->pending_del_radix); - init_bit_radix(&fs_info->extent_map_radix); init_bit_radix(&fs_info->extent_ins_radix); INIT_RADIX_TREE(&fs_info->fs_roots_radix, GFP_NOFS); INIT_RADIX_TREE(&fs_info->block_group_radix, GFP_KERNEL); @@ -449,6 +456,8 @@ struct btrfs_root *open_ctree(struct super_block *sb) extent_map_tree_init(&BTRFS_I(fs_info->btree_inode)->extent_tree, fs_info->btree_inode->i_mapping, GFP_NOFS); + extent_map_tree_init(&fs_info->free_space_cache, + fs_info->btree_inode->i_mapping, GFP_NOFS); fs_info->do_barriers = 1; fs_info->closing = 0; @@ -594,8 +603,10 @@ int close_ctree(struct btrfs_root *root) if (fs_info->extent_root->node) free_extent_buffer(fs_info->extent_root->node); + if (fs_info->tree_root->node) free_extent_buffer(fs_info->tree_root->node); + free_extent_buffer(fs_info->sb_buffer); truncate_inode_pages(fs_info->btree_inode->i_mapping, 0); iput(fs_info->btree_inode); diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 089c41cbca74..74cfbee2ff33 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -34,21 +34,19 @@ static int cache_block_group(struct btrfs_root *root, int ret; struct btrfs_key key; struct extent_buffer *leaf; - struct radix_tree_root *extent_radix; + struct extent_map_tree *free_space_cache; int slot; - u64 i; u64 last = 0; u64 hole_size; u64 first_free; int found = 0; root = root->fs_info->extent_root; - extent_radix = &root->fs_info->extent_map_radix; + free_space_cache = &root->fs_info->free_space_cache; if (block_group->cached) return 0; - if (block_group->data) - return 0; + path = btrfs_alloc_path(); if (!path) return -ENOMEM; @@ -98,9 +96,11 @@ static int cache_block_group(struct btrfs_root *root, last = first_free; found = 1; } - hole_size = key.objectid - last; - for (i = 0; i < hole_size; i++) { - set_radix_bit(extent_radix, last + i); + if (key.objectid > last) { + hole_size = key.objectid - last; + set_extent_dirty(free_space_cache, last, + last + hole_size - 1, + GFP_NOFS); } last = key.objectid + key.offset; } @@ -114,9 +114,8 @@ next: block_group->key.offset > last) { hole_size = block_group->key.objectid + block_group->key.offset - last; - for (i = 0; i < hole_size; i++) { - set_radix_bit(extent_radix, last + i); - } + set_extent_dirty(free_space_cache, last, + last + hole_size - 1, GFP_NOFS); } block_group->cached = 1; err: @@ -150,47 +149,33 @@ struct btrfs_block_group_cache *btrfs_lookup_block_group(struct return NULL; } -static u64 leaf_range(struct btrfs_root *root) -{ - u64 size = BTRFS_LEAF_DATA_SIZE(root); - do_div(size, sizeof(struct btrfs_extent_item) + - sizeof(struct btrfs_item)); - return size; -} - static u64 find_search_start(struct btrfs_root *root, struct btrfs_block_group_cache **cache_ret, - u64 search_start, int num) + u64 search_start, int num, int data) { - unsigned long gang[8]; int ret; struct btrfs_block_group_cache *cache = *cache_ret; u64 last = max(search_start, cache->key.objectid); + u64 start = 0; + u64 end = 0; - if (cache->data) - goto out; again: ret = cache_block_group(root, cache); if (ret) goto out; while(1) { - ret = find_first_radix_bit(&root->fs_info->extent_map_radix, - gang, last, ARRAY_SIZE(gang)); - if (!ret) + ret = find_first_extent_bit(&root->fs_info->free_space_cache, + last, &start, &end, EXTENT_DIRTY); + if (ret) goto out; - last = gang[ret-1] + 1; - if (num > 1) { - if (ret != ARRAY_SIZE(gang)) { - goto new_group; - } - if (gang[ret-1] - gang[0] > leaf_range(root)) { - continue; - } - } - if (gang[0] >= cache->key.objectid + cache->key.offset) { + + start = max(last, start); + last = end + 1; + if (end + 1 - start < num) + continue; + if (start + num > cache->key.objectid + cache->key.offset) goto new_group; - } - return gang[0]; + return start; } out: return max(cache->last_alloc, search_start); @@ -202,7 +187,7 @@ new_group: return max((*cache_ret)->last_alloc, search_start); } cache = btrfs_find_block_group(root, cache, - last + cache->key.offset - 1, 0, 0); + last + cache->key.offset - 1, data, 0); *cache_ret = cache; goto again; } @@ -625,7 +610,6 @@ static int update_block_group(struct btrfs_trans_handle *trans, u64 total = num; u64 old_val; u64 block_in_group; - u64 i; int ret; while(total) { @@ -644,12 +628,6 @@ static int update_block_group(struct btrfs_trans_handle *trans, if (alloc) { if (blocknr > cache->last_alloc) cache->last_alloc = blocknr; - if (!cache->data) { - for (i = 0; i < num; i++) { - clear_radix_bit(&info->extent_map_radix, - blocknr + i); - } - } if (cache->data != data && old_val < (cache->key.offset >> 1)) { cache->data = data; @@ -677,11 +655,10 @@ static int update_block_group(struct btrfs_trans_handle *trans, old_val -= num; if (blocknr < cache->first_free) cache->first_free = blocknr; - if (!cache->data && mark_free) { - for (i = 0; i < num; i++) { - set_radix_bit(&info->extent_map_radix, - blocknr + i); - } + if (mark_free) { + set_extent_dirty(&info->free_space_cache, + blocknr, blocknr + num - 1, + GFP_NOFS); } if (old_val < (cache->key.offset >> 1) && old_val + num >= (cache->key.offset >> 1)) { @@ -732,7 +709,9 @@ int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans, int ret; int i; struct radix_tree_root *pinned_radix = &root->fs_info->pinned_radix; - struct radix_tree_root *extent_radix = &root->fs_info->extent_map_radix; + struct extent_map_tree *free_space_cache; + + free_space_cache = &root->fs_info->free_space_cache; while(1) { ret = find_first_radix_bit(unpin_radix, gang, 0, @@ -751,8 +730,11 @@ int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans, block_group->pinned--; if (gang[i] < block_group->last_alloc) block_group->last_alloc = gang[i]; - if (!block_group->data) - set_radix_bit(extent_radix, gang[i]); + if (!block_group->data) { + set_extent_dirty(free_space_cache, + gang[i], gang[i], + GFP_NOFS); + } } } } @@ -995,6 +977,7 @@ static int find_free_extent(struct btrfs_trans_handle *trans, struct btrfs_root struct btrfs_block_group_cache *block_group; int full_scan = 0; int wrapped = 0; + u64 cached_search_start = 0; WARN_ON(num_blocks < 1); btrfs_set_key_type(ins, BTRFS_EXTENT_ITEM_KEY); @@ -1017,11 +1000,9 @@ static int find_free_extent(struct btrfs_trans_handle *trans, struct btrfs_root path = btrfs_alloc_path(); check_failed: - if (!block_group->data) - search_start = find_search_start(root, &block_group, - search_start, total_needed); - else if (!full_scan) - search_start = max(block_group->last_alloc, search_start); + search_start = find_search_start(root, &block_group, + search_start, total_needed, data); + cached_search_start = search_start; btrfs_init_path(path); ins->objectid = search_start; @@ -1097,6 +1078,7 @@ check_failed: start_found = 1; last_block = key.objectid + key.offset; + if (!full_scan && last_block >= block_group->key.objectid + block_group->key.offset) { btrfs_release_path(root, path); @@ -1138,6 +1120,9 @@ check_pending: } ins->offset = num_blocks; btrfs_free_path(path); + if (0 && ins->objectid != cached_search_start) { +printk("\tcached was %Lu found %Lu\n", cached_search_start, ins->objectid); + } return 0; new_group: @@ -1209,6 +1194,10 @@ int btrfs_alloc_extent(struct btrfs_trans_handle *trans, btrfs_set_root_used(&root->root_item, root_blocks_used + num_blocks); + clear_extent_dirty(&root->fs_info->free_space_cache, + ins->objectid, ins->objectid + ins->offset - 1, + GFP_NOFS); + if (root == extent_root) { BUG_ON(num_blocks != 1); set_radix_bit(&root->fs_info->extent_ins_radix, ins->objectid); @@ -1227,6 +1216,7 @@ int btrfs_alloc_extent(struct btrfs_trans_handle *trans, BUG_ON(ret); finish_current_insert(trans, extent_root); pending_ret = del_pending_extents(trans, extent_root); + if (ret) { return ret; } @@ -1265,6 +1255,7 @@ struct extent_buffer *btrfs_alloc_free_block(struct btrfs_trans_handle *trans, return ERR_PTR(-ENOMEM); } btrfs_set_buffer_uptodate(buf); + buf->alloc_addr = (unsigned long)__builtin_return_address(0); set_extent_dirty(&trans->transaction->dirty_pages, buf->start, buf->start + buf->len - 1, GFP_NOFS); /* @@ -1492,6 +1483,7 @@ int btrfs_drop_snapshot(struct btrfs_trans_handle *trans, struct btrfs_root orig_level = level; if (btrfs_disk_key_objectid(&root_item->drop_progress) == 0) { path->nodes[level] = root->node; + extent_buffer_get(root->node); path->slots[level] = 0; } else { struct btrfs_key key; @@ -1524,7 +1516,6 @@ int btrfs_drop_snapshot(struct btrfs_trans_handle *trans, struct btrfs_root if (wret < 0) ret = wret; ret = -EAGAIN; - extent_buffer_get(root->node); break; } for (i = 0; i <= orig_level; i++) { @@ -1562,8 +1553,8 @@ int btrfs_free_block_groups(struct btrfs_fs_info *info) { int ret; int ret2; - unsigned long gang[16]; - int i; + u64 start; + u64 end; ret = free_block_group_radix(&info->block_group_radix); ret2 = free_block_group_radix(&info->block_group_data_radix); @@ -1573,13 +1564,12 @@ int btrfs_free_block_groups(struct btrfs_fs_info *info) return ret2; while(1) { - ret = find_first_radix_bit(&info->extent_map_radix, - gang, 0, ARRAY_SIZE(gang)); - if (!ret) + ret = find_first_extent_bit(&info->free_space_cache, 0, + &start, &end, EXTENT_DIRTY); + if (ret) break; - for (i = 0; i < ret; i++) { - clear_radix_bit(&info->extent_map_radix, gang[i]); - } + clear_extent_dirty(&info->free_space_cache, start, + end, GFP_NOFS); } return 0; } diff --git a/fs/btrfs/extent_map.c b/fs/btrfs/extent_map.c index f150188f621c..5b7dbcaacd11 100644 --- a/fs/btrfs/extent_map.c +++ b/fs/btrfs/extent_map.c @@ -19,8 +19,13 @@ struct kmem_cache *btrfs_cache_create(const char *name, size_t size, static struct kmem_cache *extent_map_cache; static struct kmem_cache *extent_state_cache; static struct kmem_cache *extent_buffer_cache; + static LIST_HEAD(extent_buffers); +static LIST_HEAD(buffers); +static LIST_HEAD(states); + static spinlock_t extent_buffers_lock; +static spinlock_t state_lock = SPIN_LOCK_UNLOCKED; static int nr_extent_buffers; #define MAX_EXTENT_BUFFER_CACHE 128 @@ -48,6 +53,7 @@ void __init extent_map_init(void) void __exit extent_map_exit(void) { struct extent_buffer *eb; + struct extent_state *state; while (!list_empty(&extent_buffers)) { eb = list_entry(extent_buffers.next, @@ -55,6 +61,22 @@ void __exit extent_map_exit(void) list_del(&eb->list); kmem_cache_free(extent_buffer_cache, eb); } + while (!list_empty(&states)) { + state = list_entry(states.next, struct extent_state, list); + printk("state leak: start %Lu end %Lu state %lu in tree %d refs %d\n", state->start, state->end, state->state, state->in_tree, atomic_read(&state->refs)); + list_del(&state->list); + kmem_cache_free(extent_state_cache, state); + + } + while (!list_empty(&buffers)) { + eb = list_entry(buffers.next, + struct extent_buffer, leak_list); + printk("buffer leak start %Lu len %lu return %lX\n", eb->start, eb->len, eb->alloc_addr); + list_del(&eb->leak_list); + kmem_cache_free(extent_buffer_cache, eb); + } + + if (extent_map_cache) kmem_cache_destroy(extent_map_cache); if (extent_state_cache) @@ -101,12 +123,19 @@ EXPORT_SYMBOL(free_extent_map); struct extent_state *alloc_extent_state(gfp_t mask) { struct extent_state *state; + unsigned long flags; + state = kmem_cache_alloc(extent_state_cache, mask); if (!state || IS_ERR(state)) return state; state->state = 0; state->in_tree = 0; state->private = 0; + + spin_lock_irqsave(&state_lock, flags); + list_add(&state->list, &states); + spin_unlock_irqrestore(&state_lock, flags); + atomic_set(&state->refs, 1); init_waitqueue_head(&state->wq); return state; @@ -115,10 +144,14 @@ EXPORT_SYMBOL(alloc_extent_state); void free_extent_state(struct extent_state *state) { + unsigned long flags; if (!state) return; if (atomic_dec_and_test(&state->refs)) { WARN_ON(state->in_tree); + spin_lock_irqsave(&state_lock, flags); + list_del(&state->list); + spin_unlock_irqrestore(&state_lock, flags); kmem_cache_free(extent_state_cache, state); } } @@ -361,10 +394,6 @@ static int insert_state(struct extent_map_tree *tree, state->state |= bits; state->start = start; state->end = end; - if ((end & 4095) == 0) { - printk("insert state %Lu %Lu strange end\n", start, end); - WARN_ON(1); - } node = tree_insert(&tree->state, end, &state->rb_node); if (node) { struct extent_state *found; @@ -399,11 +428,7 @@ static int split_state(struct extent_map_tree *tree, struct extent_state *orig, prealloc->end = split - 1; prealloc->state = orig->state; orig->start = split; - if ((prealloc->end & 4095) == 0) { - printk("insert state %Lu %Lu strange end\n", prealloc->start, - prealloc->end); - WARN_ON(1); - } + node = tree_insert(&tree->state, prealloc->end, &prealloc->rb_node); if (node) { struct extent_state *found; @@ -957,6 +982,7 @@ int find_first_extent_bit(struct extent_map_tree *tree, u64 start, *start_ret = state->start; *end_ret = state->end; ret = 0; + break; } node = rb_next(node); if (!node) @@ -1877,6 +1903,7 @@ sector_t extent_bmap(struct address_space *mapping, sector_t iblock, static struct extent_buffer *__alloc_extent_buffer(gfp_t mask) { struct extent_buffer *eb = NULL; + spin_lock(&extent_buffers_lock); if (!list_empty(&extent_buffers)) { eb = list_entry(extent_buffers.next, struct extent_buffer, @@ -1886,15 +1913,26 @@ static struct extent_buffer *__alloc_extent_buffer(gfp_t mask) nr_extent_buffers--; } spin_unlock(&extent_buffers_lock); + if (eb) { memset(eb, 0, sizeof(*eb)); - return eb; + } else { + eb = kmem_cache_zalloc(extent_buffer_cache, mask); } - return kmem_cache_zalloc(extent_buffer_cache, mask); + spin_lock(&extent_buffers_lock); + list_add(&eb->leak_list, &buffers); + spin_unlock(&extent_buffers_lock); + + return eb; } static void __free_extent_buffer(struct extent_buffer *eb) { + + spin_lock(&extent_buffers_lock); + list_del_init(&eb->leak_list); + spin_unlock(&extent_buffers_lock); + if (nr_extent_buffers >= MAX_EXTENT_BUFFER_CACHE) { kmem_cache_free(extent_buffer_cache, eb); } else { @@ -1933,6 +1971,7 @@ struct extent_buffer *alloc_extent_buffer(struct extent_map_tree *tree, if (!eb || IS_ERR(eb)) return NULL; + eb->alloc_addr = __builtin_return_address(0); eb->start = start; eb->len = len; atomic_set(&eb->refs, 1); @@ -1947,6 +1986,7 @@ struct extent_buffer *alloc_extent_buffer(struct extent_map_tree *tree, eb->start &= ~((u64)PAGE_CACHE_SIZE - 1); goto fail; } + set_page_extent_mapped(p); if (i == 0) eb->first_page = p; if (!PageUptodate(p)) @@ -1978,6 +2018,7 @@ struct extent_buffer *find_extent_buffer(struct extent_map_tree *tree, if (!eb || IS_ERR(eb)) return NULL; + eb->alloc_addr = __builtin_return_address(0); eb->start = start; eb->len = len; atomic_set(&eb->refs, 1); @@ -1992,6 +2033,7 @@ struct extent_buffer *find_extent_buffer(struct extent_map_tree *tree, eb->start &= ~((u64)PAGE_CACHE_SIZE - 1); goto fail; } + set_page_extent_mapped(p); if (i == 0) eb->first_page = p; } diff --git a/fs/btrfs/extent_map.h b/fs/btrfs/extent_map.h index 3b3abf34c403..d100f7c1476c 100644 --- a/fs/btrfs/extent_map.h +++ b/fs/btrfs/extent_map.h @@ -68,7 +68,9 @@ struct extent_buffer { atomic_t refs; int flags; struct list_head list; + struct list_head leak_list; struct page *first_page; + unsigned long alloc_addr; }; typedef struct extent_map *(get_extent_t)(struct inode *inode, diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c index 750f35a37aae..372b61f5733f 100644 --- a/fs/btrfs/transaction.c +++ b/fs/btrfs/transaction.c @@ -352,7 +352,7 @@ int btrfs_defrag_root(struct btrfs_root *root, int cacheonly) return 0; trans = btrfs_start_transaction(root, 1); - while (1) { + while (0) { root->defrag_running = 1; ret = btrfs_defrag_leaves(trans, root, cacheonly); nr = trans->blocks_used; @@ -394,7 +394,7 @@ int btrfs_defrag_dirty_roots(struct btrfs_fs_info *info) for (i = 0; i < ret; i++) { root = gang[i]; last = root->root_key.objectid + 1; - // btrfs_defrag_root(root, 1); + btrfs_defrag_root(root, 1); } } // btrfs_defrag_root(info->extent_root, 1); @@ -462,6 +462,7 @@ static int drop_dirty_roots(struct btrfs_root *tree_root, ret = btrfs_end_transaction(trans, tree_root); BUG_ON(ret); + free_extent_buffer(dirty->root->node); kfree(dirty->root); kfree(dirty); mutex_unlock(&tree_root->fs_info->fs_mutex); -- cgit v1.2.3-70-g09d2 From 96b5179d0d9b6368c203856f2ad6e8e12a8b2a2c Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Mon, 15 Oct 2007 16:15:19 -0400 Subject: Btrfs: Stop using radix trees for the block group cache Signed-off-by: Chris Mason --- fs/btrfs/ctree.h | 6 +- fs/btrfs/disk-io.c | 4 +- fs/btrfs/extent-tree.c | 362 ++++++++++++++++++++----------------------------- fs/btrfs/extent_map.c | 18 ++- fs/btrfs/extent_map.h | 4 + 5 files changed, 171 insertions(+), 223 deletions(-) (limited to 'fs/btrfs/extent_map.c') diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 5262b28f468c..c6174b27fd13 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -271,8 +271,6 @@ struct btrfs_csum_item { } __attribute__ ((__packed__)); /* tag for the radix tree of block groups in ram */ -#define BTRFS_BLOCK_GROUP_DIRTY 0 -#define BTRFS_BLOCK_GROUP_AVAIL 1 #define BTRFS_BLOCK_GROUP_SIZE (256 * 1024 * 1024) @@ -285,7 +283,6 @@ struct btrfs_block_group_item { struct btrfs_block_group_cache { struct btrfs_key key; struct btrfs_block_group_item item; - struct radix_tree_root *radix; u64 first_free; u64 last_alloc; u64 pinned; @@ -301,10 +298,9 @@ struct btrfs_fs_info { struct radix_tree_root fs_roots_radix; struct radix_tree_root pending_del_radix; struct radix_tree_root pinned_radix; - struct radix_tree_root block_group_radix; - struct radix_tree_root block_group_data_radix; struct radix_tree_root extent_ins_radix; struct extent_map_tree free_space_cache; + struct extent_map_tree block_group_cache; u64 generation; u64 last_trans_committed; struct btrfs_transaction *running_transaction; diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 09f4e694624d..aac7c82b0dce 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -436,8 +436,6 @@ struct btrfs_root *open_ctree(struct super_block *sb) init_bit_radix(&fs_info->pending_del_radix); init_bit_radix(&fs_info->extent_ins_radix); INIT_RADIX_TREE(&fs_info->fs_roots_radix, GFP_NOFS); - INIT_RADIX_TREE(&fs_info->block_group_radix, GFP_KERNEL); - INIT_RADIX_TREE(&fs_info->block_group_data_radix, GFP_KERNEL); INIT_LIST_HEAD(&fs_info->trans_list); INIT_LIST_HEAD(&fs_info->dead_roots); memset(&fs_info->super_kobj, 0, sizeof(fs_info->super_kobj)); @@ -458,6 +456,8 @@ struct btrfs_root *open_ctree(struct super_block *sb) GFP_NOFS); extent_map_tree_init(&fs_info->free_space_cache, fs_info->btree_inode->i_mapping, GFP_NOFS); + extent_map_tree_init(&fs_info->block_group_cache, + fs_info->btree_inode->i_mapping, GFP_NOFS); fs_info->do_barriers = 1; fs_info->closing = 0; diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 74cfbee2ff33..4bc639565d1c 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -22,6 +22,10 @@ #include "print-tree.h" #include "transaction.h" +#define BLOCK_GROUP_DATA EXTENT_WRITEBACK +#define BLOCK_GROUP_METADATA EXTENT_UPTODATE +#define BLOCK_GROUP_DIRTY EXTENT_DIRTY + static int finish_current_insert(struct btrfs_trans_handle *trans, struct btrfs_root *extent_root); static int del_pending_extents(struct btrfs_trans_handle *trans, struct @@ -127,25 +131,31 @@ struct btrfs_block_group_cache *btrfs_lookup_block_group(struct btrfs_fs_info *info, u64 blocknr) { - struct btrfs_block_group_cache *block_group; + struct extent_map_tree *block_group_cache; + struct btrfs_block_group_cache *block_group = NULL; + u64 ptr; + u64 start; + u64 end; int ret; - ret = radix_tree_gang_lookup(&info->block_group_radix, - (void **)&block_group, - blocknr, 1); + block_group_cache = &info->block_group_cache; + ret = find_first_extent_bit(block_group_cache, + blocknr, &start, &end, + BLOCK_GROUP_DATA | BLOCK_GROUP_METADATA); if (ret) { - if (block_group->key.objectid <= blocknr && blocknr <= - block_group->key.objectid + block_group->key.offset) - return block_group; - } - ret = radix_tree_gang_lookup(&info->block_group_data_radix, - (void **)&block_group, - blocknr, 1); - if (ret) { - if (block_group->key.objectid <= blocknr && blocknr <= - block_group->key.objectid + block_group->key.offset) - return block_group; + return NULL; } + ret = get_state_private(block_group_cache, start, &ptr); + if (ret) + return NULL; + + block_group = (struct btrfs_block_group_cache *)ptr; + + + if (block_group->key.objectid <= blocknr && blocknr <= + block_group->key.objectid + block_group->key.offset) + return block_group; + return NULL; } @@ -173,7 +183,7 @@ again: last = end + 1; if (end + 1 - start < num) continue; - if (start + num > cache->key.objectid + cache->key.offset) + if (start + num >= cache->key.objectid + cache->key.offset) goto new_group; return start; } @@ -189,6 +199,7 @@ new_group: cache = btrfs_find_block_group(root, cache, last + cache->key.offset - 1, data, 0); *cache_ret = cache; + last = min(cache->key.objectid, last); goto again; } @@ -204,30 +215,32 @@ struct btrfs_block_group_cache *btrfs_find_block_group(struct btrfs_root *root, *hint, u64 search_start, int data, int owner) { - struct btrfs_block_group_cache *cache[8]; + struct btrfs_block_group_cache *cache; + struct extent_map_tree *block_group_cache; struct btrfs_block_group_cache *found_group = NULL; struct btrfs_fs_info *info = root->fs_info; - struct radix_tree_root *radix; - struct radix_tree_root *swap_radix; u64 used; u64 last = 0; u64 hint_last; - int i; + u64 start; + u64 end; + u64 free_check; + u64 ptr; + int bit; int ret; int full_search = 0; int factor = 8; int data_swap = 0; + block_group_cache = &info->block_group_cache; + if (!owner) factor = 5; - if (data) { - radix = &info->block_group_data_radix; - swap_radix = &info->block_group_radix; - } else { - radix = &info->block_group_radix; - swap_radix = &info->block_group_data_radix; - } + if (data) + bit = BLOCK_GROUP_DATA; + else + bit = BLOCK_GROUP_METADATA; if (search_start) { struct btrfs_block_group_cache *shint; @@ -246,12 +259,6 @@ struct btrfs_block_group_cache *btrfs_find_block_group(struct btrfs_root *root, div_factor(hint->key.offset, factor)) { return hint; } - if (used >= div_factor(hint->key.offset, 8)) { - radix_tree_tag_clear(radix, - hint->key.objectid + - hint->key.offset - 1, - BTRFS_BLOCK_GROUP_AVAIL); - } last = hint->key.offset * 3; if (hint->key.objectid >= last) last = max(search_start + hint->key.offset - 1, @@ -267,51 +274,29 @@ struct btrfs_block_group_cache *btrfs_find_block_group(struct btrfs_root *root, last = hint_last; } - while(1) { - ret = radix_tree_gang_lookup_tag(radix, (void **)cache, - last, ARRAY_SIZE(cache), - BTRFS_BLOCK_GROUP_AVAIL); - if (!ret) - break; - for (i = 0; i < ret; i++) { - last = cache[i]->key.objectid + - cache[i]->key.offset; - used = btrfs_block_group_used(&cache[i]->item); - if (used + cache[i]->pinned < - div_factor(cache[i]->key.offset, factor)) { - found_group = cache[i]; - goto found; - } - if (used >= div_factor(cache[i]->key.offset, 8)) { - radix_tree_tag_clear(radix, - cache[i]->key.objectid + - cache[i]->key.offset - 1, - BTRFS_BLOCK_GROUP_AVAIL); - } - } - cond_resched(); - } - last = hint_last; again: while(1) { - ret = radix_tree_gang_lookup(radix, (void **)cache, - last, ARRAY_SIZE(cache)); - if (!ret) + ret = find_first_extent_bit(block_group_cache, last, + &start, &end, bit); + if (ret) break; - for (i = 0; i < ret; i++) { - last = cache[i]->key.objectid + - cache[i]->key.offset; - used = btrfs_block_group_used(&cache[i]->item); - if (used + cache[i]->pinned < cache[i]->key.offset) { - found_group = cache[i]; - goto found; - } - if (used >= cache[i]->key.offset) { - radix_tree_tag_clear(radix, - cache[i]->key.objectid + - cache[i]->key.offset - 1, - BTRFS_BLOCK_GROUP_AVAIL); - } + + ret = get_state_private(block_group_cache, start, &ptr); + if (ret) + break; + + cache = (struct btrfs_block_group_cache *)ptr; + last = cache->key.objectid + cache->key.offset; + used = btrfs_block_group_used(&cache->item); + + if (full_search) + free_check = cache->key.offset; + else + free_check = div_factor(cache->key.offset, factor); + + if (used + cache->pinned < free_check) { + found_group = cache; + goto found; } cond_resched(); } @@ -321,23 +306,11 @@ again: goto again; } if (!data_swap) { - struct radix_tree_root *tmp = radix; data_swap = 1; - radix = swap_radix; - swap_radix = tmp; + bit = BLOCK_GROUP_DATA | BLOCK_GROUP_METADATA; last = search_start; goto again; } - if (!found_group) { - ret = radix_tree_gang_lookup(radix, - (void **)&found_group, 0, 1); - if (ret == 0) { - ret = radix_tree_gang_lookup(swap_radix, - (void **)&found_group, - 0, 1); - } - BUG_ON(ret != 1); - } found: return found_group; } @@ -538,68 +511,55 @@ fail: } -static int write_dirty_block_radix(struct btrfs_trans_handle *trans, - struct btrfs_root *root, - struct radix_tree_root *radix) +int btrfs_write_dirty_block_groups(struct btrfs_trans_handle *trans, + struct btrfs_root *root) { - struct btrfs_block_group_cache *cache[8]; + struct extent_map_tree *block_group_cache; + struct btrfs_block_group_cache *cache; int ret; int err = 0; int werr = 0; - int i; struct btrfs_path *path; - unsigned long off = 0; + u64 last = 0; + u64 start; + u64 end; + u64 ptr; + block_group_cache = &root->fs_info->block_group_cache; path = btrfs_alloc_path(); if (!path) return -ENOMEM; while(1) { - ret = radix_tree_gang_lookup_tag(radix, (void **)cache, - off, ARRAY_SIZE(cache), - BTRFS_BLOCK_GROUP_DIRTY); - if (!ret) + ret = find_first_extent_bit(block_group_cache, last, + &start, &end, BLOCK_GROUP_DIRTY); + if (ret) break; - for (i = 0; i < ret; i++) { - err = write_one_cache_group(trans, root, - path, cache[i]); - /* - * if we fail to write the cache group, we want - * to keep it marked dirty in hopes that a later - * write will work - */ - if (err) { - werr = err; - off = cache[i]->key.objectid + - cache[i]->key.offset; - continue; - } - radix_tree_tag_clear(radix, cache[i]->key.objectid + - cache[i]->key.offset - 1, - BTRFS_BLOCK_GROUP_DIRTY); + last = end + 1; + ret = get_state_private(block_group_cache, start, &ptr); + if (ret) + break; + + cache = (struct btrfs_block_group_cache *)ptr; + err = write_one_cache_group(trans, root, + path, cache); + /* + * if we fail to write the cache group, we want + * to keep it marked dirty in hopes that a later + * write will work + */ + if (err) { + werr = err; + continue; } + clear_extent_bits(block_group_cache, start, end, + BLOCK_GROUP_DIRTY, GFP_NOFS); } btrfs_free_path(path); return werr; } -int btrfs_write_dirty_block_groups(struct btrfs_trans_handle *trans, - struct btrfs_root *root) -{ - int ret; - int ret2; - ret = write_dirty_block_radix(trans, root, - &root->fs_info->block_group_radix); - ret2 = write_dirty_block_radix(trans, root, - &root->fs_info->block_group_data_radix); - if (ret) - return ret; - if (ret2) - return ret2; - return 0; -} - static int update_block_group(struct btrfs_trans_handle *trans, struct btrfs_root *root, u64 blocknr, u64 num, int alloc, int mark_free, @@ -610,7 +570,8 @@ static int update_block_group(struct btrfs_trans_handle *trans, u64 total = num; u64 old_val; u64 block_in_group; - int ret; + u64 start; + u64 end; while(total) { cache = btrfs_lookup_block_group(info, blocknr); @@ -619,9 +580,10 @@ static int update_block_group(struct btrfs_trans_handle *trans, } block_in_group = blocknr - cache->key.objectid; WARN_ON(block_in_group > cache->key.offset); - radix_tree_tag_set(cache->radix, cache->key.objectid + - cache->key.offset - 1, - BTRFS_BLOCK_GROUP_DIRTY); + start = cache->key.objectid; + end = start + cache->key.offset - 1; + set_extent_bits(&info->block_group_cache, start, end, + BLOCK_GROUP_DIRTY, GFP_NOFS); old_val = btrfs_block_group_used(&cache->item); num = min(total, cache->key.offset - block_in_group); @@ -630,25 +592,27 @@ static int update_block_group(struct btrfs_trans_handle *trans, cache->last_alloc = blocknr; if (cache->data != data && old_val < (cache->key.offset >> 1)) { - cache->data = data; - radix_tree_delete(cache->radix, - cache->key.objectid + - cache->key.offset - 1); + int bit_to_clear; + int bit_to_set; + cache->data = data; if (data) { - cache->radix = - &info->block_group_data_radix; + bit_to_clear = BLOCK_GROUP_DATA; + bit_to_set = BLOCK_GROUP_METADATA; cache->item.flags |= BTRFS_BLOCK_GROUP_DATA; } else { - cache->radix = &info->block_group_radix; + bit_to_clear = BLOCK_GROUP_METADATA; + bit_to_set = BLOCK_GROUP_DATA; cache->item.flags &= ~BTRFS_BLOCK_GROUP_DATA; } - ret = radix_tree_insert(cache->radix, - cache->key.objectid + - cache->key.offset - 1, - (void *)cache); + clear_extent_bits(&info->block_group_cache, + start, end, bit_to_clear, + GFP_NOFS); + set_extent_bits(&info->block_group_cache, + start, end, bit_to_set, + GFP_NOFS); } old_val += num; } else { @@ -660,13 +624,6 @@ static int update_block_group(struct btrfs_trans_handle *trans, blocknr, blocknr + num - 1, GFP_NOFS); } - if (old_val < (cache->key.offset >> 1) && - old_val + num >= (cache->key.offset >> 1)) { - radix_tree_tag_set(cache->radix, - cache->key.objectid + - cache->key.offset - 1, - BTRFS_BLOCK_GROUP_AVAIL); - } } btrfs_set_block_group_used(&cache->item, old_val); total -= num; @@ -730,11 +687,8 @@ int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans, block_group->pinned--; if (gang[i] < block_group->last_alloc) block_group->last_alloc = gang[i]; - if (!block_group->data) { - set_extent_dirty(free_space_cache, - gang[i], gang[i], - GFP_NOFS); - } + set_extent_dirty(free_space_cache, + gang[i], gang[i], GFP_NOFS); } } } @@ -1059,8 +1013,8 @@ check_failed: ins->offset = search_end - ins->objectid; goto check_pending; } - btrfs_item_key_to_cpu(l, &key, slot); + if (key.objectid >= search_start && key.objectid > last_block && start_found) { if (last_block < search_start) @@ -1072,9 +1026,14 @@ check_failed: goto check_pending; } } - - if (btrfs_key_type(&key) != BTRFS_EXTENT_ITEM_KEY) + if (btrfs_key_type(&key) != BTRFS_EXTENT_ITEM_KEY) { + if (!start_found) { + last_block = key.objectid; + start_found = 1; + } goto next; + } + start_found = 1; last_block = key.objectid + key.offset; @@ -1120,9 +1079,6 @@ check_pending: } ins->offset = num_blocks; btrfs_free_path(path); - if (0 && ins->objectid != cached_search_start) { -printk("\tcached was %Lu found %Lu\n", cached_search_start, ins->objectid); - } return 0; new_group: @@ -1529,40 +1485,20 @@ out: return ret; } -static int free_block_group_radix(struct radix_tree_root *radix) +int btrfs_free_block_groups(struct btrfs_fs_info *info) { + u64 start; + u64 end; int ret; - struct btrfs_block_group_cache *cache[8]; - int i; while(1) { - ret = radix_tree_gang_lookup(radix, (void **)cache, 0, - ARRAY_SIZE(cache)); - if (!ret) + ret = find_first_extent_bit(&info->block_group_cache, 0, + &start, &end, (unsigned int)-1); + if (ret) break; - for (i = 0; i < ret; i++) { - radix_tree_delete(radix, cache[i]->key.objectid + - cache[i]->key.offset - 1); - kfree(cache[i]); - } + clear_extent_bits(&info->block_group_cache, start, + end, (unsigned int)-1, GFP_NOFS); } - return 0; -} - -int btrfs_free_block_groups(struct btrfs_fs_info *info) -{ - int ret; - int ret2; - u64 start; - u64 end; - - ret = free_block_group_radix(&info->block_group_radix); - ret2 = free_block_group_radix(&info->block_group_data_radix); - if (ret) - return ret; - if (ret2) - return ret2; - while(1) { ret = find_first_extent_bit(&info->free_space_cache, 0, &start, &end, EXTENT_DIRTY); @@ -1579,17 +1515,20 @@ int btrfs_read_block_groups(struct btrfs_root *root) struct btrfs_path *path; int ret; int err = 0; + int bit; struct btrfs_block_group_cache *cache; struct btrfs_fs_info *info = root->fs_info; - struct radix_tree_root *radix; + struct extent_map_tree *block_group_cache; struct btrfs_key key; struct btrfs_key found_key; struct extent_buffer *leaf; u64 group_size_blocks; - u64 used; + + block_group_cache = &info->block_group_cache; group_size_blocks = BTRFS_BLOCK_GROUP_SIZE >> - root->fs_info->sb->s_blocksize_bits; + info->sb->s_blocksize_bits; + root = info->extent_root; key.objectid = 0; key.offset = group_size_blocks; @@ -1617,35 +1556,30 @@ int btrfs_read_block_groups(struct btrfs_root *root) read_extent_buffer(leaf, &cache->item, btrfs_item_ptr_offset(leaf, path->slots[0]), sizeof(cache->item)); - if (cache->item.flags & BTRFS_BLOCK_GROUP_DATA) { - radix = &info->block_group_data_radix; - cache->data = 1; - } else { - radix = &info->block_group_radix; - cache->data = 0; - } - memcpy(&cache->key, &found_key, sizeof(found_key)); cache->last_alloc = cache->key.objectid; cache->first_free = cache->key.objectid; cache->pinned = 0; cache->cached = 0; - cache->radix = radix; - key.objectid = found_key.objectid + found_key.offset; btrfs_release_path(root, path); - ret = radix_tree_insert(radix, found_key.objectid + - found_key.offset - 1, - (void *)cache); - BUG_ON(ret); - used = btrfs_block_group_used(&cache->item); - if (used < div_factor(key.offset, 8)) { - radix_tree_tag_set(radix, found_key.objectid + - found_key.offset - 1, - BTRFS_BLOCK_GROUP_AVAIL); + if (cache->item.flags & BTRFS_BLOCK_GROUP_DATA) { + bit = BLOCK_GROUP_DATA; + cache->data = 1; + } else { + bit = BLOCK_GROUP_METADATA; + cache->data = 0; } + + /* use EXTENT_LOCKED to prevent merging */ + set_extent_bits(block_group_cache, found_key.objectid, + found_key.objectid + found_key.offset - 1, + bit | EXTENT_LOCKED, GFP_NOFS); + set_state_private(block_group_cache, found_key.objectid, + (u64)cache); + if (key.objectid >= btrfs_super_total_blocks(&info->super_copy)) break; diff --git a/fs/btrfs/extent_map.c b/fs/btrfs/extent_map.c index 5b7dbcaacd11..1b2f9e059de6 100644 --- a/fs/btrfs/extent_map.c +++ b/fs/btrfs/extent_map.c @@ -574,7 +574,7 @@ out: return set; search_again: - if (start >= end) + if (start > end) goto out; write_unlock_irqrestore(&tree->lock, flags); if (mask & __GFP_WAIT) @@ -819,6 +819,21 @@ int set_extent_dirty(struct extent_map_tree *tree, u64 start, u64 end, } EXPORT_SYMBOL(set_extent_dirty); +int set_extent_bits(struct extent_map_tree *tree, u64 start, u64 end, + int bits, gfp_t mask) +{ + return set_extent_bit(tree, start, end, bits, 0, NULL, + mask); +} +EXPORT_SYMBOL(set_extent_bits); + +int clear_extent_bits(struct extent_map_tree *tree, u64 start, u64 end, + int bits, gfp_t mask) +{ + return clear_extent_bit(tree, start, end, bits, 0, 0, mask); +} +EXPORT_SYMBOL(clear_extent_bits); + int set_extent_delalloc(struct extent_map_tree *tree, u64 start, u64 end, gfp_t mask) { @@ -1138,7 +1153,6 @@ int set_state_private(struct extent_map_tree *tree, u64 start, u64 private) out: write_unlock_irq(&tree->lock); return ret; - } int get_state_private(struct extent_map_tree *tree, u64 start, u64 *private) diff --git a/fs/btrfs/extent_map.h b/fs/btrfs/extent_map.h index d100f7c1476c..5a63b417034a 100644 --- a/fs/btrfs/extent_map.h +++ b/fs/btrfs/extent_map.h @@ -96,6 +96,10 @@ int extent_read_full_page(struct extent_map_tree *tree, struct page *page, void __init extent_map_init(void); void __exit extent_map_exit(void); int extent_clean_all_trees(struct extent_map_tree *tree); +int clear_extent_bits(struct extent_map_tree *tree, u64 start, u64 end, + int bits, gfp_t mask); +int set_extent_bits(struct extent_map_tree *tree, u64 start, u64 end, + int bits, gfp_t mask); int set_extent_uptodate(struct extent_map_tree *tree, u64 start, u64 end, gfp_t mask); int set_extent_new(struct extent_map_tree *tree, u64 start, u64 end, -- cgit v1.2.3-70-g09d2 From 1a5bc167f6707542b79a55452075525620ed43f5 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Mon, 15 Oct 2007 16:15:26 -0400 Subject: Btrfs: Change the remaining radix trees used by extent-tree.c to extent_map trees Signed-off-by: Chris Mason --- fs/btrfs/ctree.h | 16 ++-- fs/btrfs/disk-io.c | 9 ++- fs/btrfs/extent-tree.c | 193 ++++++++++++++++++------------------------------- fs/btrfs/extent_map.c | 5 +- fs/btrfs/extent_map.h | 4 +- fs/btrfs/transaction.c | 5 +- 6 files changed, 94 insertions(+), 138 deletions(-) (limited to 'fs/btrfs/extent_map.c') diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index c6174b27fd13..256689551eb0 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -283,10 +283,6 @@ struct btrfs_block_group_item { struct btrfs_block_group_cache { struct btrfs_key key; struct btrfs_block_group_item item; - u64 first_free; - u64 last_alloc; - u64 pinned; - u64 last_prealloc; int data; int cached; }; @@ -296,11 +292,13 @@ struct btrfs_fs_info { struct btrfs_root *extent_root; struct btrfs_root *tree_root; struct radix_tree_root fs_roots_radix; - struct radix_tree_root pending_del_radix; - struct radix_tree_root pinned_radix; - struct radix_tree_root extent_ins_radix; + struct extent_map_tree free_space_cache; struct extent_map_tree block_group_cache; + struct extent_map_tree pinned_extents; + struct extent_map_tree pending_del; + struct extent_map_tree extent_ins; + u64 generation; u64 last_trans_committed; struct btrfs_transaction *running_transaction; @@ -926,7 +924,7 @@ static inline int btrfs_set_root_name(struct btrfs_root *root, /* extent-tree.c */ int btrfs_extent_post_op(struct btrfs_trans_handle *trans, struct btrfs_root *root); -int btrfs_copy_pinned(struct btrfs_root *root, struct radix_tree_root *copy); +int btrfs_copy_pinned(struct btrfs_root *root, struct extent_map_tree *copy); struct btrfs_block_group_cache *btrfs_lookup_block_group(struct btrfs_fs_info *info, u64 blocknr); @@ -949,7 +947,7 @@ int btrfs_free_extent(struct btrfs_trans_handle *trans, struct btrfs_root *root, u64 blocknr, u64 num_blocks, int pin); int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans, struct btrfs_root *root, - struct radix_tree_root *unpin_radix); + struct extent_map_tree *unpin); int btrfs_inc_extent_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root, u64 blocknr, u64 num_blocks); diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index aac7c82b0dce..2b86a1d779b7 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -432,9 +432,6 @@ struct btrfs_root *open_ctree(struct super_block *sb) err = -ENOMEM; goto fail; } - init_bit_radix(&fs_info->pinned_radix); - init_bit_radix(&fs_info->pending_del_radix); - init_bit_radix(&fs_info->extent_ins_radix); INIT_RADIX_TREE(&fs_info->fs_roots_radix, GFP_NOFS); INIT_LIST_HEAD(&fs_info->trans_list); INIT_LIST_HEAD(&fs_info->dead_roots); @@ -458,6 +455,12 @@ struct btrfs_root *open_ctree(struct super_block *sb) fs_info->btree_inode->i_mapping, GFP_NOFS); extent_map_tree_init(&fs_info->block_group_cache, fs_info->btree_inode->i_mapping, GFP_NOFS); + extent_map_tree_init(&fs_info->pinned_extents, + fs_info->btree_inode->i_mapping, GFP_NOFS); + extent_map_tree_init(&fs_info->pending_del, + fs_info->btree_inode->i_mapping, GFP_NOFS); + extent_map_tree_init(&fs_info->extent_ins, + fs_info->btree_inode->i_mapping, GFP_NOFS); fs_info->do_barriers = 1; fs_info->closing = 0; diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 4bc639565d1c..477466d167a4 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -188,13 +188,13 @@ again: return start; } out: - return max(cache->last_alloc, search_start); + return search_start; new_group: cache = btrfs_lookup_block_group(root->fs_info, last + cache->key.offset - 1); if (!cache) { - return max((*cache_ret)->last_alloc, search_start); + return search_start; } cache = btrfs_find_block_group(root, cache, last + cache->key.offset - 1, data, 0); @@ -247,16 +247,14 @@ struct btrfs_block_group_cache *btrfs_find_block_group(struct btrfs_root *root, shint = btrfs_lookup_block_group(info, search_start); if (shint && shint->data == data) { used = btrfs_block_group_used(&shint->item); - if (used + shint->pinned < - div_factor(shint->key.offset, factor)) { + if (used < div_factor(shint->key.offset, factor)) { return shint; } } } if (hint && hint->data == data) { used = btrfs_block_group_used(&hint->item); - if (used + hint->pinned < - div_factor(hint->key.offset, factor)) { + if (used < div_factor(hint->key.offset, factor)) { return hint; } last = hint->key.offset * 3; @@ -294,7 +292,7 @@ again: else free_check = div_factor(cache->key.offset, factor); - if (used + cache->pinned < free_check) { + if (used < free_check) { found_group = cache; goto found; } @@ -505,8 +503,6 @@ fail: return ret; if (pending_ret) return pending_ret; - if (cache->data) - cache->last_alloc = cache->first_free; return 0; } @@ -588,8 +584,6 @@ static int update_block_group(struct btrfs_trans_handle *trans, old_val = btrfs_block_group_used(&cache->item); num = min(total, cache->key.offset - block_in_group); if (alloc) { - if (blocknr > cache->last_alloc) - cache->last_alloc = blocknr; if (cache->data != data && old_val < (cache->key.offset >> 1)) { int bit_to_clear; @@ -617,8 +611,6 @@ static int update_block_group(struct btrfs_trans_handle *trans, old_val += num; } else { old_val -= num; - if (blocknr < cache->first_free) - cache->first_free = blocknr; if (mark_free) { set_extent_dirty(&info->free_space_cache, blocknr, blocknr + num - 1, @@ -632,65 +624,47 @@ static int update_block_group(struct btrfs_trans_handle *trans, return 0; } -int btrfs_copy_pinned(struct btrfs_root *root, struct radix_tree_root *copy) +int btrfs_copy_pinned(struct btrfs_root *root, struct extent_map_tree *copy) { - unsigned long gang[8]; u64 last = 0; - struct radix_tree_root *pinned_radix = &root->fs_info->pinned_radix; + u64 start; + u64 end; + struct extent_map_tree *pinned_extents = &root->fs_info->pinned_extents; int ret; - int i; while(1) { - ret = find_first_radix_bit(pinned_radix, gang, last, - ARRAY_SIZE(gang)); - if (!ret) + ret = find_first_extent_bit(pinned_extents, last, + &start, &end, EXTENT_DIRTY); + if (ret) break; - for (i = 0 ; i < ret; i++) { - set_radix_bit(copy, gang[i]); - last = gang[i] + 1; - } + set_extent_dirty(copy, start, end, GFP_NOFS); + last = end + 1; } - ret = find_first_radix_bit(&root->fs_info->extent_ins_radix, gang, 0, - ARRAY_SIZE(gang)); - WARN_ON(ret); return 0; } int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans, struct btrfs_root *root, - struct radix_tree_root *unpin_radix) + struct extent_map_tree *unpin) { - unsigned long gang[8]; - struct btrfs_block_group_cache *block_group; - u64 first = 0; + u64 start; + u64 end; int ret; - int i; - struct radix_tree_root *pinned_radix = &root->fs_info->pinned_radix; + struct extent_map_tree *pinned_extents = &root->fs_info->pinned_extents; struct extent_map_tree *free_space_cache; free_space_cache = &root->fs_info->free_space_cache; while(1) { - ret = find_first_radix_bit(unpin_radix, gang, 0, - ARRAY_SIZE(gang)); - if (!ret) + ret = find_first_extent_bit(unpin, 0, &start, &end, + EXTENT_DIRTY); + if (ret) break; - if (!first) - first = gang[0]; - for (i = 0; i < ret; i++) { - clear_radix_bit(pinned_radix, gang[i]); - clear_radix_bit(unpin_radix, gang[i]); - block_group = btrfs_lookup_block_group(root->fs_info, - gang[i]); - if (block_group) { - WARN_ON(block_group->pinned == 0); - block_group->pinned--; - if (gang[i] < block_group->last_alloc) - block_group->last_alloc = gang[i]; - set_extent_dirty(free_space_cache, - gang[i], gang[i], GFP_NOFS); - } - } + + clear_extent_dirty(pinned_extents, start, end, + GFP_NOFS); + clear_extent_dirty(unpin, start, end, GFP_NOFS); + set_extent_dirty(free_space_cache, start, end, GFP_NOFS); } return 0; } @@ -700,39 +674,36 @@ static int finish_current_insert(struct btrfs_trans_handle *trans, struct { struct btrfs_key ins; struct btrfs_extent_item extent_item; - int i; int ret; - int err; - unsigned long gang[8]; + int err = 0; + u64 start; + u64 end; struct btrfs_fs_info *info = extent_root->fs_info; btrfs_set_stack_extent_refs(&extent_item, 1); - ins.offset = 1; btrfs_set_key_type(&ins, BTRFS_EXTENT_ITEM_KEY); btrfs_set_stack_extent_owner(&extent_item, extent_root->root_key.objectid); while(1) { - ret = find_first_radix_bit(&info->extent_ins_radix, gang, 0, - ARRAY_SIZE(gang)); - if (!ret) + ret = find_first_extent_bit(&info->extent_ins, 0, &start, + &end, EXTENT_LOCKED); + if (ret) break; - for (i = 0; i < ret; i++) { - ins.objectid = gang[i]; - err = btrfs_insert_item(trans, extent_root, &ins, - &extent_item, - sizeof(extent_item)); - clear_radix_bit(&info->extent_ins_radix, gang[i]); - WARN_ON(err); - } + ins.objectid = start; + ins.offset = end + 1 - start; + err = btrfs_insert_item(trans, extent_root, &ins, + &extent_item, sizeof(extent_item)); + clear_extent_bits(&info->extent_ins, start, end, EXTENT_LOCKED, + GFP_NOFS); } return 0; } static int pin_down_block(struct btrfs_root *root, u64 blocknr, int pending) { - int err; + int err = 0; struct extent_buffer *buf; if (!pending) { @@ -748,16 +719,11 @@ static int pin_down_block(struct btrfs_root *root, u64 blocknr, int pending) } free_extent_buffer(buf); } - err = set_radix_bit(&root->fs_info->pinned_radix, blocknr); - if (!err) { - struct btrfs_block_group_cache *cache; - cache = btrfs_lookup_block_group(root->fs_info, - blocknr); - if (cache) - cache->pinned++; - } + set_extent_dirty(&root->fs_info->pinned_extents, + blocknr, blocknr, GFP_NOFS); } else { - err = set_radix_bit(&root->fs_info->pending_del_radix, blocknr); + set_extent_bits(&root->fs_info->pending_del, + blocknr, blocknr, EXTENT_LOCKED, GFP_NOFS); } BUG_ON(err < 0); return 0; @@ -840,43 +806,28 @@ static int del_pending_extents(struct btrfs_trans_handle *trans, struct btrfs_root *extent_root) { int ret; - int wret; int err = 0; - unsigned long gang[4]; - int i; - struct radix_tree_root *pending_radix; - struct radix_tree_root *pinned_radix; - struct btrfs_block_group_cache *cache; + u64 start; + u64 end; + struct extent_map_tree *pending_del; + struct extent_map_tree *pinned_extents; - pending_radix = &extent_root->fs_info->pending_del_radix; - pinned_radix = &extent_root->fs_info->pinned_radix; + pending_del = &extent_root->fs_info->pending_del; + pinned_extents = &extent_root->fs_info->pinned_extents; while(1) { - ret = find_first_radix_bit(pending_radix, gang, 0, - ARRAY_SIZE(gang)); - if (!ret) + ret = find_first_extent_bit(pending_del, 0, &start, &end, + EXTENT_LOCKED); + if (ret) break; - for (i = 0; i < ret; i++) { - wret = set_radix_bit(pinned_radix, gang[i]); - if (wret == 0) { - cache = - btrfs_lookup_block_group(extent_root->fs_info, - gang[i]); - if (cache) - cache->pinned++; - } - if (wret < 0) { - printk(KERN_CRIT "set_radix_bit, err %d\n", - wret); - BUG_ON(wret < 0); - } - wret = clear_radix_bit(pending_radix, gang[i]); - BUG_ON(wret); - wret = __free_extent(trans, extent_root, - gang[i], 1, 0, 0); - if (wret) - err = wret; - } + + set_extent_dirty(pinned_extents, start, end, GFP_NOFS); + clear_extent_bits(pending_del, start, end, EXTENT_LOCKED, + GFP_NOFS); + ret = __free_extent(trans, extent_root, + start, end + 1 - start, 0, 0); + if (ret) + err = ret; } return err; } @@ -920,7 +871,6 @@ static int find_free_extent(struct btrfs_trans_handle *trans, struct btrfs_root u64 hole_size = 0; int slot = 0; u64 last_block = 0; - u64 test_block; u64 orig_search_start = search_start; int start_found; struct extent_buffer *l; @@ -1059,13 +1009,15 @@ check_pending: if (ins->objectid + num_blocks >= search_end) goto enospc; - for (test_block = ins->objectid; - test_block < ins->objectid + num_blocks; test_block++) { - if (test_radix_bit(&info->pinned_radix, test_block) || - test_radix_bit(&info->extent_ins_radix, test_block)) { - search_start = test_block + 1; - goto new_group; - } + if (test_range_bit(&info->extent_ins, ins->objectid, + ins->objectid + num_blocks -1, EXTENT_LOCKED, 0)) { + search_start = ins->objectid + num_blocks; + goto new_group; + } + if (test_range_bit(&info->pinned_extents, ins->objectid, + ins->objectid + num_blocks -1, EXTENT_DIRTY, 0)) { + search_start = ins->objectid + num_blocks; + goto new_group; } if (exclude_nr > 0 && (ins->objectid + num_blocks > exclude_start && ins->objectid < exclude_start + exclude_nr)) { @@ -1156,7 +1108,9 @@ int btrfs_alloc_extent(struct btrfs_trans_handle *trans, if (root == extent_root) { BUG_ON(num_blocks != 1); - set_radix_bit(&root->fs_info->extent_ins_radix, ins->objectid); + set_extent_bits(&root->fs_info->extent_ins, ins->objectid, + ins->objectid + ins->offset - 1, + EXTENT_LOCKED, GFP_NOFS); goto update_block; } @@ -1557,9 +1511,6 @@ int btrfs_read_block_groups(struct btrfs_root *root) btrfs_item_ptr_offset(leaf, path->slots[0]), sizeof(cache->item)); memcpy(&cache->key, &found_key, sizeof(found_key)); - cache->last_alloc = cache->key.objectid; - cache->first_free = cache->key.objectid; - cache->pinned = 0; cache->cached = 0; key.objectid = found_key.objectid + found_key.offset; diff --git a/fs/btrfs/extent_map.c b/fs/btrfs/extent_map.c index 1b2f9e059de6..e081558d52ff 100644 --- a/fs/btrfs/extent_map.c +++ b/fs/btrfs/extent_map.c @@ -1188,8 +1188,8 @@ out: * has the bits set. Otherwise, 1 is returned if any bit in the * range is found set. */ -static int test_range_bit(struct extent_map_tree *tree, u64 start, u64 end, - int bits, int filled) +int test_range_bit(struct extent_map_tree *tree, u64 start, u64 end, + int bits, int filled) { struct extent_state *state = NULL; struct rb_node *node; @@ -1222,6 +1222,7 @@ static int test_range_bit(struct extent_map_tree *tree, u64 start, u64 end, read_unlock_irq(&tree->lock); return bitset; } +EXPORT_SYMBOL(test_range_bit); /* * helper function to set a given page up to date if all the diff --git a/fs/btrfs/extent_map.h b/fs/btrfs/extent_map.h index 5a63b417034a..75dc600dc0f9 100644 --- a/fs/btrfs/extent_map.h +++ b/fs/btrfs/extent_map.h @@ -95,7 +95,9 @@ int extent_read_full_page(struct extent_map_tree *tree, struct page *page, get_extent_t *get_extent); void __init extent_map_init(void); void __exit extent_map_exit(void); -int extent_clean_all_trees(struct extent_map_tree *tree); + +int test_range_bit(struct extent_map_tree *tree, u64 start, u64 end, + int bits, int filled); int clear_extent_bits(struct extent_map_tree *tree, u64 start, u64 end, int bits, gfp_t mask); int set_extent_bits(struct extent_map_tree *tree, u64 start, u64 end, diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c index 372b61f5733f..55289b71056e 100644 --- a/fs/btrfs/transaction.c +++ b/fs/btrfs/transaction.c @@ -481,11 +481,12 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans, struct btrfs_transaction *cur_trans; struct btrfs_transaction *prev_trans = NULL; struct list_head dirty_fs_roots; - struct radix_tree_root pinned_copy; + struct extent_map_tree pinned_copy; DEFINE_WAIT(wait); int ret; - init_bit_radix(&pinned_copy); + extent_map_tree_init(&pinned_copy, + root->fs_info->btree_inode->i_mapping, GFP_NOFS); INIT_LIST_HEAD(&dirty_fs_roots); mutex_lock(&root->fs_info->trans_mutex); -- cgit v1.2.3-70-g09d2 From db94535db75e67fab12ccbb7f5ee548e33fed891 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Mon, 15 Oct 2007 16:15:53 -0400 Subject: Btrfs: Allow tree blocks larger than the page size Signed-off-by: Chris Mason --- fs/btrfs/ctree.c | 319 +++++++++++++++++++++++++++++++++++-------------- fs/btrfs/ctree.h | 112 ++++++++++------- fs/btrfs/disk-io.c | 92 ++++++++------ fs/btrfs/disk-io.h | 9 +- fs/btrfs/extent-tree.c | 285 ++++++++++++++++++++++--------------------- fs/btrfs/extent_map.c | 91 ++++++++++---- fs/btrfs/extent_map.h | 10 +- fs/btrfs/file-item.c | 10 +- fs/btrfs/file.c | 126 ++++++++++--------- fs/btrfs/inode.c | 90 +++++++------- fs/btrfs/print-tree.c | 23 ++-- fs/btrfs/super.c | 6 +- fs/btrfs/sysfs.c | 5 +- fs/btrfs/transaction.c | 39 +++--- fs/btrfs/tree-defrag.c | 18 +-- 15 files changed, 738 insertions(+), 497 deletions(-) (limited to 'fs/btrfs/extent_map.c') diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c index 50ef351ef6b3..34de83630ae9 100644 --- a/fs/btrfs/ctree.c +++ b/fs/btrfs/ctree.c @@ -83,16 +83,15 @@ static int __btrfs_cow_block(struct btrfs_trans_handle *trans, WARN_ON(root->ref_cows && trans->transid != root->last_trans); - cow = btrfs_alloc_free_block(trans, root, search_start, empty_size); + cow = btrfs_alloc_free_block(trans, root, buf->len, + search_start, empty_size); if (IS_ERR(cow)) return PTR_ERR(cow); cow->alloc_addr = (unsigned long)__builtin_return_address(0); - if (buf->len != root->sectorsize || cow->len != root->sectorsize) - WARN_ON(1); copy_extent_buffer(cow, buf, 0, 0, cow->len); - btrfs_set_header_blocknr(cow, extent_buffer_blocknr(cow)); + btrfs_set_header_bytenr(cow, cow->start); btrfs_set_header_generation(cow, trans->transid); btrfs_set_header_owner(cow, root->root_key.objectid); @@ -110,16 +109,16 @@ static int __btrfs_cow_block(struct btrfs_trans_handle *trans, root->node = cow; extent_buffer_get(cow); if (buf != root->commit_root) { - btrfs_free_extent(trans, root, - extent_buffer_blocknr(buf), 1, 1); + btrfs_free_extent(trans, root, buf->start, + buf->len, 1); } free_extent_buffer(buf); } else { btrfs_set_node_blockptr(parent, parent_slot, - extent_buffer_blocknr(cow)); + cow->start); btrfs_mark_buffer_dirty(parent); WARN_ON(btrfs_header_generation(parent) != trans->transid); - btrfs_free_extent(trans, root, extent_buffer_blocknr(buf),1,1); + btrfs_free_extent(trans, root, buf->start, buf->len, 1); } free_extent_buffer(buf); btrfs_mark_buffer_dirty(cow); @@ -149,13 +148,14 @@ int btrfs_cow_block(struct btrfs_trans_handle *trans, return 0; } - search_start = extent_buffer_blocknr(buf) & ~((u64)65535); + search_start = buf->start & ~((u64)BTRFS_BLOCK_GROUP_SIZE - 1); ret = __btrfs_cow_block(trans, root, buf, parent, parent_slot, cow_ret, search_start, 0); (*cow_ret)->alloc_addr = (unsigned long)__builtin_return_address(0); return ret; } +#if 0 static int close_blocks(u64 blocknr, u64 other) { if (blocknr < other && other - blocknr < 8) @@ -165,7 +165,6 @@ static int close_blocks(u64 blocknr, u64 other) return 0; } -#if 0 static int should_defrag_leaf(struct extent_buffer *eb) { return 0; @@ -355,7 +354,7 @@ static int check_node(struct btrfs_root *root, struct btrfs_path *path, BUG_ON(memcmp(&parent_key, &node_key, sizeof(struct btrfs_disk_key))); BUG_ON(btrfs_node_blockptr(parent, parent_slot) != - btrfs_header_blocknr(node)); + btrfs_header_bytenr(node)); } BUG_ON(nritems > BTRFS_NODEPTRS_PER_BLOCK(root)); if (slot != 0) { @@ -398,7 +397,7 @@ static int check_leaf(struct btrfs_root *root, struct btrfs_path *path, BUG_ON(memcmp(&parent_key, &leaf_key, sizeof(struct btrfs_disk_key))); BUG_ON(btrfs_node_blockptr(parent, parent_slot) != - btrfs_header_blocknr(leaf)); + btrfs_header_bytenr(leaf)); } #if 0 for (i = 0; nritems > 1 && i < nritems - 2; i++) { @@ -467,14 +466,16 @@ static int check_leaf(struct btrfs_root *root, struct btrfs_path *path, static int check_block(struct btrfs_root *root, struct btrfs_path *path, int level) { +#if 0 struct extent_buffer *buf = path->nodes[level]; if (memcmp_extent_buffer(buf, root->fs_info->fsid, (unsigned long)btrfs_header_fsid(buf), BTRFS_FSID_SIZE)) { printk("warning bad block %Lu\n", buf->start); - BUG(); + return 1; } +#endif if (level == 0) return check_leaf(root, path, level); return check_node(root, path, level); @@ -585,7 +586,8 @@ static struct extent_buffer *read_node_slot(struct btrfs_root *root, return NULL; if (slot >= btrfs_header_nritems(parent)) return NULL; - return read_tree_block(root, btrfs_node_blockptr(parent, slot)); + return read_tree_block(root, btrfs_node_blockptr(parent, slot), + btrfs_level_size(root, btrfs_header_level(parent) - 1)); } static int balance_level(struct btrfs_trans_handle *trans, struct btrfs_root @@ -618,7 +620,6 @@ static int balance_level(struct btrfs_trans_handle *trans, struct btrfs_root */ if (!parent) { struct extent_buffer *child; - u64 blocknr = extent_buffer_blocknr(mid); if (btrfs_header_nritems(mid) != 1) return 0; @@ -632,9 +633,10 @@ static int balance_level(struct btrfs_trans_handle *trans, struct btrfs_root wait_on_tree_block_writeback(root, mid); /* once for the path */ free_extent_buffer(mid); + ret = btrfs_free_extent(trans, root, mid->start, mid->len, 1); /* once for the root ptr */ free_extent_buffer(mid); - return btrfs_free_extent(trans, root, blocknr, 1, 1); + return ret; } if (btrfs_header_nritems(mid) > BTRFS_NODEPTRS_PER_BLOCK(root) / 4) @@ -680,7 +682,9 @@ static int balance_level(struct btrfs_trans_handle *trans, struct btrfs_root if (wret < 0 && wret != -ENOSPC) ret = wret; if (btrfs_header_nritems(right) == 0) { - u64 blocknr = extent_buffer_blocknr(right); + u64 bytenr = right->start; + u32 blocksize = right->len; + clean_tree_block(trans, root, right); wait_on_tree_block_writeback(root, right); free_extent_buffer(right); @@ -689,7 +693,8 @@ static int balance_level(struct btrfs_trans_handle *trans, struct btrfs_root 1); if (wret) ret = wret; - wret = btrfs_free_extent(trans, root, blocknr, 1, 1); + wret = btrfs_free_extent(trans, root, bytenr, + blocksize, 1); if (wret) ret = wret; } else { @@ -719,7 +724,8 @@ static int balance_level(struct btrfs_trans_handle *trans, struct btrfs_root } if (btrfs_header_nritems(mid) == 0) { /* we've managed to empty the middle node, drop it */ - u64 blocknr = extent_buffer_blocknr(mid); + u64 bytenr = mid->start; + u32 blocksize = mid->len; clean_tree_block(trans, root, mid); wait_on_tree_block_writeback(root, mid); free_extent_buffer(mid); @@ -727,7 +733,7 @@ static int balance_level(struct btrfs_trans_handle *trans, struct btrfs_root wret = del_ptr(trans, root, path, level + 1, pslot); if (wret) ret = wret; - wret = btrfs_free_extent(trans, root, blocknr, 1, 1); + wret = btrfs_free_extent(trans, root, bytenr, blocksize, 1); if (wret) ret = wret; } else { @@ -830,7 +836,6 @@ static int push_nodes_for_insert(struct btrfs_trans_handle *trans, path->slots[level] = orig_slot; free_extent_buffer(left); } - check_node(root, path, level); return 0; } free_extent_buffer(left); @@ -874,12 +879,10 @@ static int push_nodes_for_insert(struct btrfs_trans_handle *trans, } else { free_extent_buffer(right); } - check_node(root, path, level); return 0; } free_extent_buffer(right); } - check_node(root, path, level); return 1; } @@ -889,19 +892,23 @@ static int push_nodes_for_insert(struct btrfs_trans_handle *trans, static void reada_for_search(struct btrfs_root *root, struct btrfs_path *path, int level, int slot) { + return; +#if 0 struct extent_buffer *node; int i; u32 nritems; - u64 blocknr; + u64 bytenr; u64 search; u64 cluster_start; int ret; int nread = 0; int direction = path->reada; + int level; struct radix_tree_root found; unsigned long gang[8]; struct extent_buffer *eb; + if (level == 0) return; @@ -918,8 +925,9 @@ static void reada_for_search(struct btrfs_root *root, struct btrfs_path *path, init_bit_radix(&found); nritems = btrfs_header_nritems(node); + level = btrfs_header_level(node) - 1; for (i = slot; i < nritems; i++) { - blocknr = btrfs_node_blockptr(node, i); + bytenr = btrfs_node_blockptr(node, i); set_radix_bit(&found, blocknr); } if (direction > 0) { @@ -944,6 +952,7 @@ static void reada_for_search(struct btrfs_root *root, struct btrfs_path *path, } } } +#endif } /* * look for key in the tree. path is filled in with nodes along the way @@ -963,7 +972,7 @@ int btrfs_search_slot(struct btrfs_trans_handle *trans, struct btrfs_root ins_len, int cow) { struct extent_buffer *b; - u64 blocknr; + u64 bytenr; int slot; int ret; int level; @@ -1027,10 +1036,11 @@ again: /* this is only true while dropping a snapshot */ if (level == lowest_level) break; - blocknr = btrfs_node_blockptr(b, slot); + bytenr = btrfs_node_blockptr(b, slot); if (should_reada) reada_for_search(root, p, level, slot); - b = read_tree_block(root, btrfs_node_blockptr(b, slot)); + b = read_tree_block(root, bytenr, + btrfs_level_size(root, level - 1)); } else { p->slots[level] = slot; if (ins_len > 0 && btrfs_leaf_free_space(root, b) < @@ -1193,14 +1203,14 @@ static int insert_new_root(struct btrfs_trans_handle *trans, BUG_ON(path->nodes[level]); BUG_ON(path->nodes[level-1] != root->node); - c = btrfs_alloc_free_block(trans, root, - extent_buffer_blocknr(root->node), 0); + c = btrfs_alloc_free_block(trans, root, root->nodesize, + root->node->start, 0); if (IS_ERR(c)) return PTR_ERR(c); memset_extent_buffer(c, 0, 0, root->nodesize); btrfs_set_header_nritems(c, 1); btrfs_set_header_level(c, level); - btrfs_set_header_blocknr(c, extent_buffer_blocknr(c)); + btrfs_set_header_bytenr(c, c->start); btrfs_set_header_generation(c, trans->transid); btrfs_set_header_owner(c, root->root_key.objectid); lower = path->nodes[level-1]; @@ -1213,7 +1223,7 @@ static int insert_new_root(struct btrfs_trans_handle *trans, else btrfs_node_key(lower, &lower_key, 0); btrfs_set_node_key(c, &lower_key, 0); - btrfs_set_node_blockptr(c, 0, extent_buffer_blocknr(lower)); + btrfs_set_node_blockptr(c, 0, lower->start); btrfs_mark_buffer_dirty(c); @@ -1237,7 +1247,7 @@ static int insert_new_root(struct btrfs_trans_handle *trans, */ static int insert_ptr(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct btrfs_path *path, struct btrfs_disk_key - *key, u64 blocknr, int slot, int level) + *key, u64 bytenr, int slot, int level) { struct extent_buffer *lower; int nritems; @@ -1256,10 +1266,9 @@ static int insert_ptr(struct btrfs_trans_handle *trans, struct btrfs_root (nritems - slot) * sizeof(struct btrfs_key_ptr)); } btrfs_set_node_key(lower, key, slot); - btrfs_set_node_blockptr(lower, slot, blocknr); + btrfs_set_node_blockptr(lower, slot, bytenr); btrfs_set_header_nritems(lower, nritems + 1); btrfs_mark_buffer_dirty(lower); - check_node(root, path, level); return 0; } @@ -1300,14 +1309,14 @@ static int split_node(struct btrfs_trans_handle *trans, struct btrfs_root } c_nritems = btrfs_header_nritems(c); - split = btrfs_alloc_free_block(trans, root, - extent_buffer_blocknr(c), 0); + split = btrfs_alloc_free_block(trans, root, root->nodesize, + c->start, 0); if (IS_ERR(split)) return PTR_ERR(split); btrfs_set_header_flags(split, btrfs_header_flags(c)); btrfs_set_header_level(split, btrfs_header_level(c)); - btrfs_set_header_blocknr(split, extent_buffer_blocknr(split)); + btrfs_set_header_bytenr(split, split->start); btrfs_set_header_generation(split, trans->transid); btrfs_set_header_owner(split, root->root_key.objectid); write_extent_buffer(split, root->fs_info->fsid, @@ -1328,8 +1337,7 @@ static int split_node(struct btrfs_trans_handle *trans, struct btrfs_root btrfs_mark_buffer_dirty(split); btrfs_node_key(split, &disk_key, 0); - wret = insert_ptr(trans, root, path, &disk_key, - extent_buffer_blocknr(split), + wret = insert_ptr(trans, root, path, &disk_key, split->start, path->slots[level + 1] + 1, level + 1); if (wret) @@ -1407,6 +1415,7 @@ static int push_leaf_right(struct btrfs_trans_handle *trans, struct btrfs_root u32 left_nritems; u32 right_nritems; u32 data_end; + u32 this_item_size; int ret; slot = path->slots[1]; @@ -1417,7 +1426,8 @@ static int push_leaf_right(struct btrfs_trans_handle *trans, struct btrfs_root if (slot >= btrfs_header_nritems(upper) - 1) return 1; - right = read_tree_block(root, btrfs_node_blockptr(upper, slot + 1)); + right = read_tree_block(root, btrfs_node_blockptr(upper, slot + 1), + root->leafsize); free_space = btrfs_leaf_free_space(root, right); if (free_space < data_size + sizeof(struct btrfs_item)) { free_extent_buffer(right); @@ -1445,13 +1455,27 @@ static int push_leaf_right(struct btrfs_trans_handle *trans, struct btrfs_root for (i = left_nritems - 1; i >= 1; i--) { item = btrfs_item_nr(left, i); + if (path->slots[0] == i) push_space += data_size + sizeof(*item); - if (btrfs_item_size(left, item) + sizeof(*item) + push_space > - free_space) + + if (!left->map_token) { + map_extent_buffer(left, (unsigned long)item, + sizeof(struct btrfs_item), + &left->map_token, &left->kaddr, + &left->map_start, &left->map_len, + KM_USER1); + } + + this_item_size = btrfs_item_size(left, item); + if (this_item_size + sizeof(*item) + push_space > free_space) break; push_items++; - push_space += btrfs_item_size(left, item) + sizeof(*item); + push_space += this_item_size + sizeof(*item); + } + if (left->map_token) { + unmap_extent_buffer(left, left->map_token, KM_USER1); + left->map_token = NULL; } if (push_items == 0) { @@ -1493,11 +1517,23 @@ static int push_leaf_right(struct btrfs_trans_handle *trans, struct btrfs_root right_nritems += push_items; btrfs_set_header_nritems(right, right_nritems); push_space = BTRFS_LEAF_DATA_SIZE(root); + for (i = 0; i < right_nritems; i++) { item = btrfs_item_nr(right, i); - btrfs_set_item_offset(right, item, push_space - - btrfs_item_size(right, item)); - push_space = btrfs_item_offset(right, item); + if (!right->map_token) { + map_extent_buffer(right, (unsigned long)item, + sizeof(struct btrfs_item), + &right->map_token, &right->kaddr, + &right->map_start, &right->map_len, + KM_USER1); + } + push_space -= btrfs_item_size(right, item); + btrfs_set_item_offset(right, item, push_space); + } + + if (right->map_token) { + unmap_extent_buffer(right, right->map_token, KM_USER1); + right->map_token = NULL; } left_nritems -= push_items; btrfs_set_header_nritems(left, left_nritems); @@ -1518,8 +1554,6 @@ static int push_leaf_right(struct btrfs_trans_handle *trans, struct btrfs_root } else { free_extent_buffer(right); } - if (path->nodes[1]) - check_node(root, path, 1); return 0; } /* @@ -1542,6 +1576,8 @@ static int push_leaf_left(struct btrfs_trans_handle *trans, struct btrfs_root u32 right_nritems; int ret = 0; int wret; + u32 this_item_size; + u32 old_left_item_size; slot = path->slots[1]; if (slot == 0) @@ -1550,7 +1586,7 @@ static int push_leaf_left(struct btrfs_trans_handle *trans, struct btrfs_root return 1; left = read_tree_block(root, btrfs_node_blockptr(path->nodes[1], - slot - 1)); + slot - 1), root->leafsize); free_space = btrfs_leaf_free_space(root, left); if (free_space < data_size + sizeof(struct btrfs_item)) { free_extent_buffer(left); @@ -1579,14 +1615,30 @@ static int push_leaf_left(struct btrfs_trans_handle *trans, struct btrfs_root for (i = 0; i < right_nritems - 1; i++) { item = btrfs_item_nr(right, i); + if (!right->map_token) { + map_extent_buffer(right, (unsigned long)item, + sizeof(struct btrfs_item), + &right->map_token, &right->kaddr, + &right->map_start, &right->map_len, + KM_USER1); + } + if (path->slots[0] == i) push_space += data_size + sizeof(*item); - if (btrfs_item_size(right, item) + sizeof(*item) + push_space > - free_space) + + this_item_size = btrfs_item_size(right, item); + if (this_item_size + sizeof(*item) + push_space > free_space) break; + push_items++; - push_space += btrfs_item_size(right, item) + sizeof(*item); + push_space += this_item_size + sizeof(*item); + } + + if (right->map_token) { + unmap_extent_buffer(right, right->map_token, KM_USER1); + right->map_token = NULL; } + if (push_items == 0) { free_extent_buffer(left); return 1; @@ -1611,15 +1663,28 @@ static int push_leaf_left(struct btrfs_trans_handle *trans, struct btrfs_root old_left_nritems = btrfs_header_nritems(left); BUG_ON(old_left_nritems < 0); + old_left_item_size = btrfs_item_offset_nr(left, old_left_nritems - 1); for (i = old_left_nritems; i < old_left_nritems + push_items; i++) { u32 ioff; + item = btrfs_item_nr(left, i); + if (!left->map_token) { + map_extent_buffer(left, (unsigned long)item, + sizeof(struct btrfs_item), + &left->map_token, &left->kaddr, + &left->map_start, &left->map_len, + KM_USER1); + } + ioff = btrfs_item_offset(left, item); btrfs_set_item_offset(left, item, - ioff - (BTRFS_LEAF_DATA_SIZE(root) - - btrfs_item_offset_nr(left, old_left_nritems - 1))); + ioff - (BTRFS_LEAF_DATA_SIZE(root) - old_left_item_size)); } btrfs_set_header_nritems(left, old_left_nritems + push_items); + if (left->map_token) { + unmap_extent_buffer(left, left->map_token, KM_USER1); + left->map_token = NULL; + } /* fixup right node */ push_space = btrfs_item_offset_nr(right, push_items - 1) - @@ -1640,9 +1705,21 @@ static int push_leaf_left(struct btrfs_trans_handle *trans, struct btrfs_root for (i = 0; i < right_nritems; i++) { item = btrfs_item_nr(right, i); - btrfs_set_item_offset(right, item, push_space - - btrfs_item_size(right, item)); - push_space = btrfs_item_offset(right, item); + + if (!right->map_token) { + map_extent_buffer(right, (unsigned long)item, + sizeof(struct btrfs_item), + &right->map_token, &right->kaddr, + &right->map_start, &right->map_len, + KM_USER1); + } + + push_space = push_space - btrfs_item_size(right, item); + btrfs_set_item_offset(right, item, push_space); + } + if (right->map_token) { + unmap_extent_buffer(right, right->map_token, KM_USER1); + right->map_token = NULL; } btrfs_mark_buffer_dirty(left); @@ -1664,8 +1741,6 @@ static int push_leaf_left(struct btrfs_trans_handle *trans, struct btrfs_root path->slots[0] -= push_items; } BUG_ON(path->slots[0] < 0); - if (path->nodes[1]) - check_node(root, path, 1); return ret; } @@ -1718,13 +1793,13 @@ static int split_leaf(struct btrfs_trans_handle *trans, struct btrfs_root nritems = btrfs_header_nritems(l); mid = (nritems + 1)/ 2; - right = btrfs_alloc_free_block(trans, root, - extent_buffer_blocknr(l), 0); + right = btrfs_alloc_free_block(trans, root, root->leafsize, + l->start, 0); if (IS_ERR(right)) return PTR_ERR(right); memset_extent_buffer(right, 0, 0, sizeof(struct btrfs_header)); - btrfs_set_header_blocknr(right, extent_buffer_blocknr(right)); + btrfs_set_header_bytenr(right, right->start); btrfs_set_header_generation(right, trans->transid); btrfs_set_header_owner(right, root->root_key.objectid); btrfs_set_header_level(right, 0); @@ -1740,8 +1815,7 @@ static int split_leaf(struct btrfs_trans_handle *trans, struct btrfs_root btrfs_cpu_key_to_disk(&disk_key, ins_key); btrfs_set_header_nritems(right, 0); wret = insert_ptr(trans, root, path, - &disk_key, - extent_buffer_blocknr(right), + &disk_key, right->start, path->slots[1] + 1, 1); if (wret) ret = wret; @@ -1762,7 +1836,7 @@ static int split_leaf(struct btrfs_trans_handle *trans, struct btrfs_root btrfs_set_header_nritems(right, 0); wret = insert_ptr(trans, root, path, &disk_key, - extent_buffer_blocknr(right), + right->start, path->slots[1], 1); if (wret) ret = wret; @@ -1799,15 +1873,30 @@ static int split_leaf(struct btrfs_trans_handle *trans, struct btrfs_root for (i = 0; i < nritems; i++) { struct btrfs_item *item = btrfs_item_nr(right, i); - u32 ioff = btrfs_item_offset(right, item); + u32 ioff; + + if (!right->map_token) { + map_extent_buffer(right, (unsigned long)item, + sizeof(struct btrfs_item), + &right->map_token, &right->kaddr, + &right->map_start, &right->map_len, + KM_USER1); + } + + ioff = btrfs_item_offset(right, item); btrfs_set_item_offset(right, item, ioff + rt_data_off); } + if (right->map_token) { + unmap_extent_buffer(right, right->map_token, KM_USER1); + right->map_token = NULL; + } + btrfs_set_header_nritems(l, mid); ret = 0; btrfs_item_key(right, &disk_key, 0); - wret = insert_ptr(trans, root, path, &disk_key, - extent_buffer_blocknr(right), path->slots[1] + 1, 1); + wret = insert_ptr(trans, root, path, &disk_key, right->start, + path->slots[1] + 1, 1); if (wret) ret = wret; @@ -1824,19 +1913,17 @@ static int split_leaf(struct btrfs_trans_handle *trans, struct btrfs_root free_extent_buffer(right); BUG_ON(path->slots[0] < 0); - check_node(root, path, 1); - check_leaf(root, path, 0); if (!double_split) return ret; - right = btrfs_alloc_free_block(trans, root, - extent_buffer_blocknr(l), 0); + right = btrfs_alloc_free_block(trans, root, root->leafsize, + l->start, 0); if (IS_ERR(right)) return PTR_ERR(right); memset_extent_buffer(right, 0, 0, sizeof(struct btrfs_header)); - btrfs_set_header_blocknr(right, extent_buffer_blocknr(right)); + btrfs_set_header_bytenr(right, right->start); btrfs_set_header_generation(right, trans->transid); btrfs_set_header_owner(right, root->root_key.objectid); btrfs_set_header_level(right, 0); @@ -1847,8 +1934,7 @@ static int split_leaf(struct btrfs_trans_handle *trans, struct btrfs_root btrfs_cpu_key_to_disk(&disk_key, ins_key); btrfs_set_header_nritems(right, 0); wret = insert_ptr(trans, root, path, - &disk_key, - extent_buffer_blocknr(right), + &disk_key, right->start, path->slots[1], 1); if (wret) ret = wret; @@ -1860,8 +1946,6 @@ static int split_leaf(struct btrfs_trans_handle *trans, struct btrfs_root free_extent_buffer(path->nodes[0]); path->nodes[0] = right; path->slots[0] = 0; - check_node(root, path, 1); - check_leaf(root, path, 0); return ret; } @@ -1904,9 +1988,24 @@ int btrfs_truncate_item(struct btrfs_trans_handle *trans, for (i = slot; i < nritems; i++) { u32 ioff; item = btrfs_item_nr(leaf, i); + + if (!leaf->map_token) { + map_extent_buffer(leaf, (unsigned long)item, + sizeof(struct btrfs_item), + &leaf->map_token, &leaf->kaddr, + &leaf->map_start, &leaf->map_len, + KM_USER1); + } + ioff = btrfs_item_offset(leaf, item); btrfs_set_item_offset(leaf, item, ioff + size_diff); } + + if (leaf->map_token) { + unmap_extent_buffer(leaf, leaf->map_token, KM_USER1); + leaf->map_token = NULL; + } + /* shift the data */ memmove_extent_buffer(leaf, btrfs_leaf_data(leaf) + data_end + size_diff, btrfs_leaf_data(leaf) + @@ -1921,7 +2020,6 @@ int btrfs_truncate_item(struct btrfs_trans_handle *trans, btrfs_print_leaf(root, leaf); BUG(); } - check_leaf(root, path, 0); return ret; } @@ -1963,10 +2061,23 @@ int btrfs_extend_item(struct btrfs_trans_handle *trans, for (i = slot; i < nritems; i++) { u32 ioff; item = btrfs_item_nr(leaf, i); + + if (!leaf->map_token) { + map_extent_buffer(leaf, (unsigned long)item, + sizeof(struct btrfs_item), + &leaf->map_token, &leaf->kaddr, + &leaf->map_start, &leaf->map_len, + KM_USER1); + } ioff = btrfs_item_offset(leaf, item); btrfs_set_item_offset(leaf, item, ioff - data_size); } + if (leaf->map_token) { + unmap_extent_buffer(leaf, leaf->map_token, KM_USER1); + leaf->map_token = NULL; + } + /* shift the data */ memmove_extent_buffer(leaf, btrfs_leaf_data(leaf) + data_end - data_size, btrfs_leaf_data(leaf) + @@ -1983,7 +2094,6 @@ int btrfs_extend_item(struct btrfs_trans_handle *trans, btrfs_print_leaf(root, leaf); BUG(); } - check_leaf(root, path, 0); return ret; } @@ -2046,12 +2156,26 @@ int btrfs_insert_empty_item(struct btrfs_trans_handle *trans, * item0..itemN ... dataN.offset..dataN.size .. data0.size */ /* first correct the data pointers */ + WARN_ON(leaf->map_token); for (i = slot; i < nritems; i++) { u32 ioff; + item = btrfs_item_nr(leaf, i); + if (!leaf->map_token) { + map_extent_buffer(leaf, (unsigned long)item, + sizeof(struct btrfs_item), + &leaf->map_token, &leaf->kaddr, + &leaf->map_start, &leaf->map_len, + KM_USER1); + } + ioff = btrfs_item_offset(leaf, item); btrfs_set_item_offset(leaf, item, ioff - data_size); } + if (leaf->map_token) { + unmap_extent_buffer(leaf, leaf->map_token, KM_USER1); + leaf->map_token = NULL; + } /* shift the items */ memmove_extent_buffer(leaf, btrfs_item_nr_offset(slot + 1), @@ -2081,7 +2205,6 @@ int btrfs_insert_empty_item(struct btrfs_trans_handle *trans, btrfs_print_leaf(root, leaf); BUG(); } - check_leaf(root, path, 0); out: return ret; } @@ -2186,10 +2309,24 @@ int btrfs_del_item(struct btrfs_trans_handle *trans, struct btrfs_root *root, for (i = slot + 1; i < nritems; i++) { u32 ioff; + item = btrfs_item_nr(leaf, i); + if (!leaf->map_token) { + map_extent_buffer(leaf, (unsigned long)item, + sizeof(struct btrfs_item), + &leaf->map_token, &leaf->kaddr, + &leaf->map_start, &leaf->map_len, + KM_USER1); + } ioff = btrfs_item_offset(leaf, item); btrfs_set_item_offset(leaf, item, ioff + dsize); } + + if (leaf->map_token) { + unmap_extent_buffer(leaf, leaf->map_token, KM_USER1); + leaf->map_token = NULL; + } + memmove_extent_buffer(leaf, btrfs_item_nr_offset(slot), btrfs_item_nr_offset(slot + 1), sizeof(struct btrfs_item) * @@ -2209,8 +2346,7 @@ int btrfs_del_item(struct btrfs_trans_handle *trans, struct btrfs_root *root, if (wret) ret = wret; wret = btrfs_free_extent(trans, root, - extent_buffer_blocknr(leaf), - 1, 1); + leaf->start, leaf->len, 1); if (wret) ret = wret; } @@ -2247,7 +2383,8 @@ int btrfs_del_item(struct btrfs_trans_handle *trans, struct btrfs_root *root, } if (btrfs_header_nritems(leaf) == 0) { - u64 blocknr = extent_buffer_blocknr(leaf); + u64 bytenr = leaf->start; + u32 blocksize = leaf->len; clean_tree_block(trans, root, leaf); wait_on_tree_block_writeback(root, leaf); @@ -2257,8 +2394,8 @@ int btrfs_del_item(struct btrfs_trans_handle *trans, struct btrfs_root *root, ret = wret; free_extent_buffer(leaf); - wret = btrfs_free_extent(trans, root, blocknr, - 1, 1); + wret = btrfs_free_extent(trans, root, bytenr, + blocksize, 1); if (wret) ret = wret; } else { @@ -2281,7 +2418,7 @@ int btrfs_next_leaf(struct btrfs_root *root, struct btrfs_path *path) { int slot; int level = 1; - u64 blocknr; + u64 bytenr; struct extent_buffer *c; struct extent_buffer *next = NULL; @@ -2296,14 +2433,15 @@ int btrfs_next_leaf(struct btrfs_root *root, struct btrfs_path *path) continue; } - blocknr = btrfs_node_blockptr(c, slot); + bytenr = btrfs_node_blockptr(c, slot); if (next) free_extent_buffer(next); if (path->reada) reada_for_search(root, path, level, slot); - next = read_tree_block(root, blocknr); + next = read_tree_block(root, bytenr, + btrfs_level_size(root, level -1)); break; } path->slots[level] = slot; @@ -2317,7 +2455,8 @@ int btrfs_next_leaf(struct btrfs_root *root, struct btrfs_path *path) break; if (path->reada) reada_for_search(root, path, level, 0); - next = read_tree_block(root, btrfs_node_blockptr(next, 0)); + next = read_tree_block(root, btrfs_node_blockptr(next, 0), + btrfs_level_size(root, level - 1)); } return 0; } diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 256689551eb0..4d05456ec32f 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -96,7 +96,7 @@ struct btrfs_key { struct btrfs_header { u8 csum[BTRFS_CSUM_SIZE]; u8 fsid[BTRFS_FSID_SIZE]; /* FS specific uuid */ - __le64 blocknr; /* which block this node is supposed to live in */ + __le64 bytenr; /* which block this node is supposed to live in */ __le64 generation; __le64 owner; __le32 nritems; @@ -122,16 +122,17 @@ struct btrfs_super_block { u8 csum[BTRFS_CSUM_SIZE]; /* the first 3 fields must match struct btrfs_header */ u8 fsid[16]; /* FS specific uuid */ - __le64 blocknr; /* this block number */ + __le64 bytenr; /* this block number */ __le64 magic; __le64 generation; __le64 root; - __le64 total_blocks; - __le64 blocks_used; + __le64 total_bytes; + __le64 bytes_used; __le64 root_dir_objectid; __le32 sectorsize; __le32 nodesize; __le32 leafsize; + u8 root_level; } __attribute__ ((__packed__)); /* @@ -231,13 +232,14 @@ struct btrfs_dir_item { struct btrfs_root_item { struct btrfs_inode_item inode; __le64 root_dirid; - __le64 blocknr; - __le64 block_limit; - __le64 blocks_used; + __le64 bytenr; + __le64 byte_limit; + __le64 bytes_used; __le32 flags; __le32 refs; struct btrfs_disk_key drop_progress; u8 drop_level; + u8 level; } __attribute__ ((__packed__)); #define BTRFS_FILE_EXTENT_REG 0 @@ -250,8 +252,8 @@ struct btrfs_file_extent_item { * disk space consumed by the extent, checksum blocks are included * in these numbers */ - __le64 disk_blocknr; - __le64 disk_num_blocks; + __le64 disk_bytenr; + __le64 disk_num_bytes; /* * the logical offset in file blocks (no csums) * this extent record is for. This allows a file extent to point @@ -263,7 +265,7 @@ struct btrfs_file_extent_item { /* * the logical number of file blocks (no csums included) */ - __le64 num_blocks; + __le64 num_bytes; } __attribute__ ((__packed__)); struct btrfs_csum_item { @@ -429,6 +431,7 @@ static inline u##bits btrfs_##name(struct extent_buffer *eb, \ int err; \ char *map_token; \ char *kaddr; \ + int unmap_on_exit = (eb->map_token == NULL); \ unsigned long map_start; \ unsigned long map_len; \ unsigned long offset = (unsigned long)s + \ @@ -436,12 +439,13 @@ static inline u##bits btrfs_##name(struct extent_buffer *eb, \ err = map_extent_buffer(eb, offset, \ sizeof(((type *)0)->member), \ &map_token, &kaddr, \ - &map_start, &map_len, KM_USER0); \ + &map_start, &map_len, KM_USER1); \ if (!err) { \ __le##bits *tmp = (__le##bits *)(kaddr + offset - \ map_start); \ u##bits res = le##bits##_to_cpu(*tmp); \ - unmap_extent_buffer(eb, map_token, KM_USER0); \ + if (unmap_on_exit) \ + unmap_extent_buffer(eb, map_token, KM_USER1); \ return res; \ } else { \ __le##bits res; \ @@ -457,17 +461,19 @@ static inline void btrfs_set_##name(struct extent_buffer *eb, \ char *kaddr; \ unsigned long map_start; \ unsigned long map_len; \ + int unmap_on_exit = (eb->map_token == NULL); \ unsigned long offset = (unsigned long)s + \ offsetof(type, member); \ err = map_extent_buffer(eb, offset, \ sizeof(((type *)0)->member), \ &map_token, &kaddr, \ - &map_start, &map_len, KM_USER0); \ + &map_start, &map_len, KM_USER1); \ if (!err) { \ __le##bits *tmp = (__le##bits *)(kaddr + offset - \ map_start); \ *tmp = cpu_to_le##bits(val); \ - unmap_extent_buffer(eb, map_token, KM_USER0); \ + if (unmap_on_exit) \ + unmap_extent_buffer(eb, map_token, KM_USER1); \ } else { \ val = cpu_to_le##bits(val); \ write_eb_member(eb, s, type, member, &val); \ @@ -483,15 +489,17 @@ static inline u##bits btrfs_##name(struct extent_buffer *eb) \ unsigned long map_start; \ unsigned long map_len; \ unsigned long offset = offsetof(type, member); \ + int unmap_on_exit = (eb->map_token == NULL); \ err = map_extent_buffer(eb, offset, \ sizeof(((type *)0)->member), \ &map_token, &kaddr, \ - &map_start, &map_len, KM_USER0); \ + &map_start, &map_len, KM_USER1); \ if (!err) { \ __le##bits *tmp = (__le##bits *)(kaddr + offset - \ map_start); \ u##bits res = le##bits##_to_cpu(*tmp); \ - unmap_extent_buffer(eb, map_token, KM_USER0); \ + if (unmap_on_exit) \ + unmap_extent_buffer(eb, map_token, KM_USER1); \ return res; \ } else { \ __le##bits res; \ @@ -508,15 +516,17 @@ static inline void btrfs_set_##name(struct extent_buffer *eb, \ unsigned long map_start; \ unsigned long map_len; \ unsigned long offset = offsetof(type, member); \ + int unmap_on_exit = (eb->map_token == NULL); \ err = map_extent_buffer(eb, offset, \ sizeof(((type *)0)->member), \ &map_token, &kaddr, \ - &map_start, &map_len, KM_USER0); \ + &map_start, &map_len, KM_USER1); \ if (!err) { \ __le##bits *tmp = (__le##bits *)(kaddr + offset - \ map_start); \ *tmp = cpu_to_le##bits(val); \ - unmap_extent_buffer(eb, map_token, KM_USER0); \ + if (unmap_on_exit) \ + unmap_extent_buffer(eb, map_token, KM_USER1); \ } else { \ val = cpu_to_le##bits(val); \ write_eb_member(eb, NULL, type, member, &val); \ @@ -769,7 +779,7 @@ static inline void btrfs_set_key_type(struct btrfs_key *key, u8 val) } /* struct btrfs_header */ -BTRFS_SETGET_HEADER_FUNCS(header_blocknr, struct btrfs_header, blocknr, 64); +BTRFS_SETGET_HEADER_FUNCS(header_bytenr, struct btrfs_header, bytenr, 64); BTRFS_SETGET_HEADER_FUNCS(header_generation, struct btrfs_header, generation, 64); BTRFS_SETGET_HEADER_FUNCS(header_owner, struct btrfs_header, owner, 64); @@ -817,24 +827,28 @@ static inline int btrfs_is_leaf(struct extent_buffer *eb) /* struct btrfs_root_item */ BTRFS_SETGET_FUNCS(disk_root_refs, struct btrfs_root_item, refs, 32); -BTRFS_SETGET_FUNCS(disk_root_blocknr, struct btrfs_root_item, blocknr, 64); +BTRFS_SETGET_FUNCS(disk_root_bytenr, struct btrfs_root_item, bytenr, 64); +BTRFS_SETGET_FUNCS(disk_root_level, struct btrfs_root_item, level, 8); -BTRFS_SETGET_STACK_FUNCS(root_blocknr, struct btrfs_root_item, blocknr, 64); +BTRFS_SETGET_STACK_FUNCS(root_bytenr, struct btrfs_root_item, bytenr, 64); +BTRFS_SETGET_STACK_FUNCS(root_level, struct btrfs_root_item, level, 8); BTRFS_SETGET_STACK_FUNCS(root_dirid, struct btrfs_root_item, root_dirid, 64); BTRFS_SETGET_STACK_FUNCS(root_refs, struct btrfs_root_item, refs, 32); BTRFS_SETGET_STACK_FUNCS(root_flags, struct btrfs_root_item, flags, 32); -BTRFS_SETGET_STACK_FUNCS(root_used, struct btrfs_root_item, blocks_used, 64); -BTRFS_SETGET_STACK_FUNCS(root_limit, struct btrfs_root_item, block_limit, 64); +BTRFS_SETGET_STACK_FUNCS(root_used, struct btrfs_root_item, bytes_used, 64); +BTRFS_SETGET_STACK_FUNCS(root_limit, struct btrfs_root_item, byte_limit, 64); /* struct btrfs_super_block */ -BTRFS_SETGET_STACK_FUNCS(super_blocknr, struct btrfs_super_block, blocknr, 64); +BTRFS_SETGET_STACK_FUNCS(super_bytenr, struct btrfs_super_block, bytenr, 64); BTRFS_SETGET_STACK_FUNCS(super_generation, struct btrfs_super_block, generation, 64); BTRFS_SETGET_STACK_FUNCS(super_root, struct btrfs_super_block, root, 64); -BTRFS_SETGET_STACK_FUNCS(super_total_blocks, struct btrfs_super_block, - total_blocks, 64); -BTRFS_SETGET_STACK_FUNCS(super_blocks_used, struct btrfs_super_block, - blocks_used, 64); +BTRFS_SETGET_STACK_FUNCS(super_root_level, struct btrfs_super_block, + root_level, 8); +BTRFS_SETGET_STACK_FUNCS(super_total_bytes, struct btrfs_super_block, + total_bytes, 64); +BTRFS_SETGET_STACK_FUNCS(super_bytes_used, struct btrfs_super_block, + bytes_used, 64); BTRFS_SETGET_STACK_FUNCS(super_sectorsize, struct btrfs_super_block, sectorsize, 32); BTRFS_SETGET_STACK_FUNCS(super_nodesize, struct btrfs_super_block, @@ -856,33 +870,33 @@ static inline unsigned long btrfs_file_extent_inline_start(struct btrfs_file_extent_item *e) { unsigned long offset = (unsigned long)e; - offset += offsetof(struct btrfs_file_extent_item, disk_blocknr); + offset += offsetof(struct btrfs_file_extent_item, disk_bytenr); return offset; } static inline u32 btrfs_file_extent_calc_inline_size(u32 datasize) { - return offsetof(struct btrfs_file_extent_item, disk_blocknr) + datasize; + return offsetof(struct btrfs_file_extent_item, disk_bytenr) + datasize; } static inline u32 btrfs_file_extent_inline_len(struct extent_buffer *eb, struct btrfs_item *e) { unsigned long offset; - offset = offsetof(struct btrfs_file_extent_item, disk_blocknr); + offset = offsetof(struct btrfs_file_extent_item, disk_bytenr); return btrfs_item_size(eb, e) - offset; } -BTRFS_SETGET_FUNCS(file_extent_disk_blocknr, struct btrfs_file_extent_item, - disk_blocknr, 64); +BTRFS_SETGET_FUNCS(file_extent_disk_bytenr, struct btrfs_file_extent_item, + disk_bytenr, 64); BTRFS_SETGET_FUNCS(file_extent_generation, struct btrfs_file_extent_item, generation, 64); -BTRFS_SETGET_FUNCS(file_extent_disk_num_blocks, struct btrfs_file_extent_item, - disk_num_blocks, 64); +BTRFS_SETGET_FUNCS(file_extent_disk_num_bytes, struct btrfs_file_extent_item, + disk_num_bytes, 64); BTRFS_SETGET_FUNCS(file_extent_offset, struct btrfs_file_extent_item, offset, 64); -BTRFS_SETGET_FUNCS(file_extent_num_blocks, struct btrfs_file_extent_item, - num_blocks, 64); +BTRFS_SETGET_FUNCS(file_extent_num_bytes, struct btrfs_file_extent_item, + num_bytes, 64); static inline struct btrfs_root *btrfs_sb(struct super_block *sb) { @@ -906,6 +920,12 @@ static inline int btrfs_set_root_name(struct btrfs_root *root, return 0; } +static inline u32 btrfs_level_size(struct btrfs_root *root, int level) { + if (level == 0) + return root->leafsize; + return root->nodesize; +} + /* helper function to cast into the data area of the leaf. */ #define btrfs_item_ptr(leaf, slot, type) \ ((type *)(btrfs_leaf_data(leaf) + \ @@ -927,7 +947,7 @@ int btrfs_extent_post_op(struct btrfs_trans_handle *trans, int btrfs_copy_pinned(struct btrfs_root *root, struct extent_map_tree *copy); struct btrfs_block_group_cache *btrfs_lookup_block_group(struct btrfs_fs_info *info, - u64 blocknr); + u64 bytenr); struct btrfs_block_group_cache *btrfs_find_block_group(struct btrfs_root *root, struct btrfs_block_group_cache *hint, u64 search_start, @@ -935,22 +955,22 @@ struct btrfs_block_group_cache *btrfs_find_block_group(struct btrfs_root *root, int btrfs_inc_root_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root); struct extent_buffer *btrfs_alloc_free_block(struct btrfs_trans_handle *trans, - struct btrfs_root *root, u64 hint, - u64 empty_size); + struct btrfs_root *root, u32 size, + u64 hint, u64 empty_size); int btrfs_alloc_extent(struct btrfs_trans_handle *trans, struct btrfs_root *root, u64 owner, - u64 num_blocks, u64 empty_size, u64 search_start, + u64 num_bytes, u64 empty_size, u64 search_start, u64 search_end, struct btrfs_key *ins, int data); int btrfs_inc_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct extent_buffer *buf); int btrfs_free_extent(struct btrfs_trans_handle *trans, struct btrfs_root - *root, u64 blocknr, u64 num_blocks, int pin); + *root, u64 bytenr, u64 num_bytes, int pin); int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct extent_map_tree *unpin); int btrfs_inc_extent_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root, - u64 blocknr, u64 num_blocks); + u64 bytenr, u64 num_bytes); int btrfs_write_dirty_block_groups(struct btrfs_trans_handle *trans, struct btrfs_root *root); int btrfs_free_block_groups(struct btrfs_fs_info *info); @@ -1040,12 +1060,12 @@ int btrfs_lookup_inode(struct btrfs_trans_handle *trans, struct btrfs_root int btrfs_insert_file_extent(struct btrfs_trans_handle *trans, struct btrfs_root *root, u64 objectid, u64 pos, u64 offset, - u64 disk_num_blocks, - u64 num_blocks); + u64 disk_num_bytes, + u64 num_bytes); int btrfs_lookup_file_extent(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct btrfs_path *path, u64 objectid, - u64 blocknr, int mod); + u64 bytenr, int mod); int btrfs_csum_file_block(struct btrfs_trans_handle *trans, struct btrfs_root *root, u64 objectid, u64 offset, diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 2b86a1d779b7..fad9298c6962 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -28,6 +28,7 @@ #include "disk-io.h" #include "transaction.h" #include "btrfs_inode.h" +#include "print-tree.h" #if 0 static int check_tree_block(struct btrfs_root *root, struct extent_buffer *buf) @@ -43,26 +44,25 @@ static int check_tree_block(struct btrfs_root *root, struct extent_buffer *buf) #endif struct extent_buffer *btrfs_find_tree_block(struct btrfs_root *root, - u64 blocknr) + u64 bytenr, u32 blocksize) { struct inode *btree_inode = root->fs_info->btree_inode; struct extent_buffer *eb; eb = find_extent_buffer(&BTRFS_I(btree_inode)->extent_tree, - blocknr * root->sectorsize, - root->sectorsize, GFP_NOFS); + bytenr, blocksize, GFP_NOFS); if (eb) eb->alloc_addr = (unsigned long)__builtin_return_address(0); return eb; } struct extent_buffer *btrfs_find_create_tree_block(struct btrfs_root *root, - u64 blocknr) + u64 bytenr, u32 blocksize) { struct inode *btree_inode = root->fs_info->btree_inode; struct extent_buffer *eb; + eb = alloc_extent_buffer(&BTRFS_I(btree_inode)->extent_tree, - blocknr * root->sectorsize, - root->sectorsize, GFP_NOFS); + bytenr, blocksize, GFP_NOFS); eb->alloc_addr = (unsigned long)__builtin_return_address(0); return eb; } @@ -208,13 +208,13 @@ static struct address_space_operations btree_aops = { .sync_page = block_sync_page, }; -int readahead_tree_block(struct btrfs_root *root, u64 blocknr) +int readahead_tree_block(struct btrfs_root *root, u64 bytenr, u32 blocksize) { struct extent_buffer *buf = NULL; struct inode *btree_inode = root->fs_info->btree_inode; int ret = 0; - buf = btrfs_find_create_tree_block(root, blocknr); + buf = btrfs_find_create_tree_block(root, bytenr, blocksize); if (!buf) return 0; read_extent_buffer_pages(&BTRFS_I(btree_inode)->extent_tree, @@ -223,12 +223,13 @@ int readahead_tree_block(struct btrfs_root *root, u64 blocknr) return ret; } -struct extent_buffer *read_tree_block(struct btrfs_root *root, u64 blocknr) +struct extent_buffer *read_tree_block(struct btrfs_root *root, u64 bytenr, + u32 blocksize) { struct extent_buffer *buf = NULL; struct inode *btree_inode = root->fs_info->btree_inode; - buf = btrfs_find_create_tree_block(root, blocknr); + buf = btrfs_find_create_tree_block(root, bytenr, blocksize); if (!buf) return NULL; read_extent_buffer_pages(&BTRFS_I(btree_inode)->extent_tree, @@ -261,7 +262,7 @@ int set_tree_block_dirty(struct btrfs_root *root, struct extent_buffer *buf) return 0; } -static int __setup_root(int blocksize, +static int __setup_root(u32 nodesize, u32 leafsize, u32 sectorsize, struct btrfs_root *root, struct btrfs_fs_info *fs_info, u64 objectid) @@ -269,9 +270,9 @@ static int __setup_root(int blocksize, root->node = NULL; root->inode = NULL; root->commit_root = NULL; - root->sectorsize = blocksize; - root->nodesize = blocksize; - root->leafsize = blocksize; + root->sectorsize = sectorsize; + root->nodesize = nodesize; + root->leafsize = leafsize; root->ref_cows = 0; root->fs_info = fs_info; root->objectid = objectid; @@ -291,21 +292,23 @@ static int __setup_root(int blocksize, return 0; } -static int find_and_setup_root(int blocksize, - struct btrfs_root *tree_root, +static int find_and_setup_root(struct btrfs_root *tree_root, struct btrfs_fs_info *fs_info, u64 objectid, struct btrfs_root *root) { int ret; + u32 blocksize; - __setup_root(blocksize, root, fs_info, objectid); + __setup_root(tree_root->nodesize, tree_root->leafsize, + tree_root->sectorsize, root, fs_info, objectid); ret = btrfs_find_last_root(tree_root, objectid, &root->root_item, &root->root_key); BUG_ON(ret); - root->node = read_tree_block(root, - btrfs_root_blocknr(&root->root_item)); + blocksize = btrfs_level_size(root, btrfs_root_level(&root->root_item)); + root->node = read_tree_block(root, btrfs_root_bytenr(&root->root_item), + blocksize); BUG_ON(!root->node); return 0; } @@ -318,14 +321,14 @@ struct btrfs_root *btrfs_read_fs_root_no_radix(struct btrfs_fs_info *fs_info, struct btrfs_path *path; struct extent_buffer *l; u64 highest_inode; + u32 blocksize; int ret = 0; root = kzalloc(sizeof(*root), GFP_NOFS); if (!root) return ERR_PTR(-ENOMEM); if (location->offset == (u64)-1) { - ret = find_and_setup_root(fs_info->sb->s_blocksize, - fs_info->tree_root, fs_info, + ret = find_and_setup_root(tree_root, fs_info, location->objectid, root); if (ret) { kfree(root); @@ -334,7 +337,8 @@ struct btrfs_root *btrfs_read_fs_root_no_radix(struct btrfs_fs_info *fs_info, goto insert; } - __setup_root(fs_info->sb->s_blocksize, root, fs_info, + __setup_root(tree_root->nodesize, tree_root->leafsize, + tree_root->sectorsize, root, fs_info, location->objectid); path = btrfs_alloc_path(); @@ -357,8 +361,9 @@ out: kfree(root); return ERR_PTR(ret); } - root->node = read_tree_block(root, - btrfs_root_blocknr(&root->root_item)); + blocksize = btrfs_level_size(root, btrfs_root_level(&root->root_item)); + root->node = read_tree_block(root, btrfs_root_bytenr(&root->root_item), + blocksize); BUG_ON(!root->node); insert: root->ref_cows = 1; @@ -418,6 +423,10 @@ struct btrfs_root *btrfs_read_fs_root(struct btrfs_fs_info *fs_info, struct btrfs_root *open_ctree(struct super_block *sb) { + u32 sectorsize; + u32 nodesize; + u32 leafsize; + u32 blocksize; struct btrfs_root *extent_root = kmalloc(sizeof(struct btrfs_root), GFP_NOFS); struct btrfs_root *tree_root = kmalloc(sizeof(struct btrfs_root), @@ -474,12 +483,12 @@ struct btrfs_root *open_ctree(struct super_block *sb) mutex_init(&fs_info->trans_mutex); mutex_init(&fs_info->fs_mutex); - __setup_root(sb->s_blocksize, tree_root, + __setup_root(512, 512, 512, tree_root, fs_info, BTRFS_ROOT_TREE_OBJECTID); fs_info->sb_buffer = read_tree_block(tree_root, - BTRFS_SUPER_INFO_OFFSET / - sb->s_blocksize); + BTRFS_SUPER_INFO_OFFSET, + 512); if (!fs_info->sb_buffer) goto fail_iput; @@ -494,9 +503,15 @@ struct btrfs_root *open_ctree(struct super_block *sb) if (!btrfs_super_root(disk_super)) goto fail_sb_buffer; + nodesize = btrfs_super_nodesize(disk_super); + leafsize = btrfs_super_leafsize(disk_super); + sectorsize = btrfs_super_sectorsize(disk_super); + tree_root->nodesize = nodesize; + tree_root->leafsize = leafsize; + tree_root->sectorsize = sectorsize; + i_size_write(fs_info->btree_inode, - btrfs_super_total_blocks(disk_super) << - fs_info->btree_inode->i_blkbits); + btrfs_super_total_bytes(disk_super)); if (strncmp((char *)(&disk_super->magic), BTRFS_MAGIC, @@ -504,13 +519,22 @@ struct btrfs_root *open_ctree(struct super_block *sb) printk("btrfs: valid FS not found on %s\n", sb->s_id); goto fail_sb_buffer; } + blocksize = btrfs_level_size(tree_root, + btrfs_super_root_level(disk_super)); tree_root->node = read_tree_block(tree_root, - btrfs_super_root(disk_super)); + btrfs_super_root(disk_super), + blocksize); if (!tree_root->node) goto fail_sb_buffer; +#if 0 + btrfs_print_leaf(tree_root, tree_root->node); + err = -EIO; + goto fail_tree_root; +#endif mutex_lock(&fs_info->fs_mutex); - ret = find_and_setup_root(sb->s_blocksize, tree_root, fs_info, + + ret = find_and_setup_root(tree_root, fs_info, BTRFS_EXTENT_TREE_OBJECTID, extent_root); if (ret) { mutex_unlock(&fs_info->fs_mutex); @@ -611,11 +635,11 @@ int close_ctree(struct btrfs_root *root) free_extent_buffer(fs_info->tree_root->node); free_extent_buffer(fs_info->sb_buffer); - truncate_inode_pages(fs_info->btree_inode->i_mapping, 0); - iput(fs_info->btree_inode); btrfs_free_block_groups(root->fs_info); del_fs_roots(fs_info); + truncate_inode_pages(fs_info->btree_inode->i_mapping, 0); + iput(fs_info->btree_inode); kfree(fs_info->extent_root); kfree(fs_info->tree_root); return 0; @@ -642,7 +666,7 @@ void btrfs_mark_buffer_dirty(struct extent_buffer *buf) if (transid != root->fs_info->generation) { printk(KERN_CRIT "transid mismatch buffer %llu, found %Lu running %Lu\n", - (unsigned long long)extent_buffer_blocknr(buf), + (unsigned long long)buf->start, transid, root->fs_info->generation); WARN_ON(1); } diff --git a/fs/btrfs/disk-io.h b/fs/btrfs/disk-io.h index 70d9413c599f..fd4db5f810cf 100644 --- a/fs/btrfs/disk-io.h +++ b/fs/btrfs/disk-io.h @@ -21,10 +21,11 @@ #define BTRFS_SUPER_INFO_OFFSET (16 * 1024) -struct extent_buffer *read_tree_block(struct btrfs_root *root, u64 blocknr); -int readahead_tree_block(struct btrfs_root *root, u64 blocknr); +struct extent_buffer *read_tree_block(struct btrfs_root *root, u64 bytenr, + u32 blocksize); +int readahead_tree_block(struct btrfs_root *root, u64 bytenr, u32 blocksize); struct extent_buffer *btrfs_find_create_tree_block(struct btrfs_root *root, - u64 blocknr); + u64 bytenr, u32 blocksize); int clean_tree_block(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct extent_buffer *buf); struct btrfs_root *open_ctree(struct super_block *sb); @@ -32,7 +33,7 @@ int close_ctree(struct btrfs_root *root); int write_ctree_super(struct btrfs_trans_handle *trans, struct btrfs_root *root); struct extent_buffer *btrfs_find_tree_block(struct btrfs_root *root, - u64 blocknr); + u64 bytenr, u32 blocksize); struct btrfs_root *btrfs_read_fs_root(struct btrfs_fs_info *fs_info, struct btrfs_key *location, const char *name, int namelen); diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 477466d167a4..1be8f9f04a13 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -129,7 +129,7 @@ err: struct btrfs_block_group_cache *btrfs_lookup_block_group(struct btrfs_fs_info *info, - u64 blocknr) + u64 bytenr) { struct extent_map_tree *block_group_cache; struct btrfs_block_group_cache *block_group = NULL; @@ -140,7 +140,7 @@ struct btrfs_block_group_cache *btrfs_lookup_block_group(struct block_group_cache = &info->block_group_cache; ret = find_first_extent_bit(block_group_cache, - blocknr, &start, &end, + bytenr, &start, &end, BLOCK_GROUP_DATA | BLOCK_GROUP_METADATA); if (ret) { return NULL; @@ -152,7 +152,7 @@ struct btrfs_block_group_cache *btrfs_lookup_block_group(struct block_group = (struct btrfs_block_group_cache *)ptr; - if (block_group->key.objectid <= blocknr && blocknr <= + if (block_group->key.objectid <= bytenr && bytenr <= block_group->key.objectid + block_group->key.offset) return block_group; @@ -315,7 +315,7 @@ found: int btrfs_inc_extent_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root, - u64 blocknr, u64 num_blocks) + u64 bytenr, u64 num_bytes) { struct btrfs_path *path; int ret; @@ -324,13 +324,14 @@ int btrfs_inc_extent_ref(struct btrfs_trans_handle *trans, struct btrfs_extent_item *item; u32 refs; + WARN_ON(num_bytes < root->sectorsize); path = btrfs_alloc_path(); if (!path) return -ENOMEM; - key.objectid = blocknr; + key.objectid = bytenr; btrfs_set_key_type(&key, BTRFS_EXTENT_ITEM_KEY); - key.offset = num_blocks; + key.offset = num_bytes; ret = btrfs_search_slot(trans, root->fs_info->extent_root, &key, path, 0, 1); if (ret < 0) @@ -361,8 +362,8 @@ int btrfs_extent_post_op(struct btrfs_trans_handle *trans, } static int lookup_extent_ref(struct btrfs_trans_handle *trans, - struct btrfs_root *root, u64 blocknr, - u64 num_blocks, u32 *refs) + struct btrfs_root *root, u64 bytenr, + u64 num_bytes, u32 *refs) { struct btrfs_path *path; int ret; @@ -370,9 +371,10 @@ static int lookup_extent_ref(struct btrfs_trans_handle *trans, struct extent_buffer *l; struct btrfs_extent_item *item; + WARN_ON(num_bytes < root->sectorsize); path = btrfs_alloc_path(); - key.objectid = blocknr; - key.offset = num_blocks; + key.objectid = bytenr; + key.offset = num_bytes; btrfs_set_key_type(&key, BTRFS_EXTENT_ITEM_KEY); ret = btrfs_search_slot(trans, root->fs_info->extent_root, &key, path, 0, 0); @@ -380,7 +382,7 @@ static int lookup_extent_ref(struct btrfs_trans_handle *trans, goto out; if (ret != 0) { btrfs_print_leaf(root, path->nodes[0]); - printk("failed to find block number %Lu\n", blocknr); + printk("failed to find block number %Lu\n", bytenr); BUG(); } l = path->nodes[0]; @@ -394,19 +396,19 @@ out: int btrfs_inc_root_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root) { - return btrfs_inc_extent_ref(trans, root, - extent_buffer_blocknr(root->node), 1); + return btrfs_inc_extent_ref(trans, root, root->node->start, + root->node->len); } int btrfs_inc_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct extent_buffer *buf) { - u64 blocknr; + u64 bytenr; u32 nritems; struct btrfs_key key; struct btrfs_file_extent_item *fi; int i; - int leaf; + int level; int ret; int faili; int err; @@ -414,11 +416,11 @@ int btrfs_inc_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root, if (!root->ref_cows) return 0; - leaf = btrfs_is_leaf(buf); + level = btrfs_header_level(buf); nritems = btrfs_header_nritems(buf); for (i = 0; i < nritems; i++) { - if (leaf) { - u64 disk_blocknr; + if (level == 0) { + u64 disk_bytenr; btrfs_item_key_to_cpu(buf, &key, i); if (btrfs_key_type(&key) != BTRFS_EXTENT_DATA_KEY) continue; @@ -427,18 +429,19 @@ int btrfs_inc_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root, if (btrfs_file_extent_type(buf, fi) == BTRFS_FILE_EXTENT_INLINE) continue; - disk_blocknr = btrfs_file_extent_disk_blocknr(buf, fi); - if (disk_blocknr == 0) + disk_bytenr = btrfs_file_extent_disk_bytenr(buf, fi); + if (disk_bytenr == 0) continue; - ret = btrfs_inc_extent_ref(trans, root, disk_blocknr, - btrfs_file_extent_disk_num_blocks(buf, fi)); + ret = btrfs_inc_extent_ref(trans, root, disk_bytenr, + btrfs_file_extent_disk_num_bytes(buf, fi)); if (ret) { faili = i; goto fail; } } else { - blocknr = btrfs_node_blockptr(buf, i); - ret = btrfs_inc_extent_ref(trans, root, blocknr, 1); + bytenr = btrfs_node_blockptr(buf, i); + ret = btrfs_inc_extent_ref(trans, root, bytenr, + btrfs_level_size(root, level - 1)); if (ret) { faili = i; goto fail; @@ -449,8 +452,8 @@ int btrfs_inc_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root, fail: WARN_ON(1); for (i =0; i < faili; i++) { - if (leaf) { - u64 disk_blocknr; + if (level == 0) { + u64 disk_bytenr; btrfs_item_key_to_cpu(buf, &key, i); if (btrfs_key_type(&key) != BTRFS_EXTENT_DATA_KEY) continue; @@ -459,16 +462,17 @@ fail: if (btrfs_file_extent_type(buf, fi) == BTRFS_FILE_EXTENT_INLINE) continue; - disk_blocknr = btrfs_file_extent_disk_blocknr(buf, fi); - if (disk_blocknr == 0) + disk_bytenr = btrfs_file_extent_disk_bytenr(buf, fi); + if (disk_bytenr == 0) continue; - err = btrfs_free_extent(trans, root, disk_blocknr, - btrfs_file_extent_disk_num_blocks(buf, + err = btrfs_free_extent(trans, root, disk_bytenr, + btrfs_file_extent_disk_num_bytes(buf, fi), 0); BUG_ON(err); } else { - blocknr = btrfs_node_blockptr(buf, i); - err = btrfs_free_extent(trans, root, blocknr, 1, 0); + bytenr = btrfs_node_blockptr(buf, i); + err = btrfs_free_extent(trans, root, bytenr, + btrfs_level_size(root, level - 1), 0); BUG_ON(err); } } @@ -558,31 +562,31 @@ int btrfs_write_dirty_block_groups(struct btrfs_trans_handle *trans, static int update_block_group(struct btrfs_trans_handle *trans, struct btrfs_root *root, - u64 blocknr, u64 num, int alloc, int mark_free, - int data) + u64 bytenr, u64 num_bytes, int alloc, + int mark_free, int data) { struct btrfs_block_group_cache *cache; struct btrfs_fs_info *info = root->fs_info; - u64 total = num; + u64 total = num_bytes; u64 old_val; - u64 block_in_group; + u64 byte_in_group; u64 start; u64 end; while(total) { - cache = btrfs_lookup_block_group(info, blocknr); + cache = btrfs_lookup_block_group(info, bytenr); if (!cache) { return -1; } - block_in_group = blocknr - cache->key.objectid; - WARN_ON(block_in_group > cache->key.offset); + byte_in_group = bytenr - cache->key.objectid; + WARN_ON(byte_in_group > cache->key.offset); start = cache->key.objectid; end = start + cache->key.offset - 1; set_extent_bits(&info->block_group_cache, start, end, BLOCK_GROUP_DIRTY, GFP_NOFS); old_val = btrfs_block_group_used(&cache->item); - num = min(total, cache->key.offset - block_in_group); + num_bytes = min(total, cache->key.offset - byte_in_group); if (alloc) { if (cache->data != data && old_val < (cache->key.offset >> 1)) { @@ -608,18 +612,18 @@ static int update_block_group(struct btrfs_trans_handle *trans, start, end, bit_to_set, GFP_NOFS); } - old_val += num; + old_val += num_bytes; } else { - old_val -= num; + old_val -= num_bytes; if (mark_free) { set_extent_dirty(&info->free_space_cache, - blocknr, blocknr + num - 1, + bytenr, bytenr + num_bytes - 1, GFP_NOFS); } } btrfs_set_block_group_used(&cache->item, old_val); - total -= num; - blocknr += num; + total -= num_bytes; + bytenr += num_bytes; } return 0; } @@ -701,13 +705,14 @@ static int finish_current_insert(struct btrfs_trans_handle *trans, struct return 0; } -static int pin_down_block(struct btrfs_root *root, u64 blocknr, int pending) +static int pin_down_bytes(struct btrfs_root *root, u64 bytenr, u32 num_bytes, + int pending) { int err = 0; struct extent_buffer *buf; if (!pending) { - buf = btrfs_find_tree_block(root, blocknr); + buf = btrfs_find_tree_block(root, bytenr, num_bytes); if (buf) { if (btrfs_buffer_uptodate(buf)) { u64 transid = @@ -720,10 +725,11 @@ static int pin_down_block(struct btrfs_root *root, u64 blocknr, int pending) free_extent_buffer(buf); } set_extent_dirty(&root->fs_info->pinned_extents, - blocknr, blocknr, GFP_NOFS); + bytenr, bytenr + num_bytes - 1, GFP_NOFS); } else { set_extent_bits(&root->fs_info->pending_del, - blocknr, blocknr, EXTENT_LOCKED, GFP_NOFS); + bytenr, bytenr + num_bytes - 1, + EXTENT_LOCKED, GFP_NOFS); } BUG_ON(err < 0); return 0; @@ -733,7 +739,7 @@ static int pin_down_block(struct btrfs_root *root, u64 blocknr, int pending) * remove an extent from the root, returns 0 on success */ static int __free_extent(struct btrfs_trans_handle *trans, struct btrfs_root - *root, u64 blocknr, u64 num_blocks, int pin, + *root, u64 bytenr, u64 num_bytes, int pin, int mark_free) { struct btrfs_path *path; @@ -745,9 +751,9 @@ static int __free_extent(struct btrfs_trans_handle *trans, struct btrfs_root struct btrfs_extent_item *ei; u32 refs; - key.objectid = blocknr; + key.objectid = bytenr; btrfs_set_key_type(&key, BTRFS_EXTENT_ITEM_KEY); - key.offset = num_blocks; + key.offset = num_bytes; path = btrfs_alloc_path(); if (!path) @@ -768,28 +774,29 @@ static int __free_extent(struct btrfs_trans_handle *trans, struct btrfs_root btrfs_mark_buffer_dirty(leaf); if (refs == 0) { - u64 super_blocks_used, root_blocks_used; + u64 super_used; + u64 root_used; if (pin) { - ret = pin_down_block(root, blocknr, 0); + ret = pin_down_bytes(root, bytenr, num_bytes, 0); BUG_ON(ret); } /* block accounting for super block */ - super_blocks_used = btrfs_super_blocks_used(&info->super_copy); - btrfs_set_super_blocks_used(&info->super_copy, - super_blocks_used - num_blocks); + super_used = btrfs_super_bytes_used(&info->super_copy); + btrfs_set_super_bytes_used(&info->super_copy, + super_used - num_bytes); /* block accounting for root item */ - root_blocks_used = btrfs_root_used(&root->root_item); + root_used = btrfs_root_used(&root->root_item); btrfs_set_root_used(&root->root_item, - root_blocks_used - num_blocks); + root_used - num_bytes); ret = btrfs_del_item(trans, extent_root, path); if (ret) { return ret; } - ret = update_block_group(trans, root, blocknr, num_blocks, 0, + ret = update_block_group(trans, root, bytenr, num_bytes, 0, mark_free, 0); BUG_ON(ret); } @@ -836,17 +843,18 @@ static int del_pending_extents(struct btrfs_trans_handle *trans, struct * remove an extent from the root, returns 0 on success */ int btrfs_free_extent(struct btrfs_trans_handle *trans, struct btrfs_root - *root, u64 blocknr, u64 num_blocks, int pin) + *root, u64 bytenr, u64 num_bytes, int pin) { struct btrfs_root *extent_root = root->fs_info->extent_root; int pending_ret; int ret; + WARN_ON(num_bytes < root->sectorsize); if (root == extent_root) { - pin_down_block(root, blocknr, 1); + pin_down_bytes(root, bytenr, num_bytes, 1); return 0; } - ret = __free_extent(trans, root, blocknr, num_blocks, pin, pin == 0); + ret = __free_extent(trans, root, bytenr, num_bytes, pin, pin == 0); pending_ret = del_pending_extents(trans, root->fs_info->extent_root); return ret ? ret : pending_ret; } @@ -860,8 +868,8 @@ int btrfs_free_extent(struct btrfs_trans_handle *trans, struct btrfs_root * Any available blocks before search_start are skipped. */ static int find_free_extent(struct btrfs_trans_handle *trans, struct btrfs_root - *orig_root, u64 num_blocks, u64 empty_size, - u64 search_start, u64 search_end, u64 hint_block, + *orig_root, u64 num_bytes, u64 empty_size, + u64 search_start, u64 search_end, u64 hint_byte, struct btrfs_key *ins, u64 exclude_start, u64 exclude_nr, int data) { @@ -870,30 +878,29 @@ static int find_free_extent(struct btrfs_trans_handle *trans, struct btrfs_root int ret; u64 hole_size = 0; int slot = 0; - u64 last_block = 0; + u64 last_byte = 0; u64 orig_search_start = search_start; int start_found; struct extent_buffer *l; struct btrfs_root * root = orig_root->fs_info->extent_root; struct btrfs_fs_info *info = root->fs_info; - int total_needed = num_blocks; + u64 total_needed = num_bytes; int level; struct btrfs_block_group_cache *block_group; int full_scan = 0; int wrapped = 0; - u64 cached_search_start = 0; - WARN_ON(num_blocks < 1); + WARN_ON(num_bytes < root->sectorsize); btrfs_set_key_type(ins, BTRFS_EXTENT_ITEM_KEY); level = btrfs_header_level(root->node); if (search_end == (u64)-1) - search_end = btrfs_super_total_blocks(&info->super_copy); - if (hint_block) { - block_group = btrfs_lookup_block_group(info, hint_block); + search_end = btrfs_super_total_bytes(&info->super_copy); + if (hint_byte) { + block_group = btrfs_lookup_block_group(info, hint_byte); block_group = btrfs_find_block_group(root, block_group, - hint_block, data, 1); + hint_byte, data, 1); } else { block_group = btrfs_find_block_group(root, trans->block_group, 0, @@ -906,7 +913,6 @@ static int find_free_extent(struct btrfs_trans_handle *trans, struct btrfs_root check_failed: search_start = find_search_start(root, &block_group, search_start, total_needed, data); - cached_search_start = search_start; btrfs_init_path(path); ins->objectid = search_start; @@ -958,27 +964,27 @@ check_failed: start_found = 1; goto check_pending; } - ins->objectid = last_block > search_start ? - last_block : search_start; + ins->objectid = last_byte > search_start ? + last_byte : search_start; ins->offset = search_end - ins->objectid; goto check_pending; } btrfs_item_key_to_cpu(l, &key, slot); - if (key.objectid >= search_start && key.objectid > last_block && + if (key.objectid >= search_start && key.objectid > last_byte && start_found) { - if (last_block < search_start) - last_block = search_start; - hole_size = key.objectid - last_block; - if (hole_size >= num_blocks) { - ins->objectid = last_block; + if (last_byte < search_start) + last_byte = search_start; + hole_size = key.objectid - last_byte; + if (hole_size >= num_bytes) { + ins->objectid = last_byte; ins->offset = hole_size; goto check_pending; } } if (btrfs_key_type(&key) != BTRFS_EXTENT_ITEM_KEY) { if (!start_found) { - last_block = key.objectid; + last_byte = key.objectid; start_found = 1; } goto next; @@ -986,9 +992,9 @@ check_failed: start_found = 1; - last_block = key.objectid + key.offset; + last_byte = key.objectid + key.offset; - if (!full_scan && last_block >= block_group->key.objectid + + if (!full_scan && last_byte >= block_group->key.objectid + block_group->key.offset) { btrfs_release_path(root, path); search_start = block_group->key.objectid + @@ -1006,20 +1012,20 @@ check_pending: btrfs_release_path(root, path); BUG_ON(ins->objectid < search_start); - if (ins->objectid + num_blocks >= search_end) + if (ins->objectid + num_bytes >= search_end) goto enospc; if (test_range_bit(&info->extent_ins, ins->objectid, - ins->objectid + num_blocks -1, EXTENT_LOCKED, 0)) { - search_start = ins->objectid + num_blocks; + ins->objectid + num_bytes -1, EXTENT_LOCKED, 0)) { + search_start = ins->objectid + num_bytes; goto new_group; } if (test_range_bit(&info->pinned_extents, ins->objectid, - ins->objectid + num_blocks -1, EXTENT_DIRTY, 0)) { - search_start = ins->objectid + num_blocks; + ins->objectid + num_bytes -1, EXTENT_DIRTY, 0)) { + search_start = ins->objectid + num_bytes; goto new_group; } - if (exclude_nr > 0 && (ins->objectid + num_blocks > exclude_start && + if (exclude_nr > 0 && (ins->objectid + num_bytes > exclude_start && ins->objectid < exclude_start + exclude_nr)) { search_start = exclude_start + exclude_nr; goto new_group; @@ -1029,12 +1035,12 @@ check_pending: if (block_group) trans->block_group = block_group; } - ins->offset = num_blocks; + ins->offset = num_bytes; btrfs_free_path(path); return 0; new_group: - if (search_start + num_blocks >= search_end) { + if (search_start + num_bytes >= search_end) { enospc: search_start = orig_search_start; if (full_scan) { @@ -1069,12 +1075,12 @@ error: */ int btrfs_alloc_extent(struct btrfs_trans_handle *trans, struct btrfs_root *root, u64 owner, - u64 num_blocks, u64 empty_size, u64 hint_block, + u64 num_bytes, u64 empty_size, u64 hint_byte, u64 search_end, struct btrfs_key *ins, int data) { int ret; int pending_ret; - u64 super_blocks_used, root_blocks_used; + u64 super_used, root_used; u64 search_start = 0; struct btrfs_fs_info *info = root->fs_info; struct btrfs_root *extent_root = info->extent_root; @@ -1083,9 +1089,9 @@ int btrfs_alloc_extent(struct btrfs_trans_handle *trans, btrfs_set_stack_extent_refs(&extent_item, 1); btrfs_set_stack_extent_owner(&extent_item, owner); - WARN_ON(num_blocks < 1); - ret = find_free_extent(trans, root, num_blocks, empty_size, - search_start, search_end, hint_block, ins, + WARN_ON(num_bytes < root->sectorsize); + ret = find_free_extent(trans, root, num_bytes, empty_size, + search_start, search_end, hint_byte, ins, trans->alloc_exclude_start, trans->alloc_exclude_nr, data); BUG_ON(ret); @@ -1093,21 +1099,18 @@ int btrfs_alloc_extent(struct btrfs_trans_handle *trans, return ret; /* block accounting for super block */ - super_blocks_used = btrfs_super_blocks_used(&info->super_copy); - btrfs_set_super_blocks_used(&info->super_copy, super_blocks_used + - num_blocks); + super_used = btrfs_super_bytes_used(&info->super_copy); + btrfs_set_super_bytes_used(&info->super_copy, super_used + num_bytes); /* block accounting for root item */ - root_blocks_used = btrfs_root_used(&root->root_item); - btrfs_set_root_used(&root->root_item, root_blocks_used + - num_blocks); + root_used = btrfs_root_used(&root->root_item); + btrfs_set_root_used(&root->root_item, root_used + num_bytes); clear_extent_dirty(&root->fs_info->free_space_cache, ins->objectid, ins->objectid + ins->offset - 1, GFP_NOFS); if (root == extent_root) { - BUG_ON(num_blocks != 1); set_extent_bits(&root->fs_info->extent_ins, ins->objectid, ins->objectid + ins->offset - 1, EXTENT_LOCKED, GFP_NOFS); @@ -1146,7 +1149,8 @@ update_block: * returns the tree buffer or NULL. */ struct extent_buffer *btrfs_alloc_free_block(struct btrfs_trans_handle *trans, - struct btrfs_root *root, u64 hint, + struct btrfs_root *root, + u32 blocksize, u64 hint, u64 empty_size) { struct btrfs_key ins; @@ -1154,14 +1158,15 @@ struct extent_buffer *btrfs_alloc_free_block(struct btrfs_trans_handle *trans, struct extent_buffer *buf; ret = btrfs_alloc_extent(trans, root, root->root_key.objectid, - 1, empty_size, hint, (u64)-1, &ins, 0); + blocksize, empty_size, hint, + (u64)-1, &ins, 0); if (ret) { BUG_ON(ret > 0); return ERR_PTR(ret); } - buf = btrfs_find_create_tree_block(root, ins.objectid); + buf = btrfs_find_create_tree_block(root, ins.objectid, blocksize); if (!buf) { - btrfs_free_extent(trans, root, ins.objectid, 1, 0); + btrfs_free_extent(trans, root, ins.objectid, blocksize, 0); return ERR_PTR(-ENOMEM); } btrfs_set_buffer_uptodate(buf); @@ -1191,7 +1196,7 @@ static int drop_leaf_ref(struct btrfs_trans_handle *trans, BUG_ON(!btrfs_is_leaf(leaf)); nritems = btrfs_header_nritems(leaf); for (i = 0; i < nritems; i++) { - u64 disk_blocknr; + u64 disk_bytenr; btrfs_item_key_to_cpu(leaf, &key, i); if (btrfs_key_type(&key) != BTRFS_EXTENT_DATA_KEY) @@ -1204,11 +1209,11 @@ static int drop_leaf_ref(struct btrfs_trans_handle *trans, * FIXME make sure to insert a trans record that * repeats the snapshot del on crash */ - disk_blocknr = btrfs_file_extent_disk_blocknr(leaf, fi); - if (disk_blocknr == 0) + disk_bytenr = btrfs_file_extent_disk_bytenr(leaf, fi); + if (disk_bytenr == 0) continue; - ret = btrfs_free_extent(trans, root, disk_blocknr, - btrfs_file_extent_disk_num_blocks(leaf, fi), 0); + ret = btrfs_free_extent(trans, root, disk_bytenr, + btrfs_file_extent_disk_num_bytes(leaf, fi), 0); BUG_ON(ret); } return 0; @@ -1219,19 +1224,23 @@ static void reada_walk_down(struct btrfs_root *root, { int i; u32 nritems; - u64 blocknr; + u64 bytenr; int ret; u32 refs; + int level; + u32 blocksize; nritems = btrfs_header_nritems(node); + level = btrfs_header_level(node); for (i = 0; i < nritems; i++) { - blocknr = btrfs_node_blockptr(node, i); - ret = lookup_extent_ref(NULL, root, blocknr, 1, &refs); + bytenr = btrfs_node_blockptr(node, i); + blocksize = btrfs_level_size(root, level - 1); + ret = lookup_extent_ref(NULL, root, bytenr, blocksize, &refs); BUG_ON(ret); if (refs != 1) continue; mutex_unlock(&root->fs_info->fs_mutex); - ret = readahead_tree_block(root, blocknr); + ret = readahead_tree_block(root, bytenr, blocksize); cond_resched(); mutex_lock(&root->fs_info->fs_mutex); if (ret) @@ -1248,15 +1257,16 @@ static int walk_down_tree(struct btrfs_trans_handle *trans, struct btrfs_root { struct extent_buffer *next; struct extent_buffer *cur; - u64 blocknr; + u64 bytenr; + u32 blocksize; int ret; u32 refs; WARN_ON(*level < 0); WARN_ON(*level >= BTRFS_MAX_LEVEL); ret = lookup_extent_ref(trans, root, - extent_buffer_blocknr(path->nodes[*level]), - 1, &refs); + path->nodes[*level]->start, + path->nodes[*level]->len, &refs); BUG_ON(ret); if (refs > 1) goto out; @@ -1283,30 +1293,33 @@ static int walk_down_tree(struct btrfs_trans_handle *trans, struct btrfs_root BUG_ON(ret); break; } - blocknr = btrfs_node_blockptr(cur, path->slots[*level]); - ret = lookup_extent_ref(trans, root, blocknr, 1, &refs); + bytenr = btrfs_node_blockptr(cur, path->slots[*level]); + blocksize = btrfs_level_size(root, *level - 1); + ret = lookup_extent_ref(trans, root, bytenr, blocksize, &refs); BUG_ON(ret); if (refs != 1) { path->slots[*level]++; - ret = btrfs_free_extent(trans, root, blocknr, 1, 1); + ret = btrfs_free_extent(trans, root, bytenr, + blocksize, 1); BUG_ON(ret); continue; } - next = btrfs_find_tree_block(root, blocknr); + next = btrfs_find_tree_block(root, bytenr, blocksize); if (!next || !btrfs_buffer_uptodate(next)) { free_extent_buffer(next); mutex_unlock(&root->fs_info->fs_mutex); - next = read_tree_block(root, blocknr); + next = read_tree_block(root, bytenr, blocksize); mutex_lock(&root->fs_info->fs_mutex); /* we dropped the lock, check one more time */ - ret = lookup_extent_ref(trans, root, blocknr, 1, &refs); + ret = lookup_extent_ref(trans, root, bytenr, + blocksize, &refs); BUG_ON(ret); if (refs != 1) { path->slots[*level]++; free_extent_buffer(next); ret = btrfs_free_extent(trans, root, - blocknr, 1, 1); + bytenr, blocksize, 1); BUG_ON(ret); continue; } @@ -1321,8 +1334,8 @@ static int walk_down_tree(struct btrfs_trans_handle *trans, struct btrfs_root out: WARN_ON(*level < 0); WARN_ON(*level >= BTRFS_MAX_LEVEL); - ret = btrfs_free_extent(trans, root, - extent_buffer_blocknr(path->nodes[*level]), 1, 1); + ret = btrfs_free_extent(trans, root, path->nodes[*level]->start, + path->nodes[*level]->len, 1); free_extent_buffer(path->nodes[*level]); path->nodes[*level] = NULL; *level += 1; @@ -1359,8 +1372,8 @@ static int walk_up_tree(struct btrfs_trans_handle *trans, struct btrfs_root return 0; } else { ret = btrfs_free_extent(trans, root, - extent_buffer_blocknr(path->nodes[*level]), - 1, 1); + path->nodes[*level]->start, + path->nodes[*level]->len, 1); BUG_ON(ret); free_extent_buffer(path->nodes[*level]); path->nodes[*level] = NULL; @@ -1476,16 +1489,12 @@ int btrfs_read_block_groups(struct btrfs_root *root) struct btrfs_key key; struct btrfs_key found_key; struct extent_buffer *leaf; - u64 group_size_blocks; block_group_cache = &info->block_group_cache; - group_size_blocks = BTRFS_BLOCK_GROUP_SIZE >> - info->sb->s_blocksize_bits; - root = info->extent_root; key.objectid = 0; - key.offset = group_size_blocks; + key.offset = BTRFS_BLOCK_GROUP_SIZE; btrfs_set_key_type(&key, BTRFS_BLOCK_GROUP_ITEM_KEY); path = btrfs_alloc_path(); @@ -1532,7 +1541,7 @@ int btrfs_read_block_groups(struct btrfs_root *root) (u64)cache); if (key.objectid >= - btrfs_super_total_blocks(&info->super_copy)) + btrfs_super_total_bytes(&info->super_copy)) break; } diff --git a/fs/btrfs/extent_map.c b/fs/btrfs/extent_map.c index e081558d52ff..f658703c42e6 100644 --- a/fs/btrfs/extent_map.c +++ b/fs/btrfs/extent_map.c @@ -1963,18 +1963,27 @@ static inline struct page *extent_buffer_page(struct extent_buffer *eb, int i) struct page *p; if (i == 0) return eb->first_page; + i += eb->start >> PAGE_CACHE_SHIFT; + if (eb->last_page && eb->last_page->index == i) + return eb->last_page; + p = find_get_page(eb->first_page->mapping, i); page_cache_release(p); + eb->last_page = p; return p; } +static inline unsigned long num_extent_pages(u64 start, u64 len) +{ + return ((start + len + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT) - + (start >> PAGE_CACHE_SHIFT); +} struct extent_buffer *alloc_extent_buffer(struct extent_map_tree *tree, u64 start, unsigned long len, gfp_t mask) { - unsigned long num_pages = ((start + len - 1) >> PAGE_CACHE_SHIFT) - - (start >> PAGE_CACHE_SHIFT) + 1; + unsigned long num_pages = num_extent_pages(start, len); unsigned long i; unsigned long index = start >> PAGE_CACHE_SHIFT; struct extent_buffer *eb; @@ -1986,7 +1995,7 @@ struct extent_buffer *alloc_extent_buffer(struct extent_map_tree *tree, if (!eb || IS_ERR(eb)) return NULL; - eb->alloc_addr = __builtin_return_address(0); + eb->alloc_addr = (unsigned long)__builtin_return_address(0); eb->start = start; eb->len = len; atomic_set(&eb->refs, 1); @@ -1994,6 +2003,7 @@ struct extent_buffer *alloc_extent_buffer(struct extent_map_tree *tree, for (i = 0; i < num_pages; i++, index++) { p = find_or_create_page(mapping, index, mask | __GFP_HIGHMEM); if (!p) { + WARN_ON(1); /* make sure the free only frees the pages we've * grabbed a reference on */ @@ -2021,8 +2031,7 @@ struct extent_buffer *find_extent_buffer(struct extent_map_tree *tree, u64 start, unsigned long len, gfp_t mask) { - unsigned long num_pages = ((start + len - 1) >> PAGE_CACHE_SHIFT) - - (start >> PAGE_CACHE_SHIFT) + 1; + unsigned long num_pages = num_extent_pages(start, len); unsigned long i; unsigned long index = start >> PAGE_CACHE_SHIFT; struct extent_buffer *eb; @@ -2033,7 +2042,7 @@ struct extent_buffer *find_extent_buffer(struct extent_map_tree *tree, if (!eb || IS_ERR(eb)) return NULL; - eb->alloc_addr = __builtin_return_address(0); + eb->alloc_addr = (unsigned long)__builtin_return_address(0); eb->start = start; eb->len = len; atomic_set(&eb->refs, 1); @@ -2070,8 +2079,7 @@ void free_extent_buffer(struct extent_buffer *eb) if (!atomic_dec_and_test(&eb->refs)) return; - num_pages = ((eb->start + eb->len - 1) >> PAGE_CACHE_SHIFT) - - (eb->start >> PAGE_CACHE_SHIFT) + 1; + num_pages = num_extent_pages(eb->start, eb->len); if (eb->first_page) page_cache_release(eb->first_page); @@ -2094,8 +2102,7 @@ int clear_extent_buffer_dirty(struct extent_map_tree *tree, u64 end = start + eb->len - 1; set = clear_extent_dirty(tree, start, end, GFP_NOFS); - num_pages = ((eb->start + eb->len - 1) >> PAGE_CACHE_SHIFT) - - (eb->start >> PAGE_CACHE_SHIFT) + 1; + num_pages = num_extent_pages(eb->start, eb->len); for (i = 0; i < num_pages; i++) { page = extent_buffer_page(eb, i); @@ -2145,8 +2152,7 @@ int set_extent_buffer_uptodate(struct extent_map_tree *tree, struct page *page; unsigned long num_pages; - num_pages = ((eb->start + eb->len - 1) >> PAGE_CACHE_SHIFT) - - (eb->start >> PAGE_CACHE_SHIFT) + 1; + num_pages = num_extent_pages(eb->start, eb->len); set_extent_uptodate(tree, eb->start, eb->start + eb->len - 1, GFP_NOFS); @@ -2191,8 +2197,7 @@ int read_extent_buffer_pages(struct extent_map_tree *tree, return 0; } - num_pages = ((eb->start + eb->len - 1) >> PAGE_CACHE_SHIFT) - - (eb->start >> PAGE_CACHE_SHIFT) + 1; + num_pages = num_extent_pages(eb->start, eb->len); for (i = 0; i < num_pages; i++) { page = extent_buffer_page(eb, i); if (PageUptodate(page)) { @@ -2267,14 +2272,14 @@ void read_extent_buffer(struct extent_buffer *eb, void *dstv, } EXPORT_SYMBOL(read_extent_buffer); -int map_extent_buffer(struct extent_buffer *eb, unsigned long start, - unsigned long min_len, - char **token, char **map, - unsigned long *map_start, - unsigned long *map_len, int km) +static int __map_extent_buffer(struct extent_buffer *eb, unsigned long start, + unsigned long min_len, char **token, char **map, + unsigned long *map_start, + unsigned long *map_len, int km) { size_t offset = start & (PAGE_CACHE_SIZE - 1); char *kaddr; + struct page *p; size_t start_offset = eb->start & ((u64)PAGE_CACHE_SIZE - 1); unsigned long i = (start_offset + start) >> PAGE_CACHE_SHIFT; unsigned long end_i = (start_offset + start + min_len) >> @@ -2283,21 +2288,59 @@ int map_extent_buffer(struct extent_buffer *eb, unsigned long start, if (i != end_i) return -EINVAL; - WARN_ON(start > eb->len); + if (start >= eb->len) { + printk("bad start in map eb start %Lu len %lu caller start %lu min %lu\n", eb->start, eb->len, start, min_len); + WARN_ON(1); + } if (i == 0) { offset = start_offset; *map_start = 0; } else { + offset = 0; *map_start = (i << PAGE_CACHE_SHIFT) - start_offset; } - kaddr = kmap_atomic(extent_buffer_page(eb, i), km); + p = extent_buffer_page(eb, i); + WARN_ON(!PageUptodate(p)); + kaddr = kmap_atomic(p, km); *token = kaddr; *map = kaddr + offset; *map_len = PAGE_CACHE_SIZE - offset; return 0; } + +int map_extent_buffer(struct extent_buffer *eb, unsigned long start, + unsigned long min_len, + char **token, char **map, + unsigned long *map_start, + unsigned long *map_len, int km) +{ + int err; + int save = 0; + if (eb->map_token) { + if (start >= eb->map_start && + start + min_len <= eb->map_start + eb->map_len) { + *token = eb->map_token; + *map = eb->kaddr; + *map_start = eb->map_start; + *map_len = eb->map_len; + return 0; + } + unmap_extent_buffer(eb, eb->map_token, km); + eb->map_token = NULL; + save = 1; + } + err = __map_extent_buffer(eb, start, min_len, token, map, + map_start, map_len, km); + if (!err && save) { + eb->map_token = *token; + eb->kaddr = *map; + eb->map_start = *map_start; + eb->map_len = *map_len; + } + return err; +} EXPORT_SYMBOL(map_extent_buffer); void unmap_extent_buffer(struct extent_buffer *eb, char *token, int km) @@ -2574,7 +2617,6 @@ void memmove_extent_buffer(struct extent_buffer *dst, unsigned long dst_offset, ((unsigned long)PAGE_CACHE_SIZE - 1); src_off_in_page = src_end & ((unsigned long)PAGE_CACHE_SIZE - 1); - if (src_i == 0) src_off_in_page += start_offset; if (dst_i == 0) @@ -2582,14 +2624,13 @@ void memmove_extent_buffer(struct extent_buffer *dst, unsigned long dst_offset, cur = min(len, src_off_in_page + 1); cur = min(cur, dst_off_in_page + 1); - move_pages(extent_buffer_page(dst, dst_i), extent_buffer_page(dst, src_i), dst_off_in_page - cur + 1, src_off_in_page - cur + 1, cur); - dst_end -= cur - 1; - src_end -= cur - 1; + dst_end -= cur; + src_end -= cur; len -= cur; } } diff --git a/fs/btrfs/extent_map.h b/fs/btrfs/extent_map.h index 75dc600dc0f9..4ef8bdd68348 100644 --- a/fs/btrfs/extent_map.h +++ b/fs/btrfs/extent_map.h @@ -70,7 +70,12 @@ struct extent_buffer { struct list_head list; struct list_head leak_list; struct page *first_page; + struct page *last_page; unsigned long alloc_addr; + char *map_token; + char *kaddr; + unsigned long map_start; + unsigned long map_len; }; typedef struct extent_map *(get_extent_t)(struct inode *inode, @@ -147,11 +152,6 @@ static inline void extent_buffer_get(struct extent_buffer *eb) atomic_inc(&eb->refs); } -static inline u64 extent_buffer_blocknr(struct extent_buffer *eb) -{ - return eb->start / 4096; -} - int memcmp_extent_buffer(struct extent_buffer *eb, const void *ptrv, unsigned long start, unsigned long len); diff --git a/fs/btrfs/file-item.c b/fs/btrfs/file-item.c index 226f6d028c3f..9a65e97a4e28 100644 --- a/fs/btrfs/file-item.c +++ b/fs/btrfs/file-item.c @@ -27,8 +27,8 @@ int btrfs_insert_file_extent(struct btrfs_trans_handle *trans, struct btrfs_root *root, u64 objectid, u64 pos, - u64 offset, u64 disk_num_blocks, - u64 num_blocks) + u64 offset, u64 disk_num_bytes, + u64 num_bytes) { int ret = 0; struct btrfs_file_extent_item *item; @@ -50,10 +50,10 @@ int btrfs_insert_file_extent(struct btrfs_trans_handle *trans, leaf = path->nodes[0]; item = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_file_extent_item); - btrfs_set_file_extent_disk_blocknr(leaf, item, offset); - btrfs_set_file_extent_disk_num_blocks(leaf, item, disk_num_blocks); + btrfs_set_file_extent_disk_bytenr(leaf, item, offset); + btrfs_set_file_extent_disk_num_bytes(leaf, item, disk_num_bytes); btrfs_set_file_extent_offset(leaf, item, 0); - btrfs_set_file_extent_num_blocks(leaf, item, num_blocks); + btrfs_set_file_extent_num_bytes(leaf, item, num_bytes); btrfs_set_file_extent_generation(leaf, item, trans->transid); btrfs_set_file_extent_type(leaf, item, BTRFS_FILE_EXTENT_REG); btrfs_mark_buffer_dirty(leaf); diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c index 1734ca695555..844d8807e44a 100644 --- a/fs/btrfs/file.c +++ b/fs/btrfs/file.c @@ -120,9 +120,9 @@ static int insert_inline_extent(struct btrfs_trans_handle *trans, btrfs_set_file_extent_type(leaf, ei, BTRFS_FILE_EXTENT_INLINE); ptr = btrfs_file_extent_inline_start(ei); - kaddr = kmap_atomic(page, KM_USER0); + kaddr = kmap_atomic(page, KM_USER1); write_extent_buffer(leaf, kaddr + page_offset, ptr, size); - kunmap_atomic(kaddr, KM_USER0); + kunmap_atomic(kaddr, KM_USER1); btrfs_mark_buffer_dirty(leaf); fail: btrfs_free_path(path); @@ -142,11 +142,12 @@ static int dirty_and_release_pages(struct btrfs_trans_handle *trans, struct inode *inode = file->f_path.dentry->d_inode; struct extent_map *em; struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree; - u64 hint_block; - u64 num_blocks; + u64 hint_byte; + u64 num_bytes; u64 start_pos; u64 end_of_last_block; u64 end_pos = pos + write_bytes; + u32 inline_size; loff_t isize = i_size_read(inode); em = alloc_extent_map(GFP_NOFS); @@ -156,11 +157,12 @@ static int dirty_and_release_pages(struct btrfs_trans_handle *trans, em->bdev = inode->i_sb->s_bdev; start_pos = pos & ~((u64)root->sectorsize - 1); - num_blocks = (write_bytes + pos - start_pos + root->sectorsize - 1) >> - inode->i_blkbits; + num_bytes = (write_bytes + pos - start_pos + + root->sectorsize - 1) & ~((u64)root->sectorsize - 1); down_read(&BTRFS_I(inode)->root->snap_sem); - end_of_last_block = start_pos + (num_blocks << inode->i_blkbits) - 1; + end_of_last_block = start_pos + num_bytes - 1; + lock_extent(em_tree, start_pos, end_of_last_block, GFP_NOFS); mutex_lock(&root->fs_info->fs_mutex); trans = btrfs_start_transaction(root, 1); @@ -169,8 +171,8 @@ static int dirty_and_release_pages(struct btrfs_trans_handle *trans, goto out_unlock; } btrfs_set_trans_block_group(trans, inode); - inode->i_blocks += num_blocks << 3; - hint_block = 0; + inode->i_blocks += num_bytes >> 9; + hint_byte = 0; if ((end_of_last_block & 4095) == 0) { printk("strange end of last %Lu %zu %Lu\n", start_pos, write_bytes, end_of_last_block); @@ -191,11 +193,10 @@ static int dirty_and_release_pages(struct btrfs_trans_handle *trans, err = btrfs_drop_extents(trans, root, inode, last_pos_in_file, last_pos_in_file + hole_size, - &hint_block); + &hint_byte); if (err) goto failed; - hole_size >>= inode->i_blkbits; err = btrfs_insert_file_extent(trans, root, inode->i_ino, last_pos_in_file, @@ -209,8 +210,10 @@ static int dirty_and_release_pages(struct btrfs_trans_handle *trans, * either allocate an extent for the new bytes or setup the key * to show we are doing inline data in the extent */ + inline_size = end_pos - start_pos; if (isize >= PAGE_CACHE_SIZE || pos + write_bytes < inode->i_size || - pos + write_bytes - start_pos > BTRFS_MAX_INLINE_DATA_SIZE(root)) { + inline_size >= BTRFS_MAX_INLINE_DATA_SIZE(root) || + inline_size >= PAGE_CACHE_SIZE) { u64 last_end; for (i = 0; i < num_pages; i++) { struct page *p = pages[i]; @@ -224,10 +227,9 @@ static int dirty_and_release_pages(struct btrfs_trans_handle *trans, } else { struct page *p = pages[0]; /* step one, delete the existing extents in this range */ - /* FIXME blocksize != pagesize */ err = btrfs_drop_extents(trans, root, inode, start_pos, (pos + write_bytes + root->sectorsize -1) & - ~((u64)root->sectorsize - 1), &hint_block); + ~((u64)root->sectorsize - 1), &hint_byte); if (err) goto failed; @@ -283,7 +285,7 @@ int btrfs_drop_extent_cache(struct inode *inode, u64 start, u64 end) */ int btrfs_drop_extents(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct inode *inode, - u64 start, u64 end, u64 *hint_block) + u64 start, u64 end, u64 *hint_byte) { int ret; struct btrfs_key key; @@ -346,8 +348,7 @@ next_slot: found_type = btrfs_file_extent_type(leaf, extent); if (found_type == BTRFS_FILE_EXTENT_REG) { extent_end = key.offset + - (btrfs_file_extent_num_blocks(leaf, extent) << - inode->i_blkbits); + btrfs_file_extent_num_bytes(leaf, extent); found_extent = 1; } else if (found_type == BTRFS_FILE_EXTENT_INLINE) { struct btrfs_item *item; @@ -386,17 +387,17 @@ next_slot: if (end < extent_end && end >= key.offset) { if (found_extent) { - u64 disk_blocknr = - btrfs_file_extent_disk_blocknr(leaf,extent); - u64 disk_num_blocks = - btrfs_file_extent_disk_num_blocks(leaf, + u64 disk_bytenr = + btrfs_file_extent_disk_bytenr(leaf, extent); + u64 disk_num_bytes = + btrfs_file_extent_disk_num_bytes(leaf, extent); read_extent_buffer(leaf, &old, (unsigned long)extent, sizeof(old)); - if (disk_blocknr != 0) { + if (disk_bytenr != 0) { ret = btrfs_inc_extent_ref(trans, root, - disk_blocknr, disk_num_blocks); + disk_bytenr, disk_num_bytes); BUG_ON(ret); } } @@ -410,21 +411,19 @@ next_slot: keep = 1; WARN_ON(start & (root->sectorsize - 1)); if (found_extent) { - new_num = (start - key.offset) >> - inode->i_blkbits; - old_num = btrfs_file_extent_num_blocks(leaf, - extent); - *hint_block = - btrfs_file_extent_disk_blocknr(leaf, - extent); - if (btrfs_file_extent_disk_blocknr(leaf, - extent)) { + new_num = start - key.offset; + old_num = btrfs_file_extent_num_bytes(leaf, + extent); + *hint_byte = + btrfs_file_extent_disk_bytenr(leaf, + extent); + if (btrfs_file_extent_disk_bytenr(leaf, + extent)) { inode->i_blocks -= - (old_num - new_num) << 3; + (old_num - new_num) >> 9; } - btrfs_set_file_extent_num_blocks(leaf, - extent, - new_num); + btrfs_set_file_extent_num_bytes(leaf, extent, + new_num); btrfs_mark_buffer_dirty(leaf); } else { WARN_ON(1); @@ -432,33 +431,32 @@ next_slot: } /* delete the entire extent */ if (!keep) { - u64 disk_blocknr = 0; - u64 disk_num_blocks = 0; - u64 extent_num_blocks = 0; + u64 disk_bytenr = 0; + u64 disk_num_bytes = 0; + u64 extent_num_bytes = 0; if (found_extent) { - disk_blocknr = - btrfs_file_extent_disk_blocknr(leaf, + disk_bytenr = + btrfs_file_extent_disk_bytenr(leaf, extent); - disk_num_blocks = - btrfs_file_extent_disk_num_blocks(leaf, - extent); - extent_num_blocks = - btrfs_file_extent_num_blocks(leaf, - extent); - *hint_block = - btrfs_file_extent_disk_blocknr(leaf, + disk_num_bytes = + btrfs_file_extent_disk_num_bytes(leaf, extent); + extent_num_bytes = + btrfs_file_extent_num_bytes(leaf, extent); + *hint_byte = + btrfs_file_extent_disk_bytenr(leaf, + extent); } ret = btrfs_del_item(trans, root, path); /* TODO update progress marker and return */ BUG_ON(ret); btrfs_release_path(root, path); extent = NULL; - if (found_extent && disk_blocknr != 0) { - inode->i_blocks -= extent_num_blocks << 3; + if (found_extent && disk_bytenr != 0) { + inode->i_blocks -= extent_num_bytes >> 9; ret = btrfs_free_extent(trans, root, - disk_blocknr, - disk_num_blocks, 0); + disk_bytenr, + disk_num_bytes, 0); } BUG_ON(ret); @@ -491,20 +489,19 @@ next_slot: (unsigned long)extent, sizeof(old)); btrfs_set_file_extent_offset(leaf, extent, - le64_to_cpu(old.offset) + - ((end - key.offset) >> inode->i_blkbits)); - WARN_ON(le64_to_cpu(old.num_blocks) < - (extent_end - end) >> inode->i_blkbits); - btrfs_set_file_extent_num_blocks(leaf, extent, - (extent_end - end) >> inode->i_blkbits); - + le64_to_cpu(old.offset) + end - key.offset); + WARN_ON(le64_to_cpu(old.num_bytes) < + (extent_end - end)); + btrfs_set_file_extent_num_bytes(leaf, extent, + extent_end - end); btrfs_set_file_extent_type(leaf, extent, BTRFS_FILE_EXTENT_REG); + btrfs_mark_buffer_dirty(path->nodes[0]); - if (le64_to_cpu(old.disk_blocknr) != 0) { + if (le64_to_cpu(old.disk_bytenr) != 0) { inode->i_blocks += - btrfs_file_extent_num_blocks(leaf, - extent) << 3; + btrfs_file_extent_num_bytes(leaf, + extent) >> 9; } ret = 0; goto out; @@ -531,12 +528,9 @@ static int prepare_pages(struct btrfs_root *root, unsigned long index = pos >> PAGE_CACHE_SHIFT; struct inode *inode = file->f_path.dentry->d_inode; int err = 0; - u64 num_blocks; u64 start_pos; start_pos = pos & ~((u64)root->sectorsize - 1); - num_blocks = (write_bytes + pos - start_pos + root->sectorsize - 1) >> - inode->i_blkbits; memset(pages, 0, num_pages * sizeof(struct page *)); diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index fbe2836364e0..d6b3a55ed8e0 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -77,19 +77,19 @@ static int run_delalloc_range(struct inode *inode, u64 start, u64 end) struct btrfs_trans_handle *trans; struct btrfs_key ins; u64 alloc_hint = 0; - u64 num_blocks; + u64 num_bytes; int ret; - u64 blocksize = 1 << inode->i_blkbits; + u64 blocksize = root->sectorsize; mutex_lock(&root->fs_info->fs_mutex); trans = btrfs_start_transaction(root, 1); btrfs_set_trans_block_group(trans, inode); BUG_ON(!trans); - num_blocks = (end - start + blocksize) & ~(blocksize - 1); + num_bytes = (end - start + blocksize) & ~(blocksize - 1); ret = btrfs_drop_extents(trans, root, inode, - start, start + num_blocks, &alloc_hint); - num_blocks = num_blocks >> inode->i_blkbits; - ret = btrfs_alloc_extent(trans, root, inode->i_ino, num_blocks, 0, + start, start + num_bytes, &alloc_hint); + + ret = btrfs_alloc_extent(trans, root, inode->i_ino, num_bytes, 0, alloc_hint, (u64)-1, &ins, 1); if (ret) { WARN_ON(1); @@ -186,7 +186,8 @@ int btrfs_readpage_end_io_hook(struct page *page, u64 start, u64 end) zeroit: printk("btrfs csum failed ino %lu off %llu\n", page->mapping->host->i_ino, (unsigned long long)start); - memset(kaddr + offset, 1, end - start + 1); flush_dcache_page(page); + memset(kaddr + offset, 1, end - start + 1); + flush_dcache_page(page); kunmap_atomic(kaddr, KM_IRQ0); return 0; } @@ -547,7 +548,7 @@ static int btrfs_truncate_in_trans(struct btrfs_trans_handle *trans, struct extent_buffer *leaf; struct btrfs_file_extent_item *fi; u64 extent_start = 0; - u64 extent_num_blocks = 0; + u64 extent_num_bytes = 0; u64 item_end = 0; int found_extent; int del_item; @@ -593,8 +594,7 @@ static int btrfs_truncate_in_trans(struct btrfs_trans_handle *trans, if (btrfs_file_extent_type(leaf, fi) != BTRFS_FILE_EXTENT_INLINE) { item_end += - btrfs_file_extent_num_blocks(leaf, fi) << - inode->i_blkbits; + btrfs_file_extent_num_bytes(leaf, fi); } } if (found_type == BTRFS_CSUM_ITEM_KEY) { @@ -626,28 +626,27 @@ static int btrfs_truncate_in_trans(struct btrfs_trans_handle *trans, btrfs_file_extent_type(leaf, fi) != BTRFS_FILE_EXTENT_INLINE) { u64 num_dec; - extent_start = btrfs_file_extent_disk_blocknr(leaf, fi); + extent_start = btrfs_file_extent_disk_bytenr(leaf, fi); if (!del_item) { - u64 orig_num_blocks = - btrfs_file_extent_num_blocks(leaf, fi); - extent_num_blocks = inode->i_size - + u64 orig_num_bytes = + btrfs_file_extent_num_bytes(leaf, fi); + extent_num_bytes = inode->i_size - found_key.offset + root->sectorsize - 1; - extent_num_blocks >>= inode->i_blkbits; - btrfs_set_file_extent_num_blocks(leaf, fi, - extent_num_blocks); - num_dec = (orig_num_blocks - - extent_num_blocks) << 3; + btrfs_set_file_extent_num_bytes(leaf, fi, + extent_num_bytes); + num_dec = (orig_num_bytes - + extent_num_bytes) >> 9; if (extent_start != 0) { inode->i_blocks -= num_dec; } btrfs_mark_buffer_dirty(leaf); } else { - extent_num_blocks = - btrfs_file_extent_disk_num_blocks(leaf, - fi); + extent_num_bytes = + btrfs_file_extent_disk_num_bytes(leaf, + fi); /* FIXME blocksize != 4096 */ - num_dec = btrfs_file_extent_num_blocks(leaf, - fi) << 3; + num_dec = btrfs_file_extent_num_bytes(leaf, + fi) >> 9; if (extent_start != 0) { found_extent = 1; inode->i_blocks -= num_dec; @@ -664,7 +663,7 @@ static int btrfs_truncate_in_trans(struct btrfs_trans_handle *trans, btrfs_release_path(root, path); if (found_extent) { ret = btrfs_free_extent(trans, root, extent_start, - extent_num_blocks, 0); + extent_num_bytes, 0); BUG_ON(ret); } } @@ -709,7 +708,8 @@ static int btrfs_cow_one_page(struct inode *inode, struct page *page, static int btrfs_truncate_page(struct address_space *mapping, loff_t from) { struct inode *inode = mapping->host; - unsigned blocksize = 1 << inode->i_blkbits; + struct btrfs_root *root = BTRFS_I(inode)->root; + u32 blocksize = root->sectorsize; pgoff_t index = from >> PAGE_CACHE_SHIFT; unsigned offset = from & (PAGE_CACHE_SIZE-1); struct page *page; @@ -719,7 +719,7 @@ static int btrfs_truncate_page(struct address_space *mapping, loff_t from) if ((offset & (blocksize - 1)) == 0) goto out; - down_read(&BTRFS_I(inode)->root->snap_sem); + down_read(&root->snap_sem); ret = -ENOMEM; page = grab_cache_page(mapping, index); if (!page) @@ -778,8 +778,6 @@ static int btrfs_setattr(struct dentry *dentry, struct iattr *attr) err = btrfs_drop_extents(trans, root, inode, pos, pos + hole_size, &alloc_hint); - hole_size >>= inode->i_blkbits; - err = btrfs_insert_file_extent(trans, root, inode->i_ino, pos, 0, 0, hole_size); btrfs_end_transaction(trans, root); @@ -1490,7 +1488,7 @@ struct extent_map *btrfs_get_extent(struct inode *inode, struct page *page, { int ret; int err = 0; - u64 blocknr; + u64 bytenr; u64 extent_start = 0; u64 extent_end = 0; u64 objectid = inode->i_ino; @@ -1540,10 +1538,6 @@ again: leaf = path->nodes[0]; item = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_file_extent_item); - - blocknr = btrfs_file_extent_disk_blocknr(leaf, item); - blocknr += btrfs_file_extent_offset(leaf, item); - /* are we inside the extent that was found? */ btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]); found_type = btrfs_key_type(&found_key); @@ -1556,8 +1550,7 @@ again: extent_start = found_key.offset; if (found_type == BTRFS_FILE_EXTENT_REG) { extent_end = extent_start + - (btrfs_file_extent_num_blocks(leaf, item) << - inode->i_blkbits); + btrfs_file_extent_num_bytes(leaf, item); err = 0; if (start < extent_start || start >= extent_end) { em->start = start; @@ -1570,17 +1563,18 @@ again: } goto not_found_em; } - if (btrfs_file_extent_disk_blocknr(leaf, item) == 0) { + bytenr = btrfs_file_extent_disk_bytenr(leaf, item); + if (bytenr == 0) { em->start = extent_start; em->end = extent_end - 1; em->block_start = EXTENT_MAP_HOLE; em->block_end = EXTENT_MAP_HOLE; goto insert; } - em->block_start = blocknr << inode->i_blkbits; + bytenr += btrfs_file_extent_offset(leaf, item); + em->block_start = bytenr; em->block_end = em->block_start + - (btrfs_file_extent_num_blocks(leaf, item) << - inode->i_blkbits) - 1; + btrfs_file_extent_num_bytes(leaf, item) - 1; em->start = extent_start; em->end = extent_end - 1; goto insert; @@ -1592,7 +1586,8 @@ again: size = btrfs_file_extent_inline_len(leaf, btrfs_item_nr(leaf, path->slots[0])); - extent_end = extent_start | ((u64)root->sectorsize - 1); + extent_end = (extent_start + size) | + ((u64)root->sectorsize - 1); if (start < extent_start || start >= extent_end) { em->start = start; if (start < extent_start) { @@ -1617,8 +1612,10 @@ again: ptr = btrfs_file_extent_inline_start(item); map = kmap(page); read_extent_buffer(leaf, map + page_offset, ptr, size); + /* memset(map + page_offset + size, 0, root->sectorsize - (page_offset + size)); + */ flush_dcache_page(page); kunmap(page); set_extent_uptodate(em_tree, extent_start, @@ -1836,13 +1833,13 @@ static int create_subvol(struct btrfs_root *root, char *name, int namelen) trans = btrfs_start_transaction(root, 1); BUG_ON(!trans); - leaf = btrfs_alloc_free_block(trans, root, 0, 0); + leaf = btrfs_alloc_free_block(trans, root, root->leafsize, 0, 0); if (IS_ERR(leaf)) return PTR_ERR(leaf); btrfs_set_header_nritems(leaf, 0); btrfs_set_header_level(leaf, 0); - btrfs_set_header_blocknr(leaf, extent_buffer_blocknr(leaf)); + btrfs_set_header_bytenr(leaf, leaf->start); btrfs_set_header_generation(leaf, trans->transid); btrfs_set_header_owner(leaf, root->root_key.objectid); write_extent_buffer(leaf, root->fs_info->fsid, @@ -1858,7 +1855,8 @@ static int create_subvol(struct btrfs_root *root, char *name, int namelen) inode_item->nblocks = cpu_to_le64(1); inode_item->mode = cpu_to_le32(S_IFDIR | 0755); - btrfs_set_root_blocknr(&root_item, extent_buffer_blocknr(leaf)); + btrfs_set_root_bytenr(&root_item, leaf->start); + btrfs_set_root_level(&root_item, 0); btrfs_set_root_refs(&root_item, 1); btrfs_set_root_used(&root_item, 0); @@ -1971,8 +1969,8 @@ static int create_snapshot(struct btrfs_root *root, char *name, int namelen) btrfs_set_key_type(&key, BTRFS_ROOT_ITEM_KEY); btrfs_cow_block(trans, root, root->node, NULL, 0, &tmp); - btrfs_set_root_blocknr(&new_root_item, - extent_buffer_blocknr(root->node)); + btrfs_set_root_bytenr(&new_root_item, root->node->start); + btrfs_set_root_level(&new_root_item, btrfs_header_level(root->node)); ret = btrfs_insert_root(trans, root->fs_info->tree_root, &key, &new_root_item); diff --git a/fs/btrfs/print-tree.c b/fs/btrfs/print-tree.c index a825ce078a54..9f8696c8a8e8 100644 --- a/fs/btrfs/print-tree.c +++ b/fs/btrfs/print-tree.c @@ -36,7 +36,7 @@ void btrfs_print_leaf(struct btrfs_root *root, struct extent_buffer *l) u32 type; printk("leaf %llu total ptrs %d free space %d\n", - (unsigned long long)btrfs_header_blocknr(l), nr, + (unsigned long long)btrfs_header_bytenr(l), nr, btrfs_leaf_free_space(root, l)); for (i = 0 ; i < nr ; i++) { item = btrfs_item_nr(l, i); @@ -65,8 +65,8 @@ void btrfs_print_leaf(struct btrfs_root *root, struct extent_buffer *l) break; case BTRFS_ROOT_ITEM_KEY: ri = btrfs_item_ptr(l, i, struct btrfs_root_item); - printk("\t\troot data blocknr %llu refs %u\n", - (unsigned long long)btrfs_disk_root_blocknr(l, ri), + printk("\t\troot data bytenr %llu refs %u\n", + (unsigned long long)btrfs_disk_root_bytenr(l, ri), btrfs_disk_root_refs(l, ri)); break; case BTRFS_EXTENT_ITEM_KEY: @@ -84,12 +84,12 @@ void btrfs_print_leaf(struct btrfs_root *root, struct extent_buffer *l) btrfs_file_extent_inline_len(l, item)); break; } - printk("\t\textent data disk block %llu nr %llu\n", - (unsigned long long)btrfs_file_extent_disk_blocknr(l, fi), - (unsigned long long)btrfs_file_extent_disk_num_blocks(l, fi)); + printk("\t\textent data disk bytenr %llu nr %llu\n", + (unsigned long long)btrfs_file_extent_disk_bytenr(l, fi), + (unsigned long long)btrfs_file_extent_disk_num_bytes(l, fi)); printk("\t\textent data offset %llu nr %llu\n", (unsigned long long)btrfs_file_extent_offset(l, fi), - (unsigned long long)btrfs_file_extent_num_blocks(l, fi)); + (unsigned long long)btrfs_file_extent_num_bytes(l, fi)); break; case BTRFS_BLOCK_GROUP_ITEM_KEY: bi = btrfs_item_ptr(l, i, @@ -106,16 +106,18 @@ void btrfs_print_tree(struct btrfs_root *root, struct extent_buffer *c) int i; u32 nr; struct btrfs_key key; + int level; if (!c) return; nr = btrfs_header_nritems(c); - if (btrfs_is_leaf(c)) { + level = btrfs_header_level(c); + if (level == 0) { btrfs_print_leaf(root, c); return; } printk("node %llu level %d total ptrs %d free spc %u\n", - (unsigned long long)btrfs_header_blocknr(c), + (unsigned long long)btrfs_header_bytenr(c), btrfs_header_level(c), nr, (u32)BTRFS_NODEPTRS_PER_BLOCK(root) - nr); for (i = 0; i < nr; i++) { @@ -129,7 +131,8 @@ void btrfs_print_tree(struct btrfs_root *root, struct extent_buffer *c) } for (i = 0; i < nr; i++) { struct extent_buffer *next = read_tree_block(root, - btrfs_node_blockptr(c, i)); + btrfs_node_blockptr(c, i), + btrfs_level_size(root, level - 1)); if (btrfs_is_leaf(next) && btrfs_header_level(c) != 1) BUG(); diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index 39a1435c68f1..5c4370f3a5b8 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c @@ -303,10 +303,12 @@ static int btrfs_statfs(struct dentry *dentry, struct kstatfs *buf) { struct btrfs_root *root = btrfs_sb(dentry->d_sb); struct btrfs_super_block *disk_super = &root->fs_info->super_copy; + int bits = dentry->d_sb->s_blocksize_bits; buf->f_namelen = BTRFS_NAME_LEN; - buf->f_blocks = btrfs_super_total_blocks(disk_super); - buf->f_bfree = buf->f_blocks - btrfs_super_blocks_used(disk_super); + buf->f_blocks = btrfs_super_total_bytes(disk_super) >> bits; + buf->f_bfree = buf->f_blocks - + (btrfs_super_bytes_used(disk_super) >> bits); buf->f_bavail = buf->f_bfree; buf->f_bsize = dentry->d_sb->s_blocksize; buf->f_type = BTRFS_SUPER_MAGIC; diff --git a/fs/btrfs/sysfs.c b/fs/btrfs/sysfs.c index 9654e90eec89..0bd1fd3d29df 100644 --- a/fs/btrfs/sysfs.c +++ b/fs/btrfs/sysfs.c @@ -42,14 +42,15 @@ static ssize_t root_block_limit_show(struct btrfs_root *root, char *buf) static ssize_t super_blocks_used_show(struct btrfs_fs_info *fs, char *buf) { + return snprintf(buf, PAGE_SIZE, "%llu\n", - (unsigned long long)btrfs_super_blocks_used(&fs->super_copy)); + (unsigned long long)btrfs_super_bytes_used(&fs->super_copy)); } static ssize_t super_total_blocks_show(struct btrfs_fs_info *fs, char *buf) { return snprintf(buf, PAGE_SIZE, "%llu\n", - (unsigned long long)btrfs_super_total_blocks(&fs->super_copy)); + (unsigned long long)btrfs_super_total_bytes(&fs->super_copy)); } static ssize_t super_blocksize_show(struct btrfs_fs_info *fs, char *buf) diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c index 55289b71056e..60f61345a8d0 100644 --- a/fs/btrfs/transaction.c +++ b/fs/btrfs/transaction.c @@ -205,12 +205,13 @@ int btrfs_commit_tree_roots(struct btrfs_trans_handle *trans, btrfs_write_dirty_block_groups(trans, extent_root); while(1) { - old_extent_block = btrfs_root_blocknr(&extent_root->root_item); - if (old_extent_block == - extent_buffer_blocknr(extent_root->node)) + old_extent_block = btrfs_root_bytenr(&extent_root->root_item); + if (old_extent_block == extent_root->node->start) break; - btrfs_set_root_blocknr(&extent_root->root_item, - extent_buffer_blocknr(extent_root->node)); + btrfs_set_root_bytenr(&extent_root->root_item, + extent_root->node->start); + btrfs_set_root_level(&extent_root->root_item, + btrfs_header_level(extent_root->node)); ret = btrfs_update_root(trans, tree_root, &extent_root->root_key, &extent_root->root_item); @@ -284,8 +285,8 @@ static int add_dirty_roots(struct btrfs_trans_handle *trans, (unsigned long)root->root_key.objectid, BTRFS_ROOT_TRANS_TAG); if (root->commit_root == root->node) { - WARN_ON(extent_buffer_blocknr(root->node) != - btrfs_root_blocknr(&root->root_item)); + WARN_ON(root->node->start != + btrfs_root_bytenr(&root->root_item)); free_extent_buffer(root->commit_root); root->commit_root = NULL; @@ -314,8 +315,10 @@ static int add_dirty_roots(struct btrfs_trans_handle *trans, root->commit_root = NULL; root->root_key.offset = root->fs_info->generation; - btrfs_set_root_blocknr(&root->root_item, - extent_buffer_blocknr(root->node)); + btrfs_set_root_bytenr(&root->root_item, + root->node->start); + btrfs_set_root_level(&root->root_item, + btrfs_header_level(root->node)); err = btrfs_insert_root(trans, root->fs_info->tree_root, &root->root_key, &root->root_item); @@ -407,8 +410,8 @@ static int drop_dirty_roots(struct btrfs_root *tree_root, struct dirty_root *dirty; struct btrfs_trans_handle *trans; unsigned long nr; - u64 num_blocks; - u64 blocks_used; + u64 num_bytes; + u64 bytes_used; int ret = 0; int err; @@ -419,7 +422,7 @@ static int drop_dirty_roots(struct btrfs_root *tree_root, dirty = list_entry(list->next, struct dirty_root, list); list_del_init(&dirty->list); - num_blocks = btrfs_root_used(&dirty->root->root_item); + num_bytes = btrfs_root_used(&dirty->root->root_item); root = dirty->latest_root; while(1) { @@ -446,12 +449,12 @@ static int drop_dirty_roots(struct btrfs_root *tree_root, } BUG_ON(ret); - num_blocks -= btrfs_root_used(&dirty->root->root_item); - blocks_used = btrfs_root_used(&root->root_item); - if (num_blocks) { + num_bytes -= btrfs_root_used(&dirty->root->root_item); + bytes_used = btrfs_root_used(&root->root_item); + if (num_bytes) { record_root_in_trans(root); btrfs_set_root_used(&root->root_item, - blocks_used - num_blocks); + bytes_used - num_bytes); } ret = btrfs_del_root(trans, tree_root, &dirty->root->root_key); if (ret) { @@ -560,7 +563,9 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans, btrfs_set_super_generation(&root->fs_info->super_copy, cur_trans->transid); btrfs_set_super_root(&root->fs_info->super_copy, - extent_buffer_blocknr(root->fs_info->tree_root->node)); + root->fs_info->tree_root->node->start); + btrfs_set_super_root_level(&root->fs_info->super_copy, + btrfs_header_level(root->fs_info->tree_root->node)); write_extent_buffer(root->fs_info->sb_buffer, &root->fs_info->super_copy, 0, diff --git a/fs/btrfs/tree-defrag.c b/fs/btrfs/tree-defrag.c index daf019afa0a1..3feac2f28628 100644 --- a/fs/btrfs/tree-defrag.c +++ b/fs/btrfs/tree-defrag.c @@ -27,13 +27,15 @@ static void reada_defrag(struct btrfs_root *root, { int i; u32 nritems; - u64 blocknr; + u64 bytenr; + u32 blocksize; int ret; + blocksize = btrfs_level_size(root, btrfs_header_level(node) - 1); nritems = btrfs_header_nritems(node); for (i = 0; i < nritems; i++) { - blocknr = btrfs_node_blockptr(node, i); - ret = readahead_tree_block(root, blocknr); + bytenr = btrfs_node_blockptr(node, i); + ret = readahead_tree_block(root, bytenr, blocksize); if (ret) break; } @@ -46,7 +48,7 @@ static int defrag_walk_down(struct btrfs_trans_handle *trans, { struct extent_buffer *next; struct extent_buffer *cur; - u64 blocknr; + u64 bytenr; int ret = 0; int is_extent = 0; @@ -80,10 +82,11 @@ static int defrag_walk_down(struct btrfs_trans_handle *trans, break; } - blocknr = btrfs_node_blockptr(cur, path->slots[*level]); + bytenr = btrfs_node_blockptr(cur, path->slots[*level]); if (cache_only) { - next = btrfs_find_tree_block(root, blocknr); + next = btrfs_find_tree_block(root, bytenr, + btrfs_level_size(root, *level - 1)); /* FIXME, test for defrag */ if (!next || !btrfs_buffer_uptodate(next)) { free_extent_buffer(next); @@ -91,7 +94,8 @@ static int defrag_walk_down(struct btrfs_trans_handle *trans, continue; } } else { - next = read_tree_block(root, blocknr); + next = read_tree_block(root, bytenr, + btrfs_level_size(root, *level - 1)); } ret = btrfs_cow_block(trans, root, next, path->nodes[*level], path->slots[*level], &next); -- cgit v1.2.3-70-g09d2 From 14048ed0c415b8729b194e92c16d31c61628d216 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Mon, 15 Oct 2007 16:16:28 -0400 Subject: Btrfs: Cache extent buffer mappings Signed-off-by: Chris Mason --- fs/btrfs/ctree.h | 40 ++++++++++++++++++++++++++++++++++++---- fs/btrfs/extent_map.c | 15 +++++++++++++-- 2 files changed, 49 insertions(+), 6 deletions(-) (limited to 'fs/btrfs/extent_map.c') diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 4d05456ec32f..54c754dd9a14 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -436,10 +436,18 @@ static inline u##bits btrfs_##name(struct extent_buffer *eb, \ unsigned long map_len; \ unsigned long offset = (unsigned long)s + \ offsetof(type, member); \ - err = map_extent_buffer(eb, offset, \ + if (eb->map_token && offset >= eb->map_start && \ + offset + sizeof(((type *)0)->member) <= eb->map_start + \ + eb->map_len) { \ + kaddr = eb->kaddr; \ + map_start = eb->map_start; \ + err = 0; \ + } else { \ + err = map_extent_buffer(eb, offset, \ sizeof(((type *)0)->member), \ &map_token, &kaddr, \ &map_start, &map_len, KM_USER1); \ + } \ if (!err) { \ __le##bits *tmp = (__le##bits *)(kaddr + offset - \ map_start); \ @@ -464,10 +472,18 @@ static inline void btrfs_set_##name(struct extent_buffer *eb, \ int unmap_on_exit = (eb->map_token == NULL); \ unsigned long offset = (unsigned long)s + \ offsetof(type, member); \ - err = map_extent_buffer(eb, offset, \ + if (eb->map_token && offset >= eb->map_start && \ + offset + sizeof(((type *)0)->member) <= eb->map_start + \ + eb->map_len) { \ + kaddr = eb->kaddr; \ + map_start = eb->map_start; \ + err = 0; \ + } else { \ + err = map_extent_buffer(eb, offset, \ sizeof(((type *)0)->member), \ &map_token, &kaddr, \ &map_start, &map_len, KM_USER1); \ + } \ if (!err) { \ __le##bits *tmp = (__le##bits *)(kaddr + offset - \ map_start); \ @@ -490,10 +506,18 @@ static inline u##bits btrfs_##name(struct extent_buffer *eb) \ unsigned long map_len; \ unsigned long offset = offsetof(type, member); \ int unmap_on_exit = (eb->map_token == NULL); \ - err = map_extent_buffer(eb, offset, \ + if (eb->map_token && offset >= eb->map_start && \ + offset + sizeof(((type *)0)->member) <= eb->map_start + \ + eb->map_len) { \ + kaddr = eb->kaddr; \ + map_start = eb->map_start; \ + err = 0; \ + } else { \ + err = map_extent_buffer(eb, offset, \ sizeof(((type *)0)->member), \ &map_token, &kaddr, \ &map_start, &map_len, KM_USER1); \ + } \ if (!err) { \ __le##bits *tmp = (__le##bits *)(kaddr + offset - \ map_start); \ @@ -517,10 +541,18 @@ static inline void btrfs_set_##name(struct extent_buffer *eb, \ unsigned long map_len; \ unsigned long offset = offsetof(type, member); \ int unmap_on_exit = (eb->map_token == NULL); \ - err = map_extent_buffer(eb, offset, \ + if (eb->map_token && offset >= eb->map_start && \ + offset + sizeof(((type *)0)->member) <= eb->map_start + \ + eb->map_len) { \ + kaddr = eb->kaddr; \ + map_start = eb->map_start; \ + err = 0; \ + } else { \ + err = map_extent_buffer(eb, offset, \ sizeof(((type *)0)->member), \ &map_token, &kaddr, \ &map_start, &map_len, KM_USER1); \ + } \ if (!err) { \ __le##bits *tmp = (__le##bits *)(kaddr + offset - \ map_start); \ diff --git a/fs/btrfs/extent_map.c b/fs/btrfs/extent_map.c index f658703c42e6..ea6ee68ef53c 100644 --- a/fs/btrfs/extent_map.c +++ b/fs/btrfs/extent_map.c @@ -2037,6 +2037,7 @@ struct extent_buffer *find_extent_buffer(struct extent_map_tree *tree, struct extent_buffer *eb; struct page *p; struct address_space *mapping = tree->mapping; + int uptodate = 1; eb = __alloc_extent_buffer(mask); if (!eb || IS_ERR(eb)) @@ -2048,7 +2049,7 @@ struct extent_buffer *find_extent_buffer(struct extent_map_tree *tree, atomic_set(&eb->refs, 1); for (i = 0; i < num_pages; i++, index++) { - p = find_get_page(mapping, index); + p = find_lock_page(mapping, index); if (!p) { /* make sure the free only frees the pages we've * grabbed a reference on @@ -2060,7 +2061,12 @@ struct extent_buffer *find_extent_buffer(struct extent_map_tree *tree, set_page_extent_mapped(p); if (i == 0) eb->first_page = p; + if (!PageUptodate(p)) + uptodate = 0; + unlock_page(p); } + if (uptodate) + eb->flags |= EXTENT_UPTODATE; return eb; fail: free_extent_buffer(eb); @@ -2192,7 +2198,7 @@ int read_extent_buffer_pages(struct extent_map_tree *tree, if (eb->flags & EXTENT_UPTODATE) return 0; - if (test_range_bit(tree, eb->start, eb->start + eb->len - 1, + if (0 && test_range_bit(tree, eb->start, eb->start + eb->len - 1, EXTENT_UPTODATE, 1)) { return 0; } @@ -2247,6 +2253,7 @@ void read_extent_buffer(struct extent_buffer *eb, void *dstv, char *dst = (char *)dstv; size_t start_offset = eb->start & ((u64)PAGE_CACHE_SIZE - 1); unsigned long i = (start_offset + start) >> PAGE_CACHE_SHIFT; + unsigned long num_pages = num_extent_pages(eb->start, eb->len); WARN_ON(start > eb->len); WARN_ON(start + len > eb->start + eb->len); @@ -2257,6 +2264,10 @@ void read_extent_buffer(struct extent_buffer *eb, void *dstv, while(len > 0) { page = extent_buffer_page(eb, i); + if (!PageUptodate(page)) { + printk("page %lu not up to date i %lu, total %lu, len %lu\n", page->index, i, num_pages, eb->len); + WARN_ON(1); + } WARN_ON(!PageUptodate(page)); cur = min(len, (PAGE_CACHE_SIZE - offset)); -- cgit v1.2.3-70-g09d2 From 09e71a326341f40111400c88aaf0498ef622824b Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Mon, 15 Oct 2007 16:17:04 -0400 Subject: Btrfs: Use an array of pages in the extent buffers to reduce the cost of find_get_page Signed-off-by: Chris Mason --- fs/btrfs/disk-io.c | 6 +++--- fs/btrfs/extent_map.c | 22 ++++++++-------------- fs/btrfs/extent_map.h | 4 ++-- 3 files changed, 13 insertions(+), 19 deletions(-) (limited to 'fs/btrfs/extent_map.c') diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index fad9298c6962..0636f79672e9 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -647,20 +647,20 @@ int close_ctree(struct btrfs_root *root) int btrfs_buffer_uptodate(struct extent_buffer *buf) { - struct inode *btree_inode = buf->first_page->mapping->host; + struct inode *btree_inode = buf->pages[0]->mapping->host; return extent_buffer_uptodate(&BTRFS_I(btree_inode)->extent_tree, buf); } int btrfs_set_buffer_uptodate(struct extent_buffer *buf) { - struct inode *btree_inode = buf->first_page->mapping->host; + struct inode *btree_inode = buf->pages[0]->mapping->host; return set_extent_buffer_uptodate(&BTRFS_I(btree_inode)->extent_tree, buf); } void btrfs_mark_buffer_dirty(struct extent_buffer *buf) { - struct btrfs_root *root = BTRFS_I(buf->first_page->mapping->host)->root; + struct btrfs_root *root = BTRFS_I(buf->pages[0]->mapping->host)->root; u64 transid = btrfs_header_generation(buf); struct inode *btree_inode = root->fs_info->btree_inode; diff --git a/fs/btrfs/extent_map.c b/fs/btrfs/extent_map.c index ea6ee68ef53c..7ef3397a266d 100644 --- a/fs/btrfs/extent_map.c +++ b/fs/btrfs/extent_map.c @@ -1961,16 +1961,12 @@ static void __free_extent_buffer(struct extent_buffer *eb) static inline struct page *extent_buffer_page(struct extent_buffer *eb, int i) { struct page *p; - if (i == 0) - return eb->first_page; + if (i < EXTENT_INLINE_PAGES) + return eb->pages[i]; i += eb->start >> PAGE_CACHE_SHIFT; - if (eb->last_page && eb->last_page->index == i) - return eb->last_page; - - p = find_get_page(eb->first_page->mapping, i); + p = find_get_page(eb->pages[0]->mapping, i); page_cache_release(p); - eb->last_page = p; return p; } @@ -2012,8 +2008,8 @@ struct extent_buffer *alloc_extent_buffer(struct extent_map_tree *tree, goto fail; } set_page_extent_mapped(p); - if (i == 0) - eb->first_page = p; + if (i < EXTENT_INLINE_PAGES) + eb->pages[i] = p; if (!PageUptodate(p)) uptodate = 0; unlock_page(p); @@ -2059,8 +2055,8 @@ struct extent_buffer *find_extent_buffer(struct extent_map_tree *tree, goto fail; } set_page_extent_mapped(p); - if (i == 0) - eb->first_page = p; + if (i < EXTENT_INLINE_PAGES) + eb->pages[i] = p; if (!PageUptodate(p)) uptodate = 0; unlock_page(p); @@ -2087,9 +2083,7 @@ void free_extent_buffer(struct extent_buffer *eb) num_pages = num_extent_pages(eb->start, eb->len); - if (eb->first_page) - page_cache_release(eb->first_page); - for (i = 1; i < num_pages; i++) { + for (i = 0; i < num_pages; i++) { page_cache_release(extent_buffer_page(eb, i)); } __free_extent_buffer(eb); diff --git a/fs/btrfs/extent_map.h b/fs/btrfs/extent_map.h index 4ef8bdd68348..d74a2b3e3b5d 100644 --- a/fs/btrfs/extent_map.h +++ b/fs/btrfs/extent_map.h @@ -62,6 +62,7 @@ struct extent_state { struct list_head list; }; +#define EXTENT_INLINE_PAGES 32 struct extent_buffer { u64 start; unsigned long len; @@ -69,13 +70,12 @@ struct extent_buffer { int flags; struct list_head list; struct list_head leak_list; - struct page *first_page; - struct page *last_page; unsigned long alloc_addr; char *map_token; char *kaddr; unsigned long map_start; unsigned long map_len; + struct page *pages[EXTENT_INLINE_PAGES]; }; typedef struct extent_map *(get_extent_t)(struct inode *inode, -- cgit v1.2.3-70-g09d2 From e19caa5f0e34b571ed0c2617554af5c43cb124d1 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Mon, 15 Oct 2007 16:17:44 -0400 Subject: Btrfs: Fix allocation routines to avoid intermixing data and metadata allocations Signed-off-by: Chris Mason --- fs/btrfs/extent-tree.c | 38 ++++++++++++++++++++++---------------- fs/btrfs/extent_map.c | 6 +++--- 2 files changed, 25 insertions(+), 19 deletions(-) (limited to 'fs/btrfs/extent_map.c') diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 0b0c947194ae..72e6b274a450 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -165,7 +165,7 @@ static u64 find_search_start(struct btrfs_root *root, { int ret; struct btrfs_block_group_cache *cache = *cache_ret; - u64 last = max(search_start, cache->key.objectid); + u64 last; u64 start = 0; u64 end = 0; @@ -173,11 +173,14 @@ again: ret = cache_block_group(root, cache); if (ret) goto out; + last = max(search_start, cache->key.objectid); + while(1) { ret = find_first_extent_bit(&root->fs_info->free_space_cache, last, &start, &end, EXTENT_DIRTY); - if (ret) - goto out; + if (ret) { + goto new_group; + } start = max(last, start); last = end + 1; @@ -191,15 +194,13 @@ out: return search_start; new_group: - cache = btrfs_lookup_block_group(root->fs_info, - last + cache->key.offset - 1); + last = cache->key.objectid + cache->key.offset; + cache = btrfs_lookup_block_group(root->fs_info, last); if (!cache) { return search_start; } - cache = btrfs_find_block_group(root, cache, - last + cache->key.offset - 1, data, 0); + cache = btrfs_find_block_group(root, cache, last, data, 0); *cache_ret = cache; - last = min(cache->key.objectid, last); goto again; } @@ -257,12 +258,7 @@ struct btrfs_block_group_cache *btrfs_find_block_group(struct btrfs_root *root, if (used < div_factor(hint->key.offset, factor)) { return hint; } - last = hint->key.offset * 3; - if (hint->key.objectid >= last) - last = max(search_start + hint->key.offset - 1, - hint->key.objectid - last); - else - last = hint->key.objectid + hint->key.offset; + last = hint->key.objectid + hint->key.offset; hint_last = last; } else { if (hint) @@ -913,7 +909,6 @@ static int find_free_extent(struct btrfs_trans_handle *trans, struct btrfs_root check_failed: search_start = find_search_start(root, &block_group, search_start, total_needed, data); - btrfs_init_path(path); ins->objectid = search_start; ins->offset = 0; @@ -958,6 +953,9 @@ check_failed: continue; if (ret < 0) goto error; + + search_start = max(search_start, + block_group->key.objectid); if (!start_found) { ins->objectid = search_start; ins->offset = search_end - search_start; @@ -967,6 +965,7 @@ check_failed: ins->objectid = last_byte > search_start ? last_byte : search_start; ins->offset = search_end - ins->objectid; + BUG_ON(ins->objectid >= search_end); goto check_pending; } btrfs_item_key_to_cpu(l, &key, slot); @@ -998,7 +997,7 @@ check_failed: block_group->key.offset) { btrfs_release_path(root, path); search_start = block_group->key.objectid + - block_group->key.offset * 2; + block_group->key.offset; goto new_group; } next: @@ -1015,6 +1014,12 @@ check_pending: if (ins->objectid + num_bytes >= search_end) goto enospc; + if (!full_scan && ins->objectid + num_bytes >= block_group-> + key.objectid + block_group->key.offset) { + search_start = block_group->key.objectid + + block_group->key.offset; + goto new_group; + } if (test_range_bit(&info->extent_ins, ins->objectid, ins->objectid + num_bytes -1, EXTENT_LOCKED, 0)) { search_start = ins->objectid + num_bytes; @@ -1114,6 +1119,7 @@ int btrfs_alloc_extent(struct btrfs_trans_handle *trans, set_extent_bits(&root->fs_info->extent_ins, ins->objectid, ins->objectid + ins->offset - 1, EXTENT_LOCKED, GFP_NOFS); + WARN_ON(data == 1); goto update_block; } diff --git a/fs/btrfs/extent_map.c b/fs/btrfs/extent_map.c index 7ef3397a266d..e241699024da 100644 --- a/fs/btrfs/extent_map.c +++ b/fs/btrfs/extent_map.c @@ -981,7 +981,7 @@ int find_first_extent_bit(struct extent_map_tree *tree, u64 start, struct extent_state *state; int ret = 1; - write_lock_irq(&tree->lock); + read_lock_irq(&tree->lock); /* * this search will find all the extents that end after * our range starts. @@ -993,7 +993,7 @@ int find_first_extent_bit(struct extent_map_tree *tree, u64 start, while(1) { state = rb_entry(node, struct extent_state, rb_node); - if (state->state & bits) { + if (state->end >= start && (state->state & bits)) { *start_ret = state->start; *end_ret = state->end; ret = 0; @@ -1004,7 +1004,7 @@ int find_first_extent_bit(struct extent_map_tree *tree, u64 start, break; } out: - write_unlock_irq(&tree->lock); + read_unlock_irq(&tree->lock); return ret; } EXPORT_SYMBOL(find_first_extent_bit); -- cgit v1.2.3-70-g09d2 From 4dc119046d0d8501afa4346472917fb05586ad9c Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Mon, 15 Oct 2007 16:18:14 -0400 Subject: Btrfs: Add an extent buffer LRU to reduce radix tree hits Signed-off-by: Chris Mason --- fs/btrfs/ctree.c | 3 - fs/btrfs/disk-io.c | 25 +++---- fs/btrfs/extent-tree.c | 1 - fs/btrfs/extent_map.c | 183 ++++++++++++++++++++++++++++--------------------- fs/btrfs/extent_map.h | 16 +++-- fs/btrfs/transaction.c | 5 +- 6 files changed, 128 insertions(+), 105 deletions(-) (limited to 'fs/btrfs/extent_map.c') diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c index 08ddf1873ab1..bef61ee883e1 100644 --- a/fs/btrfs/ctree.c +++ b/fs/btrfs/ctree.c @@ -88,8 +88,6 @@ static int __btrfs_cow_block(struct btrfs_trans_handle *trans, if (IS_ERR(cow)) return PTR_ERR(cow); - cow->alloc_addr = (unsigned long)__builtin_return_address(0); - copy_extent_buffer(cow, buf, 0, 0, cow->len); btrfs_set_header_bytenr(cow, cow->start); btrfs_set_header_generation(cow, trans->transid); @@ -151,7 +149,6 @@ int btrfs_cow_block(struct btrfs_trans_handle *trans, search_start = buf->start & ~((u64)BTRFS_BLOCK_GROUP_SIZE - 1); ret = __btrfs_cow_block(trans, root, buf, parent, parent_slot, cow_ret, search_start, 0); - (*cow_ret)->alloc_addr = (unsigned long)__builtin_return_address(0); return ret; } diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 8e606e6658aa..fd7e6c182b9d 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -50,8 +50,6 @@ struct extent_buffer *btrfs_find_tree_block(struct btrfs_root *root, struct extent_buffer *eb; eb = find_extent_buffer(&BTRFS_I(btree_inode)->extent_tree, bytenr, blocksize, GFP_NOFS); - if (eb) - eb->alloc_addr = (unsigned long)__builtin_return_address(0); return eb; } @@ -63,7 +61,6 @@ struct extent_buffer *btrfs_find_create_tree_block(struct btrfs_root *root, eb = alloc_extent_buffer(&BTRFS_I(btree_inode)->extent_tree, bytenr, blocksize, GFP_NOFS); - eb->alloc_addr = (unsigned long)__builtin_return_address(0); return eb; } @@ -234,7 +231,6 @@ struct extent_buffer *read_tree_block(struct btrfs_root *root, u64 bytenr, return NULL; read_extent_buffer_pages(&BTRFS_I(btree_inode)->extent_tree, buf, 1); - buf->alloc_addr = (unsigned long)__builtin_return_address(0); return buf; } @@ -638,6 +634,7 @@ int close_ctree(struct btrfs_root *root) btrfs_free_block_groups(root->fs_info); del_fs_roots(fs_info); + extent_map_tree_cleanup(&BTRFS_I(fs_info->btree_inode)->extent_tree); truncate_inode_pages(fs_info->btree_inode->i_mapping, 0); iput(fs_info->btree_inode); kfree(fs_info->extent_root); @@ -647,20 +644,20 @@ int close_ctree(struct btrfs_root *root) int btrfs_buffer_uptodate(struct extent_buffer *buf) { - struct inode *btree_inode = buf->pages[0]->mapping->host; + struct inode *btree_inode = buf->last_page->mapping->host; return extent_buffer_uptodate(&BTRFS_I(btree_inode)->extent_tree, buf); } int btrfs_set_buffer_uptodate(struct extent_buffer *buf) { - struct inode *btree_inode = buf->pages[0]->mapping->host; + struct inode *btree_inode = buf->last_page->mapping->host; return set_extent_buffer_uptodate(&BTRFS_I(btree_inode)->extent_tree, buf); } void btrfs_mark_buffer_dirty(struct extent_buffer *buf) { - struct btrfs_root *root = BTRFS_I(buf->pages[0]->mapping->host)->root; + struct btrfs_root *root = BTRFS_I(buf->last_page->mapping->host)->root; u64 transid = btrfs_header_generation(buf); struct inode *btree_inode = root->fs_info->btree_inode; @@ -681,7 +678,7 @@ void btrfs_btree_balance_dirty(struct btrfs_root *root, unsigned long nr) void btrfs_set_buffer_defrag(struct extent_buffer *buf) { - struct btrfs_root *root = BTRFS_I(buf->pages[0]->mapping->host)->root; + struct btrfs_root *root = BTRFS_I(buf->last_page->mapping->host)->root; struct inode *btree_inode = root->fs_info->btree_inode; set_extent_bits(&BTRFS_I(btree_inode)->extent_tree, buf->start, buf->start + buf->len - 1, EXTENT_DEFRAG, GFP_NOFS); @@ -689,7 +686,7 @@ void btrfs_set_buffer_defrag(struct extent_buffer *buf) void btrfs_set_buffer_defrag_done(struct extent_buffer *buf) { - struct btrfs_root *root = BTRFS_I(buf->pages[0]->mapping->host)->root; + struct btrfs_root *root = BTRFS_I(buf->last_page->mapping->host)->root; struct inode *btree_inode = root->fs_info->btree_inode; set_extent_bits(&BTRFS_I(btree_inode)->extent_tree, buf->start, buf->start + buf->len - 1, EXTENT_DEFRAG_DONE, @@ -698,7 +695,7 @@ void btrfs_set_buffer_defrag_done(struct extent_buffer *buf) int btrfs_buffer_defrag(struct extent_buffer *buf) { - struct btrfs_root *root = BTRFS_I(buf->pages[0]->mapping->host)->root; + struct btrfs_root *root = BTRFS_I(buf->last_page->mapping->host)->root; struct inode *btree_inode = root->fs_info->btree_inode; return test_range_bit(&BTRFS_I(btree_inode)->extent_tree, buf->start, buf->start + buf->len - 1, EXTENT_DEFRAG, 0); @@ -706,7 +703,7 @@ int btrfs_buffer_defrag(struct extent_buffer *buf) int btrfs_buffer_defrag_done(struct extent_buffer *buf) { - struct btrfs_root *root = BTRFS_I(buf->pages[0]->mapping->host)->root; + struct btrfs_root *root = BTRFS_I(buf->last_page->mapping->host)->root; struct inode *btree_inode = root->fs_info->btree_inode; return test_range_bit(&BTRFS_I(btree_inode)->extent_tree, buf->start, buf->start + buf->len - 1, @@ -715,7 +712,7 @@ int btrfs_buffer_defrag_done(struct extent_buffer *buf) int btrfs_clear_buffer_defrag_done(struct extent_buffer *buf) { - struct btrfs_root *root = BTRFS_I(buf->pages[0]->mapping->host)->root; + struct btrfs_root *root = BTRFS_I(buf->last_page->mapping->host)->root; struct inode *btree_inode = root->fs_info->btree_inode; return clear_extent_bits(&BTRFS_I(btree_inode)->extent_tree, buf->start, buf->start + buf->len - 1, @@ -724,7 +721,7 @@ int btrfs_clear_buffer_defrag_done(struct extent_buffer *buf) int btrfs_clear_buffer_defrag(struct extent_buffer *buf) { - struct btrfs_root *root = BTRFS_I(buf->pages[0]->mapping->host)->root; + struct btrfs_root *root = BTRFS_I(buf->last_page->mapping->host)->root; struct inode *btree_inode = root->fs_info->btree_inode; return clear_extent_bits(&BTRFS_I(btree_inode)->extent_tree, buf->start, buf->start + buf->len - 1, @@ -733,7 +730,7 @@ int btrfs_clear_buffer_defrag(struct extent_buffer *buf) int btrfs_read_buffer(struct extent_buffer *buf) { - struct btrfs_root *root = BTRFS_I(buf->pages[0]->mapping->host)->root; + struct btrfs_root *root = BTRFS_I(buf->last_page->mapping->host)->root; struct inode *btree_inode = root->fs_info->btree_inode; return read_extent_buffer_pages(&BTRFS_I(btree_inode)->extent_tree, buf, 1); diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 72e6b274a450..525fa845d613 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -1176,7 +1176,6 @@ struct extent_buffer *btrfs_alloc_free_block(struct btrfs_trans_handle *trans, return ERR_PTR(-ENOMEM); } btrfs_set_buffer_uptodate(buf); - buf->alloc_addr = (unsigned long)__builtin_return_address(0); set_extent_dirty(&trans->transaction->dirty_pages, buf->start, buf->start + buf->len - 1, GFP_NOFS); btrfs_set_buffer_defrag(buf); diff --git a/fs/btrfs/extent_map.c b/fs/btrfs/extent_map.c index e241699024da..85b28a6a4e05 100644 --- a/fs/btrfs/extent_map.c +++ b/fs/btrfs/extent_map.c @@ -8,6 +8,7 @@ #include #include #include +#include #include "extent_map.h" /* temporary define until extent_map moves out of btrfs */ @@ -20,14 +21,11 @@ static struct kmem_cache *extent_map_cache; static struct kmem_cache *extent_state_cache; static struct kmem_cache *extent_buffer_cache; -static LIST_HEAD(extent_buffers); static LIST_HEAD(buffers); static LIST_HEAD(states); -static spinlock_t extent_buffers_lock; static spinlock_t state_lock = SPIN_LOCK_UNLOCKED; -static int nr_extent_buffers; -#define MAX_EXTENT_BUFFER_CACHE 128 +#define BUFFER_LRU_MAX 64 struct tree_entry { u64 start; @@ -47,20 +45,12 @@ void __init extent_map_init(void) extent_buffer_cache = btrfs_cache_create("extent_buffers", sizeof(struct extent_buffer), 0, NULL); - spin_lock_init(&extent_buffers_lock); } void __exit extent_map_exit(void) { - struct extent_buffer *eb; struct extent_state *state; - while (!list_empty(&extent_buffers)) { - eb = list_entry(extent_buffers.next, - struct extent_buffer, list); - list_del(&eb->list); - kmem_cache_free(extent_buffer_cache, eb); - } while (!list_empty(&states)) { state = list_entry(states.next, struct extent_state, list); printk("state leak: start %Lu end %Lu state %lu in tree %d refs %d\n", state->start, state->end, state->state, state->in_tree, atomic_read(&state->refs)); @@ -68,14 +58,6 @@ void __exit extent_map_exit(void) kmem_cache_free(extent_state_cache, state); } - while (!list_empty(&buffers)) { - eb = list_entry(buffers.next, - struct extent_buffer, leak_list); - printk("buffer leak start %Lu len %lu return %lX\n", eb->start, eb->len, eb->alloc_addr); - list_del(&eb->leak_list); - kmem_cache_free(extent_buffer_cache, eb); - } - if (extent_map_cache) kmem_cache_destroy(extent_map_cache); @@ -92,10 +74,25 @@ void extent_map_tree_init(struct extent_map_tree *tree, tree->state.rb_node = NULL; tree->ops = NULL; rwlock_init(&tree->lock); + spin_lock_init(&tree->lru_lock); tree->mapping = mapping; + INIT_LIST_HEAD(&tree->buffer_lru); + tree->lru_size = 0; } EXPORT_SYMBOL(extent_map_tree_init); +void extent_map_tree_cleanup(struct extent_map_tree *tree) +{ + struct extent_buffer *eb; + while(!list_empty(&tree->buffer_lru)) { + eb = list_entry(tree->buffer_lru.next, struct extent_buffer, + lru); + list_del(&eb->lru); + free_extent_buffer(eb); + } +} +EXPORT_SYMBOL(extent_map_tree_cleanup); + struct extent_map *alloc_extent_map(gfp_t mask) { struct extent_map *em; @@ -1915,66 +1912,99 @@ sector_t extent_bmap(struct address_space *mapping, sector_t iblock, return (em->block_start + start - em->start) >> inode->i_blkbits; } -static struct extent_buffer *__alloc_extent_buffer(gfp_t mask) +static int add_lru(struct extent_map_tree *tree, struct extent_buffer *eb) { - struct extent_buffer *eb = NULL; - - spin_lock(&extent_buffers_lock); - if (!list_empty(&extent_buffers)) { - eb = list_entry(extent_buffers.next, struct extent_buffer, - list); - list_del(&eb->list); - WARN_ON(nr_extent_buffers == 0); - nr_extent_buffers--; - } - spin_unlock(&extent_buffers_lock); + if (list_empty(&eb->lru)) { + extent_buffer_get(eb); + list_add(&eb->lru, &tree->buffer_lru); + tree->lru_size++; + if (tree->lru_size >= BUFFER_LRU_MAX) { + struct extent_buffer *rm; + rm = list_entry(tree->buffer_lru.prev, + struct extent_buffer, lru); + tree->lru_size--; + list_del(&rm->lru); + free_extent_buffer(rm); + } + } else + list_move(&eb->lru, &tree->buffer_lru); + return 0; +} +static struct extent_buffer *find_lru(struct extent_map_tree *tree, + u64 start, unsigned long len) +{ + struct list_head *lru = &tree->buffer_lru; + struct list_head *cur = lru->next; + struct extent_buffer *eb; - if (eb) { - memset(eb, 0, sizeof(*eb)); - } else { - eb = kmem_cache_zalloc(extent_buffer_cache, mask); - } - spin_lock(&extent_buffers_lock); - list_add(&eb->leak_list, &buffers); - spin_unlock(&extent_buffers_lock); + if (list_empty(lru)) + return NULL; - return eb; + do { + eb = list_entry(cur, struct extent_buffer, lru); + if (eb->start == start && eb->len == len) { + extent_buffer_get(eb); + return eb; + } + cur = cur->next; + } while (cur != lru); + return NULL; } -static void __free_extent_buffer(struct extent_buffer *eb) +static inline unsigned long num_extent_pages(u64 start, u64 len) { - - spin_lock(&extent_buffers_lock); - list_del_init(&eb->leak_list); - spin_unlock(&extent_buffers_lock); - - if (nr_extent_buffers >= MAX_EXTENT_BUFFER_CACHE) { - kmem_cache_free(extent_buffer_cache, eb); - } else { - spin_lock(&extent_buffers_lock); - list_add(&eb->list, &extent_buffers); - nr_extent_buffers++; - spin_unlock(&extent_buffers_lock); - } + return ((start + len + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT) - + (start >> PAGE_CACHE_SHIFT); } -static inline struct page *extent_buffer_page(struct extent_buffer *eb, int i) +static inline struct page *extent_buffer_page(struct extent_buffer *eb, + unsigned long i) { struct page *p; - if (i < EXTENT_INLINE_PAGES) - return eb->pages[i]; + if (i == 0) + return eb->last_page; i += eb->start >> PAGE_CACHE_SHIFT; - p = find_get_page(eb->pages[0]->mapping, i); + p = find_get_page(eb->last_page->mapping, i); page_cache_release(p); return p; } -static inline unsigned long num_extent_pages(u64 start, u64 len) +static struct extent_buffer *__alloc_extent_buffer(struct extent_map_tree *tree, + u64 start, + unsigned long len, + gfp_t mask) { - return ((start + len + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT) - - (start >> PAGE_CACHE_SHIFT); + struct extent_buffer *eb = NULL; + + spin_lock(&tree->lru_lock); + eb = find_lru(tree, start, len); + if (eb) + goto lru_add; + spin_unlock(&tree->lru_lock); + + if (eb) { + memset(eb, 0, sizeof(*eb)); + } else { + eb = kmem_cache_zalloc(extent_buffer_cache, mask); + } + INIT_LIST_HEAD(&eb->lru); + eb->start = start; + eb->len = len; + atomic_set(&eb->refs, 1); + + spin_lock(&tree->lru_lock); +lru_add: + add_lru(tree, eb); + spin_unlock(&tree->lru_lock); + return eb; +} + +static void __free_extent_buffer(struct extent_buffer *eb) +{ + kmem_cache_free(extent_buffer_cache, eb); } + struct extent_buffer *alloc_extent_buffer(struct extent_map_tree *tree, u64 start, unsigned long len, gfp_t mask) @@ -1987,14 +2017,12 @@ struct extent_buffer *alloc_extent_buffer(struct extent_map_tree *tree, struct address_space *mapping = tree->mapping; int uptodate = 0; - eb = __alloc_extent_buffer(mask); + eb = __alloc_extent_buffer(tree, start, len, mask); if (!eb || IS_ERR(eb)) return NULL; - eb->alloc_addr = (unsigned long)__builtin_return_address(0); - eb->start = start; - eb->len = len; - atomic_set(&eb->refs, 1); + if (eb->flags & EXTENT_BUFFER_FILLED) + return eb; for (i = 0; i < num_pages; i++, index++) { p = find_or_create_page(mapping, index, mask | __GFP_HIGHMEM); @@ -2008,14 +2036,15 @@ struct extent_buffer *alloc_extent_buffer(struct extent_map_tree *tree, goto fail; } set_page_extent_mapped(p); - if (i < EXTENT_INLINE_PAGES) - eb->pages[i] = p; + if (i == 0) + eb->last_page = p; if (!PageUptodate(p)) uptodate = 0; unlock_page(p); } if (uptodate) eb->flags |= EXTENT_UPTODATE; + eb->flags |= EXTENT_BUFFER_FILLED; return eb; fail: free_extent_buffer(eb); @@ -2035,14 +2064,12 @@ struct extent_buffer *find_extent_buffer(struct extent_map_tree *tree, struct address_space *mapping = tree->mapping; int uptodate = 1; - eb = __alloc_extent_buffer(mask); + eb = __alloc_extent_buffer(tree, start, len, mask); if (!eb || IS_ERR(eb)) return NULL; - eb->alloc_addr = (unsigned long)__builtin_return_address(0); - eb->start = start; - eb->len = len; - atomic_set(&eb->refs, 1); + if (eb->flags & EXTENT_BUFFER_FILLED) + return eb; for (i = 0; i < num_pages; i++, index++) { p = find_lock_page(mapping, index); @@ -2055,14 +2082,15 @@ struct extent_buffer *find_extent_buffer(struct extent_map_tree *tree, goto fail; } set_page_extent_mapped(p); - if (i < EXTENT_INLINE_PAGES) - eb->pages[i] = p; + if (i == 0) + eb->last_page = p; if (!PageUptodate(p)) uptodate = 0; unlock_page(p); } if (uptodate) eb->flags |= EXTENT_UPTODATE; + eb->flags |= EXTENT_BUFFER_FILLED; return eb; fail: free_extent_buffer(eb); @@ -2231,7 +2259,8 @@ int read_extent_buffer_pages(struct extent_map_tree *tree, ret = -EIO; } } - eb->flags |= EXTENT_UPTODATE; + if (!ret) + eb->flags |= EXTENT_UPTODATE; return ret; } EXPORT_SYMBOL(read_extent_buffer_pages); diff --git a/fs/btrfs/extent_map.h b/fs/btrfs/extent_map.h index 8409b5cbeda7..52a8b9394fc6 100644 --- a/fs/btrfs/extent_map.h +++ b/fs/btrfs/extent_map.h @@ -16,6 +16,7 @@ #define EXTENT_DELALLOC (1 << 5) #define EXTENT_DEFRAG (1 << 6) #define EXTENT_DEFRAG_DONE (1 << 7) +#define EXTENT_BUFFER_FILLED (1 << 8) #define EXTENT_IOBITS (EXTENT_LOCKED | EXTENT_WRITEBACK) @@ -33,6 +34,9 @@ struct extent_map_tree { struct address_space *mapping; rwlock_t lock; struct extent_map_ops *ops; + spinlock_t lru_lock; + struct list_head buffer_lru; + int lru_size; }; /* note, this must start with the same fields as fs/extent_map.c:tree_entry */ @@ -64,20 +68,17 @@ struct extent_state { struct list_head list; }; -#define EXTENT_INLINE_PAGES 32 struct extent_buffer { u64 start; unsigned long len; - atomic_t refs; - int flags; - struct list_head list; - struct list_head leak_list; - unsigned long alloc_addr; char *map_token; char *kaddr; unsigned long map_start; unsigned long map_len; - struct page *pages[EXTENT_INLINE_PAGES]; + struct page *last_page; + struct list_head lru; + atomic_t refs; + int flags; }; typedef struct extent_map *(get_extent_t)(struct inode *inode, @@ -88,6 +89,7 @@ typedef struct extent_map *(get_extent_t)(struct inode *inode, void extent_map_tree_init(struct extent_map_tree *tree, struct address_space *mapping, gfp_t mask); +void extent_map_tree_cleanup(struct extent_map_tree *tree); struct extent_map *lookup_extent_mapping(struct extent_map_tree *tree, u64 start, u64 end); int add_extent_mapping(struct extent_map_tree *tree, diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c index 87456ab74275..67e4aca36a62 100644 --- a/fs/btrfs/transaction.c +++ b/fs/btrfs/transaction.c @@ -443,8 +443,7 @@ static int drop_dirty_roots(struct btrfs_root *tree_root, BUG_ON(ret); mutex_unlock(&tree_root->fs_info->fs_mutex); btrfs_btree_balance_dirty(tree_root, nr); - schedule(); - + cond_resched(); mutex_lock(&tree_root->fs_info->fs_mutex); } BUG_ON(ret); @@ -471,7 +470,7 @@ static int drop_dirty_roots(struct btrfs_root *tree_root, mutex_unlock(&tree_root->fs_info->fs_mutex); btrfs_btree_balance_dirty(tree_root, nr); - schedule(); + cond_resched(); } return ret; } -- cgit v1.2.3-70-g09d2 From 810191ff3087e8143b41a944fcf4fd8c693f00e3 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Mon, 15 Oct 2007 16:18:55 -0400 Subject: Btrfs: extent_map optimizations to cut down on CPU usage Signed-off-by: Chris Mason --- fs/btrfs/ctree.c | 1 + fs/btrfs/ctree.h | 74 ++++++++++----------------------------------------- fs/btrfs/disk-io.c | 20 +++++++------- fs/btrfs/extent_map.c | 33 ++++++++++------------- fs/btrfs/extent_map.h | 2 +- fs/btrfs/file.c | 1 - 6 files changed, 40 insertions(+), 91 deletions(-) (limited to 'fs/btrfs/extent_map.c') diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c index f691542dfa15..54a5d006c562 100644 --- a/fs/btrfs/ctree.c +++ b/fs/btrfs/ctree.c @@ -463,6 +463,7 @@ static int check_leaf(struct btrfs_root *root, struct btrfs_path *path, static int check_block(struct btrfs_root *root, struct btrfs_path *path, int level) { + return 0; #if 0 struct extent_buffer *buf = path->nodes[level]; diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 18994c53106c..a942a2427228 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -19,6 +19,8 @@ #ifndef __BTRFS__ #define __BTRFS__ +#include +#include #include #include #include @@ -499,70 +501,22 @@ static inline void btrfs_set_##name(struct extent_buffer *eb, \ #define BTRFS_SETGET_HEADER_FUNCS(name, type, member, bits) \ static inline u##bits btrfs_##name(struct extent_buffer *eb) \ { \ - int err; \ - char *map_token; \ - char *kaddr; \ - unsigned long map_start; \ - unsigned long map_len; \ + char *kaddr = kmap_atomic(eb->first_page, KM_USER0); \ unsigned long offset = offsetof(type, member); \ - int unmap_on_exit = (eb->map_token == NULL); \ - if (eb->map_token && offset >= eb->map_start && \ - offset + sizeof(((type *)0)->member) <= eb->map_start + \ - eb->map_len) { \ - kaddr = eb->kaddr; \ - map_start = eb->map_start; \ - err = 0; \ - } else { \ - err = map_extent_buffer(eb, offset, \ - sizeof(((type *)0)->member), \ - &map_token, &kaddr, \ - &map_start, &map_len, KM_USER1); \ - } \ - if (!err) { \ - __le##bits *tmp = (__le##bits *)(kaddr + offset - \ - map_start); \ - u##bits res = le##bits##_to_cpu(*tmp); \ - if (unmap_on_exit) \ - unmap_extent_buffer(eb, map_token, KM_USER1); \ - return res; \ - } else { \ - __le##bits res; \ - read_eb_member(eb, NULL, type, member, &res); \ - return le##bits##_to_cpu(res); \ - } \ + u##bits res; \ + __le##bits *tmp = (__le##bits *)(kaddr + offset); \ + res = le##bits##_to_cpu(*tmp); \ + kunmap_atomic(kaddr, KM_USER0); \ + return res; \ } \ static inline void btrfs_set_##name(struct extent_buffer *eb, \ u##bits val) \ { \ - int err; \ - char *map_token; \ - char *kaddr; \ - unsigned long map_start; \ - unsigned long map_len; \ + char *kaddr = kmap_atomic(eb->first_page, KM_USER0); \ unsigned long offset = offsetof(type, member); \ - int unmap_on_exit = (eb->map_token == NULL); \ - if (eb->map_token && offset >= eb->map_start && \ - offset + sizeof(((type *)0)->member) <= eb->map_start + \ - eb->map_len) { \ - kaddr = eb->kaddr; \ - map_start = eb->map_start; \ - err = 0; \ - } else { \ - err = map_extent_buffer(eb, offset, \ - sizeof(((type *)0)->member), \ - &map_token, &kaddr, \ - &map_start, &map_len, KM_USER1); \ - } \ - if (!err) { \ - __le##bits *tmp = (__le##bits *)(kaddr + offset - \ - map_start); \ - *tmp = cpu_to_le##bits(val); \ - if (unmap_on_exit) \ - unmap_extent_buffer(eb, map_token, KM_USER1); \ - } else { \ - val = cpu_to_le##bits(val); \ - write_eb_member(eb, NULL, type, member, &val); \ - } \ + __le##bits *tmp = (__le##bits *)(kaddr + offset); \ + *tmp = cpu_to_le##bits(val); \ + kunmap_atomic(kaddr, KM_USER0); \ } #define BTRFS_SETGET_STACK_FUNCS(name, type, member, bits) \ @@ -659,13 +613,13 @@ static inline void btrfs_set_node_blockptr(struct extent_buffer *eb, btrfs_set_key_blockptr(eb, (struct btrfs_key_ptr *)ptr, val); } -static unsigned long btrfs_node_key_ptr_offset(int nr) +static inline unsigned long btrfs_node_key_ptr_offset(int nr) { return offsetof(struct btrfs_node, ptrs) + sizeof(struct btrfs_key_ptr) * nr; } -static void btrfs_node_key(struct extent_buffer *eb, +static inline void btrfs_node_key(struct extent_buffer *eb, struct btrfs_disk_key *disk_key, int nr) { unsigned long ptr; diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index fd7e6c182b9d..16f0260fca66 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -644,20 +644,20 @@ int close_ctree(struct btrfs_root *root) int btrfs_buffer_uptodate(struct extent_buffer *buf) { - struct inode *btree_inode = buf->last_page->mapping->host; + struct inode *btree_inode = buf->first_page->mapping->host; return extent_buffer_uptodate(&BTRFS_I(btree_inode)->extent_tree, buf); } int btrfs_set_buffer_uptodate(struct extent_buffer *buf) { - struct inode *btree_inode = buf->last_page->mapping->host; + struct inode *btree_inode = buf->first_page->mapping->host; return set_extent_buffer_uptodate(&BTRFS_I(btree_inode)->extent_tree, buf); } void btrfs_mark_buffer_dirty(struct extent_buffer *buf) { - struct btrfs_root *root = BTRFS_I(buf->last_page->mapping->host)->root; + struct btrfs_root *root = BTRFS_I(buf->first_page->mapping->host)->root; u64 transid = btrfs_header_generation(buf); struct inode *btree_inode = root->fs_info->btree_inode; @@ -678,7 +678,7 @@ void btrfs_btree_balance_dirty(struct btrfs_root *root, unsigned long nr) void btrfs_set_buffer_defrag(struct extent_buffer *buf) { - struct btrfs_root *root = BTRFS_I(buf->last_page->mapping->host)->root; + struct btrfs_root *root = BTRFS_I(buf->first_page->mapping->host)->root; struct inode *btree_inode = root->fs_info->btree_inode; set_extent_bits(&BTRFS_I(btree_inode)->extent_tree, buf->start, buf->start + buf->len - 1, EXTENT_DEFRAG, GFP_NOFS); @@ -686,7 +686,7 @@ void btrfs_set_buffer_defrag(struct extent_buffer *buf) void btrfs_set_buffer_defrag_done(struct extent_buffer *buf) { - struct btrfs_root *root = BTRFS_I(buf->last_page->mapping->host)->root; + struct btrfs_root *root = BTRFS_I(buf->first_page->mapping->host)->root; struct inode *btree_inode = root->fs_info->btree_inode; set_extent_bits(&BTRFS_I(btree_inode)->extent_tree, buf->start, buf->start + buf->len - 1, EXTENT_DEFRAG_DONE, @@ -695,7 +695,7 @@ void btrfs_set_buffer_defrag_done(struct extent_buffer *buf) int btrfs_buffer_defrag(struct extent_buffer *buf) { - struct btrfs_root *root = BTRFS_I(buf->last_page->mapping->host)->root; + struct btrfs_root *root = BTRFS_I(buf->first_page->mapping->host)->root; struct inode *btree_inode = root->fs_info->btree_inode; return test_range_bit(&BTRFS_I(btree_inode)->extent_tree, buf->start, buf->start + buf->len - 1, EXTENT_DEFRAG, 0); @@ -703,7 +703,7 @@ int btrfs_buffer_defrag(struct extent_buffer *buf) int btrfs_buffer_defrag_done(struct extent_buffer *buf) { - struct btrfs_root *root = BTRFS_I(buf->last_page->mapping->host)->root; + struct btrfs_root *root = BTRFS_I(buf->first_page->mapping->host)->root; struct inode *btree_inode = root->fs_info->btree_inode; return test_range_bit(&BTRFS_I(btree_inode)->extent_tree, buf->start, buf->start + buf->len - 1, @@ -712,7 +712,7 @@ int btrfs_buffer_defrag_done(struct extent_buffer *buf) int btrfs_clear_buffer_defrag_done(struct extent_buffer *buf) { - struct btrfs_root *root = BTRFS_I(buf->last_page->mapping->host)->root; + struct btrfs_root *root = BTRFS_I(buf->first_page->mapping->host)->root; struct inode *btree_inode = root->fs_info->btree_inode; return clear_extent_bits(&BTRFS_I(btree_inode)->extent_tree, buf->start, buf->start + buf->len - 1, @@ -721,7 +721,7 @@ int btrfs_clear_buffer_defrag_done(struct extent_buffer *buf) int btrfs_clear_buffer_defrag(struct extent_buffer *buf) { - struct btrfs_root *root = BTRFS_I(buf->last_page->mapping->host)->root; + struct btrfs_root *root = BTRFS_I(buf->first_page->mapping->host)->root; struct inode *btree_inode = root->fs_info->btree_inode; return clear_extent_bits(&BTRFS_I(btree_inode)->extent_tree, buf->start, buf->start + buf->len - 1, @@ -730,7 +730,7 @@ int btrfs_clear_buffer_defrag(struct extent_buffer *buf) int btrfs_read_buffer(struct extent_buffer *buf) { - struct btrfs_root *root = BTRFS_I(buf->last_page->mapping->host)->root; + struct btrfs_root *root = BTRFS_I(buf->first_page->mapping->host)->root; struct inode *btree_inode = root->fs_info->btree_inode; return read_extent_buffer_pages(&BTRFS_I(btree_inode)->extent_tree, buf, 1); diff --git a/fs/btrfs/extent_map.c b/fs/btrfs/extent_map.c index 85b28a6a4e05..f8aaba8a30a2 100644 --- a/fs/btrfs/extent_map.c +++ b/fs/btrfs/extent_map.c @@ -1963,9 +1963,9 @@ static inline struct page *extent_buffer_page(struct extent_buffer *eb, struct page *p; if (i == 0) - return eb->last_page; + return eb->first_page; i += eb->start >> PAGE_CACHE_SHIFT; - p = find_get_page(eb->last_page->mapping, i); + p = find_get_page(eb->first_page->mapping, i); page_cache_release(p); return p; } @@ -2037,7 +2037,7 @@ struct extent_buffer *alloc_extent_buffer(struct extent_map_tree *tree, } set_page_extent_mapped(p); if (i == 0) - eb->last_page = p; + eb->first_page = p; if (!PageUptodate(p)) uptodate = 0; unlock_page(p); @@ -2083,7 +2083,7 @@ struct extent_buffer *find_extent_buffer(struct extent_map_tree *tree, } set_page_extent_mapped(p); if (i == 0) - eb->last_page = p; + eb->first_page = p; if (!PageUptodate(p)) uptodate = 0; unlock_page(p); @@ -2169,7 +2169,15 @@ EXPORT_SYMBOL(wait_on_extent_buffer_writeback); int set_extent_buffer_dirty(struct extent_map_tree *tree, struct extent_buffer *eb) { - return set_range_dirty(tree, eb->start, eb->start + eb->len - 1); + unsigned long i; + unsigned long num_pages; + + num_pages = num_extent_pages(eb->start, eb->len); + for (i = 0; i < num_pages; i++) { + __set_page_dirty_nobuffers(extent_buffer_page(eb, i)); + } + return set_extent_dirty(tree, eb->start, + eb->start + eb->len - 1, GFP_NOFS); } EXPORT_SYMBOL(set_extent_buffer_dirty); @@ -2317,16 +2325,11 @@ static int __map_extent_buffer(struct extent_buffer *eb, unsigned long start, size_t start_offset = eb->start & ((u64)PAGE_CACHE_SIZE - 1); unsigned long i = (start_offset + start) >> PAGE_CACHE_SHIFT; unsigned long end_i = (start_offset + start + min_len) >> - PAGE_CACHE_SHIFT; + PAGE_CACHE_SHIFT; if (i != end_i) return -EINVAL; - if (start >= eb->len) { - printk("bad start in map eb start %Lu len %lu caller start %lu min %lu\n", eb->start, eb->len, start, min_len); - WARN_ON(1); - } - if (i == 0) { offset = start_offset; *map_start = 0; @@ -2353,14 +2356,6 @@ int map_extent_buffer(struct extent_buffer *eb, unsigned long start, int err; int save = 0; if (eb->map_token) { - if (start >= eb->map_start && - start + min_len <= eb->map_start + eb->map_len) { - *token = eb->map_token; - *map = eb->kaddr; - *map_start = eb->map_start; - *map_len = eb->map_len; - return 0; - } unmap_extent_buffer(eb, eb->map_token, km); eb->map_token = NULL; save = 1; diff --git a/fs/btrfs/extent_map.h b/fs/btrfs/extent_map.h index 52a8b9394fc6..f1dc28d260eb 100644 --- a/fs/btrfs/extent_map.h +++ b/fs/btrfs/extent_map.h @@ -75,7 +75,7 @@ struct extent_buffer { char *kaddr; unsigned long map_start; unsigned long map_len; - struct page *last_page; + struct page *first_page; struct list_head lru; atomic_t refs; int flags; diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c index 1af2b6534dad..fe28404ae7f4 100644 --- a/fs/btrfs/file.c +++ b/fs/btrfs/file.c @@ -289,7 +289,6 @@ static int dirty_and_release_pages(struct btrfs_trans_handle *trans, */ inline_size = end_pos; if (isize >= BTRFS_MAX_INLINE_DATA_SIZE(root) || - inline_size > 16384 || inline_size >= BTRFS_MAX_INLINE_DATA_SIZE(root)) { u64 last_end; -- cgit v1.2.3-70-g09d2 From 19c00ddcc31ad4bdfb86b57085e06d6135b9b1d7 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Mon, 15 Oct 2007 16:19:22 -0400 Subject: Btrfs: Add back metadata checksumming Signed-off-by: Chris Mason --- fs/btrfs/ctree.h | 2 + fs/btrfs/disk-io.c | 215 +++++++++++++++++++++++++++++++++++-------------- fs/btrfs/extent-tree.c | 4 + fs/btrfs/extent_map.c | 83 +++++++++++++++---- fs/btrfs/extent_map.h | 18 ++++- fs/btrfs/inode.c | 4 - 6 files changed, 245 insertions(+), 81 deletions(-) (limited to 'fs/btrfs/extent_map.c') diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index d1c6f023a302..1af0a966f85b 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -310,9 +310,11 @@ struct btrfs_fs_info { struct extent_buffer *sb_buffer; struct super_block *sb; struct inode *btree_inode; + spinlock_t hash_lock; struct mutex trans_mutex; struct mutex fs_mutex; struct list_head trans_list; + struct list_head hashers; struct list_head dead_roots; struct delayed_work trans_work; struct kobject super_kobj; diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 16f0260fca66..1176e5420c56 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -60,7 +60,7 @@ struct extent_buffer *btrfs_find_create_tree_block(struct btrfs_root *root, struct extent_buffer *eb; eb = alloc_extent_buffer(&BTRFS_I(btree_inode)->extent_tree, - bytenr, blocksize, GFP_NOFS); + bytenr, blocksize, NULL, GFP_NOFS); return eb; } @@ -99,10 +99,102 @@ out: return em; } +u32 btrfs_csum_data(struct btrfs_root *root, char *data, u32 seed, size_t len) +{ + return crc32c(seed, data, len); +} + +void btrfs_csum_final(u32 crc, char *result) +{ + *(__le32 *)result = ~cpu_to_le32(crc); +} + +static int csum_tree_block(struct btrfs_root *root, struct extent_buffer *buf, + int verify) +{ + char result[BTRFS_CRC32_SIZE]; + unsigned long len; + unsigned long cur_len; + unsigned long offset = BTRFS_CSUM_SIZE; + char *map_token = NULL; + char *kaddr; + unsigned long map_start; + unsigned long map_len; + int err; + u32 crc = ~(u32)0; + + len = buf->len - offset; + while(len > 0) { + err = map_private_extent_buffer(buf, offset, 32, + &map_token, &kaddr, + &map_start, &map_len, KM_USER0); + if (err) { + printk("failed to map extent buffer! %lu\n", + offset); + return 1; + } + cur_len = min(len, map_len - (offset - map_start)); + crc = btrfs_csum_data(root, kaddr + offset - map_start, + crc, cur_len); + len -= cur_len; + offset += cur_len; + unmap_extent_buffer(buf, map_token, KM_USER0); + } + btrfs_csum_final(crc, result); + + if (verify) { + if (memcmp_extent_buffer(buf, result, 0, BTRFS_CRC32_SIZE)) { + printk("btrfs: %s checksum verify failed on %llu\n", + root->fs_info->sb->s_id, + buf->start); + return 1; + } + } else { + write_extent_buffer(buf, result, 0, BTRFS_CRC32_SIZE); + } + return 0; +} + + +int csum_dirty_buffer(struct btrfs_root *root, struct page *page) +{ + struct extent_map_tree *tree; + u64 start = page->index << PAGE_CACHE_SHIFT; + u64 found_start; + int found_level; + unsigned long len; + struct extent_buffer *eb; + tree = &BTRFS_I(page->mapping->host)->extent_tree; + + if (page->private == EXTENT_PAGE_PRIVATE) + goto out; + if (!page->private) + goto out; + len = page->private >> 2; + if (len == 0) { + WARN_ON(1); + } + eb = alloc_extent_buffer(tree, start, len, page, GFP_NOFS); + read_extent_buffer_pages(tree, eb, start + PAGE_CACHE_SIZE, 1); + found_start = btrfs_header_bytenr(eb); + if (found_start != start) { + printk("warning: eb start incorrect %Lu buffer %Lu len %lu\n", + start, found_start, len); + } + found_level = btrfs_header_level(eb); + csum_tree_block(root, eb, 0); + free_extent_buffer(eb); +out: + return 0; +} + static int btree_writepage(struct page *page, struct writeback_control *wbc) { struct extent_map_tree *tree; + struct btrfs_root *root = BTRFS_I(page->mapping->host)->root; tree = &BTRFS_I(page->mapping->host)->extent_tree; + + csum_dirty_buffer(root, page); return extent_write_full_page(tree, page, btree_get_extent, wbc); } int btree_readpage(struct file *file, struct page *page) @@ -117,7 +209,6 @@ static int btree_releasepage(struct page *page, gfp_t unused_gfp_flags) struct extent_map_tree *tree; int ret; - BUG_ON(page->private != 1); tree = &BTRFS_I(page->mapping->host)->extent_tree; ret = try_release_extent_mapping(tree, page); if (ret == 1) { @@ -136,46 +227,6 @@ static void btree_invalidatepage(struct page *page, unsigned long offset) btree_releasepage(page, GFP_NOFS); } -int btrfs_csum_data(struct btrfs_root * root, char *data, size_t len, - char *result) -{ - return 0; -#if 0 - u32 crc; - crc = crc32c(0, data, len); - memcpy(result, &crc, BTRFS_CRC32_SIZE); - return 0; -#endif -} - -#if 0 -static int csum_tree_block(struct btrfs_root *root, struct extent_buffer *buf, - int verify) -{ - return 0; - char result[BTRFS_CRC32_SIZE]; - int ret; - struct btrfs_node *node; - - ret = btrfs_csum_data(root, bh->b_data + BTRFS_CSUM_SIZE, - bh->b_size - BTRFS_CSUM_SIZE, result); - if (ret) - return ret; - if (verify) { - if (memcmp(bh->b_data, result, BTRFS_CRC32_SIZE)) { - printk("btrfs: %s checksum verify failed on %llu\n", - root->fs_info->sb->s_id, - (unsigned long long)bh_blocknr(bh)); - return 1; - } - } else { - node = btrfs_buffer_node(bh); - memcpy(node->header.csum, result, BTRFS_CRC32_SIZE); - } - return 0; -} -#endif - #if 0 static int btree_writepage(struct page *page, struct writeback_control *wbc) { @@ -215,7 +266,7 @@ int readahead_tree_block(struct btrfs_root *root, u64 bytenr, u32 blocksize) if (!buf) return 0; read_extent_buffer_pages(&BTRFS_I(btree_inode)->extent_tree, - buf, 0); + buf, 0, 0); free_extent_buffer(buf); return ret; } @@ -225,12 +276,29 @@ struct extent_buffer *read_tree_block(struct btrfs_root *root, u64 bytenr, { struct extent_buffer *buf = NULL; struct inode *btree_inode = root->fs_info->btree_inode; + struct extent_map_tree *extent_tree; + int ret; + + extent_tree = &BTRFS_I(btree_inode)->extent_tree; buf = btrfs_find_create_tree_block(root, bytenr, blocksize); if (!buf) return NULL; read_extent_buffer_pages(&BTRFS_I(btree_inode)->extent_tree, - buf, 1); + buf, 0, 1); + if (buf->flags & EXTENT_CSUM) { + return buf; + } + if (test_range_bit(extent_tree, buf->start, buf->start + buf->len - 1, + EXTENT_CSUM, 1)) { + buf->flags |= EXTENT_CSUM; + return buf; + } + ret = csum_tree_block(root, buf, 1); + set_extent_bits(extent_tree, buf->start, + buf->start + buf->len - 1, + EXTENT_CSUM, GFP_NOFS); + buf->flags |= EXTENT_CSUM; return buf; } @@ -251,13 +319,6 @@ int wait_on_tree_block_writeback(struct btrfs_root *root, return 0; } -int set_tree_block_dirty(struct btrfs_root *root, struct extent_buffer *buf) -{ - struct inode *btree_inode = root->fs_info->btree_inode; - set_extent_buffer_dirty(&BTRFS_I(btree_inode)->extent_tree, buf); - return 0; -} - static int __setup_root(u32 nodesize, u32 leafsize, u32 sectorsize, struct btrfs_root *root, struct btrfs_fs_info *fs_info, @@ -416,7 +477,24 @@ struct btrfs_root *btrfs_read_fs_root(struct btrfs_fs_info *fs_info, return root; } - +#if 0 +static int add_hasher(struct btrfs_fs_info *info, char *type) { + struct btrfs_hasher *hasher; + + hasher = kmalloc(sizeof(*hasher), GFP_NOFS); + if (!hasher) + return -ENOMEM; + hasher->hash_tfm = crypto_alloc_hash(type, 0, CRYPTO_ALG_ASYNC); + if (!hasher->hash_tfm) { + kfree(hasher); + return -EINVAL; + } + spin_lock(&info->hash_lock); + list_add(&hasher->list, &info->hashers); + spin_unlock(&info->hash_lock); + return 0; +} +#endif struct btrfs_root *open_ctree(struct super_block *sb) { u32 sectorsize; @@ -440,6 +518,9 @@ struct btrfs_root *open_ctree(struct super_block *sb) INIT_RADIX_TREE(&fs_info->fs_roots_radix, GFP_NOFS); INIT_LIST_HEAD(&fs_info->trans_list); INIT_LIST_HEAD(&fs_info->dead_roots); + INIT_LIST_HEAD(&fs_info->hashers); + spin_lock_init(&fs_info->hash_lock); + memset(&fs_info->super_kobj, 0, sizeof(fs_info->super_kobj)); init_completion(&fs_info->kobj_unregister); sb_set_blocksize(sb, 4096); @@ -479,6 +560,14 @@ struct btrfs_root *open_ctree(struct super_block *sb) mutex_init(&fs_info->trans_mutex); mutex_init(&fs_info->fs_mutex); +#if 0 + ret = add_hasher(fs_info, "crc32c"); + if (ret) { + printk("btrfs: failed hash setup, modprobe cryptomgr?\n"); + err = -ENOMEM; + goto fail_iput; + } +#endif __setup_root(512, 512, 512, tree_root, fs_info, BTRFS_ROOT_TREE_OBJECTID); @@ -509,25 +598,21 @@ struct btrfs_root *open_ctree(struct super_block *sb) i_size_write(fs_info->btree_inode, btrfs_super_total_bytes(disk_super)); - if (strncmp((char *)(&disk_super->magic), BTRFS_MAGIC, sizeof(disk_super->magic))) { printk("btrfs: valid FS not found on %s\n", sb->s_id); goto fail_sb_buffer; } + blocksize = btrfs_level_size(tree_root, btrfs_super_root_level(disk_super)); + tree_root->node = read_tree_block(tree_root, btrfs_super_root(disk_super), blocksize); if (!tree_root->node) goto fail_sb_buffer; -#if 0 - btrfs_print_leaf(tree_root, tree_root->node); - err = -EIO; - goto fail_tree_root; -#endif mutex_lock(&fs_info->fs_mutex); ret = find_and_setup_root(tree_root, fs_info, @@ -634,9 +719,19 @@ int close_ctree(struct btrfs_root *root) btrfs_free_block_groups(root->fs_info); del_fs_roots(fs_info); - extent_map_tree_cleanup(&BTRFS_I(fs_info->btree_inode)->extent_tree); + extent_map_tree_empty_lru(&BTRFS_I(fs_info->btree_inode)->extent_tree); truncate_inode_pages(fs_info->btree_inode->i_mapping, 0); iput(fs_info->btree_inode); +#if 0 + while(!list_empty(&fs_info->hashers)) { + struct btrfs_hasher *hasher; + hasher = list_entry(fs_info->hashers.next, struct btrfs_hasher, + hashers); + list_del(&hasher->hashers); + crypto_free_hash(&fs_info->hash_tfm); + kfree(hasher); + } +#endif kfree(fs_info->extent_root); kfree(fs_info->tree_root); return 0; @@ -733,5 +828,5 @@ int btrfs_read_buffer(struct extent_buffer *buf) struct btrfs_root *root = BTRFS_I(buf->first_page->mapping->host)->root; struct inode *btree_inode = root->fs_info->btree_inode; return read_extent_buffer_pages(&BTRFS_I(btree_inode)->extent_tree, - buf, 1); + buf, 0, 1); } diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index e4e68ea96483..6b2f2b414fa1 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -1178,6 +1178,10 @@ struct extent_buffer *btrfs_alloc_free_block(struct btrfs_trans_handle *trans, btrfs_set_buffer_uptodate(buf); set_extent_dirty(&trans->transaction->dirty_pages, buf->start, buf->start + buf->len - 1, GFP_NOFS); + set_extent_bits(&BTRFS_I(root->fs_info->btree_inode)->extent_tree, + buf->start, buf->start + buf->len - 1, + EXTENT_CSUM, GFP_NOFS); + buf->flags |= EXTENT_CSUM; btrfs_set_buffer_defrag(buf); trans->blocks_used++; return buf; diff --git a/fs/btrfs/extent_map.c b/fs/btrfs/extent_map.c index f8aaba8a30a2..2a8bc4bd43a9 100644 --- a/fs/btrfs/extent_map.c +++ b/fs/btrfs/extent_map.c @@ -81,7 +81,7 @@ void extent_map_tree_init(struct extent_map_tree *tree, } EXPORT_SYMBOL(extent_map_tree_init); -void extent_map_tree_cleanup(struct extent_map_tree *tree) +void extent_map_tree_empty_lru(struct extent_map_tree *tree) { struct extent_buffer *eb; while(!list_empty(&tree->buffer_lru)) { @@ -91,7 +91,7 @@ void extent_map_tree_cleanup(struct extent_map_tree *tree) free_extent_buffer(eb); } } -EXPORT_SYMBOL(extent_map_tree_cleanup); +EXPORT_SYMBOL(extent_map_tree_empty_lru); struct extent_map *alloc_extent_map(gfp_t mask) { @@ -1464,7 +1464,7 @@ void set_page_extent_mapped(struct page *page) if (!PagePrivate(page)) { SetPagePrivate(page); WARN_ON(!page->mapping->a_ops->invalidatepage); - set_page_private(page, 1); + set_page_private(page, EXTENT_PAGE_PRIVATE); page_cache_get(page); } } @@ -1979,8 +1979,9 @@ static struct extent_buffer *__alloc_extent_buffer(struct extent_map_tree *tree, spin_lock(&tree->lru_lock); eb = find_lru(tree, start, len); - if (eb) + if (eb) { goto lru_add; + } spin_unlock(&tree->lru_lock); if (eb) { @@ -2007,6 +2008,7 @@ static void __free_extent_buffer(struct extent_buffer *eb) struct extent_buffer *alloc_extent_buffer(struct extent_map_tree *tree, u64 start, unsigned long len, + struct page *page0, gfp_t mask) { unsigned long num_pages = num_extent_pages(start, len); @@ -2024,7 +2026,18 @@ struct extent_buffer *alloc_extent_buffer(struct extent_map_tree *tree, if (eb->flags & EXTENT_BUFFER_FILLED) return eb; - for (i = 0; i < num_pages; i++, index++) { + if (page0) { + eb->first_page = page0; + i = 1; + index++; + page_cache_get(page0); + set_page_extent_mapped(page0); + set_page_private(page0, EXTENT_PAGE_PRIVATE_FIRST_PAGE | + len << 2); + } else { + i = 0; + } + for (; i < num_pages; i++, index++) { p = find_or_create_page(mapping, index, mask | __GFP_HIGHMEM); if (!p) { WARN_ON(1); @@ -2036,8 +2049,13 @@ struct extent_buffer *alloc_extent_buffer(struct extent_map_tree *tree, goto fail; } set_page_extent_mapped(p); - if (i == 0) + if (i == 0) { eb->first_page = p; + set_page_private(p, EXTENT_PAGE_PRIVATE_FIRST_PAGE | + len << 2); + } else { + set_page_private(p, EXTENT_PAGE_PRIVATE); + } if (!PageUptodate(p)) uptodate = 0; unlock_page(p); @@ -2057,8 +2075,7 @@ struct extent_buffer *find_extent_buffer(struct extent_map_tree *tree, gfp_t mask) { unsigned long num_pages = num_extent_pages(start, len); - unsigned long i; - unsigned long index = start >> PAGE_CACHE_SHIFT; + unsigned long i; unsigned long index = start >> PAGE_CACHE_SHIFT; struct extent_buffer *eb; struct page *p; struct address_space *mapping = tree->mapping; @@ -2082,8 +2099,15 @@ struct extent_buffer *find_extent_buffer(struct extent_map_tree *tree, goto fail; } set_page_extent_mapped(p); - if (i == 0) + + if (i == 0) { eb->first_page = p; + set_page_private(p, EXTENT_PAGE_PRIVATE_FIRST_PAGE | + len << 2); + } else { + set_page_private(p, EXTENT_PAGE_PRIVATE); + } + if (!PageUptodate(p)) uptodate = 0; unlock_page(p); @@ -2174,7 +2198,21 @@ int set_extent_buffer_dirty(struct extent_map_tree *tree, num_pages = num_extent_pages(eb->start, eb->len); for (i = 0; i < num_pages; i++) { + struct page *page = extent_buffer_page(eb, i); + /* writepage may need to do something special for the + * first page, we have to make sure page->private is + * properly set. releasepage may drop page->private + * on us if the page isn't already dirty. + */ + if (i == 0) { + lock_page(page); + set_page_private(page, + EXTENT_PAGE_PRIVATE_FIRST_PAGE | + eb->len << 2); + } __set_page_dirty_nobuffers(extent_buffer_page(eb, i)); + if (i == 0) + unlock_page(page); } return set_extent_dirty(tree, eb->start, eb->start + eb->len - 1, GFP_NOFS); @@ -2217,9 +2255,12 @@ int extent_buffer_uptodate(struct extent_map_tree *tree, EXPORT_SYMBOL(extent_buffer_uptodate); int read_extent_buffer_pages(struct extent_map_tree *tree, - struct extent_buffer *eb, int wait) + struct extent_buffer *eb, + u64 start, + int wait) { unsigned long i; + unsigned long start_i; struct page *page; int err; int ret = 0; @@ -2232,9 +2273,16 @@ int read_extent_buffer_pages(struct extent_map_tree *tree, EXTENT_UPTODATE, 1)) { return 0; } + if (start) { + WARN_ON(start < eb->start); + start_i = (start >> PAGE_CACHE_SHIFT) - + (eb->start >> PAGE_CACHE_SHIFT); + } else { + start_i = 0; + } num_pages = num_extent_pages(eb->start, eb->len); - for (i = 0; i < num_pages; i++) { + for (i = start_i; i < num_pages; i++) { page = extent_buffer_page(eb, i); if (PageUptodate(page)) { continue; @@ -2260,7 +2308,7 @@ int read_extent_buffer_pages(struct extent_map_tree *tree, return ret; } - for (i = 0; i < num_pages; i++) { + for (i = start_i; i < num_pages; i++) { page = extent_buffer_page(eb, i); wait_on_page_locked(page); if (!PageUptodate(page)) { @@ -2314,7 +2362,7 @@ void read_extent_buffer(struct extent_buffer *eb, void *dstv, } EXPORT_SYMBOL(read_extent_buffer); -static int __map_extent_buffer(struct extent_buffer *eb, unsigned long start, +int map_private_extent_buffer(struct extent_buffer *eb, unsigned long start, unsigned long min_len, char **token, char **map, unsigned long *map_start, unsigned long *map_len, int km) @@ -2337,6 +2385,10 @@ static int __map_extent_buffer(struct extent_buffer *eb, unsigned long start, offset = 0; *map_start = (i << PAGE_CACHE_SHIFT) - start_offset; } + if (start + min_len >= eb->len) { +printk("bad mapping eb start %Lu len %lu, wanted %lu %lu\n", eb->start, eb->len, start, min_len); + WARN_ON(1); + } p = extent_buffer_page(eb, i); WARN_ON(!PageUptodate(p)); @@ -2346,6 +2398,7 @@ static int __map_extent_buffer(struct extent_buffer *eb, unsigned long start, *map_len = PAGE_CACHE_SIZE - offset; return 0; } +EXPORT_SYMBOL(map_private_extent_buffer); int map_extent_buffer(struct extent_buffer *eb, unsigned long start, unsigned long min_len, @@ -2360,8 +2413,8 @@ int map_extent_buffer(struct extent_buffer *eb, unsigned long start, eb->map_token = NULL; save = 1; } - err = __map_extent_buffer(eb, start, min_len, token, map, - map_start, map_len, km); + err = map_private_extent_buffer(eb, start, min_len, token, map, + map_start, map_len, km); if (!err && save) { eb->map_token = *token; eb->kaddr = *map; diff --git a/fs/btrfs/extent_map.h b/fs/btrfs/extent_map.h index f1dc28d260eb..39d78d31b6a2 100644 --- a/fs/btrfs/extent_map.h +++ b/fs/btrfs/extent_map.h @@ -17,8 +17,17 @@ #define EXTENT_DEFRAG (1 << 6) #define EXTENT_DEFRAG_DONE (1 << 7) #define EXTENT_BUFFER_FILLED (1 << 8) +#define EXTENT_CSUM (1 << 9) #define EXTENT_IOBITS (EXTENT_LOCKED | EXTENT_WRITEBACK) +/* + * page->private values. Every page that is controlled by the extent + * map has page->private set to one. + */ + +#define EXTENT_PAGE_PRIVATE 1 +#define EXTENT_PAGE_PRIVATE_FIRST_PAGE 3 + struct extent_map_ops { int (*fill_delalloc)(struct inode *inode, u64 start, u64 end); @@ -89,7 +98,7 @@ typedef struct extent_map *(get_extent_t)(struct inode *inode, void extent_map_tree_init(struct extent_map_tree *tree, struct address_space *mapping, gfp_t mask); -void extent_map_tree_cleanup(struct extent_map_tree *tree); +void extent_map_tree_empty_lru(struct extent_map_tree *tree); struct extent_map *lookup_extent_mapping(struct extent_map_tree *tree, u64 start, u64 end); int add_extent_mapping(struct extent_map_tree *tree, @@ -143,13 +152,14 @@ void set_page_extent_mapped(struct page *page); struct extent_buffer *alloc_extent_buffer(struct extent_map_tree *tree, u64 start, unsigned long len, + struct page *page0, gfp_t mask); struct extent_buffer *find_extent_buffer(struct extent_map_tree *tree, u64 start, unsigned long len, gfp_t mask); void free_extent_buffer(struct extent_buffer *eb); int read_extent_buffer_pages(struct extent_map_tree *tree, - struct extent_buffer *eb, int wait); + struct extent_buffer *eb, u64 start, int wait); static inline void extent_buffer_get(struct extent_buffer *eb) { @@ -187,5 +197,9 @@ int map_extent_buffer(struct extent_buffer *eb, unsigned long offset, unsigned long min_len, char **token, char **map, unsigned long *map_start, unsigned long *map_len, int km); +int map_private_extent_buffer(struct extent_buffer *eb, unsigned long offset, + unsigned long min_len, char **token, char **map, + unsigned long *map_start, + unsigned long *map_len, int km); void unmap_extent_buffer(struct extent_buffer *eb, char *token, int km); #endif diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 84f496c838c9..80f84b48a70f 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -1713,10 +1713,6 @@ static int btrfs_releasepage(struct page *page, gfp_t unused_gfp_flags) struct extent_map_tree *tree; int ret; - if (page->private != 1) { - WARN_ON(1); - return try_to_free_buffers(page); - } tree = &BTRFS_I(page->mapping->host)->extent_tree; ret = try_release_extent_mapping(tree, page); if (ret == 1) { -- cgit v1.2.3-70-g09d2 From ff79f8190b6e955ff7a71faf804a3017d526e657 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Mon, 15 Oct 2007 16:22:25 -0400 Subject: Btrfs: Add back file data checksumming Signed-off-by: Chris Mason --- fs/btrfs/disk-io.c | 1 + fs/btrfs/disk-io.h | 2 ++ fs/btrfs/extent_map.c | 3 +++ fs/btrfs/file-item.c | 17 +++++++++-------- fs/btrfs/file.c | 1 + fs/btrfs/inode.c | 21 ++++++++++----------- 6 files changed, 26 insertions(+), 19 deletions(-) (limited to 'fs/btrfs/extent_map.c') diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 3878772fcffd..5accced2858a 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -594,6 +594,7 @@ struct btrfs_root *open_ctree(struct super_block *sb) tree_root->nodesize = nodesize; tree_root->leafsize = leafsize; tree_root->sectorsize = sectorsize; + sb_set_blocksize(sb, sectorsize); i_size_write(fs_info->btree_inode, btrfs_super_total_bytes(disk_super)); diff --git a/fs/btrfs/disk-io.h b/fs/btrfs/disk-io.h index 190b07bc642b..33d2ccfd74d8 100644 --- a/fs/btrfs/disk-io.h +++ b/fs/btrfs/disk-io.h @@ -58,4 +58,6 @@ int btrfs_buffer_defrag_done(struct extent_buffer *buf); int btrfs_clear_buffer_defrag(struct extent_buffer *buf); int btrfs_clear_buffer_defrag_done(struct extent_buffer *buf); int btrfs_read_buffer(struct extent_buffer *buf); +u32 btrfs_csum_data(struct btrfs_root *root, char *data, u32 seed, size_t len); +void btrfs_csum_final(u32 crc, char *result); #endif diff --git a/fs/btrfs/extent_map.c b/fs/btrfs/extent_map.c index 2a8bc4bd43a9..e87e476dca92 100644 --- a/fs/btrfs/extent_map.c +++ b/fs/btrfs/extent_map.c @@ -2031,6 +2031,7 @@ struct extent_buffer *alloc_extent_buffer(struct extent_map_tree *tree, i = 1; index++; page_cache_get(page0); + mark_page_accessed(page0); set_page_extent_mapped(page0); set_page_private(page0, EXTENT_PAGE_PRIVATE_FIRST_PAGE | len << 2); @@ -2049,6 +2050,7 @@ struct extent_buffer *alloc_extent_buffer(struct extent_map_tree *tree, goto fail; } set_page_extent_mapped(p); + mark_page_accessed(p); if (i == 0) { eb->first_page = p; set_page_private(p, EXTENT_PAGE_PRIVATE_FIRST_PAGE | @@ -2099,6 +2101,7 @@ struct extent_buffer *find_extent_buffer(struct extent_map_tree *tree, goto fail; } set_page_extent_mapped(p); + mark_page_accessed(p); if (i == 0) { eb->first_page = p; diff --git a/fs/btrfs/file-item.c b/fs/btrfs/file-item.c index 9a65e97a4e28..10a4c7408600 100644 --- a/fs/btrfs/file-item.c +++ b/fs/btrfs/file-item.c @@ -136,27 +136,27 @@ int btrfs_csum_file_block(struct btrfs_trans_handle *trans, u64 objectid, u64 offset, char *data, size_t len) { - return 0; -#if 0 int ret; struct btrfs_key file_key; struct btrfs_key found_key; struct btrfs_path *path; struct btrfs_csum_item *item; - struct extent_buffer *leaf; + struct extent_buffer *leaf = NULL; u64 csum_offset; + u32 csum_result = ~(u32)0; path = btrfs_alloc_path(); BUG_ON(!path); file_key.objectid = objectid; file_key.offset = offset; - file_key.flags = 0; btrfs_set_key_type(&file_key, BTRFS_CSUM_ITEM_KEY); item = btrfs_lookup_csum(trans, root, path, objectid, offset, 1); - if (!IS_ERR(item)) + if (!IS_ERR(item)) { + leaf = path->nodes[0]; goto found; + } ret = PTR_ERR(item); if (ret == -EFBIG) { u32 item_size; @@ -226,14 +226,15 @@ csum: item = (struct btrfs_csum_item *)((unsigned char *)item + csum_offset * BTRFS_CRC32_SIZE); found: - /* FIXME!!!!!!!!!!!! */ - ret = btrfs_csum_data(root, data, len, &item->csum); + csum_result = btrfs_csum_data(root, data, csum_result, len); + btrfs_csum_final(csum_result, (char *)&csum_result); + write_extent_buffer(leaf, &csum_result, (unsigned long)item, + BTRFS_CRC32_SIZE); btrfs_mark_buffer_dirty(path->nodes[0]); fail: btrfs_release_path(root, path); btrfs_free_path(path); return ret; -#endif } int btrfs_csum_truncate(struct btrfs_trans_handle *trans, diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c index fe28404ae7f4..1af2b6534dad 100644 --- a/fs/btrfs/file.c +++ b/fs/btrfs/file.c @@ -289,6 +289,7 @@ static int dirty_and_release_pages(struct btrfs_trans_handle *trans, */ inline_size = end_pos; if (isize >= BTRFS_MAX_INLINE_DATA_SIZE(root) || + inline_size > 16384 || inline_size >= BTRFS_MAX_INLINE_DATA_SIZE(root)) { u64 last_end; diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 80f84b48a70f..ac69f6926afc 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -135,7 +135,7 @@ int btrfs_readpage_io_hook(struct page *page, u64 start, u64 end) struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree; struct btrfs_csum_item *item; struct btrfs_path *path = NULL; - u64 private; + u32 csum; mutex_lock(&root->fs_info->fs_mutex); path = btrfs_alloc_path(); @@ -145,11 +145,12 @@ int btrfs_readpage_io_hook(struct page *page, u64 start, u64 end) /* a csum that isn't present is a preallocated region. */ if (ret == -ENOENT || ret == -EFBIG) ret = 0; - private = 0; + csum = 0; goto out; } - memcpy((char *)&private, &item->csum, BTRFS_CRC32_SIZE); - set_state_private(em_tree, start, private); + read_extent_buffer(path->nodes[0], &csum, (unsigned long)item, + BTRFS_CRC32_SIZE); + set_state_private(em_tree, start, csum); out: if (path) btrfs_free_path(path); @@ -165,21 +166,19 @@ int btrfs_readpage_end_io_hook(struct page *page, u64 start, u64 end) char *kaddr; u64 private; int ret; + struct btrfs_root *root = BTRFS_I(inode)->root; + u32 csum = ~(u32)0; ret = get_state_private(em_tree, start, &private); kaddr = kmap_atomic(page, KM_IRQ0); if (ret) { goto zeroit; } - /* - struct btrfs_root *root = BTRFS_I(inode)->root; - char csum[BTRFS_CRC32_SIZE]; - ret = btrfs_csum_data(root, kaddr + offset, end - start + 1, csum); - BUG_ON(ret); - if (memcmp(csum, &private, BTRFS_CRC32_SIZE)) { + csum = btrfs_csum_data(root, kaddr + offset, csum, end - start + 1); + btrfs_csum_final(csum, (char *)&csum); + if (csum != private) { goto zeroit; } - */ kunmap_atomic(kaddr, KM_IRQ0); return 0; -- cgit v1.2.3-70-g09d2 From ae2f5411c4ce7180cca8418853db50c8e52d40db Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Fri, 19 Oct 2007 09:22:59 -0400 Subject: btrfs: 32-bit type problems An assorted set of casts to get rid of the warnings on 32-bit archs. Signed-off-by: Chris Mason --- fs/btrfs/ctree.c | 2 +- fs/btrfs/extent-tree.c | 8 ++++---- fs/btrfs/extent_map.c | 6 +++--- fs/btrfs/file.c | 2 +- fs/btrfs/inode.c | 2 +- 5 files changed, 10 insertions(+), 10 deletions(-) (limited to 'fs/btrfs/extent_map.c') diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c index 54967099a8eb..17a322663f3b 100644 --- a/fs/btrfs/ctree.c +++ b/fs/btrfs/ctree.c @@ -1389,7 +1389,7 @@ int btrfs_leaf_free_space(struct btrfs_root *root, struct extent_buffer *leaf) ret = BTRFS_LEAF_DATA_SIZE(root) - leaf_space_used(leaf, 0, nritems); if (ret < 0) { printk("leaf free space ret %d, leaf data size %lu, used %d nritems %d\n", - ret, BTRFS_LEAF_DATA_SIZE(root), + ret, (unsigned long) BTRFS_LEAF_DATA_SIZE(root), leaf_space_used(leaf, 0, nritems), nritems); } return ret; diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 6b2f2b414fa1..7de7707d939c 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -149,7 +149,7 @@ struct btrfs_block_group_cache *btrfs_lookup_block_group(struct if (ret) return NULL; - block_group = (struct btrfs_block_group_cache *)ptr; + block_group = (struct btrfs_block_group_cache *)(unsigned long)ptr; if (block_group->key.objectid <= bytenr && bytenr <= @@ -279,7 +279,7 @@ again: if (ret) break; - cache = (struct btrfs_block_group_cache *)ptr; + cache = (struct btrfs_block_group_cache *)(unsigned long)ptr; last = cache->key.objectid + cache->key.offset; used = btrfs_block_group_used(&cache->item); @@ -537,7 +537,7 @@ int btrfs_write_dirty_block_groups(struct btrfs_trans_handle *trans, if (ret) break; - cache = (struct btrfs_block_group_cache *)ptr; + cache = (struct btrfs_block_group_cache *)(unsigned long)ptr; err = write_one_cache_group(trans, root, path, cache); /* @@ -1541,7 +1541,7 @@ int btrfs_read_block_groups(struct btrfs_root *root) found_key.objectid + found_key.offset - 1, bit | EXTENT_LOCKED, GFP_NOFS); set_state_private(block_group_cache, found_key.objectid, - (u64)cache); + (unsigned long)cache); if (key.objectid >= btrfs_super_total_bytes(&info->super_copy)) diff --git a/fs/btrfs/extent_map.c b/fs/btrfs/extent_map.c index e87e476dca92..89d0ebd0ea83 100644 --- a/fs/btrfs/extent_map.c +++ b/fs/btrfs/extent_map.c @@ -2654,8 +2654,8 @@ void memcpy_extent_buffer(struct extent_buffer *dst, unsigned long dst_offset, cur = min(len, (unsigned long)(PAGE_CACHE_SIZE - src_off_in_page)); - cur = min(cur, (unsigned long)(PAGE_CACHE_SIZE - - dst_off_in_page)); + cur = min_t(unsigned long, cur, + (unsigned long)(PAGE_CACHE_SIZE - dst_off_in_page)); copy_pages(extent_buffer_page(dst, dst_i), extent_buffer_page(dst, src_i), @@ -2707,7 +2707,7 @@ void memmove_extent_buffer(struct extent_buffer *dst, unsigned long dst_offset, if (dst_i == 0) dst_off_in_page += start_offset; - cur = min(len, src_off_in_page + 1); + cur = min_t(unsigned long, len, src_off_in_page + 1); cur = min(cur, dst_off_in_page + 1); move_pages(extent_buffer_page(dst, dst_i), extent_buffer_page(dst, src_i), diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c index 4aacf99bd97f..96df1b10cb60 100644 --- a/fs/btrfs/file.c +++ b/fs/btrfs/file.c @@ -188,7 +188,7 @@ insert: while (size > 0) { page = pages[i]; kaddr = kmap_atomic(page, KM_USER0); - cur_size = min(PAGE_CACHE_SIZE - page_offset, size); + cur_size = min_t(size_t, PAGE_CACHE_SIZE - page_offset, size); write_extent_buffer(leaf, kaddr + page_offset, ptr, cur_size); kunmap_atomic(kaddr, KM_USER0); page_offset = 0; diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index ac69f6926afc..0e1ca423e22d 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -1606,7 +1606,7 @@ again: extent_start; ptr = btrfs_file_extent_inline_start(item) + extent_offset; map = kmap(page); - copy_size = min(PAGE_CACHE_SIZE - page_offset, + copy_size = min_t(u64, PAGE_CACHE_SIZE - page_offset, size - extent_offset); em->block_start = EXTENT_MAP_INLINE; -- cgit v1.2.3-70-g09d2 From 0a2118dfd4c328e8823cdcfebf839dff73ac21e9 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Fri, 19 Oct 2007 09:23:05 -0400 Subject: Btrfs: Fix bi_end_io() functions on > 2.6.23 kernels It now returns void and it is never called for partial completions, so the bio->bi_size check must go. Signed-off-by: Chris Mason --- fs/btrfs/extent_map.c | 25 +++++++++++++++++++++++++ 1 file changed, 25 insertions(+) (limited to 'fs/btrfs/extent_map.c') diff --git a/fs/btrfs/extent_map.c b/fs/btrfs/extent_map.c index 89d0ebd0ea83..c0f834563ced 100644 --- a/fs/btrfs/extent_map.c +++ b/fs/btrfs/extent_map.c @@ -9,6 +9,7 @@ #include #include #include +#include #include "extent_map.h" /* temporary define until extent_map moves out of btrfs */ @@ -1274,8 +1275,12 @@ static int check_page_writeback(struct extent_map_tree *tree, * Scheduling is not allowed, so the extent state tree is expected * to have one and only one object corresponding to this IO. */ +#if LINUX_VERSION_CODE > KERNEL_VERSION(2,6,23) +static void end_bio_extent_writepage(struct bio *bio, int err) +#else static int end_bio_extent_writepage(struct bio *bio, unsigned int bytes_done, int err) +#endif { const int uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags); struct bio_vec *bvec = bio->bi_io_vec + bio->bi_vcnt - 1; @@ -1284,8 +1289,10 @@ static int end_bio_extent_writepage(struct bio *bio, u64 end; int whole_page; +#if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,23) if (bio->bi_size) return 1; +#endif do { struct page *page = bvec->bv_page; @@ -1316,7 +1323,9 @@ static int end_bio_extent_writepage(struct bio *bio, } while (bvec >= bio->bi_io_vec); bio_put(bio); +#if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,23) return 0; +#endif } /* @@ -1330,8 +1339,12 @@ static int end_bio_extent_writepage(struct bio *bio, * Scheduling is not allowed, so the extent state tree is expected * to have one and only one object corresponding to this IO. */ +#if LINUX_VERSION_CODE > KERNEL_VERSION(2,6,23) +static void end_bio_extent_readpage(struct bio *bio, int err) +#else static int end_bio_extent_readpage(struct bio *bio, unsigned int bytes_done, int err) +#endif { int uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags); struct bio_vec *bvec = bio->bi_io_vec + bio->bi_vcnt - 1; @@ -1341,8 +1354,10 @@ static int end_bio_extent_readpage(struct bio *bio, int whole_page; int ret; +#if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,23) if (bio->bi_size) return 1; +#endif do { struct page *page = bvec->bv_page; @@ -1382,7 +1397,9 @@ static int end_bio_extent_readpage(struct bio *bio, } while (bvec >= bio->bi_io_vec); bio_put(bio); +#if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,23) return 0; +#endif } /* @@ -1390,8 +1407,12 @@ static int end_bio_extent_readpage(struct bio *bio, * the structs in the extent tree when done, and set the uptodate bits * as appropriate. */ +#if LINUX_VERSION_CODE > KERNEL_VERSION(2,6,23) +static void end_bio_extent_preparewrite(struct bio *bio, int err) +#else static int end_bio_extent_preparewrite(struct bio *bio, unsigned int bytes_done, int err) +#endif { const int uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags); struct bio_vec *bvec = bio->bi_io_vec + bio->bi_vcnt - 1; @@ -1399,8 +1420,10 @@ static int end_bio_extent_preparewrite(struct bio *bio, u64 start; u64 end; +#if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,23) if (bio->bi_size) return 1; +#endif do { struct page *page = bvec->bv_page; @@ -1422,7 +1445,9 @@ static int end_bio_extent_preparewrite(struct bio *bio, } while (bvec >= bio->bi_io_vec); bio_put(bio); +#if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,23) return 0; +#endif } static int submit_extent_page(int rw, struct extent_map_tree *tree, -- cgit v1.2.3-70-g09d2 From 59d169e2b3d69ba0beca0fa0936789080c8d61a9 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Fri, 19 Oct 2007 09:23:09 -0400 Subject: Btrfs: Fix read/write_extent_buffer to use KM_USER1 instead of KM_USER0 This avoids recursive use of KM_USER0 during btrfs_file_write Signed-off-by: Chris Mason --- fs/btrfs/extent_map.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'fs/btrfs/extent_map.c') diff --git a/fs/btrfs/extent_map.c b/fs/btrfs/extent_map.c index c0f834563ced..3c81f5eab155 100644 --- a/fs/btrfs/extent_map.c +++ b/fs/btrfs/extent_map.c @@ -2378,9 +2378,9 @@ void read_extent_buffer(struct extent_buffer *eb, void *dstv, WARN_ON(!PageUptodate(page)); cur = min(len, (PAGE_CACHE_SIZE - offset)); - kaddr = kmap_atomic(page, KM_USER0); + kaddr = kmap_atomic(page, KM_USER1); memcpy(dst, kaddr + offset, cur); - kunmap_atomic(kaddr, KM_USER0); + kunmap_atomic(kaddr, KM_USER1); dst += cur; len -= cur; @@ -2523,9 +2523,9 @@ void write_extent_buffer(struct extent_buffer *eb, const void *srcv, WARN_ON(!PageUptodate(page)); cur = min(len, PAGE_CACHE_SIZE - offset); - kaddr = kmap_atomic(page, KM_USER0); + kaddr = kmap_atomic(page, KM_USER1); memcpy(kaddr + offset, src, cur); - kunmap_atomic(kaddr, KM_USER0); + kunmap_atomic(kaddr, KM_USER1); src += cur; len -= cur; -- cgit v1.2.3-70-g09d2 From 3685f791659c9f21b763ee1702ac8ca58bc20f81 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Fri, 19 Oct 2007 09:23:27 -0400 Subject: Btrfs: CPU usage optimizations in push and the extent_map code Signed-off-by: Chris Mason --- fs/btrfs/ctree.c | 44 ++++++++++++++++++++++++-------------------- fs/btrfs/extent_map.c | 45 +++++++++++++++------------------------------ 2 files changed, 39 insertions(+), 50 deletions(-) (limited to 'fs/btrfs/extent_map.c') diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c index 17a322663f3b..e8466940fa30 100644 --- a/fs/btrfs/ctree.c +++ b/fs/btrfs/ctree.c @@ -1588,6 +1588,11 @@ static int push_leaf_left(struct btrfs_trans_handle *trans, struct btrfs_root if (!path->nodes[1]) return 1; + right_nritems = btrfs_header_nritems(right); + if (right_nritems == 0) { + return 1; + } + left = read_tree_block(root, btrfs_node_blockptr(path->nodes[1], slot - 1), root->leafsize); free_space = btrfs_leaf_free_space(root, left); @@ -1604,18 +1609,13 @@ static int push_leaf_left(struct btrfs_trans_handle *trans, struct btrfs_root free_extent_buffer(left); return 1; } + free_space = btrfs_leaf_free_space(root, left); if (free_space < data_size + sizeof(struct btrfs_item)) { free_extent_buffer(left); return 1; } - right_nritems = btrfs_header_nritems(right); - if (right_nritems == 0) { - free_extent_buffer(left); - return 1; - } - for (i = 0; i < right_nritems - 1; i++) { item = btrfs_item_nr(right, i); if (!right->map_token) { @@ -1772,21 +1772,25 @@ static int split_leaf(struct btrfs_trans_handle *trans, struct btrfs_root struct btrfs_disk_key disk_key; /* first try to make some room by pushing left and right */ - wret = push_leaf_left(trans, root, path, data_size); - if (wret < 0) { - return wret; - } - if (wret) { + if (ins_key->type != BTRFS_DIR_ITEM_KEY) { wret = push_leaf_right(trans, root, path, data_size); - if (wret < 0) + if (wret < 0) { return wret; - } - l = path->nodes[0]; + } + if (wret) { + wret = push_leaf_left(trans, root, path, data_size); + if (wret < 0) + return wret; + } + l = path->nodes[0]; - /* did the pushes work? */ - if (btrfs_leaf_free_space(root, l) >= - sizeof(struct btrfs_item) + data_size) { - return 0; + /* did the pushes work? */ + if (btrfs_leaf_free_space(root, l) >= + sizeof(struct btrfs_item) + data_size) { + return 0; + } + } else { + l = path->nodes[0]; } if (!path->nodes[1]) { @@ -2388,13 +2392,13 @@ int btrfs_del_item(struct btrfs_trans_handle *trans, struct btrfs_root *root, slot = path->slots[1]; extent_buffer_get(leaf); - wret = push_leaf_left(trans, root, path, 1); + wret = push_leaf_right(trans, root, path, 1); if (wret < 0 && wret != -ENOSPC) ret = wret; if (path->nodes[0] == leaf && btrfs_header_nritems(leaf)) { - wret = push_leaf_right(trans, root, path, 1); + wret = push_leaf_left(trans, root, path, 1); if (wret < 0 && wret != -ENOSPC) ret = wret; } diff --git a/fs/btrfs/extent_map.c b/fs/btrfs/extent_map.c index 3c81f5eab155..caaf0bf0e059 100644 --- a/fs/btrfs/extent_map.c +++ b/fs/btrfs/extent_map.c @@ -1986,12 +1986,15 @@ static inline struct page *extent_buffer_page(struct extent_buffer *eb, unsigned long i) { struct page *p; + struct address_space *mapping; if (i == 0) return eb->first_page; i += eb->start >> PAGE_CACHE_SHIFT; - p = find_get_page(eb->first_page->mapping, i); - page_cache_release(p); + mapping = eb->first_page->mapping; + read_lock_irq(&mapping->tree_lock); + p = radix_tree_lookup(&mapping->page_tree, i); + read_unlock_irq(&mapping->tree_lock); return p; } @@ -2365,9 +2368,7 @@ void read_extent_buffer(struct extent_buffer *eb, void *dstv, WARN_ON(start > eb->len); WARN_ON(start + len > eb->start + eb->len); - offset = start & ((unsigned long)PAGE_CACHE_SIZE - 1); - if (i == 0) - offset += start_offset; + offset = (start_offset + start) & ((unsigned long)PAGE_CACHE_SIZE - 1); while(len > 0) { page = extent_buffer_page(eb, i); @@ -2475,9 +2476,7 @@ int memcmp_extent_buffer(struct extent_buffer *eb, const void *ptrv, WARN_ON(start > eb->len); WARN_ON(start + len > eb->start + eb->len); - offset = start & ((unsigned long)PAGE_CACHE_SIZE - 1); - if (i == 0) - offset += start_offset; + offset = (start_offset + start) & ((unsigned long)PAGE_CACHE_SIZE - 1); while(len > 0) { page = extent_buffer_page(eb, i); @@ -2514,9 +2513,7 @@ void write_extent_buffer(struct extent_buffer *eb, const void *srcv, WARN_ON(start > eb->len); WARN_ON(start + len > eb->start + eb->len); - offset = start & ((unsigned long)PAGE_CACHE_SIZE - 1); - if (i == 0) - offset += start_offset; + offset = (start_offset + start) & ((unsigned long)PAGE_CACHE_SIZE - 1); while(len > 0) { page = extent_buffer_page(eb, i); @@ -2548,9 +2545,7 @@ void memset_extent_buffer(struct extent_buffer *eb, char c, WARN_ON(start > eb->len); WARN_ON(start + len > eb->start + eb->len); - offset = start & ((unsigned long)PAGE_CACHE_SIZE - 1); - if (i == 0) - offset += start_offset; + offset = (start_offset + start) & ((unsigned long)PAGE_CACHE_SIZE - 1); while(len > 0) { page = extent_buffer_page(eb, i); @@ -2582,9 +2577,8 @@ void copy_extent_buffer(struct extent_buffer *dst, struct extent_buffer *src, WARN_ON(src->len != dst_len); - offset = dst_offset & ((unsigned long)PAGE_CACHE_SIZE - 1); - if (i == 0) - offset += start_offset; + offset = (start_offset + dst_offset) & + ((unsigned long)PAGE_CACHE_SIZE - 1); while(len > 0) { page = extent_buffer_page(dst, i); @@ -2664,19 +2658,14 @@ void memcpy_extent_buffer(struct extent_buffer *dst, unsigned long dst_offset, } while(len > 0) { - dst_off_in_page = dst_offset & + dst_off_in_page = (start_offset + dst_offset) & ((unsigned long)PAGE_CACHE_SIZE - 1); - src_off_in_page = src_offset & + src_off_in_page = (start_offset + src_offset) & ((unsigned long)PAGE_CACHE_SIZE - 1); dst_i = (start_offset + dst_offset) >> PAGE_CACHE_SHIFT; src_i = (start_offset + src_offset) >> PAGE_CACHE_SHIFT; - if (src_i == 0) - src_off_in_page += start_offset; - if (dst_i == 0) - dst_off_in_page += start_offset; - cur = min(len, (unsigned long)(PAGE_CACHE_SIZE - src_off_in_page)); cur = min_t(unsigned long, cur, @@ -2723,14 +2712,10 @@ void memmove_extent_buffer(struct extent_buffer *dst, unsigned long dst_offset, dst_i = (start_offset + dst_end) >> PAGE_CACHE_SHIFT; src_i = (start_offset + src_end) >> PAGE_CACHE_SHIFT; - dst_off_in_page = dst_end & + dst_off_in_page = (start_offset + dst_end) & ((unsigned long)PAGE_CACHE_SIZE - 1); - src_off_in_page = src_end & + src_off_in_page = (start_offset + src_end) & ((unsigned long)PAGE_CACHE_SIZE - 1); - if (src_i == 0) - src_off_in_page += start_offset; - if (dst_i == 0) - dst_off_in_page += start_offset; cur = min_t(unsigned long, len, src_off_in_page + 1); cur = min(cur, dst_off_in_page + 1); -- cgit v1.2.3-70-g09d2 From ff190c0c004d8e51195c7bcf5a8490aeefccbce0 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Fri, 19 Oct 2007 10:39:41 -0400 Subject: Btrfs: Avoid recursive KM_USER1 mappings in copy_extent_buffer Signed-off-by: Chris Mason --- fs/btrfs/extent_map.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'fs/btrfs/extent_map.c') diff --git a/fs/btrfs/extent_map.c b/fs/btrfs/extent_map.c index caaf0bf0e059..a61379230dd2 100644 --- a/fs/btrfs/extent_map.c +++ b/fs/btrfs/extent_map.c @@ -2586,9 +2586,9 @@ void copy_extent_buffer(struct extent_buffer *dst, struct extent_buffer *src, cur = min(len, (unsigned long)(PAGE_CACHE_SIZE - offset)); - kaddr = kmap_atomic(page, KM_USER1); + kaddr = kmap_atomic(page, KM_USER0); read_extent_buffer(src, kaddr + offset, src_offset, cur); - kunmap_atomic(kaddr, KM_USER1); + kunmap_atomic(kaddr, KM_USER0); src_offset += cur; len -= cur; -- cgit v1.2.3-70-g09d2 From 65555a06b4d1ae116ce223dc4b82d6068b36df96 Mon Sep 17 00:00:00 2001 From: Yan Date: Thu, 25 Oct 2007 15:42:57 -0400 Subject: Btrfs: Off by one fixes in extent_map.c Signed-off-by: Chris Mason --- fs/btrfs/extent_map.c | 10 +++++----- fs/btrfs/inode.c | 1 + 2 files changed, 6 insertions(+), 5 deletions(-) (limited to 'fs/btrfs/extent_map.c') diff --git a/fs/btrfs/extent_map.c b/fs/btrfs/extent_map.c index a61379230dd2..c44989a1e520 100644 --- a/fs/btrfs/extent_map.c +++ b/fs/btrfs/extent_map.c @@ -2045,7 +2045,7 @@ struct extent_buffer *alloc_extent_buffer(struct extent_map_tree *tree, struct extent_buffer *eb; struct page *p; struct address_space *mapping = tree->mapping; - int uptodate = 0; + int uptodate = 1; eb = __alloc_extent_buffer(tree, start, len, mask); if (!eb || IS_ERR(eb)) @@ -2197,7 +2197,7 @@ int clear_extent_buffer_dirty(struct extent_map_tree *tree, */ if ((i == 0 && (eb->start & (PAGE_CACHE_SIZE - 1))) || ((i == num_pages - 1) && - ((eb->start + eb->len - 1) & (PAGE_CACHE_SIZE - 1)))) { + ((eb->start + eb->len) & (PAGE_CACHE_SIZE - 1)))) { start = page->index << PAGE_CACHE_SHIFT; end = start + PAGE_CACHE_SIZE - 1; if (test_range_bit(tree, start, end, @@ -2265,7 +2265,7 @@ int set_extent_buffer_uptodate(struct extent_map_tree *tree, page = extent_buffer_page(eb, i); if ((i == 0 && (eb->start & (PAGE_CACHE_SIZE - 1))) || ((i == num_pages - 1) && - ((eb->start + eb->len - 1) & (PAGE_CACHE_SIZE - 1)))) { + ((eb->start + eb->len) & (PAGE_CACHE_SIZE - 1)))) { check_page_uptodate(tree, page); continue; } @@ -2401,7 +2401,7 @@ int map_private_extent_buffer(struct extent_buffer *eb, unsigned long start, struct page *p; size_t start_offset = eb->start & ((u64)PAGE_CACHE_SIZE - 1); unsigned long i = (start_offset + start) >> PAGE_CACHE_SHIFT; - unsigned long end_i = (start_offset + start + min_len) >> + unsigned long end_i = (start_offset + start + min_len - 1) >> PAGE_CACHE_SHIFT; if (i != end_i) @@ -2414,7 +2414,7 @@ int map_private_extent_buffer(struct extent_buffer *eb, unsigned long start, offset = 0; *map_start = (i << PAGE_CACHE_SHIFT) - start_offset; } - if (start + min_len >= eb->len) { + if (start + min_len > eb->len) { printk("bad mapping eb start %Lu len %lu, wanted %lu %lu\n", eb->start, eb->len, start, min_len); WARN_ON(1); } diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 7efc0470abf1..c07433072322 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -616,6 +616,7 @@ static int btrfs_truncate_in_trans(struct btrfs_trans_handle *trans, break; } btrfs_set_key_type(&key, found_type); + btrfs_release_path(root, path); continue; } if (found_key.offset >= inode->i_size) -- cgit v1.2.3-70-g09d2 From c67cda1758130f3e437744c4c2ec4a7cc25564e3 Mon Sep 17 00:00:00 2001 From: Yan Date: Mon, 29 Oct 2007 11:41:05 -0400 Subject: Btrfs: Fix extent_map leak in extent_bmap Signed-off-by: Chris Mason --- fs/btrfs/extent_map.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) (limited to 'fs/btrfs/extent_map.c') diff --git a/fs/btrfs/extent_map.c b/fs/btrfs/extent_map.c index c44989a1e520..e4ee980cc138 100644 --- a/fs/btrfs/extent_map.c +++ b/fs/btrfs/extent_map.c @@ -1924,6 +1924,7 @@ sector_t extent_bmap(struct address_space *mapping, sector_t iblock, struct inode *inode = mapping->host; u64 start = iblock << inode->i_blkbits; u64 end = start + (1 << inode->i_blkbits) - 1; + sector_t sector = 0; struct extent_map *em; em = get_extent(inode, NULL, 0, start, end, 0); @@ -1932,9 +1933,12 @@ sector_t extent_bmap(struct address_space *mapping, sector_t iblock, if (em->block_start == EXTENT_MAP_INLINE || em->block_start == EXTENT_MAP_HOLE) - return 0; + goto out; - return (em->block_start + start - em->start) >> inode->i_blkbits; + sector = (em->block_start + start - em->start) >> inode->i_blkbits; +out: + free_extent_map(em); + return sector; } static int add_lru(struct extent_map_tree *tree, struct extent_buffer *eb) -- cgit v1.2.3-70-g09d2 From 35ebb934bd7fcc7ca991b155b7980c3c4ff9f1a5 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Tue, 30 Oct 2007 16:56:53 -0400 Subject: Btrfs: Fix PAGE_CACHE_SHIFT shifts on 32 bit machines Signed-off-by: Chris Mason --- fs/btrfs/disk-io.c | 2 +- fs/btrfs/extent_map.c | 27 +++++++++++++++------------ fs/btrfs/file.c | 3 ++- fs/btrfs/inode.c | 14 +++++++------- fs/btrfs/transaction.c | 2 +- 5 files changed, 26 insertions(+), 22 deletions(-) (limited to 'fs/btrfs/extent_map.c') diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 5a7e60d7f443..8935eec31ee2 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -159,7 +159,7 @@ static int csum_tree_block(struct btrfs_root *root, struct extent_buffer *buf, int csum_dirty_buffer(struct btrfs_root *root, struct page *page) { struct extent_map_tree *tree; - u64 start = page->index << PAGE_CACHE_SHIFT; + u64 start = (u64)page->index << PAGE_CACHE_SHIFT; u64 found_start; int found_level; unsigned long len; diff --git a/fs/btrfs/extent_map.c b/fs/btrfs/extent_map.c index e4ee980cc138..238cb1d81d56 100644 --- a/fs/btrfs/extent_map.c +++ b/fs/btrfs/extent_map.c @@ -1229,7 +1229,7 @@ EXPORT_SYMBOL(test_range_bit); static int check_page_uptodate(struct extent_map_tree *tree, struct page *page) { - u64 start = page->index << PAGE_CACHE_SHIFT; + u64 start = (u64)page->index << PAGE_CACHE_SHIFT; u64 end = start + PAGE_CACHE_SIZE - 1; if (test_range_bit(tree, start, end, EXTENT_UPTODATE, 1)) SetPageUptodate(page); @@ -1243,7 +1243,7 @@ static int check_page_uptodate(struct extent_map_tree *tree, static int check_page_locked(struct extent_map_tree *tree, struct page *page) { - u64 start = page->index << PAGE_CACHE_SHIFT; + u64 start = (u64)page->index << PAGE_CACHE_SHIFT; u64 end = start + PAGE_CACHE_SIZE - 1; if (!test_range_bit(tree, start, end, EXTENT_LOCKED, 0)) unlock_page(page); @@ -1257,7 +1257,7 @@ static int check_page_locked(struct extent_map_tree *tree, static int check_page_writeback(struct extent_map_tree *tree, struct page *page) { - u64 start = page->index << PAGE_CACHE_SHIFT; + u64 start = (u64)page->index << PAGE_CACHE_SHIFT; u64 end = start + PAGE_CACHE_SIZE - 1; if (!test_range_bit(tree, start, end, EXTENT_WRITEBACK, 0)) end_page_writeback(page); @@ -1296,7 +1296,8 @@ static int end_bio_extent_writepage(struct bio *bio, do { struct page *page = bvec->bv_page; - start = (page->index << PAGE_CACHE_SHIFT) + bvec->bv_offset; + start = ((u64)page->index << PAGE_CACHE_SHIFT) + + bvec->bv_offset; end = start + bvec->bv_len - 1; if (bvec->bv_offset == 0 && bvec->bv_len == PAGE_CACHE_SIZE) @@ -1361,7 +1362,8 @@ static int end_bio_extent_readpage(struct bio *bio, do { struct page *page = bvec->bv_page; - start = (page->index << PAGE_CACHE_SHIFT) + bvec->bv_offset; + start = ((u64)page->index << PAGE_CACHE_SHIFT) + + bvec->bv_offset; end = start + bvec->bv_len - 1; if (bvec->bv_offset == 0 && bvec->bv_len == PAGE_CACHE_SIZE) @@ -1427,7 +1429,8 @@ static int end_bio_extent_preparewrite(struct bio *bio, do { struct page *page = bvec->bv_page; - start = (page->index << PAGE_CACHE_SHIFT) + bvec->bv_offset; + start = ((u64)page->index << PAGE_CACHE_SHIFT) + + bvec->bv_offset; end = start + bvec->bv_len - 1; if (--bvec >= bio->bi_io_vec) @@ -1503,7 +1506,7 @@ int extent_read_full_page(struct extent_map_tree *tree, struct page *page, get_extent_t *get_extent) { struct inode *inode = page->mapping->host; - u64 start = page->index << PAGE_CACHE_SHIFT; + u64 start = (u64)page->index << PAGE_CACHE_SHIFT; u64 page_end = start + PAGE_CACHE_SIZE - 1; u64 end; u64 cur = start; @@ -1608,7 +1611,7 @@ int extent_write_full_page(struct extent_map_tree *tree, struct page *page, struct writeback_control *wbc) { struct inode *inode = page->mapping->host; - u64 start = page->index << PAGE_CACHE_SHIFT; + u64 start = (u64)page->index << PAGE_CACHE_SHIFT; u64 page_end = start + PAGE_CACHE_SIZE - 1; u64 end; u64 cur = start; @@ -1750,7 +1753,7 @@ EXPORT_SYMBOL(extent_write_full_page); int extent_invalidatepage(struct extent_map_tree *tree, struct page *page, unsigned long offset) { - u64 start = (page->index << PAGE_CACHE_SHIFT); + u64 start = ((u64)page->index << PAGE_CACHE_SHIFT); u64 end = start + PAGE_CACHE_SIZE - 1; size_t blocksize = page->mapping->host->i_sb->s_blocksize; @@ -1792,7 +1795,7 @@ int extent_prepare_write(struct extent_map_tree *tree, struct inode *inode, struct page *page, unsigned from, unsigned to, get_extent_t *get_extent) { - u64 page_start = page->index << PAGE_CACHE_SHIFT; + u64 page_start = (u64)page->index << PAGE_CACHE_SHIFT; u64 page_end = page_start + PAGE_CACHE_SIZE - 1; u64 block_start; u64 orig_block_start; @@ -1890,7 +1893,7 @@ EXPORT_SYMBOL(extent_prepare_write); int try_release_extent_mapping(struct extent_map_tree *tree, struct page *page) { struct extent_map *em; - u64 start = page->index << PAGE_CACHE_SHIFT; + u64 start = (u64)page->index << PAGE_CACHE_SHIFT; u64 end = start + PAGE_CACHE_SIZE - 1; u64 orig_start = start; int ret = 1; @@ -2202,7 +2205,7 @@ int clear_extent_buffer_dirty(struct extent_map_tree *tree, if ((i == 0 && (eb->start & (PAGE_CACHE_SIZE - 1))) || ((i == num_pages - 1) && ((eb->start + eb->len) & (PAGE_CACHE_SIZE - 1)))) { - start = page->index << PAGE_CACHE_SHIFT; + start = (u64)page->index << PAGE_CACHE_SHIFT; end = start + PAGE_CACHE_SIZE - 1; if (test_range_bit(tree, start, end, EXTENT_DIRTY, 0)) { diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c index 843e920388ed..abdd9caad94e 100644 --- a/fs/btrfs/file.c +++ b/fs/btrfs/file.c @@ -301,7 +301,8 @@ static int dirty_and_release_pages(struct btrfs_trans_handle *trans, SetPageUptodate(p); set_page_dirty(p); } - last_end = pages[num_pages -1]->index << PAGE_CACHE_SHIFT; + last_end = (u64)(pages[num_pages -1]->index) << + PAGE_CACHE_SHIFT; last_end += PAGE_CACHE_SIZE - 1; set_extent_delalloc(em_tree, start_pos, end_of_last_block, GFP_NOFS); diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 0ba1cdbdf112..5d10b64e42df 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -111,7 +111,7 @@ int btrfs_writepage_io_hook(struct page *page, u64 start, u64 end) struct btrfs_trans_handle *trans; char *kaddr; int ret; - u64 page_start = page->index << PAGE_CACHE_SHIFT; + u64 page_start = (u64)page->index << PAGE_CACHE_SHIFT; size_t offset = start - page_start; mutex_lock(&root->fs_info->fs_mutex); @@ -160,7 +160,7 @@ out: int btrfs_readpage_end_io_hook(struct page *page, u64 start, u64 end) { - size_t offset = start - (page->index << PAGE_CACHE_SHIFT); + size_t offset = start - ((u64)page->index << PAGE_CACHE_SHIFT); struct inode *inode = page->mapping->host; struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree; char *kaddr; @@ -688,7 +688,7 @@ static int btrfs_cow_one_page(struct inode *inode, struct page *page, char *kaddr; int ret = 0; struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree; - u64 page_start = page->index << PAGE_CACHE_SHIFT; + u64 page_start = (u64)page->index << PAGE_CACHE_SHIFT; u64 page_end = page_start + PAGE_CACHE_SIZE - 1; set_page_extent_mapped(page); @@ -739,7 +739,7 @@ static int btrfs_truncate_page(struct address_space *mapping, loff_t from) goto out; } } - page_start = page->index << PAGE_CACHE_SHIFT; + page_start = (u64)page->index << PAGE_CACHE_SHIFT; ret = btrfs_cow_one_page(inode, page, offset); @@ -1618,7 +1618,7 @@ again: goto out; } - extent_offset = (page->index << PAGE_CACHE_SHIFT) - + extent_offset = ((u64)page->index << PAGE_CACHE_SHIFT) - extent_start + page_offset; copy_size = min_t(u64, PAGE_CACHE_SIZE - page_offset, size - extent_offset); @@ -1769,7 +1769,7 @@ int btrfs_page_mkwrite(struct vm_area_struct *vma, struct page *page) lock_page(page); wait_on_page_writeback(page); size = i_size_read(inode); - page_start = page->index << PAGE_CACHE_SHIFT; + page_start = (u64)page->index << PAGE_CACHE_SHIFT; if ((page->mapping != inode->i_mapping) || (page_start > size)) { @@ -2065,7 +2065,7 @@ int btrfs_defrag_file(struct file *file) { goto out_unlock; } } - page_start = page->index << PAGE_CACHE_SHIFT; + page_start = (u64)page->index << PAGE_CACHE_SHIFT; page_end = page_start + PAGE_CACHE_SIZE - 1; lock_extent(em_tree, page_start, page_end, GFP_NOFS); diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c index bdfe05cf26c6..4af1c0dab7f8 100644 --- a/fs/btrfs/transaction.c +++ b/fs/btrfs/transaction.c @@ -169,7 +169,7 @@ int btrfs_write_and_wait_transaction(struct btrfs_trans_handle *trans, clear_extent_dirty(dirty_pages, start, end, GFP_NOFS); while(start <= end) { index = start >> PAGE_CACHE_SHIFT; - start = (index + 1) << PAGE_CACHE_SHIFT; + start = (u64)(index + 1) << PAGE_CACHE_SHIFT; page = find_lock_page(btree_inode->i_mapping, index); if (!page) continue; -- cgit v1.2.3-70-g09d2 From 179e29e488cc74f1e9bd67bc45f70b832740e9ec Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Thu, 1 Nov 2007 11:28:41 -0400 Subject: Btrfs: Fix a number of inline extent problems that Yan Zheng reported. The fixes do a number of things: 1) Most btrfs_drop_extent callers will try to leave the inline extents in place. It can truncate bytes off the beginning of the inline extent if required. 2) writepage can now update the inline extent, allowing mmap writes to go directly into the inline extent. 3) btrfs_truncate_in_transaction truncates inline extents 4) extent_map.c fixed to not merge inline extent mappings and hole mappings together Signed-off-by: Chris Mason --- fs/btrfs/ctree.c | 52 ++++++++++++++++++++++++++++++++++----- fs/btrfs/ctree.h | 2 +- fs/btrfs/dir-item.c | 2 +- fs/btrfs/extent_map.c | 11 ++++++--- fs/btrfs/extent_map.h | 1 - fs/btrfs/file-item.c | 2 +- fs/btrfs/file.c | 36 ++++++++++++++++++++++----- fs/btrfs/inode.c | 67 ++++++++++++++++++++++++++++++++++++--------------- 8 files changed, 135 insertions(+), 38 deletions(-) (limited to 'fs/btrfs/extent_map.c') diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c index 516b90224a1b..eef9c92f86d5 100644 --- a/fs/btrfs/ctree.c +++ b/fs/btrfs/ctree.c @@ -1930,7 +1930,7 @@ again: int btrfs_truncate_item(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct btrfs_path *path, - u32 new_size) + u32 new_size, int from_end) { int ret = 0; int slot; @@ -1946,13 +1946,17 @@ int btrfs_truncate_item(struct btrfs_trans_handle *trans, slot_orig = path->slots[0]; leaf = path->nodes[0]; + slot = path->slots[0]; + + old_size = btrfs_item_size_nr(leaf, slot); + if (old_size == new_size) + return 0; nritems = btrfs_header_nritems(leaf); data_end = leaf_data_end(root, leaf); - slot = path->slots[0]; old_data_start = btrfs_item_offset_nr(leaf, slot); - old_size = btrfs_item_size_nr(leaf, slot); BUG_ON(old_size <= new_size); + size_diff = old_size - new_size; BUG_ON(slot < 0); @@ -1984,9 +1988,45 @@ int btrfs_truncate_item(struct btrfs_trans_handle *trans, } /* shift the data */ - memmove_extent_buffer(leaf, btrfs_leaf_data(leaf) + - data_end + size_diff, btrfs_leaf_data(leaf) + - data_end, old_data_start + new_size - data_end); + if (from_end) { + memmove_extent_buffer(leaf, btrfs_leaf_data(leaf) + + data_end + size_diff, btrfs_leaf_data(leaf) + + data_end, old_data_start + new_size - data_end); + } else { + struct btrfs_disk_key disk_key; + u64 offset; + + btrfs_item_key(leaf, &disk_key, slot); + + if (btrfs_disk_key_type(&disk_key) == BTRFS_EXTENT_DATA_KEY) { + unsigned long ptr; + struct btrfs_file_extent_item *fi; + + fi = btrfs_item_ptr(leaf, slot, + struct btrfs_file_extent_item); + fi = (struct btrfs_file_extent_item *)( + (unsigned long)fi - size_diff); + + if (btrfs_file_extent_type(leaf, fi) == + BTRFS_FILE_EXTENT_INLINE) { + ptr = btrfs_item_ptr_offset(leaf, slot); + memmove_extent_buffer(leaf, ptr, + (unsigned long)fi, + offsetof(struct btrfs_file_extent_item, + disk_bytenr)); + } + } + + memmove_extent_buffer(leaf, btrfs_leaf_data(leaf) + + data_end + size_diff, btrfs_leaf_data(leaf) + + data_end, old_data_start - data_end); + + offset = btrfs_disk_key_offset(&disk_key); + btrfs_set_disk_key_offset(&disk_key, offset + size_diff); + btrfs_set_item_key(leaf, &disk_key, slot); + if (slot == 0) + fixup_low_keys(trans, root, path, &disk_key, 1); + } item = btrfs_item_nr(leaf, slot); btrfs_set_item_size(leaf, item, new_size); diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 70e143b13577..d82afb618bf1 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -907,7 +907,7 @@ int btrfs_extend_item(struct btrfs_trans_handle *trans, struct btrfs_root int btrfs_truncate_item(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct btrfs_path *path, - u32 new_size); + u32 new_size, int from_end); int btrfs_search_slot(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct btrfs_key *key, struct btrfs_path *p, int ins_len, int cow); diff --git a/fs/btrfs/dir-item.c b/fs/btrfs/dir-item.c index 6f19de41b878..514a1dc337a8 100644 --- a/fs/btrfs/dir-item.c +++ b/fs/btrfs/dir-item.c @@ -249,7 +249,7 @@ int btrfs_delete_one_dir_name(struct btrfs_trans_handle *trans, memmove_extent_buffer(leaf, ptr, ptr + sub_item_len, item_len - (ptr + sub_item_len - start)); ret = btrfs_truncate_item(trans, root, path, - item_len - sub_item_len); + item_len - sub_item_len, 1); } return 0; } diff --git a/fs/btrfs/extent_map.c b/fs/btrfs/extent_map.c index 238cb1d81d56..44be9cfd30ee 100644 --- a/fs/btrfs/extent_map.c +++ b/fs/btrfs/extent_map.c @@ -263,7 +263,12 @@ int add_extent_mapping(struct extent_map_tree *tree, if (prev && prev->end + 1 == em->start && ((em->block_start == EXTENT_MAP_HOLE && prev->block_start == EXTENT_MAP_HOLE) || - (em->block_start == prev->block_end + 1))) { + (em->block_start == EXTENT_MAP_INLINE && + prev->block_start == EXTENT_MAP_INLINE) || + (em->block_start == EXTENT_MAP_DELALLOC && + prev->block_start == EXTENT_MAP_DELALLOC) || + (em->block_start < EXTENT_MAP_DELALLOC - 1 && + em->block_start == prev->block_end + 1))) { em->start = prev->start; em->block_start = prev->block_start; rb_erase(&prev->rb_node, &tree->map); @@ -1618,13 +1623,13 @@ int extent_write_full_page(struct extent_map_tree *tree, struct page *page, u64 extent_offset; u64 last_byte = i_size_read(inode); u64 block_start; + u64 iosize; sector_t sector; struct extent_map *em; struct block_device *bdev; int ret; int nr = 0; size_t page_offset = 0; - size_t iosize; size_t blocksize; loff_t i_size = i_size_read(inode); unsigned long end_index = i_size >> PAGE_CACHE_SHIFT; @@ -1684,7 +1689,7 @@ int extent_write_full_page(struct extent_map_tree *tree, struct page *page, clear_extent_dirty(tree, cur, page_end, GFP_NOFS); break; } - em = get_extent(inode, page, page_offset, cur, end, 0); + em = get_extent(inode, page, page_offset, cur, end, 1); if (IS_ERR(em) || !em) { SetPageError(page); break; diff --git a/fs/btrfs/extent_map.h b/fs/btrfs/extent_map.h index 39d78d31b6a2..b6f6519f9ba3 100644 --- a/fs/btrfs/extent_map.h +++ b/fs/btrfs/extent_map.h @@ -24,7 +24,6 @@ * page->private values. Every page that is controlled by the extent * map has page->private set to one. */ - #define EXTENT_PAGE_PRIVATE 1 #define EXTENT_PAGE_PRIVATE_FIRST_PAGE 3 diff --git a/fs/btrfs/file-item.c b/fs/btrfs/file-item.c index 7eb9a5412e2f..614176e52851 100644 --- a/fs/btrfs/file-item.c +++ b/fs/btrfs/file-item.c @@ -291,7 +291,7 @@ int btrfs_csum_truncate(struct btrfs_trans_handle *trans, new_item_size = blocks * BTRFS_CRC32_SIZE; if (new_item_size >= btrfs_item_size_nr(leaf, slot)) return 0; - ret = btrfs_truncate_item(trans, root, path, new_item_size); + ret = btrfs_truncate_item(trans, root, path, new_item_size, 1); BUG_ON(ret); return ret; } diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c index abdd9caad94e..4e52f7ec1cbe 100644 --- a/fs/btrfs/file.c +++ b/fs/btrfs/file.c @@ -115,8 +115,20 @@ static int insert_inline_extent(struct btrfs_trans_handle *trans, goto fail; } if (ret == 1) { + struct btrfs_key found_key; + + if (path->slots[0] == 0) + goto insert; + path->slots[0]--; leaf = path->nodes[0]; + btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]); + + if (found_key.objectid != inode->i_ino) + goto insert; + + if (found_key.type != BTRFS_EXTENT_DATA_KEY) + goto insert; ei = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_file_extent_item); @@ -152,6 +164,7 @@ static int insert_inline_extent(struct btrfs_trans_handle *trans, ret = btrfs_search_slot(trans, root, &key, path, offset + size - found_end, 1); BUG_ON(ret != 0); + ret = btrfs_extend_item(trans, root, path, offset + size - found_end); if (ret) { @@ -292,7 +305,7 @@ static int dirty_and_release_pages(struct btrfs_trans_handle *trans, */ inline_size = end_pos; if (isize >= BTRFS_MAX_INLINE_DATA_SIZE(root) || - inline_size > 8192 || + inline_size > 32768 || inline_size >= BTRFS_MAX_INLINE_DATA_SIZE(root)) { u64 last_end; @@ -312,7 +325,7 @@ static int dirty_and_release_pages(struct btrfs_trans_handle *trans, aligned_end = (pos + write_bytes + root->sectorsize - 1) & ~((u64)root->sectorsize - 1); err = btrfs_drop_extents(trans, root, inode, start_pos, - aligned_end, end_pos, &hint_byte); + aligned_end, aligned_end, &hint_byte); if (err) goto failed; err = insert_inline_extent(trans, root, inode, start_pos, @@ -456,13 +469,15 @@ next_slot: goto next_slot; } - /* FIXME, there's only one inline extent allowed right now */ if (found_inline) { u64 mask = root->sectorsize - 1; search_start = (extent_end + mask) & ~mask; } else search_start = extent_end; + if (end <= extent_end && start >= key.offset && found_inline) { + *hint_byte = EXTENT_MAP_INLINE; + } if (end < extent_end && end >= key.offset) { if (found_extent) { u64 disk_bytenr = @@ -479,8 +494,10 @@ next_slot: BUG_ON(ret); } } - if (!found_inline) - bookend = 1; + bookend = 1; + if (found_inline && start <= key.offset && + inline_end < extent_end) + keep = 1; } /* truncate existing extent */ if (start > key.offset) { @@ -510,7 +527,7 @@ next_slot: new_size = btrfs_file_extent_calc_inline_size( inline_end - key.offset); btrfs_truncate_item(trans, root, path, - new_size); + new_size, 1); } } /* delete the entire extent */ @@ -551,6 +568,13 @@ next_slot: if (!bookend) continue; } + if (bookend && found_inline && start <= key.offset && + inline_end < extent_end) { + u32 new_size; + new_size = btrfs_file_extent_calc_inline_size( + extent_end - inline_end); + btrfs_truncate_item(trans, root, path, new_size, 0); + } /* create bookend, splitting the extent in two */ if (bookend && found_extent) { struct btrfs_key ins; diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 5d10b64e42df..0c65141b9993 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -89,6 +89,9 @@ static int run_delalloc_range(struct inode *inode, u64 start, u64 end) ret = btrfs_drop_extents(trans, root, inode, start, start + num_bytes, start, &alloc_hint); + if (alloc_hint == EXTENT_MAP_INLINE) + goto out; + ret = btrfs_alloc_extent(trans, root, inode->i_ino, num_bytes, 0, alloc_hint, (u64)-1, &ins, 1); if (ret) { @@ -558,6 +561,7 @@ static int btrfs_truncate_in_trans(struct btrfs_trans_handle *trans, u64 item_end = 0; int found_extent; int del_item; + int extent_type = -1; btrfs_drop_extent_cache(inode, inode->i_size, (u64)-1); path = btrfs_alloc_path(); @@ -597,10 +601,15 @@ static int btrfs_truncate_in_trans(struct btrfs_trans_handle *trans, if (found_type == BTRFS_EXTENT_DATA_KEY) { fi = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_file_extent_item); - if (btrfs_file_extent_type(leaf, fi) != - BTRFS_FILE_EXTENT_INLINE) { + extent_type = btrfs_file_extent_type(leaf, fi); + if (extent_type != BTRFS_FILE_EXTENT_INLINE) { item_end += btrfs_file_extent_num_bytes(leaf, fi); + } else if (extent_type == BTRFS_FILE_EXTENT_INLINE) { + struct btrfs_item *item = btrfs_item_nr(leaf, + path->slots[0]); + item_end += btrfs_file_extent_inline_len(leaf, + item); } } if (found_type == BTRFS_CSUM_ITEM_KEY) { @@ -608,7 +617,7 @@ static int btrfs_truncate_in_trans(struct btrfs_trans_handle *trans, inode->i_size); BUG_ON(ret); } - if (item_end < inode->i_size) { + if (item_end <= inode->i_size) { if (found_type == BTRFS_DIR_ITEM_KEY) { found_type = BTRFS_INODE_ITEM_KEY; } else if (found_type == BTRFS_EXTENT_ITEM_KEY) { @@ -629,9 +638,10 @@ static int btrfs_truncate_in_trans(struct btrfs_trans_handle *trans, found_extent = 0; /* FIXME, shrink the extent if the ref count is only 1 */ - if (found_type == BTRFS_EXTENT_DATA_KEY && - btrfs_file_extent_type(leaf, fi) != - BTRFS_FILE_EXTENT_INLINE) { + if (found_type != BTRFS_EXTENT_DATA_KEY) + goto delete; + + if (extent_type != BTRFS_FILE_EXTENT_INLINE) { u64 num_dec; extent_start = btrfs_file_extent_disk_bytenr(leaf, fi); if (!del_item) { @@ -659,7 +669,15 @@ static int btrfs_truncate_in_trans(struct btrfs_trans_handle *trans, inode->i_blocks -= num_dec; } } + } else if (extent_type == BTRFS_FILE_EXTENT_INLINE && + !del_item) { + u32 newsize = inode->i_size - found_key.offset; + newsize = btrfs_file_extent_calc_inline_size(newsize); + ret = btrfs_truncate_item(trans, root, path, + newsize, 1); + BUG_ON(ret); } +delete: if (del_item) { ret = btrfs_del_item(trans, root, path); if (ret) @@ -769,7 +787,7 @@ static int btrfs_setattr(struct dentry *dentry, struct iattr *attr) u64 pos = (inode->i_size + mask) & ~mask; u64 block_end = attr->ia_size | mask; u64 hole_size; - u64 alloc_hint; + u64 alloc_hint = 0; if (attr->ia_size <= pos) goto out; @@ -786,8 +804,11 @@ static int btrfs_setattr(struct dentry *dentry, struct iattr *attr) pos, pos + hole_size, pos, &alloc_hint); - err = btrfs_insert_file_extent(trans, root, inode->i_ino, - pos, 0, 0, hole_size); + if (alloc_hint != EXTENT_MAP_INLINE) { + err = btrfs_insert_file_extent(trans, root, + inode->i_ino, + pos, 0, 0, hole_size); + } btrfs_end_transaction(trans, root); mutex_unlock(&root->fs_info->fs_mutex); unlock_extent(em_tree, pos, block_end, GFP_NOFS); @@ -1531,8 +1552,8 @@ again: em->end = EXTENT_MAP_HOLE; } em->bdev = inode->i_sb->s_bdev; - ret = btrfs_lookup_file_extent(NULL, root, path, - objectid, start, 0); + ret = btrfs_lookup_file_extent(trans, root, path, + objectid, start, trans != NULL); if (ret < 0) { err = ret; goto out; @@ -1627,15 +1648,23 @@ again: ((u64)root->sectorsize -1); map = kmap(page); ptr = btrfs_file_extent_inline_start(item) + extent_offset; - read_extent_buffer(leaf, map + page_offset, ptr, copy_size); - - if (em->start + copy_size <= em->end) { - size = min_t(u64, em->end + 1 - em->start, - PAGE_CACHE_SIZE - page_offset) - copy_size; - memset(map + page_offset + copy_size, 0, size); + if (create == 0 && !PageUptodate(page)) { + read_extent_buffer(leaf, map + page_offset, ptr, + copy_size); + flush_dcache_page(page); + } else if (create && PageUptodate(page)) { + if (!trans) { + kunmap(page); + free_extent_map(em); + em = NULL; + btrfs_release_path(root, path); + trans = btrfs_start_transaction(root, 1); + goto again; + } + write_extent_buffer(leaf, map + page_offset, ptr, + copy_size); + btrfs_mark_buffer_dirty(leaf); } - - flush_dcache_page(page); kunmap(page); set_extent_uptodate(em_tree, em->start, em->end, GFP_NOFS); goto insert; -- cgit v1.2.3-70-g09d2 From 944746ec7575258475958d858cdf3a0e4c8f7b9e Mon Sep 17 00:00:00 2001 From: Yan Date: Thu, 1 Nov 2007 11:28:42 -0400 Subject: Btrfs: small fixes for find_lock_delalloc_range. There is a 'finish_wait', but no 'prepare_to_wait' . So I think that the 'prepare_to_wait' is missing. The second change is according to the name of variable. Signed-off-by: Chris Mason --- fs/btrfs/extent_map.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'fs/btrfs/extent_map.c') diff --git a/fs/btrfs/extent_map.c b/fs/btrfs/extent_map.c index 44be9cfd30ee..ff8881fb56d6 100644 --- a/fs/btrfs/extent_map.c +++ b/fs/btrfs/extent_map.c @@ -1044,6 +1044,8 @@ search_again: if (state->state & EXTENT_LOCKED) { DEFINE_WAIT(wait); atomic_inc(&state->refs); + prepare_to_wait(&state->wq, &wait, + TASK_UNINTERRUPTIBLE); write_unlock_irq(&tree->lock); schedule(); write_lock_irq(&tree->lock); @@ -1059,7 +1061,7 @@ search_again: node = rb_next(node); if (!node) break; - total_bytes = state->end - state->start + 1; + total_bytes += state->end - state->start + 1; if (total_bytes >= max_bytes) break; } -- cgit v1.2.3-70-g09d2 From b293f02e1423f2099744f3ade23ddd83b65321fc Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Thu, 1 Nov 2007 19:45:34 -0400 Subject: Btrfs: Add writepages support Signed-off-by: Chris Mason --- fs/btrfs/extent_map.c | 135 ++++++++++++++++++++++++++++++++++++++++---------- fs/btrfs/extent_map.h | 4 ++ fs/btrfs/inode.c | 10 ++++ 3 files changed, 124 insertions(+), 25 deletions(-) (limited to 'fs/btrfs/extent_map.c') diff --git a/fs/btrfs/extent_map.c b/fs/btrfs/extent_map.c index ff8881fb56d6..b0677c84bb75 100644 --- a/fs/btrfs/extent_map.c +++ b/fs/btrfs/extent_map.c @@ -10,6 +10,7 @@ #include #include #include +#include #include "extent_map.h" /* temporary define until extent_map moves out of btrfs */ @@ -35,6 +36,12 @@ struct tree_entry { struct rb_node rb_node; }; +struct extent_page_data { + struct bio *bio; + struct extent_map_tree *tree; + get_extent_t *get_extent; +}; + void __init extent_map_init(void) { extent_map_cache = btrfs_cache_create("extent_map", @@ -1460,40 +1467,76 @@ static int end_bio_extent_preparewrite(struct bio *bio, #endif } -static int submit_extent_page(int rw, struct extent_map_tree *tree, - struct page *page, sector_t sector, - size_t size, unsigned long offset, - struct block_device *bdev, - bio_end_io_t end_io_func) +static struct bio * +extent_bio_alloc(struct block_device *bdev, u64 first_sector, int nr_vecs, + gfp_t gfp_flags) { struct bio *bio; - int ret = 0; - bio = bio_alloc(GFP_NOIO, 1); + bio = bio_alloc(gfp_flags, nr_vecs); - bio->bi_sector = sector; - bio->bi_bdev = bdev; - bio->bi_io_vec[0].bv_page = page; - bio->bi_io_vec[0].bv_len = size; - bio->bi_io_vec[0].bv_offset = offset; - - bio->bi_vcnt = 1; - bio->bi_idx = 0; - bio->bi_size = size; + if (bio == NULL && (current->flags & PF_MEMALLOC)) { + while (!bio && (nr_vecs /= 2)) + bio = bio_alloc(gfp_flags, nr_vecs); + } - bio->bi_end_io = end_io_func; - bio->bi_private = tree; + if (bio) { + bio->bi_bdev = bdev; + bio->bi_sector = first_sector; + } + return bio; +} +static int submit_one_bio(int rw, struct bio *bio) +{ + int ret = 0; bio_get(bio); submit_bio(rw, bio); - if (bio_flagged(bio, BIO_EOPNOTSUPP)) ret = -EOPNOTSUPP; - bio_put(bio); return ret; } +static int submit_extent_page(int rw, struct extent_map_tree *tree, + struct page *page, sector_t sector, + size_t size, unsigned long offset, + struct block_device *bdev, + struct bio **bio_ret, + int max_pages, + bio_end_io_t end_io_func) +{ + int ret = 0; + struct bio *bio; + int nr; + + if (bio_ret && *bio_ret) { + bio = *bio_ret; + if (bio->bi_sector + (bio->bi_size >> 9) != sector || + bio_add_page(bio, page, size, offset) < size) { + ret = submit_one_bio(rw, bio); + bio = NULL; + } else { + return 0; + } + } + nr = min(max_pages, bio_get_nr_vecs(bdev)); + bio = extent_bio_alloc(bdev, sector, nr, GFP_NOFS | __GFP_HIGH); + if (!bio) { + printk("failed to allocate bio nr %d\n", nr); + } + bio_add_page(bio, page, size, offset); + bio->bi_end_io = end_io_func; + bio->bi_private = tree; + if (bio_ret) { + *bio_ret = bio; + } else { + ret = submit_one_bio(rw, bio); + } + + return ret; +} + void set_page_extent_mapped(struct page *page) { if (!PagePrivate(page)) { @@ -1590,7 +1633,8 @@ int extent_read_full_page(struct extent_map_tree *tree, struct page *page, if (!ret) { ret = submit_extent_page(READ, tree, page, sector, iosize, page_offset, - bdev, end_bio_extent_readpage); + bdev, NULL, 1, + end_bio_extent_readpage); } if (ret) SetPageError(page); @@ -1613,11 +1657,12 @@ EXPORT_SYMBOL(extent_read_full_page); * are found, they are marked writeback. Then the lock bits are removed * and the end_io handler clears the writeback ranges */ -int extent_write_full_page(struct extent_map_tree *tree, struct page *page, - get_extent_t *get_extent, - struct writeback_control *wbc) +static int __extent_writepage(struct page *page, struct writeback_control *wbc, + void *data) { struct inode *inode = page->mapping->host; + struct extent_page_data *epd = data; + struct extent_map_tree *tree = epd->tree; u64 start = (u64)page->index << PAGE_CACHE_SHIFT; u64 page_end = start + PAGE_CACHE_SIZE - 1; u64 end; @@ -1691,7 +1736,7 @@ int extent_write_full_page(struct extent_map_tree *tree, struct page *page, clear_extent_dirty(tree, cur, page_end, GFP_NOFS); break; } - em = get_extent(inode, page, page_offset, cur, end, 1); + em = epd->get_extent(inode, page, page_offset, cur, end, 1); if (IS_ERR(em) || !em) { SetPageError(page); break; @@ -1734,9 +1779,12 @@ int extent_write_full_page(struct extent_map_tree *tree, struct page *page, if (ret) SetPageError(page); else { + unsigned long nr = end_index + 1; set_range_writeback(tree, cur, cur + iosize - 1); + ret = submit_extent_page(WRITE, tree, page, sector, iosize, page_offset, bdev, + &epd->bio, nr, end_bio_extent_writepage); if (ret) SetPageError(page); @@ -1750,8 +1798,44 @@ done: unlock_page(page); return 0; } + +int extent_write_full_page(struct extent_map_tree *tree, struct page *page, + get_extent_t *get_extent, + struct writeback_control *wbc) +{ + int ret; + struct extent_page_data epd = { + .bio = NULL, + .tree = tree, + .get_extent = get_extent, + }; + + ret = __extent_writepage(page, wbc, &epd); + if (epd.bio) + submit_one_bio(WRITE, epd.bio); + return ret; +} EXPORT_SYMBOL(extent_write_full_page); +int extent_writepages(struct extent_map_tree *tree, + struct address_space *mapping, + get_extent_t *get_extent, + struct writeback_control *wbc) +{ + int ret; + struct extent_page_data epd = { + .bio = NULL, + .tree = tree, + .get_extent = get_extent, + }; + + ret = write_cache_pages(mapping, wbc, __extent_writepage, &epd); + if (epd.bio) + submit_one_bio(WRITE, epd.bio); + return ret; +} +EXPORT_SYMBOL(extent_writepages); + /* * basic invalidatepage code, this waits on any locked or writeback * ranges corresponding to the page, and then deletes any extent state @@ -1869,6 +1953,7 @@ int extent_prepare_write(struct extent_map_tree *tree, EXTENT_LOCKED, 0, NULL, GFP_NOFS); ret = submit_extent_page(READ, tree, page, sector, iosize, page_offset, em->bdev, + NULL, 1, end_bio_extent_preparewrite); iocount++; block_start = block_start + iosize; diff --git a/fs/btrfs/extent_map.h b/fs/btrfs/extent_map.h index b6f6519f9ba3..0b0dcf19fe45 100644 --- a/fs/btrfs/extent_map.h +++ b/fs/btrfs/extent_map.h @@ -136,6 +136,10 @@ int extent_invalidatepage(struct extent_map_tree *tree, int extent_write_full_page(struct extent_map_tree *tree, struct page *page, get_extent_t *get_extent, struct writeback_control *wbc); +int extent_writepages(struct extent_map_tree *tree, + struct address_space *mapping, + get_extent_t *get_extent, + struct writeback_control *wbc); int extent_prepare_write(struct extent_map_tree *tree, struct inode *inode, struct page *page, unsigned from, unsigned to, get_extent_t *get_extent); diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 0c65141b9993..25f32d7c7ee8 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -1747,6 +1747,15 @@ static int btrfs_writepage(struct page *page, struct writeback_control *wbc) return extent_write_full_page(tree, page, btrfs_get_extent, wbc); } + +static int btrfs_writepages(struct address_space *mapping, + struct writeback_control *wbc) +{ + struct extent_map_tree *tree; + tree = &BTRFS_I(mapping->host)->extent_tree; + return extent_writepages(tree, mapping, btrfs_get_extent, wbc); +} + static int btrfs_releasepage(struct page *page, gfp_t unused_gfp_flags) { struct extent_map_tree *tree; @@ -2526,6 +2535,7 @@ static struct extent_map_ops btrfs_extent_map_ops = { static struct address_space_operations btrfs_aops = { .readpage = btrfs_readpage, .writepage = btrfs_writepage, + .writepages = btrfs_writepages, .sync_page = block_sync_page, .prepare_write = btrfs_prepare_write, .commit_write = btrfs_commit_write, -- cgit v1.2.3-70-g09d2 From 09be207d1ba224531a61de9afdc07a125e45318c Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Wed, 7 Nov 2007 21:08:16 -0500 Subject: Btrfs: Fix failure cleanups when allocating extent buffers fail Signed-off-by: Chris Mason --- fs/btrfs/extent_map.c | 56 +++++++++++++++++++++++++++------------------------ 1 file changed, 30 insertions(+), 26 deletions(-) (limited to 'fs/btrfs/extent_map.c') diff --git a/fs/btrfs/extent_map.c b/fs/btrfs/extent_map.c index b0677c84bb75..c976615dcda3 100644 --- a/fs/btrfs/extent_map.c +++ b/fs/btrfs/extent_map.c @@ -2106,25 +2106,17 @@ static struct extent_buffer *__alloc_extent_buffer(struct extent_map_tree *tree, spin_lock(&tree->lru_lock); eb = find_lru(tree, start, len); - if (eb) { - goto lru_add; - } spin_unlock(&tree->lru_lock); - if (eb) { - memset(eb, 0, sizeof(*eb)); - } else { - eb = kmem_cache_zalloc(extent_buffer_cache, mask); + return eb; } + + eb = kmem_cache_zalloc(extent_buffer_cache, mask); INIT_LIST_HEAD(&eb->lru); eb->start = start; eb->len = len; atomic_set(&eb->refs, 1); - spin_lock(&tree->lru_lock); -lru_add: - add_lru(tree, eb); - spin_unlock(&tree->lru_lock); return eb; } @@ -2151,7 +2143,7 @@ struct extent_buffer *alloc_extent_buffer(struct extent_map_tree *tree, return NULL; if (eb->flags & EXTENT_BUFFER_FILLED) - return eb; + goto lru_add; if (page0) { eb->first_page = page0; @@ -2169,11 +2161,6 @@ struct extent_buffer *alloc_extent_buffer(struct extent_map_tree *tree, p = find_or_create_page(mapping, index, mask | __GFP_HIGHMEM); if (!p) { WARN_ON(1); - /* make sure the free only frees the pages we've - * grabbed a reference on - */ - eb->len = i << PAGE_CACHE_SHIFT; - eb->start &= ~((u64)PAGE_CACHE_SIZE - 1); goto fail; } set_page_extent_mapped(p); @@ -2192,9 +2179,20 @@ struct extent_buffer *alloc_extent_buffer(struct extent_map_tree *tree, if (uptodate) eb->flags |= EXTENT_UPTODATE; eb->flags |= EXTENT_BUFFER_FILLED; + +lru_add: + spin_lock(&tree->lru_lock); + add_lru(tree, eb); + spin_unlock(&tree->lru_lock); return eb; + fail: - free_extent_buffer(eb); + if (!atomic_dec_and_test(&eb->refs)) + return NULL; + for (index = 0; index < i; index++) { + page_cache_release(extent_buffer_page(eb, index)); + } + __free_extent_buffer(eb); return NULL; } EXPORT_SYMBOL(alloc_extent_buffer); @@ -2204,7 +2202,8 @@ struct extent_buffer *find_extent_buffer(struct extent_map_tree *tree, gfp_t mask) { unsigned long num_pages = num_extent_pages(start, len); - unsigned long i; unsigned long index = start >> PAGE_CACHE_SHIFT; + unsigned long i; + unsigned long index = start >> PAGE_CACHE_SHIFT; struct extent_buffer *eb; struct page *p; struct address_space *mapping = tree->mapping; @@ -2215,16 +2214,11 @@ struct extent_buffer *find_extent_buffer(struct extent_map_tree *tree, return NULL; if (eb->flags & EXTENT_BUFFER_FILLED) - return eb; + goto lru_add; for (i = 0; i < num_pages; i++, index++) { p = find_lock_page(mapping, index); if (!p) { - /* make sure the free only frees the pages we've - * grabbed a reference on - */ - eb->len = i << PAGE_CACHE_SHIFT; - eb->start &= ~((u64)PAGE_CACHE_SIZE - 1); goto fail; } set_page_extent_mapped(p); @@ -2245,9 +2239,19 @@ struct extent_buffer *find_extent_buffer(struct extent_map_tree *tree, if (uptodate) eb->flags |= EXTENT_UPTODATE; eb->flags |= EXTENT_BUFFER_FILLED; + +lru_add: + spin_lock(&tree->lru_lock); + add_lru(tree, eb); + spin_unlock(&tree->lru_lock); return eb; fail: - free_extent_buffer(eb); + if (!atomic_dec_and_test(&eb->refs)) + return NULL; + for (index = 0; index < i; index++) { + page_cache_release(extent_buffer_page(eb, index)); + } + __free_extent_buffer(eb); return NULL; } EXPORT_SYMBOL(find_extent_buffer); -- cgit v1.2.3-70-g09d2 From 856bf3e592f917e7d663cb7fa93e83fe795e8f4e Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Thu, 8 Nov 2007 10:59:05 -0500 Subject: Btrfs: Avoid extent_buffer lru corruption Signed-off-by: Chris Mason --- fs/btrfs/extent_map.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) (limited to 'fs/btrfs/extent_map.c') diff --git a/fs/btrfs/extent_map.c b/fs/btrfs/extent_map.c index c976615dcda3..754bc42c162a 100644 --- a/fs/btrfs/extent_map.c +++ b/fs/btrfs/extent_map.c @@ -2047,7 +2047,7 @@ static int add_lru(struct extent_map_tree *tree, struct extent_buffer *eb) rm = list_entry(tree->buffer_lru.prev, struct extent_buffer, lru); tree->lru_size--; - list_del(&rm->lru); + list_del_init(&rm->lru); free_extent_buffer(rm); } } else @@ -2187,6 +2187,9 @@ lru_add: return eb; fail: + spin_lock(&tree->lru_lock); + list_del_init(&eb->lru); + spin_unlock(&tree->lru_lock); if (!atomic_dec_and_test(&eb->refs)) return NULL; for (index = 0; index < i; index++) { @@ -2246,6 +2249,9 @@ lru_add: spin_unlock(&tree->lru_lock); return eb; fail: + spin_lock(&tree->lru_lock); + list_del_init(&eb->lru); + spin_unlock(&tree->lru_lock); if (!atomic_dec_and_test(&eb->refs)) return NULL; for (index = 0; index < i; index++) { -- cgit v1.2.3-70-g09d2 From 3ab2fb5a8cb003897016b6eb38ddad916226c1b2 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Thu, 8 Nov 2007 10:59:22 -0500 Subject: Btrfs: Add readpages support Signed-off-by: Chris Mason --- fs/btrfs/extent_map.c | 70 +++++++++++++++++++++++++++++++++++++++++++++------ fs/btrfs/extent_map.h | 4 +++ fs/btrfs/inode.c | 12 ++++++++- 3 files changed, 78 insertions(+), 8 deletions(-) (limited to 'fs/btrfs/extent_map.c') diff --git a/fs/btrfs/extent_map.c b/fs/btrfs/extent_map.c index 754bc42c162a..0077c6c1d9f9 100644 --- a/fs/btrfs/extent_map.c +++ b/fs/btrfs/extent_map.c @@ -11,6 +11,7 @@ #include #include #include +#include #include "extent_map.h" /* temporary define until extent_map moves out of btrfs */ @@ -1503,7 +1504,7 @@ static int submit_extent_page(int rw, struct extent_map_tree *tree, size_t size, unsigned long offset, struct block_device *bdev, struct bio **bio_ret, - int max_pages, + unsigned long max_pages, bio_end_io_t end_io_func) { int ret = 0; @@ -1520,7 +1521,7 @@ static int submit_extent_page(int rw, struct extent_map_tree *tree, return 0; } } - nr = min(max_pages, bio_get_nr_vecs(bdev)); + nr = min_t(int, max_pages, bio_get_nr_vecs(bdev)); bio = extent_bio_alloc(bdev, sector, nr, GFP_NOFS | __GFP_HIGH); if (!bio) { printk("failed to allocate bio nr %d\n", nr); @@ -1552,8 +1553,10 @@ void set_page_extent_mapped(struct page *page) * into the tree that are removed when the IO is done (by the end_io * handlers) */ -int extent_read_full_page(struct extent_map_tree *tree, struct page *page, - get_extent_t *get_extent) +static int __extent_read_full_page(struct extent_map_tree *tree, + struct page *page, + get_extent_t *get_extent, + struct bio **bio) { struct inode *inode = page->mapping->host; u64 start = (u64)page->index << PAGE_CACHE_SHIFT; @@ -1631,10 +1634,12 @@ int extent_read_full_page(struct extent_map_tree *tree, struct page *page, cur + iosize - 1); } if (!ret) { + unsigned long nr = (last_byte >> PAGE_CACHE_SHIFT) + 1; + nr -= page->index; ret = submit_extent_page(READ, tree, page, - sector, iosize, page_offset, - bdev, NULL, 1, - end_bio_extent_readpage); + sector, iosize, page_offset, + bdev, bio, nr, + end_bio_extent_readpage); } if (ret) SetPageError(page); @@ -1649,6 +1654,18 @@ int extent_read_full_page(struct extent_map_tree *tree, struct page *page, } return 0; } + +int extent_read_full_page(struct extent_map_tree *tree, struct page *page, + get_extent_t *get_extent) +{ + struct bio *bio = NULL; + int ret; + + ret = __extent_read_full_page(tree, page, get_extent, &bio); + if (bio) + submit_one_bio(READ, bio); + return ret; +} EXPORT_SYMBOL(extent_read_full_page); /* @@ -1836,6 +1853,45 @@ int extent_writepages(struct extent_map_tree *tree, } EXPORT_SYMBOL(extent_writepages); +int extent_readpages(struct extent_map_tree *tree, + struct address_space *mapping, + struct list_head *pages, unsigned nr_pages, + get_extent_t get_extent) +{ + struct bio *bio = NULL; + unsigned page_idx; + struct pagevec pvec; + + pagevec_init(&pvec, 0); + for (page_idx = 0; page_idx < nr_pages; page_idx++) { + struct page *page = list_entry(pages->prev, struct page, lru); + + prefetchw(&page->flags); + list_del(&page->lru); + /* + * what we want to do here is call add_to_page_cache_lru, + * but that isn't exported, so we reproduce it here + */ + if (!add_to_page_cache(page, mapping, + page->index, GFP_KERNEL)) { + + /* open coding of lru_cache_add, also not exported */ + page_cache_get(page); + if (!pagevec_add(&pvec, page)) + __pagevec_lru_add(&pvec); + __extent_read_full_page(tree, page, get_extent, &bio); + } + page_cache_release(page); + } + if (pagevec_count(&pvec)) + __pagevec_lru_add(&pvec); + BUG_ON(!list_empty(pages)); + if (bio) + submit_one_bio(READ, bio); + return 0; +} +EXPORT_SYMBOL(extent_readpages); + /* * basic invalidatepage code, this waits on any locked or writeback * ranges corresponding to the page, and then deletes any extent state diff --git a/fs/btrfs/extent_map.h b/fs/btrfs/extent_map.h index 0b0dcf19fe45..fbd23a72f53b 100644 --- a/fs/btrfs/extent_map.h +++ b/fs/btrfs/extent_map.h @@ -140,6 +140,10 @@ int extent_writepages(struct extent_map_tree *tree, struct address_space *mapping, get_extent_t *get_extent, struct writeback_control *wbc); +int extent_readpages(struct extent_map_tree *tree, + struct address_space *mapping, + struct list_head *pages, unsigned nr_pages, + get_extent_t get_extent); int extent_prepare_write(struct extent_map_tree *tree, struct inode *inode, struct page *page, unsigned from, unsigned to, get_extent_t *get_extent); diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 90cb71d68294..d6d32465bc9a 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -1748,7 +1748,6 @@ static int btrfs_writepage(struct page *page, struct writeback_control *wbc) return extent_write_full_page(tree, page, btrfs_get_extent, wbc); } - static int btrfs_writepages(struct address_space *mapping, struct writeback_control *wbc) { @@ -1757,6 +1756,16 @@ static int btrfs_writepages(struct address_space *mapping, return extent_writepages(tree, mapping, btrfs_get_extent, wbc); } +static int +btrfs_readpages(struct file *file, struct address_space *mapping, + struct list_head *pages, unsigned nr_pages) +{ + struct extent_map_tree *tree; + tree = &BTRFS_I(mapping->host)->extent_tree; + return extent_readpages(tree, mapping, pages, nr_pages, + btrfs_get_extent); +} + static int btrfs_releasepage(struct page *page, gfp_t unused_gfp_flags) { struct extent_map_tree *tree; @@ -2537,6 +2546,7 @@ static struct address_space_operations btrfs_aops = { .readpage = btrfs_readpage, .writepage = btrfs_writepage, .writepages = btrfs_writepages, + .readpages = btrfs_readpages, .sync_page = block_sync_page, .prepare_write = btrfs_prepare_write, .commit_write = btrfs_commit_write, -- cgit v1.2.3-70-g09d2 From 0591fb56fb732c7bf500d1446d0bf927046d7002 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Sun, 11 Nov 2007 08:22:00 -0500 Subject: Btrfs: Fix extent bit range testing It could return the bit as set when there was actually a hole at the very end of the range. Signed-off-by: Chris Mason --- fs/btrfs/extent_map.c | 24 +++++++++++++++++------- 1 file changed, 17 insertions(+), 7 deletions(-) (limited to 'fs/btrfs/extent_map.c') diff --git a/fs/btrfs/extent_map.c b/fs/btrfs/extent_map.c index 0077c6c1d9f9..b3ff63c4bf2a 100644 --- a/fs/btrfs/extent_map.c +++ b/fs/btrfs/extent_map.c @@ -96,7 +96,7 @@ void extent_map_tree_empty_lru(struct extent_map_tree *tree) while(!list_empty(&tree->buffer_lru)) { eb = list_entry(tree->buffer_lru.next, struct extent_buffer, lru); - list_del(&eb->lru); + list_del_init(&eb->lru); free_extent_buffer(eb); } } @@ -1212,13 +1212,15 @@ int test_range_bit(struct extent_map_tree *tree, u64 start, u64 end, node = tree_search(&tree->state, start); while (node && start <= end) { state = rb_entry(node, struct extent_state, rb_node); - if (state->start > end) - break; if (filled && state->start > start) { bitset = 0; break; } + + if (state->start > end) + break; + if (state->state & bits) { bitset = 1; if (!filled) @@ -2208,6 +2210,7 @@ struct extent_buffer *alloc_extent_buffer(struct extent_map_tree *tree, page_cache_get(page0); mark_page_accessed(page0); set_page_extent_mapped(page0); + WARN_ON(!PageUptodate(page0)); set_page_private(page0, EXTENT_PAGE_PRIVATE_FIRST_PAGE | len << 2); } else { @@ -2248,9 +2251,11 @@ fail: spin_unlock(&tree->lru_lock); if (!atomic_dec_and_test(&eb->refs)) return NULL; - for (index = 0; index < i; index++) { + for (index = 1; index < i; index++) { page_cache_release(extent_buffer_page(eb, index)); } + if (i > 0) + page_cache_release(extent_buffer_page(eb, 0)); __free_extent_buffer(eb); return NULL; } @@ -2310,9 +2315,11 @@ fail: spin_unlock(&tree->lru_lock); if (!atomic_dec_and_test(&eb->refs)) return NULL; - for (index = 0; index < i; index++) { + for (index = 1; index < i; index++) { page_cache_release(extent_buffer_page(eb, index)); } + if (i > 0) + page_cache_release(extent_buffer_page(eb, 0)); __free_extent_buffer(eb); return NULL; } @@ -2329,11 +2336,13 @@ void free_extent_buffer(struct extent_buffer *eb) if (!atomic_dec_and_test(&eb->refs)) return; + WARN_ON(!list_empty(&eb->lru)); num_pages = num_extent_pages(eb->start, eb->len); - for (i = 0; i < num_pages; i++) { + for (i = 1; i < num_pages; i++) { page_cache_release(extent_buffer_page(eb, i)); } + page_cache_release(extent_buffer_page(eb, 0)); __free_extent_buffer(eb); } EXPORT_SYMBOL(free_extent_buffer); @@ -2469,6 +2478,7 @@ int read_extent_buffer_pages(struct extent_map_tree *tree, EXTENT_UPTODATE, 1)) { return 0; } + if (start) { WARN_ON(start < eb->start); start_i = (start >> PAGE_CACHE_SHIFT) - @@ -2577,7 +2587,7 @@ int map_private_extent_buffer(struct extent_buffer *eb, unsigned long start, *map_start = 0; } else { offset = 0; - *map_start = (i << PAGE_CACHE_SHIFT) - start_offset; + *map_start = ((u64)i << PAGE_CACHE_SHIFT) - start_offset; } if (start + min_len > eb->len) { printk("bad mapping eb start %Lu len %lu, wanted %lu %lu\n", eb->start, eb->len, start, min_len); -- cgit v1.2.3-70-g09d2 From 2f4cbe6442d3be7b5e4cf1607a5ab33995fe2d25 Mon Sep 17 00:00:00 2001 From: Wyatt Banks Date: Mon, 19 Nov 2007 10:22:33 -0500 Subject: Btrfs: Return value checking in module init Signed-off-by: Chris Mason --- fs/btrfs/extent_map.c | 16 ++++++++++++++-- fs/btrfs/extent_map.h | 2 +- fs/btrfs/super.c | 21 ++++++++++++++++++--- 3 files changed, 33 insertions(+), 6 deletions(-) (limited to 'fs/btrfs/extent_map.c') diff --git a/fs/btrfs/extent_map.c b/fs/btrfs/extent_map.c index b3ff63c4bf2a..f91f28efdb59 100644 --- a/fs/btrfs/extent_map.c +++ b/fs/btrfs/extent_map.c @@ -42,18 +42,30 @@ struct extent_page_data { struct extent_map_tree *tree; get_extent_t *get_extent; }; - -void __init extent_map_init(void) +int __init extent_map_init(void) { extent_map_cache = btrfs_cache_create("extent_map", sizeof(struct extent_map), 0, NULL); + if (!extent_map_cache) + return -ENOMEM; extent_state_cache = btrfs_cache_create("extent_state", sizeof(struct extent_state), 0, NULL); + if (!extent_state_cache) + goto free_map_cache; extent_buffer_cache = btrfs_cache_create("extent_buffers", sizeof(struct extent_buffer), 0, NULL); + if (!extent_buffer_cache) + goto free_state_cache; + return 0; + +free_state_cache: + kmem_cache_destroy(extent_state_cache); +free_map_cache: + kmem_cache_destroy(extent_map_cache); + return -ENOMEM; } void __exit extent_map_exit(void) diff --git a/fs/btrfs/extent_map.h b/fs/btrfs/extent_map.h index fbd23a72f53b..24ddc8c93461 100644 --- a/fs/btrfs/extent_map.h +++ b/fs/btrfs/extent_map.h @@ -110,7 +110,7 @@ struct extent_map *alloc_extent_map(gfp_t mask); void free_extent_map(struct extent_map *em); int extent_read_full_page(struct extent_map_tree *tree, struct page *page, get_extent_t *get_extent); -void __init extent_map_init(void); +int __init extent_map_init(void); void __exit extent_map_exit(void); int test_range_bit(struct extent_map_tree *tree, u64 start, u64 end, diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index c46bc3911798..ffa02872c4d6 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c @@ -347,9 +347,24 @@ static int __init init_btrfs_fs(void) btrfs_init_transaction_sys(); err = btrfs_init_cachep(); if (err) - return err; - extent_map_init(); - return register_filesystem(&btrfs_fs_type); + goto free_transaction_sys; + err = extent_map_init(); + if (err) + goto free_cachep; + + err = register_filesystem(&btrfs_fs_type); + if (err) + goto free_extent_map; + return 0; + +free_extent_map: + extent_map_exit(); +free_cachep: + btrfs_destroy_cachep(); +free_transaction_sys: + btrfs_exit_transaction_sys(); + btrfs_exit_sysfs(); + return err; } static void __exit exit_btrfs_fs(void) -- cgit v1.2.3-70-g09d2 From 3e9fd94ff0028a044d55690eb0a801fd1472e3c6 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Tue, 20 Nov 2007 10:47:25 -0500 Subject: Btrfs: Avoid fragmentation from parallel delalloc filling Signed-off-by: Chris Mason --- fs/btrfs/extent_map.c | 83 ++++++++++++++++++++++++++++++--------------------- 1 file changed, 49 insertions(+), 34 deletions(-) (limited to 'fs/btrfs/extent_map.c') diff --git a/fs/btrfs/extent_map.c b/fs/btrfs/extent_map.c index f91f28efdb59..7fd4eb7a8f03 100644 --- a/fs/btrfs/extent_map.c +++ b/fs/btrfs/extent_map.c @@ -1033,11 +1033,11 @@ out: EXPORT_SYMBOL(find_first_extent_bit); u64 find_lock_delalloc_range(struct extent_map_tree *tree, - u64 start, u64 lock_start, u64 *end, u64 max_bytes) + u64 *start, u64 *end, u64 max_bytes) { struct rb_node *node; struct extent_state *state; - u64 cur_start = start; + u64 cur_start = *start; u64 found = 0; u64 total_bytes = 0; @@ -1054,27 +1054,43 @@ search_again: while(1) { state = rb_entry(node, struct extent_state, rb_node); - if (state->start != cur_start) { + if (found && state->start != cur_start) { goto out; } if (!(state->state & EXTENT_DELALLOC)) { goto out; } - if (state->start >= lock_start) { - if (state->state & EXTENT_LOCKED) { - DEFINE_WAIT(wait); - atomic_inc(&state->refs); - prepare_to_wait(&state->wq, &wait, - TASK_UNINTERRUPTIBLE); - write_unlock_irq(&tree->lock); - schedule(); - write_lock_irq(&tree->lock); - finish_wait(&state->wq, &wait); - free_extent_state(state); - goto search_again; + if (!found) { + struct extent_state *prev_state; + struct rb_node *prev_node = node; + while(1) { + prev_node = rb_prev(prev_node); + if (!prev_node) + break; + prev_state = rb_entry(prev_node, + struct extent_state, + rb_node); + if (!(prev_state->state & EXTENT_DELALLOC)) + break; + state = prev_state; + node = prev_node; } - state->state |= EXTENT_LOCKED; } + if (state->state & EXTENT_LOCKED) { + DEFINE_WAIT(wait); + atomic_inc(&state->refs); + prepare_to_wait(&state->wq, &wait, + TASK_UNINTERRUPTIBLE); + write_unlock_irq(&tree->lock); + schedule(); + write_lock_irq(&tree->lock); + finish_wait(&state->wq, &wait); + free_extent_state(state); + goto search_again; + } + state->state |= EXTENT_LOCKED; + if (!found) + *start = state->start; found++; *end = state->end; cur_start = state->end + 1; @@ -1695,6 +1711,7 @@ static int __extent_writepage(struct page *page, struct writeback_control *wbc, struct extent_page_data *epd = data; struct extent_map_tree *tree = epd->tree; u64 start = (u64)page->index << PAGE_CACHE_SHIFT; + u64 delalloc_start; u64 page_end = start + PAGE_CACHE_SIZE - 1; u64 end; u64 cur = start; @@ -1729,25 +1746,23 @@ static int __extent_writepage(struct page *page, struct writeback_control *wbc, set_page_extent_mapped(page); - lock_extent(tree, start, page_end, GFP_NOFS); - nr_delalloc = find_lock_delalloc_range(tree, start, page_end + 1, - &delalloc_end, - 128 * 1024 * 1024); - if (nr_delalloc) { - tree->ops->fill_delalloc(inode, start, delalloc_end); - if (delalloc_end >= page_end + 1) { - clear_extent_bit(tree, page_end + 1, delalloc_end, - EXTENT_LOCKED | EXTENT_DELALLOC, - 1, 0, GFP_NOFS); - } - clear_extent_bit(tree, start, page_end, EXTENT_DELALLOC, - 0, 0, GFP_NOFS); - if (test_range_bit(tree, start, page_end, EXTENT_DELALLOC, 0)) { - printk("found delalloc bits after clear extent_bit\n"); - } - } else if (test_range_bit(tree, start, page_end, EXTENT_DELALLOC, 0)) { - printk("found delalloc bits after find_delalloc_range returns 0\n"); + delalloc_start = start; + delalloc_end = 0; + while(delalloc_end < page_end) { + nr_delalloc = find_lock_delalloc_range(tree, &delalloc_start, + &delalloc_end, + 128 * 1024 * 1024); + if (nr_delalloc <= 0) + break; + tree->ops->fill_delalloc(inode, delalloc_start, + delalloc_end); + clear_extent_bit(tree, delalloc_start, + delalloc_end, + EXTENT_LOCKED | EXTENT_DELALLOC, + 1, 0, GFP_NOFS); + delalloc_start = delalloc_end + 1; } + lock_extent(tree, start, page_end, GFP_NOFS); end = page_end; if (test_range_bit(tree, start, page_end, EXTENT_DELALLOC, 0)) { -- cgit v1.2.3-70-g09d2 From 7073c8e852946274e4d50fdf072438612f9dc845 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Tue, 20 Nov 2007 13:44:45 -0500 Subject: Btrfs: Make sure page mapping dirty tag is properly cleared Signed-off-by: Chris Mason --- fs/btrfs/extent_map.c | 22 ++++++++++++++++++++-- 1 file changed, 20 insertions(+), 2 deletions(-) (limited to 'fs/btrfs/extent_map.c') diff --git a/fs/btrfs/extent_map.c b/fs/btrfs/extent_map.c index 7fd4eb7a8f03..a4e9096754fc 100644 --- a/fs/btrfs/extent_map.c +++ b/fs/btrfs/extent_map.c @@ -1825,12 +1825,18 @@ static int __extent_writepage(struct page *page, struct writeback_control *wbc, if (ret) SetPageError(page); else { - unsigned long nr = end_index + 1; + unsigned long max_nr = end_index + 1; set_range_writeback(tree, cur, cur + iosize - 1); + if (!PageWriteback(page)) { + printk("warning page %lu not writeback, " + "cur %llu end %llu\n", page->index, + (unsigned long long)cur, + (unsigned long long)end); + } ret = submit_extent_page(WRITE, tree, page, sector, iosize, page_offset, bdev, - &epd->bio, nr, + &epd->bio, max_nr, end_bio_extent_writepage); if (ret) SetPageError(page); @@ -1840,6 +1846,11 @@ static int __extent_writepage(struct page *page, struct writeback_control *wbc, nr++; } done: + if (nr == 0) { + /* make sure the mapping tag for page dirty gets cleared */ + set_page_writeback(page); + end_page_writeback(page); + } unlock_extent(tree, start, page_end, GFP_NOFS); unlock_page(page); return 0; @@ -2408,6 +2419,13 @@ int clear_extent_buffer_dirty(struct extent_map_tree *tree, } } clear_page_dirty_for_io(page); + write_lock_irq(&page->mapping->tree_lock); + if (!PageDirty(page)) { + radix_tree_tag_clear(&page->mapping->page_tree, + page_index(page), + PAGECACHE_TAG_DIRTY); + } + write_unlock_irq(&page->mapping->tree_lock); unlock_page(page); } return 0; -- cgit v1.2.3-70-g09d2 From 015a739c7c238768fbfa4eea8ea2ebc1a35e7bb1 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Mon, 26 Nov 2007 16:15:16 -0800 Subject: Btrfs: Handle writeback under high memory pressure better Signed-off-by: Chris Mason --- fs/btrfs/extent-tree.c | 2 +- fs/btrfs/extent_map.c | 12 ++++++++++++ 2 files changed, 13 insertions(+), 1 deletion(-) (limited to 'fs/btrfs/extent_map.c') diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 39be6baccc36..3c00f967eccb 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -961,7 +961,7 @@ static int find_free_extent(struct btrfs_trans_handle *trans, struct btrfs_root level = btrfs_header_level(root->node); - if (num_bytes >= 96 * 1024 * 1024 && hint_byte) { + if (num_bytes >= 32 * 1024 * 1024 && hint_byte) { data = BTRFS_BLOCK_GROUP_MIXED; } diff --git a/fs/btrfs/extent_map.c b/fs/btrfs/extent_map.c index a4e9096754fc..55f272c335c6 100644 --- a/fs/btrfs/extent_map.c +++ b/fs/btrfs/extent_map.c @@ -1861,13 +1861,25 @@ int extent_write_full_page(struct extent_map_tree *tree, struct page *page, struct writeback_control *wbc) { int ret; + struct address_space *mapping = page->mapping; struct extent_page_data epd = { .bio = NULL, .tree = tree, .get_extent = get_extent, }; + struct writeback_control wbc_writepages = { + .bdi = wbc->bdi, + .sync_mode = WB_SYNC_NONE, + .older_than_this = NULL, + .nr_to_write = 64, + .range_start = page_offset(page) + PAGE_CACHE_SIZE, + .range_end = (loff_t)-1, + }; + ret = __extent_writepage(page, wbc, &epd); + + write_cache_pages(mapping, &wbc_writepages, __extent_writepage, &epd); if (epd.bio) submit_one_bio(WRITE, epd.bio); return ret; -- cgit v1.2.3-70-g09d2 From 793955bca66c99defdffc857ae6eb7e8431d6bbe Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Mon, 26 Nov 2007 16:34:41 -0800 Subject: Btrfs: Limit btree writeback to prevent seeks Signed-off-by: Chris Mason --- fs/btrfs/disk-io.c | 9 +++++++++ fs/btrfs/extent_map.c | 39 +++++++++++++++++++++++++++++++++++++++ fs/btrfs/extent_map.h | 3 +++ 3 files changed, 51 insertions(+) (limited to 'fs/btrfs/extent_map.c') diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 3e16cca72b49..28e9ef8a3253 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -210,6 +210,15 @@ static int btree_writepages(struct address_space *mapping, { struct extent_map_tree *tree; tree = &BTRFS_I(mapping->host)->extent_tree; + if (wbc->sync_mode == WB_SYNC_NONE) { + u64 num_dirty; + u64 start = 0; + unsigned long thresh = 96 * 1024 * 1024; + num_dirty = count_range_bits(tree, &start, thresh, EXTENT_DIRTY); + if (num_dirty < thresh) { + return 0; + } + } return extent_writepages(tree, mapping, btree_get_extent, wbc); } diff --git a/fs/btrfs/extent_map.c b/fs/btrfs/extent_map.c index 55f272c335c6..b6a4974ecc23 100644 --- a/fs/btrfs/extent_map.c +++ b/fs/btrfs/extent_map.c @@ -1106,6 +1106,45 @@ out: return found; } +u64 count_range_bits(struct extent_map_tree *tree, + u64 *start, u64 max_bytes, unsigned long bits) +{ + struct rb_node *node; + struct extent_state *state; + u64 cur_start = *start; + u64 total_bytes = 0; + int found = 0; + + write_lock_irq(&tree->lock); + /* + * this search will find all the extents that end after + * our range starts. + */ + node = tree_search(&tree->state, cur_start); + if (!node || IS_ERR(node)) { + goto out; + } + + while(1) { + state = rb_entry(node, struct extent_state, rb_node); + if ((state->state & bits)) { + total_bytes += state->end - state->start + 1; + if (total_bytes >= max_bytes) + break; + if (!found) { + *start = state->start; + found = 1; + } + } + node = rb_next(node); + if (!node) + break; + } +out: + write_unlock_irq(&tree->lock); + return total_bytes; +} + /* * helper function to lock both pages and extents in the tree. * pages must be locked first. diff --git a/fs/btrfs/extent_map.h b/fs/btrfs/extent_map.h index 24ddc8c93461..13c562f7cb6f 100644 --- a/fs/btrfs/extent_map.h +++ b/fs/btrfs/extent_map.h @@ -113,6 +113,9 @@ int extent_read_full_page(struct extent_map_tree *tree, struct page *page, int __init extent_map_init(void); void __exit extent_map_exit(void); +u64 count_range_bits(struct extent_map_tree *tree, + u64 *start, u64 max_bytes, unsigned long bits); + int test_range_bit(struct extent_map_tree *tree, u64 start, u64 end, int bits, int filled); int clear_extent_bits(struct extent_map_tree *tree, u64 start, u64 end, -- cgit v1.2.3-70-g09d2 From ca6646264b7dab662d84435441164bb2a8e8885a Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Tue, 27 Nov 2007 11:16:35 -0500 Subject: Btrfs: Add efficient dirty accounting to the extent_map tree Signed-off-by: Chris Mason --- fs/btrfs/disk-io.c | 7 ++++++- fs/btrfs/extent_map.c | 32 +++++++++++++++++++++++++++++--- fs/btrfs/extent_map.h | 1 + 3 files changed, 36 insertions(+), 4 deletions(-) (limited to 'fs/btrfs/extent_map.c') diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index de0552532d23..eef4ab56b9ca 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -210,7 +210,7 @@ static int btree_writepages(struct address_space *mapping, { struct extent_map_tree *tree; tree = &BTRFS_I(mapping->host)->extent_tree; - if (wbc->sync_mode == WB_SYNC_NONE && current_is_pdflush()) { + if (wbc->sync_mode == WB_SYNC_NONE) { u64 num_dirty; u64 start = 0; unsigned long thresh = 96 * 1024 * 1024; @@ -218,6 +218,11 @@ static int btree_writepages(struct address_space *mapping, if (wbc->for_kupdate) return 0; + if (current_is_pdflush()) { + thresh = 96 * 1024 * 1024; + } else { + thresh = 8 * 1024 * 1024; + } num_dirty = count_range_bits(tree, &start, thresh, EXTENT_DIRTY); if (num_dirty < thresh) { return 0; diff --git a/fs/btrfs/extent_map.c b/fs/btrfs/extent_map.c index b6a4974ecc23..06e437723dc3 100644 --- a/fs/btrfs/extent_map.c +++ b/fs/btrfs/extent_map.c @@ -42,6 +42,7 @@ struct extent_page_data { struct extent_map_tree *tree; get_extent_t *get_extent; }; + int __init extent_map_init(void) { extent_map_cache = btrfs_cache_create("extent_map", @@ -94,6 +95,7 @@ void extent_map_tree_init(struct extent_map_tree *tree, tree->map.rb_node = NULL; tree->state.rb_node = NULL; tree->ops = NULL; + tree->dirty_bytes = 0; rwlock_init(&tree->lock); spin_lock_init(&tree->lru_lock); tree->mapping = mapping; @@ -414,6 +416,8 @@ static int insert_state(struct extent_map_tree *tree, printk("end < start %Lu %Lu\n", end, start); WARN_ON(1); } + if (bits & EXTENT_DIRTY) + tree->dirty_bytes += end - start + 1; state->state |= bits; state->start = start; state->end = end; @@ -476,6 +480,12 @@ static int clear_state_bit(struct extent_map_tree *tree, int delete) { int ret = state->state & bits; + + if ((bits & EXTENT_DIRTY) && (state->state & EXTENT_DIRTY)) { + u64 range = state->end - state->start + 1; + WARN_ON(range > tree->dirty_bytes); + tree->dirty_bytes -= range; + } state->state &= ~bits; if (wake) wake_up(&state->wq); @@ -668,6 +678,17 @@ out: } EXPORT_SYMBOL(wait_extent_bit); +static void set_state_bits(struct extent_map_tree *tree, + struct extent_state *state, + int bits) +{ + if ((bits & EXTENT_DIRTY) && !(state->state & EXTENT_DIRTY)) { + u64 range = state->end - state->start + 1; + tree->dirty_bytes += range; + } + state->state |= bits; +} + /* * set some bits on a range in the tree. This may require allocations * or sleeping, so the gfp mask is used to indicate what is allowed. @@ -727,7 +748,7 @@ again: err = -EEXIST; goto out; } - state->state |= bits; + set_state_bits(tree, state, bits); start = state->end + 1; merge_state(tree, state); goto search_again; @@ -762,7 +783,7 @@ again: if (err) goto out; if (state->end <= end) { - state->state |= bits; + set_state_bits(tree, state, bits); start = state->end + 1; merge_state(tree, state); } else { @@ -808,7 +829,7 @@ again: err = split_state(tree, state, prealloc, end + 1); BUG_ON(err == -EEXIST); - prealloc->state |= bits; + set_state_bits(tree, prealloc, bits); merge_state(tree, prealloc); prealloc = NULL; goto out; @@ -1116,6 +1137,11 @@ u64 count_range_bits(struct extent_map_tree *tree, int found = 0; write_lock_irq(&tree->lock); + if (bits == EXTENT_DIRTY) { + *start = 0; + total_bytes = tree->dirty_bytes; + goto out; + } /* * this search will find all the extents that end after * our range starts. diff --git a/fs/btrfs/extent_map.h b/fs/btrfs/extent_map.h index 13c562f7cb6f..2ffc0c76338c 100644 --- a/fs/btrfs/extent_map.h +++ b/fs/btrfs/extent_map.h @@ -40,6 +40,7 @@ struct extent_map_tree { struct rb_root map; struct rb_root state; struct address_space *mapping; + u64 dirty_bytes; rwlock_t lock; struct extent_map_ops *ops; spinlock_t lru_lock; -- cgit v1.2.3-70-g09d2 From 17636e03f42a1a42fed3834859de4702bd655fd1 Mon Sep 17 00:00:00 2001 From: Christian Hesse Date: Tue, 11 Dec 2007 09:25:06 -0500 Subject: Btrfs: section mismatch warnings --Boundary-00=_CcOWHFYK4T+JwSj Content-Type: text/plain; charset="iso-8859-1" Content-Transfer-Encoding: 7bit Content-Disposition: inline Hello everybody, compiling btrfs into the kernel results in section mismatch warnings. __exit functions are called where they are not allowed to. The attached patch fixes this for me. Not sure if it is correct though. Signed-off-by: Christian Hesse -- Regards, Chris --Boundary-00=_CcOWHFYK4T+JwSj Content-Type: text/x-diff; charset="iso-8859-1"; name="btrfs-section_mismatches.patch" Content-Transfer-Encoding: 7bit Content-Disposition: attachment; filename="btrfs-section_mismatches.patch" Signed-off-by: Chris Mason --- fs/btrfs/extent_map.c | 2 +- fs/btrfs/extent_map.h | 2 +- fs/btrfs/transaction.c | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) (limited to 'fs/btrfs/extent_map.c') diff --git a/fs/btrfs/extent_map.c b/fs/btrfs/extent_map.c index 06e437723dc3..8326a18413ff 100644 --- a/fs/btrfs/extent_map.c +++ b/fs/btrfs/extent_map.c @@ -69,7 +69,7 @@ free_map_cache: return -ENOMEM; } -void __exit extent_map_exit(void) +void extent_map_exit(void) { struct extent_state *state; diff --git a/fs/btrfs/extent_map.h b/fs/btrfs/extent_map.h index 2ffc0c76338c..6e572d3e8924 100644 --- a/fs/btrfs/extent_map.h +++ b/fs/btrfs/extent_map.h @@ -112,7 +112,7 @@ void free_extent_map(struct extent_map *em); int extent_read_full_page(struct extent_map_tree *tree, struct page *page, get_extent_t *get_extent); int __init extent_map_init(void); -void __exit extent_map_exit(void); +void extent_map_exit(void); u64 count_range_bits(struct extent_map_tree *tree, u64 *start, u64 max_bytes, unsigned long bits); diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c index 4af1c0dab7f8..0229e69bd2ff 100644 --- a/fs/btrfs/transaction.c +++ b/fs/btrfs/transaction.c @@ -666,7 +666,7 @@ void __init btrfs_init_transaction_sys(void) trans_wq = create_workqueue("btrfs"); } -void __exit btrfs_exit_transaction_sys(void) +void btrfs_exit_transaction_sys(void) { destroy_workqueue(trans_wq); } -- cgit v1.2.3-70-g09d2 From 6da6abae027e2dbc59bca5f4168b0760f25068c7 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Tue, 18 Dec 2007 16:15:09 -0500 Subject: Btrfs: Back port to 2.6.18-el kernels Signed-off-by: Chris Mason --- fs/btrfs/ctree.h | 13 ++++ fs/btrfs/disk-io.c | 5 +- fs/btrfs/extent_map.c | 168 ++++++++++++++++++++++++++++++++++++++++++++++--- fs/btrfs/file.c | 15 +++-- fs/btrfs/inode.c | 21 +++++-- fs/btrfs/super.c | 1 + fs/btrfs/sysfs.c | 23 ++++++- fs/btrfs/transaction.c | 8 +++ fs/btrfs/transaction.h | 5 ++ 9 files changed, 239 insertions(+), 20 deletions(-) (limited to 'fs/btrfs/extent_map.c') diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 32b24460ec82..2559f5d5d00a 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -19,6 +19,7 @@ #ifndef __BTRFS__ #define __BTRFS__ +#include #include #include #include @@ -334,7 +335,11 @@ struct btrfs_fs_info { struct list_head trans_list; struct list_head hashers; struct list_head dead_roots; +#if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,18) + struct work_struct trans_work; +#else struct delayed_work trans_work; +#endif struct kobject super_kobj; struct completion kobj_unregister; int do_barriers; @@ -916,6 +921,14 @@ static inline u32 btrfs_level_size(struct btrfs_root *root, int level) { ((unsigned long)(btrfs_leaf_data(leaf) + \ btrfs_item_offset_nr(leaf, slot))) +static inline struct dentry *fdentry(struct file *file) { +#if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,18) + return file->f_dentry; +#else + return file->f_path.dentry; +#endif +} + /* extent-tree.c */ u32 btrfs_count_snapshots_in_path(struct btrfs_root *root, struct btrfs_path *count_path, diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 7213012c27d5..ebb2db624fdd 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -593,8 +593,11 @@ struct btrfs_root *open_ctree(struct super_block *sb) fs_info->do_barriers = 1; fs_info->closing = 0; fs_info->total_pinned = 0; - +#if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,18) + INIT_WORK(&fs_info->trans_work, btrfs_transaction_cleaner, fs_info); +#else INIT_DELAYED_WORK(&fs_info->trans_work, btrfs_transaction_cleaner); +#endif BTRFS_I(fs_info->btree_inode)->root = tree_root; memset(&BTRFS_I(fs_info->btree_inode)->location, 0, sizeof(struct btrfs_key)); diff --git a/fs/btrfs/extent_map.c b/fs/btrfs/extent_map.c index 8326a18413ff..0d1e59a86e49 100644 --- a/fs/btrfs/extent_map.c +++ b/fs/btrfs/extent_map.c @@ -1585,8 +1585,18 @@ extent_bio_alloc(struct block_device *bdev, u64 first_sector, int nr_vecs, static int submit_one_bio(int rw, struct bio *bio) { + u64 maxsector; int ret = 0; + bio_get(bio); + + maxsector = bio->bi_bdev->bd_inode->i_size >> 9; + if (maxsector < bio->bi_sector) { + printk("sector too large max %Lu got %llu\n", maxsector, + (unsigned long long)bio->bi_sector); + WARN_ON(1); + } + submit_bio(rw, bio); if (bio_flagged(bio, BIO_EOPNOTSUPP)) ret = -EOPNOTSUPP; @@ -1678,8 +1688,12 @@ static int __extent_read_full_page(struct extent_map_tree *tree, while (cur <= end) { if (cur >= last_byte) { + char *userpage; iosize = PAGE_CACHE_SIZE - page_offset; - zero_user_page(page, page_offset, iosize, KM_USER0); + userpage = kmap_atomic(page, KM_USER0); + memset(userpage + page_offset, 0, iosize); + flush_dcache_page(page); + kunmap_atomic(userpage, KM_USER0); set_extent_uptodate(tree, cur, cur + iosize - 1, GFP_NOFS); unlock_extent(tree, cur, cur + iosize - 1, GFP_NOFS); @@ -1707,7 +1721,12 @@ static int __extent_read_full_page(struct extent_map_tree *tree, /* we've found a hole, just zero and go on */ if (block_start == EXTENT_MAP_HOLE) { - zero_user_page(page, page_offset, iosize, KM_USER0); + char *userpage; + userpage = kmap_atomic(page, KM_USER0); + memset(userpage + page_offset, 0, iosize); + flush_dcache_page(page); + kunmap_atomic(userpage, KM_USER0); + set_extent_uptodate(tree, cur, cur + iosize - 1, GFP_NOFS); unlock_extent(tree, cur, cur + iosize - 1, GFP_NOFS); @@ -1804,9 +1823,14 @@ static int __extent_writepage(struct page *page, struct writeback_control *wbc, } if (page->index == end_index) { + char *userpage; + size_t offset = i_size & (PAGE_CACHE_SIZE - 1); - zero_user_page(page, offset, - PAGE_CACHE_SIZE - offset, KM_USER0); + + userpage = kmap_atomic(page, KM_USER0); + memset(userpage + offset, 0, PAGE_CACHE_SIZE - offset); + flush_dcache_page(page); + kunmap_atomic(userpage, KM_USER0); } set_page_extent_mapped(page); @@ -1921,6 +1945,129 @@ done: return 0; } +#if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,18) + +/* Taken directly from 2.6.23 for 2.6.18 back port */ +typedef int (*writepage_t)(struct page *page, struct writeback_control *wbc, + void *data); + +/** + * write_cache_pages - walk the list of dirty pages of the given address space + * and write all of them. + * @mapping: address space structure to write + * @wbc: subtract the number of written pages from *@wbc->nr_to_write + * @writepage: function called for each page + * @data: data passed to writepage function + * + * If a page is already under I/O, write_cache_pages() skips it, even + * if it's dirty. This is desirable behaviour for memory-cleaning writeback, + * but it is INCORRECT for data-integrity system calls such as fsync(). fsync() + * and msync() need to guarantee that all the data which was dirty at the time + * the call was made get new I/O started against them. If wbc->sync_mode is + * WB_SYNC_ALL then we were called for data integrity and we must wait for + * existing IO to complete. + */ +static int write_cache_pages(struct address_space *mapping, + struct writeback_control *wbc, writepage_t writepage, + void *data) +{ + struct backing_dev_info *bdi = mapping->backing_dev_info; + int ret = 0; + int done = 0; + struct pagevec pvec; + int nr_pages; + pgoff_t index; + pgoff_t end; /* Inclusive */ + int scanned = 0; + int range_whole = 0; + + if (wbc->nonblocking && bdi_write_congested(bdi)) { + wbc->encountered_congestion = 1; + return 0; + } + + pagevec_init(&pvec, 0); + if (wbc->range_cyclic) { + index = mapping->writeback_index; /* Start from prev offset */ + end = -1; + } else { + index = wbc->range_start >> PAGE_CACHE_SHIFT; + end = wbc->range_end >> PAGE_CACHE_SHIFT; + if (wbc->range_start == 0 && wbc->range_end == LLONG_MAX) + range_whole = 1; + scanned = 1; + } +retry: + while (!done && (index <= end) && + (nr_pages = pagevec_lookup_tag(&pvec, mapping, &index, + PAGECACHE_TAG_DIRTY, + min(end - index, (pgoff_t)PAGEVEC_SIZE-1) + 1))) { + unsigned i; + + scanned = 1; + for (i = 0; i < nr_pages; i++) { + struct page *page = pvec.pages[i]; + + /* + * At this point we hold neither mapping->tree_lock nor + * lock on the page itself: the page may be truncated or + * invalidated (changing page->mapping to NULL), or even + * swizzled back from swapper_space to tmpfs file + * mapping + */ + lock_page(page); + + if (unlikely(page->mapping != mapping)) { + unlock_page(page); + continue; + } + + if (!wbc->range_cyclic && page->index > end) { + done = 1; + unlock_page(page); + continue; + } + + if (wbc->sync_mode != WB_SYNC_NONE) + wait_on_page_writeback(page); + + if (PageWriteback(page) || + !clear_page_dirty_for_io(page)) { + unlock_page(page); + continue; + } + + ret = (*writepage)(page, wbc, data); + + if (unlikely(ret == AOP_WRITEPAGE_ACTIVATE)) { + unlock_page(page); + ret = 0; + } + if (ret || (--(wbc->nr_to_write) <= 0)) + done = 1; + if (wbc->nonblocking && bdi_write_congested(bdi)) { + wbc->encountered_congestion = 1; + done = 1; + } + } + pagevec_release(&pvec); + cond_resched(); + } + if (!scanned && !done) { + /* + * We hit the last page and there is more work to be done: wrap + * back to the start of the file + */ + scanned = 1; + index = 0; + goto retry; + } + if (wbc->range_cyclic || (range_whole && wbc->nr_to_write > 0)) + mapping->writeback_index = index; + return ret; +} +#endif + int extent_write_full_page(struct extent_map_tree *tree, struct page *page, get_extent_t *get_extent, struct writeback_control *wbc) @@ -1945,18 +2092,20 @@ int extent_write_full_page(struct extent_map_tree *tree, struct page *page, ret = __extent_writepage(page, wbc, &epd); write_cache_pages(mapping, &wbc_writepages, __extent_writepage, &epd); - if (epd.bio) + if (epd.bio) { submit_one_bio(WRITE, epd.bio); + } return ret; } EXPORT_SYMBOL(extent_write_full_page); + int extent_writepages(struct extent_map_tree *tree, struct address_space *mapping, get_extent_t *get_extent, struct writeback_control *wbc) { - int ret; + int ret = 0; struct extent_page_data epd = { .bio = NULL, .tree = tree, @@ -1964,8 +2113,9 @@ int extent_writepages(struct extent_map_tree *tree, }; ret = write_cache_pages(mapping, wbc, __extent_writepage, &epd); - if (epd.bio) + if (epd.bio) { submit_one_bio(WRITE, epd.bio); + } return ret; } EXPORT_SYMBOL(extent_writepages); @@ -2106,7 +2256,9 @@ int extent_prepare_write(struct extent_map_tree *tree, flush_dcache_page(page); kunmap_atomic(kaddr, KM_USER0); } - if (!isnew && !PageUptodate(page) && + if ((em->block_start != EXTENT_MAP_HOLE && + em->block_start != EXTENT_MAP_INLINE) && + !isnew && !PageUptodate(page) && (block_off_end > to || block_off_start < from) && !test_range_bit(tree, block_start, cur_end, EXTENT_UPTODATE, 1)) { diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c index 62fcd79d8ab3..461b09663fed 100644 --- a/fs/btrfs/file.c +++ b/fs/btrfs/file.c @@ -231,7 +231,7 @@ static int dirty_and_release_pages(struct btrfs_trans_handle *trans, { int err = 0; int i; - struct inode *inode = file->f_path.dentry->d_inode; + struct inode *inode = fdentry(file)->d_inode; struct extent_map *em; struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree; u64 hint_byte; @@ -652,7 +652,7 @@ static int prepare_pages(struct btrfs_root *root, { int i; unsigned long index = pos >> PAGE_CACHE_SHIFT; - struct inode *inode = file->f_path.dentry->d_inode; + struct inode *inode = fdentry(file)->d_inode; int err = 0; u64 start_pos; @@ -666,7 +666,11 @@ static int prepare_pages(struct btrfs_root *root, err = -ENOMEM; BUG_ON(1); } +#if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,18) + ClearPageDirty(pages[i]); +#else cancel_dirty_page(pages[i], PAGE_CACHE_SIZE); +#endif wait_on_page_writeback(pages[i]); set_page_extent_mapped(pages[i]); WARN_ON(!PageLocked(pages[i])); @@ -682,7 +686,7 @@ static ssize_t btrfs_file_write(struct file *file, const char __user *buf, ssize_t num_written = 0; ssize_t err = 0; int ret = 0; - struct inode *inode = file->f_path.dentry->d_inode; + struct inode *inode = fdentry(file)->d_inode; struct btrfs_root *root = BTRFS_I(inode)->root; struct page **pages = NULL; int nrptrs; @@ -707,7 +711,7 @@ static ssize_t btrfs_file_write(struct file *file, const char __user *buf, goto out; if (count == 0) goto out; - err = remove_suid(file->f_path.dentry); + err = remove_suid(fdentry(file)); if (err) goto out; file_update_time(file); @@ -862,6 +866,9 @@ struct file_operations btrfs_file_operations = { .read = do_sync_read, .aio_read = generic_file_aio_read, .splice_read = generic_file_splice_read, +#if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,18) + .sendfile = generic_file_sendfile, +#endif .write = btrfs_file_write, .mmap = btrfs_file_mmap, .open = generic_file_open, diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 686dd03f34f2..6a7d9160df27 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -532,7 +532,11 @@ err: dir->i_size -= name_len * 2; dir->i_mtime = dir->i_ctime = CURRENT_TIME; btrfs_update_inode(trans, root, dir); +#if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,18) + dentry->d_inode->i_nlink--; +#else drop_nlink(dentry->d_inode); +#endif ret = btrfs_update_inode(trans, root, dentry->d_inode); dir->i_sb->s_dirt = 1; } @@ -1139,7 +1143,7 @@ static unsigned char btrfs_filetype_table[] = { static int btrfs_readdir(struct file *filp, void *dirent, filldir_t filldir) { - struct inode *inode = filp->f_path.dentry->d_inode; + struct inode *inode = filp->f_dentry->d_inode; struct btrfs_root *root = BTRFS_I(inode)->root; struct btrfs_item *item; struct btrfs_dir_item *di; @@ -1554,7 +1558,11 @@ static int btrfs_link(struct dentry *old_dentry, struct inode *dir, if (inode->i_nlink == 0) return -ENOENT; +#if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,18) + inode->i_nlink++; +#else inc_nlink(inode); +#endif mutex_lock(&root->fs_info->fs_mutex); trans = btrfs_start_transaction(root, 1); @@ -1825,6 +1833,9 @@ insert: if (ret == -EEXIST) { free_extent_map(em); em = NULL; + if (0 && failed_insert == 1) { + btrfs_drop_extent_cache(inode, start, end); + } failed_insert++; if (failed_insert > 5) { printk("failing to insert %Lu %Lu\n", start, end); @@ -1942,7 +1953,7 @@ static void btrfs_invalidatepage(struct page *page, unsigned long offset) */ int btrfs_page_mkwrite(struct vm_area_struct *vma, struct page *page) { - struct inode *inode = vma->vm_file->f_path.dentry->d_inode; + struct inode *inode = fdentry(vma->vm_file)->d_inode; unsigned long end; loff_t size; int ret = -EINVAL; @@ -2243,7 +2254,7 @@ static unsigned long force_ra(struct address_space *mapping, } int btrfs_defrag_file(struct file *file) { - struct inode *inode = file->f_path.dentry->d_inode; + struct inode *inode = fdentry(file)->d_inode; struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree; struct page *page; unsigned long last_index; @@ -2329,7 +2340,7 @@ static int btrfs_ioctl_snap_create(struct btrfs_root *root, void __user *arg) static int btrfs_ioctl_defrag(struct file *file) { - struct inode *inode = file->f_path.dentry->d_inode; + struct inode *inode = fdentry(file)->d_inode; struct btrfs_root *root = BTRFS_I(inode)->root; switch (inode->i_mode & S_IFMT) { @@ -2350,7 +2361,7 @@ static int btrfs_ioctl_defrag(struct file *file) long btrfs_ioctl(struct file *file, unsigned int cmd, unsigned long arg) { - struct btrfs_root *root = BTRFS_I(file->f_path.dentry->d_inode)->root; + struct btrfs_root *root = BTRFS_I(fdentry(file)->d_inode)->root; switch (cmd) { case BTRFS_IOC_SNAP_CREATE: diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index b7323914bf5a..7cab680418a1 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c @@ -35,6 +35,7 @@ #include #include #include +#include #include "ctree.h" #include "disk-io.h" #include "transaction.h" diff --git a/fs/btrfs/sysfs.c b/fs/btrfs/sysfs.c index 1671c731800e..cd673ca9bb98 100644 --- a/fs/btrfs/sysfs.c +++ b/fs/btrfs/sysfs.c @@ -189,12 +189,29 @@ static struct kset btrfs_kset; int btrfs_sysfs_add_super(struct btrfs_fs_info *fs) { int error; + char *name; + char c; + int len = strlen(fs->sb->s_id) + 1; + int i; + + name = kmalloc(len, GFP_NOFS); + if (!name) { + error = -ENOMEM; + goto fail; + } + + for (i = 0; i < len; i++) { + c = fs->sb->s_id[i]; + if (c == '/' || c == '\\') + c = '!'; + name[i] = c; + } + name[len] = '\0'; fs->super_kobj.kset = &btrfs_kset; fs->super_kobj.ktype = &btrfs_super_ktype; - error = kobject_set_name(&fs->super_kobj, "%s", - fs->sb->s_id); + error = kobject_set_name(&fs->super_kobj, "%s", name); if (error) goto fail; @@ -202,9 +219,11 @@ int btrfs_sysfs_add_super(struct btrfs_fs_info *fs) if (error) goto fail; + kfree(name); return 0; fail: + kfree(name); printk(KERN_ERR "btrfs: sysfs creation for super failed\n"); return error; } diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c index 0229e69bd2ff..1ad611b9f61b 100644 --- a/fs/btrfs/transaction.c +++ b/fs/btrfs/transaction.c @@ -614,12 +614,20 @@ int btrfs_clean_old_snapshots(struct btrfs_root *root) } return 0; } +#if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,18) +void btrfs_transaction_cleaner(void *p) +#else void btrfs_transaction_cleaner(struct work_struct *work) +#endif { +#if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,18) + struct btrfs_fs_info *fs_info = p; +#else struct btrfs_fs_info *fs_info = container_of(work, struct btrfs_fs_info, trans_work.work); +#endif struct btrfs_root *root = fs_info->tree_root; struct btrfs_transaction *cur; struct btrfs_trans_handle *trans; diff --git a/fs/btrfs/transaction.h b/fs/btrfs/transaction.h index ae39fcfc169a..eef840bca91e 100644 --- a/fs/btrfs/transaction.h +++ b/fs/btrfs/transaction.h @@ -73,7 +73,12 @@ int btrfs_write_and_wait_transaction(struct btrfs_trans_handle *trans, int btrfs_commit_tree_roots(struct btrfs_trans_handle *trans, struct btrfs_root *root); +#if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,18) +void btrfs_transaction_cleaner(void *p); +#else void btrfs_transaction_cleaner(struct work_struct *work); +#endif + void btrfs_transaction_flush_work(struct btrfs_root *root); void btrfs_transaction_queue_work(struct btrfs_root *root, int delay); void btrfs_init_transaction_sys(void); -- cgit v1.2.3-70-g09d2 From 190662b2128dd648749e197f5563e9f6bbb5e05c Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Tue, 18 Dec 2007 16:25:45 -0500 Subject: Btrfs: Fix delayed allocation to avoid missing delalloc extents find_lock_delalloc_range could exit out too early Signed-off-by: Chris Mason --- fs/btrfs/extent_map.c | 9 +++++++-- fs/btrfs/inode.c | 1 + 2 files changed, 8 insertions(+), 2 deletions(-) (limited to 'fs/btrfs/extent_map.c') diff --git a/fs/btrfs/extent_map.c b/fs/btrfs/extent_map.c index 0d1e59a86e49..a0dff34dd437 100644 --- a/fs/btrfs/extent_map.c +++ b/fs/btrfs/extent_map.c @@ -1070,6 +1070,7 @@ u64 find_lock_delalloc_range(struct extent_map_tree *tree, search_again: node = tree_search(&tree->state, cur_start); if (!node || IS_ERR(node)) { + *end = (u64)-1; goto out; } @@ -1079,6 +1080,8 @@ search_again: goto out; } if (!(state->state & EXTENT_DELALLOC)) { + if (!found) + *end = state->end; goto out; } if (!found) { @@ -1841,8 +1844,10 @@ static int __extent_writepage(struct page *page, struct writeback_control *wbc, nr_delalloc = find_lock_delalloc_range(tree, &delalloc_start, &delalloc_end, 128 * 1024 * 1024); - if (nr_delalloc <= 0) - break; + if (nr_delalloc == 0) { + delalloc_start = delalloc_end + 1; + continue; + } tree->ops->fill_delalloc(inode, delalloc_start, delalloc_end); clear_extent_bit(tree, delalloc_start, diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 1e725a48467c..55b2e1426024 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -798,6 +798,7 @@ static int btrfs_cow_one_page(struct inode *inode, struct page *page, u64 page_start = (u64)page->index << PAGE_CACHE_SHIFT; u64 page_end = page_start + PAGE_CACHE_SIZE - 1; + WARN_ON(!PageLocked(page)); set_page_extent_mapped(page); lock_extent(em_tree, page_start, page_end, GFP_NOFS); -- cgit v1.2.3-70-g09d2 From 1832a6d5ee3b1af61001cadba9e10da9e91af4a4 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Fri, 21 Dec 2007 16:27:21 -0500 Subject: Btrfs: Implement basic support for -ENOSPC This is intended to prevent accidentally filling the drive. A determined user can still make things oops. It includes some accounting of the current bytes under delayed allocation, but this will change as things get optimized Signed-off-by: Chris Mason --- fs/btrfs/ctree.h | 4 ++ fs/btrfs/disk-io.c | 5 +- fs/btrfs/extent_map.c | 20 +++++-- fs/btrfs/extent_map.h | 3 +- fs/btrfs/file.c | 28 +++++++-- fs/btrfs/inode.c | 161 +++++++++++++++++++++++++++++++++++++++++++++----- 6 files changed, 193 insertions(+), 28 deletions(-) (limited to 'fs/btrfs/extent_map.c') diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 2559f5d5d00a..10129cc6656f 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -346,6 +346,8 @@ struct btrfs_fs_info { int closing; u64 total_pinned; + spinlock_t delalloc_lock; + u64 delalloc_bytes; }; /* * in ram representation of the tree. extent_root is used for all allocations @@ -1115,6 +1117,8 @@ int btrfs_csum_truncate(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct btrfs_path *path, u64 isize); /* inode.c */ +int btrfs_check_free_space(struct btrfs_root *root, u64 num_required, + int for_del); int btrfs_page_mkwrite(struct vm_area_struct *vma, struct page *page); int btrfs_readpage(struct file *file, struct page *page); void btrfs_delete_inode(struct inode *inode); diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index ebb2db624fdd..eebb4fb65c61 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -223,7 +223,8 @@ static int btree_writepages(struct address_space *mapping, } else { thresh = 8 * 1024 * 1024; } - num_dirty = count_range_bits(tree, &start, thresh, EXTENT_DIRTY); + num_dirty = count_range_bits(tree, &start, (u64)-1, + thresh, EXTENT_DIRTY); if (num_dirty < thresh) { return 0; } @@ -559,6 +560,7 @@ struct btrfs_root *open_ctree(struct super_block *sb) INIT_LIST_HEAD(&fs_info->dead_roots); INIT_LIST_HEAD(&fs_info->hashers); spin_lock_init(&fs_info->hash_lock); + spin_lock_init(&fs_info->delalloc_lock); memset(&fs_info->super_kobj, 0, sizeof(fs_info->super_kobj)); init_completion(&fs_info->kobj_unregister); @@ -570,6 +572,7 @@ struct btrfs_root *open_ctree(struct super_block *sb) fs_info->sb = sb; fs_info->mount_opt = 0; fs_info->max_extent = (u64)-1; + fs_info->delalloc_bytes = 0; fs_info->btree_inode = new_inode(sb); fs_info->btree_inode->i_ino = 1; fs_info->btree_inode->i_nlink = 1; diff --git a/fs/btrfs/extent_map.c b/fs/btrfs/extent_map.c index a0dff34dd437..2b92f1070274 100644 --- a/fs/btrfs/extent_map.c +++ b/fs/btrfs/extent_map.c @@ -1131,7 +1131,8 @@ out: } u64 count_range_bits(struct extent_map_tree *tree, - u64 *start, u64 max_bytes, unsigned long bits) + u64 *start, u64 search_end, u64 max_bytes, + unsigned long bits) { struct rb_node *node; struct extent_state *state; @@ -1139,9 +1140,14 @@ u64 count_range_bits(struct extent_map_tree *tree, u64 total_bytes = 0; int found = 0; + if (search_end <= cur_start) { + printk("search_end %Lu start %Lu\n", search_end, cur_start); + WARN_ON(1); + return 0; + } + write_lock_irq(&tree->lock); - if (bits == EXTENT_DIRTY) { - *start = 0; + if (cur_start == 0 && bits == EXTENT_DIRTY) { total_bytes = tree->dirty_bytes; goto out; } @@ -1156,8 +1162,11 @@ u64 count_range_bits(struct extent_map_tree *tree, while(1) { state = rb_entry(node, struct extent_state, rb_node); - if ((state->state & bits)) { - total_bytes += state->end - state->start + 1; + if (state->start > search_end) + break; + if (state->end >= cur_start && (state->state & bits)) { + total_bytes += min(search_end, state->end) + 1 - + max(cur_start, state->start); if (total_bytes >= max_bytes) break; if (!found) { @@ -1173,7 +1182,6 @@ out: write_unlock_irq(&tree->lock); return total_bytes; } - /* * helper function to lock both pages and extents in the tree. * pages must be locked first. diff --git a/fs/btrfs/extent_map.h b/fs/btrfs/extent_map.h index 6e572d3e8924..ea60f5447b5b 100644 --- a/fs/btrfs/extent_map.h +++ b/fs/btrfs/extent_map.h @@ -115,7 +115,8 @@ int __init extent_map_init(void); void extent_map_exit(void); u64 count_range_bits(struct extent_map_tree *tree, - u64 *start, u64 max_bytes, unsigned long bits); + u64 *start, u64 search_end, + u64 max_bytes, unsigned long bits); int test_range_bit(struct extent_map_tree *tree, u64 start, u64 end, int bits, int filled); diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c index 461b09663fed..71dc2d33b6c6 100644 --- a/fs/btrfs/file.c +++ b/fs/btrfs/file.c @@ -307,6 +307,7 @@ static int dirty_and_release_pages(struct btrfs_trans_handle *trans, inline_size > 32768 || inline_size >= BTRFS_MAX_INLINE_DATA_SIZE(root)) { u64 last_end; + u64 existing_delalloc = 0; for (i = 0; i < num_pages; i++) { struct page *p = pages[i]; @@ -316,8 +317,19 @@ static int dirty_and_release_pages(struct btrfs_trans_handle *trans, last_end = (u64)(pages[num_pages -1]->index) << PAGE_CACHE_SHIFT; last_end += PAGE_CACHE_SIZE - 1; + if (start_pos < isize) { + u64 delalloc_start = start_pos; + existing_delalloc = count_range_bits(em_tree, + &delalloc_start, + end_of_last_block, (u64)-1, + EXTENT_DELALLOC); + } set_extent_delalloc(em_tree, start_pos, end_of_last_block, GFP_NOFS); + spin_lock(&root->fs_info->delalloc_lock); + root->fs_info->delalloc_bytes += (end_of_last_block + 1 - + start_pos) - existing_delalloc; + spin_unlock(&root->fs_info->delalloc_lock); } else { u64 aligned_end; /* step one, delete the existing extents in this range */ @@ -708,12 +720,12 @@ static ssize_t btrfs_file_write(struct file *file, const char __user *buf, current->backing_dev_info = inode->i_mapping->backing_dev_info; err = generic_write_checks(file, &pos, &count, S_ISBLK(inode->i_mode)); if (err) - goto out; + goto out_nolock; if (count == 0) - goto out; + goto out_nolock; err = remove_suid(fdentry(file)); if (err) - goto out; + goto out_nolock; file_update_time(file); pages = kmalloc(nrptrs * sizeof(struct page *), GFP_KERNEL); @@ -758,6 +770,13 @@ static ssize_t btrfs_file_write(struct file *file, const char __user *buf, WARN_ON(num_pages > nrptrs); memset(pages, 0, sizeof(pages)); + + mutex_lock(&root->fs_info->fs_mutex); + ret = btrfs_check_free_space(root, write_bytes, 0); + mutex_unlock(&root->fs_info->fs_mutex); + if (ret) + goto out; + ret = prepare_pages(root, file, pages, num_pages, pos, first_index, last_index, write_bytes); @@ -787,8 +806,9 @@ static ssize_t btrfs_file_write(struct file *file, const char __user *buf, btrfs_btree_balance_dirty(root, 1); cond_resched(); } - mutex_unlock(&inode->i_mutex); out: + mutex_unlock(&inode->i_mutex); +out_nolock: kfree(pages); if (pinned[0]) page_cache_release(pinned[0]); diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 1c61c85dc3d8..a9f5d6d417f0 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -72,6 +72,26 @@ static unsigned char btrfs_type_by_mode[S_IFMT >> S_SHIFT] = { [S_IFLNK >> S_SHIFT] = BTRFS_FT_SYMLINK, }; +int btrfs_check_free_space(struct btrfs_root *root, u64 num_required, + int for_del) +{ + u64 total = btrfs_super_total_bytes(&root->fs_info->super_copy); + u64 used = btrfs_super_bytes_used(&root->fs_info->super_copy); + u64 thresh; + int ret = 0; + + if (for_del) + thresh = (total * 90) / 100; + else + thresh = (total * 85) / 100; + + spin_lock(&root->fs_info->delalloc_lock); + if (used + root->fs_info->delalloc_bytes + num_required > thresh) + ret = -ENOSPC; + spin_unlock(&root->fs_info->delalloc_lock); + return ret; +} + static int cow_file_range(struct inode *inode, u64 start, u64 end) { struct btrfs_root *root = BTRFS_I(inode)->root; @@ -124,6 +144,7 @@ static int run_delalloc_nocow(struct inode *inode, u64 start, u64 end) u64 extent_end; u64 bytenr; u64 cow_end; + u64 loops = 0; struct btrfs_root *root = BTRFS_I(inode)->root; struct extent_buffer *leaf; int found_type; @@ -169,6 +190,9 @@ again: btrfs_file_extent_num_bytes(leaf, item); err = 0; + if (loops && start != extent_start) + goto not_found; + if (start < extent_start || start >= extent_end) goto not_found; @@ -191,6 +215,7 @@ loop: return 0; } btrfs_release_path(root, path); + loops++; goto again; not_found: @@ -202,6 +227,7 @@ not_found: static int run_delalloc_range(struct inode *inode, u64 start, u64 end) { struct btrfs_root *root = BTRFS_I(inode)->root; + u64 num_bytes; int ret; mutex_lock(&root->fs_info->fs_mutex); @@ -209,6 +235,17 @@ static int run_delalloc_range(struct inode *inode, u64 start, u64 end) ret = run_delalloc_nocow(inode, start, end); else ret = cow_file_range(inode, start, end); + + spin_lock(&root->fs_info->delalloc_lock); + num_bytes = end + 1 - start; + if (root->fs_info->delalloc_bytes < num_bytes) { + printk("delalloc accounting error total %llu sub %llu\n", + root->fs_info->delalloc_bytes, num_bytes); + } else { + root->fs_info->delalloc_bytes -= num_bytes; + } + spin_unlock(&root->fs_info->delalloc_lock); + mutex_unlock(&root->fs_info->fs_mutex); return ret; } @@ -547,10 +584,15 @@ static int btrfs_unlink(struct inode *dir, struct dentry *dentry) struct btrfs_root *root; struct btrfs_trans_handle *trans; int ret; - unsigned long nr; + unsigned long nr = 0; root = BTRFS_I(dir)->root; mutex_lock(&root->fs_info->fs_mutex); + + ret = btrfs_check_free_space(root, 1, 1); + if (ret) + goto fail; + trans = btrfs_start_transaction(root, 1); btrfs_set_trans_block_group(trans, dir); @@ -558,25 +600,29 @@ static int btrfs_unlink(struct inode *dir, struct dentry *dentry) nr = trans->blocks_used; btrfs_end_transaction(trans, root); +fail: mutex_unlock(&root->fs_info->fs_mutex); btrfs_btree_balance_dirty(root, nr); - return ret; } static int btrfs_rmdir(struct inode *dir, struct dentry *dentry) { struct inode *inode = dentry->d_inode; - int err; + int err = 0; int ret; struct btrfs_root *root = BTRFS_I(dir)->root; struct btrfs_trans_handle *trans; - unsigned long nr; + unsigned long nr = 0; if (inode->i_size > BTRFS_EMPTY_DIR_SIZE) return -ENOTEMPTY; mutex_lock(&root->fs_info->fs_mutex); + ret = btrfs_check_free_space(root, 1, 1); + if (ret) + goto fail; + trans = btrfs_start_transaction(root, 1); btrfs_set_trans_block_group(trans, dir); @@ -588,6 +634,7 @@ static int btrfs_rmdir(struct inode *dir, struct dentry *dentry) nr = trans->blocks_used; ret = btrfs_end_transaction(trans, root); +fail: mutex_unlock(&root->fs_info->fs_mutex); btrfs_btree_balance_dirty(root, nr); @@ -792,17 +839,29 @@ static int btrfs_cow_one_page(struct inode *inode, struct page *page, size_t zero_start) { char *kaddr; - int ret = 0; struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree; + struct btrfs_root *root = BTRFS_I(inode)->root; u64 page_start = (u64)page->index << PAGE_CACHE_SHIFT; u64 page_end = page_start + PAGE_CACHE_SIZE - 1; + u64 existing_delalloc; + u64 delalloc_start; + int ret = 0; WARN_ON(!PageLocked(page)); set_page_extent_mapped(page); lock_extent(em_tree, page_start, page_end, GFP_NOFS); + delalloc_start = page_start; + existing_delalloc = count_range_bits(&BTRFS_I(inode)->extent_tree, + &delalloc_start, page_end, + PAGE_CACHE_SIZE, EXTENT_DELALLOC); set_extent_delalloc(&BTRFS_I(inode)->extent_tree, page_start, page_end, GFP_NOFS); + + spin_lock(&root->fs_info->delalloc_lock); + root->fs_info->delalloc_bytes += PAGE_CACHE_SIZE - existing_delalloc; + spin_unlock(&root->fs_info->delalloc_lock); + if (zero_start != PAGE_CACHE_SIZE) { kaddr = kmap(page); memset(kaddr + zero_start, 0, PAGE_CACHE_SIZE - zero_start); @@ -881,6 +940,12 @@ static int btrfs_setattr(struct dentry *dentry, struct iattr *attr) if (attr->ia_size <= pos) goto out; + mutex_lock(&root->fs_info->fs_mutex); + err = btrfs_check_free_space(root, 1, 0); + mutex_unlock(&root->fs_info->fs_mutex); + if (err) + goto fail; + btrfs_truncate_page(inode->i_mapping, inode->i_size); lock_extent(em_tree, pos, block_end, GFP_NOFS); @@ -906,7 +971,7 @@ static int btrfs_setattr(struct dentry *dentry, struct iattr *attr) } out: err = inode_setattr(inode, attr); - +fail: return err; } void btrfs_delete_inode(struct inode *inode) @@ -1440,16 +1505,20 @@ static int btrfs_mknod(struct inode *dir, struct dentry *dentry, { struct btrfs_trans_handle *trans; struct btrfs_root *root = BTRFS_I(dir)->root; - struct inode *inode; + struct inode *inode = NULL; int err; int drop_inode = 0; u64 objectid; - unsigned long nr; + unsigned long nr = 0; if (!new_valid_dev(rdev)) return -EINVAL; mutex_lock(&root->fs_info->fs_mutex); + err = btrfs_check_free_space(root, 1, 0); + if (err) + goto fail; + trans = btrfs_start_transaction(root, 1); btrfs_set_trans_block_group(trans, dir); @@ -1480,6 +1549,7 @@ static int btrfs_mknod(struct inode *dir, struct dentry *dentry, out_unlock: nr = trans->blocks_used; btrfs_end_transaction(trans, root); +fail: mutex_unlock(&root->fs_info->fs_mutex); if (drop_inode) { @@ -1495,13 +1565,16 @@ static int btrfs_create(struct inode *dir, struct dentry *dentry, { struct btrfs_trans_handle *trans; struct btrfs_root *root = BTRFS_I(dir)->root; - struct inode *inode; + struct inode *inode = NULL; int err; int drop_inode = 0; - unsigned long nr; + unsigned long nr = 0; u64 objectid; mutex_lock(&root->fs_info->fs_mutex); + err = btrfs_check_free_space(root, 1, 0); + if (err) + goto fail; trans = btrfs_start_transaction(root, 1); btrfs_set_trans_block_group(trans, dir); @@ -1535,6 +1608,7 @@ static int btrfs_create(struct inode *dir, struct dentry *dentry, out_unlock: nr = trans->blocks_used; btrfs_end_transaction(trans, root); +fail: mutex_unlock(&root->fs_info->fs_mutex); if (drop_inode) { @@ -1551,7 +1625,7 @@ static int btrfs_link(struct dentry *old_dentry, struct inode *dir, struct btrfs_trans_handle *trans; struct btrfs_root *root = BTRFS_I(dir)->root; struct inode *inode = old_dentry->d_inode; - unsigned long nr; + unsigned long nr = 0; int err; int drop_inode = 0; @@ -1564,6 +1638,9 @@ static int btrfs_link(struct dentry *old_dentry, struct inode *dir, inc_nlink(inode); #endif mutex_lock(&root->fs_info->fs_mutex); + err = btrfs_check_free_space(root, 1, 0); + if (err) + goto fail; trans = btrfs_start_transaction(root, 1); btrfs_set_trans_block_group(trans, dir); @@ -1582,6 +1659,7 @@ static int btrfs_link(struct dentry *old_dentry, struct inode *dir, nr = trans->blocks_used; btrfs_end_transaction(trans, root); +fail: mutex_unlock(&root->fs_info->fs_mutex); if (drop_inode) { @@ -1603,6 +1681,10 @@ static int btrfs_mkdir(struct inode *dir, struct dentry *dentry, int mode) unsigned long nr = 1; mutex_lock(&root->fs_info->fs_mutex); + err = btrfs_check_free_space(root, 1, 0); + if (err) + goto out_unlock; + trans = btrfs_start_transaction(root, 1); btrfs_set_trans_block_group(trans, dir); @@ -1869,6 +1951,15 @@ static sector_t btrfs_bmap(struct address_space *mapping, sector_t iblock) static int btrfs_prepare_write(struct file *file, struct page *page, unsigned from, unsigned to) { + struct btrfs_root *root = BTRFS_I(page->mapping->host)->root; + int err; + + mutex_lock(&root->fs_info->fs_mutex); + err = btrfs_check_free_space(root, PAGE_CACHE_SIZE, 0); + mutex_lock(&root->fs_info->fs_mutex); + if (err) + return -ENOSPC; + return extent_prepare_write(&BTRFS_I(page->mapping->host)->extent_tree, page->mapping->host, page, from, to, btrfs_get_extent); @@ -1880,6 +1971,7 @@ int btrfs_readpage(struct file *file, struct page *page) tree = &BTRFS_I(page->mapping->host)->extent_tree; return extent_read_full_page(tree, page, btrfs_get_extent); } + static int btrfs_writepage(struct page *page, struct writeback_control *wbc) { struct extent_map_tree *tree; @@ -1954,11 +2046,20 @@ static void btrfs_invalidatepage(struct page *page, unsigned long offset) int btrfs_page_mkwrite(struct vm_area_struct *vma, struct page *page) { struct inode *inode = fdentry(vma->vm_file)->d_inode; + struct btrfs_root *root = BTRFS_I(inode)->root; unsigned long end; loff_t size; - int ret = -EINVAL; + int ret; u64 page_start; + mutex_lock(&root->fs_info->fs_mutex); + ret = btrfs_check_free_space(root, PAGE_CACHE_SIZE, 0); + mutex_lock(&root->fs_info->fs_mutex); + if (ret) + goto out; + + ret = -EINVAL; + down_read(&BTRFS_I(inode)->root->snap_sem); lock_page(page); wait_on_page_writeback(page); @@ -1982,6 +2083,7 @@ int btrfs_page_mkwrite(struct vm_area_struct *vma, struct page *page) out_unlock: up_read(&BTRFS_I(inode)->root->snap_sem); unlock_page(page); +out: return ret; } @@ -2046,6 +2148,10 @@ static int create_subvol(struct btrfs_root *root, char *name, int namelen) unsigned long nr = 1; mutex_lock(&root->fs_info->fs_mutex); + ret = btrfs_check_free_space(root, 1, 0); + if (ret) + goto fail_commit; + trans = btrfs_start_transaction(root, 1); BUG_ON(!trans); @@ -2162,7 +2268,7 @@ static int create_snapshot(struct btrfs_root *root, char *name, int namelen) int ret; int err; u64 objectid; - unsigned long nr; + unsigned long nr = 0; if (!root->ref_cows) return -EINVAL; @@ -2172,6 +2278,10 @@ static int create_snapshot(struct btrfs_root *root, char *name, int namelen) thaw_bdev(root->fs_info->sb->s_bdev, root->fs_info->sb); mutex_lock(&root->fs_info->fs_mutex); + ret = btrfs_check_free_space(root, 1, 0); + if (ret) + goto fail_unlock; + trans = btrfs_start_transaction(root, 1); BUG_ON(!trans); @@ -2229,7 +2339,7 @@ fail: if (err && !ret) ret = err; - +fail_unlock: mutex_unlock(&root->fs_info->fs_mutex); up_write(&root->snap_sem); btrfs_btree_balance_dirty(root, nr); @@ -2255,6 +2365,7 @@ static unsigned long force_ra(struct address_space *mapping, int btrfs_defrag_file(struct file *file) { struct inode *inode = fdentry(file)->d_inode; + struct btrfs_root *root = BTRFS_I(inode)->root; struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree; struct page *page; unsigned long last_index; @@ -2262,6 +2373,13 @@ int btrfs_defrag_file(struct file *file) { u64 page_start; u64 page_end; unsigned long i; + int ret; + + mutex_lock(&root->fs_info->fs_mutex); + ret = btrfs_check_free_space(root, inode->i_size, 0); + mutex_unlock(&root->fs_info->fs_mutex); + if (ret) + return -ENOSPC; mutex_lock(&inode->i_mutex); last_index = inode->i_size >> PAGE_CACHE_SHIFT; @@ -2522,6 +2640,10 @@ static int btrfs_rename(struct inode * old_dir, struct dentry *old_dentry, } mutex_lock(&root->fs_info->fs_mutex); + ret = btrfs_check_free_space(root, 1, 0); + if (ret) + goto out_unlock; + trans = btrfs_start_transaction(root, 1); btrfs_set_trans_block_group(trans, new_dir); @@ -2553,6 +2675,7 @@ static int btrfs_rename(struct inode * old_dir, struct dentry *old_dentry, out_fail: btrfs_free_path(path); btrfs_end_transaction(trans, root); +out_unlock: mutex_unlock(&root->fs_info->fs_mutex); return ret; } @@ -2564,7 +2687,7 @@ static int btrfs_symlink(struct inode *dir, struct dentry *dentry, struct btrfs_root *root = BTRFS_I(dir)->root; struct btrfs_path *path; struct btrfs_key key; - struct inode *inode; + struct inode *inode = NULL; int err; int drop_inode = 0; u64 objectid; @@ -2573,12 +2696,17 @@ static int btrfs_symlink(struct inode *dir, struct dentry *dentry, unsigned long ptr; struct btrfs_file_extent_item *ei; struct extent_buffer *leaf; - unsigned long nr; + unsigned long nr = 0; name_len = strlen(symname) + 1; if (name_len > BTRFS_MAX_INLINE_DATA_SIZE(root)) return -ENAMETOOLONG; + mutex_lock(&root->fs_info->fs_mutex); + err = btrfs_check_free_space(root, 1, 0); + if (err) + goto out_fail; + trans = btrfs_start_transaction(root, 1); btrfs_set_trans_block_group(trans, dir); @@ -2645,6 +2773,7 @@ static int btrfs_symlink(struct inode *dir, struct dentry *dentry, out_unlock: nr = trans->blocks_used; btrfs_end_transaction(trans, root); +out_fail: mutex_unlock(&root->fs_info->fs_mutex); if (drop_inode) { inode_dec_link_count(inode); -- cgit v1.2.3-70-g09d2 From 5d4fb734b4cb89a3902d6de309af0f6f3e63fe73 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Fri, 21 Dec 2007 16:27:23 -0500 Subject: Btrfs: Fix an off by one in the extent_map prepare write code Signed-off-by: Chris Mason --- fs/btrfs/extent_map.c | 4 ++-- fs/btrfs/inode.c | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) (limited to 'fs/btrfs/extent_map.c') diff --git a/fs/btrfs/extent_map.c b/fs/btrfs/extent_map.c index 2b92f1070274..f53d8c976219 100644 --- a/fs/btrfs/extent_map.c +++ b/fs/btrfs/extent_map.c @@ -2270,7 +2270,7 @@ int extent_prepare_write(struct extent_map_tree *tree, kunmap_atomic(kaddr, KM_USER0); } if ((em->block_start != EXTENT_MAP_HOLE && - em->block_start != EXTENT_MAP_INLINE) && + em->block_start != EXTENT_MAP_INLINE) && !isnew && !PageUptodate(page) && (block_off_end > to || block_off_start < from) && !test_range_bit(tree, block_start, cur_end, @@ -2279,7 +2279,7 @@ int extent_prepare_write(struct extent_map_tree *tree, u64 extent_offset = block_start - em->start; size_t iosize; sector = (em->block_start + extent_offset) >> 9; - iosize = (cur_end - block_start + blocksize - 1) & + iosize = (cur_end - block_start + blocksize) & ~((u64)blocksize - 1); /* * we've already got the extent locked, but we diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index a9f5d6d417f0..5f5b7b89b144 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -1956,7 +1956,7 @@ static int btrfs_prepare_write(struct file *file, struct page *page, mutex_lock(&root->fs_info->fs_mutex); err = btrfs_check_free_space(root, PAGE_CACHE_SIZE, 0); - mutex_lock(&root->fs_info->fs_mutex); + mutex_unlock(&root->fs_info->fs_mutex); if (err) return -ENOSPC; -- cgit v1.2.3-70-g09d2 From bcd987feefe8da66bc59b4e6bd51761a9820588c Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Wed, 9 Jan 2008 06:28:28 -0500 Subject: Btrfs: Remove extent_map debugging message Signed-off-by: Chris Mason --- fs/btrfs/extent_map.c | 1 - 1 file changed, 1 deletion(-) (limited to 'fs/btrfs/extent_map.c') diff --git a/fs/btrfs/extent_map.c b/fs/btrfs/extent_map.c index f53d8c976219..9d6aefa937c4 100644 --- a/fs/btrfs/extent_map.c +++ b/fs/btrfs/extent_map.c @@ -273,7 +273,6 @@ int add_extent_mapping(struct extent_map_tree *tree, rb = tree_insert(&tree->map, em->end, &em->rb_node); if (rb) { prev = rb_entry(rb, struct extent_map, rb_node); - printk("found extent map %Lu %Lu on insert of %Lu %Lu\n", prev->start, prev->end, em->start, em->end); ret = -EEXIST; goto out; } -- cgit v1.2.3-70-g09d2 From 55c69072d6bd5be170a85467f64a20963cddf490 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Wed, 9 Jan 2008 15:55:33 -0500 Subject: Btrfs: Fix extent_buffer usage when nodesize != leafsize Signed-off-by: Chris Mason --- fs/btrfs/disk-io.c | 26 +++++++++++++++++++++++--- fs/btrfs/extent-tree.c | 13 ++++++++++--- fs/btrfs/extent_map.c | 30 +++++++++++++++++++++--------- 3 files changed, 54 insertions(+), 15 deletions(-) (limited to 'fs/btrfs/extent_map.c') diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index a481b970608c..0338f8fd382d 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -197,9 +197,23 @@ int csum_dirty_buffer(struct btrfs_root *root, struct page *page) if (found_start != start) { printk("warning: eb start incorrect %Lu buffer %Lu len %lu\n", start, found_start, len); + WARN_ON(1); + goto err; + } + if (eb->first_page != page) { + printk("bad first page %lu %lu\n", eb->first_page->index, + page->index); + WARN_ON(1); + goto err; + } + if (!PageUptodate(page)) { + printk("csum not up to date page %lu\n", page->index); + WARN_ON(1); + goto err; } found_level = btrfs_header_level(eb); csum_tree_block(root, eb, 0); +err: free_extent_buffer(eb); out: return 0; @@ -368,7 +382,10 @@ int clean_tree_block(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct extent_buffer *buf) { struct inode *btree_inode = root->fs_info->btree_inode; - clear_extent_buffer_dirty(&BTRFS_I(btree_inode)->extent_tree, buf); + if (btrfs_header_generation(buf) == + root->fs_info->running_transaction->transid) + clear_extent_buffer_dirty(&BTRFS_I(btree_inode)->extent_tree, + buf); return 0; } @@ -897,8 +914,11 @@ void btrfs_mark_buffer_dirty(struct extent_buffer *buf) void btrfs_throttle(struct btrfs_root *root) { - if (root->fs_info->throttles) - congestion_wait(WRITE, HZ/10); + struct backing_dev_info *bdi; + + bdi = root->fs_info->sb->s_bdev->bd_inode->i_mapping->backing_dev_info; + if (root->fs_info->throttles && bdi_write_congested(bdi)) + congestion_wait(WRITE, HZ/20); } void btrfs_btree_balance_dirty(struct btrfs_root *root, unsigned long nr) diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 99a8b0f0d318..2c569b4d59d4 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -1212,6 +1212,7 @@ static int pin_down_bytes(struct btrfs_root *root, u64 bytenr, u32 num_bytes, u64 header_transid = btrfs_header_generation(buf); if (header_transid == transid) { + clean_tree_block(NULL, root, buf); free_extent_buffer(buf); return 1; } @@ -1249,7 +1250,6 @@ static int __free_extent(struct btrfs_trans_handle *trans, struct btrfs_root key.objectid = bytenr; btrfs_set_key_type(&key, BTRFS_EXTENT_ITEM_KEY); key.offset = num_bytes; - path = btrfs_alloc_path(); if (!path) return -ENOMEM; @@ -1648,8 +1648,6 @@ int btrfs_alloc_extent(struct btrfs_trans_handle *trans, search_start, search_end, hint_byte, ins, trans->alloc_exclude_start, trans->alloc_exclude_nr, data); -if (ret) -printk("find free extent returns %d\n", ret); BUG_ON(ret); if (ret) return ret; @@ -1764,7 +1762,16 @@ struct extent_buffer *__btrfs_alloc_free_block(struct btrfs_trans_handle *trans, 0, 0, 0); return ERR_PTR(-ENOMEM); } + btrfs_set_header_generation(buf, trans->transid); + clean_tree_block(trans, root, buf); + wait_on_tree_block_writeback(root, buf); btrfs_set_buffer_uptodate(buf); + + if (PageDirty(buf->first_page)) { + printk("page %lu dirty\n", buf->first_page->index); + WARN_ON(1); + } + set_extent_dirty(&trans->transaction->dirty_pages, buf->start, buf->start + buf->len - 1, GFP_NOFS); set_extent_bits(&BTRFS_I(root->fs_info->btree_inode)->extent_tree, diff --git a/fs/btrfs/extent_map.c b/fs/btrfs/extent_map.c index 9d6aefa937c4..f3a384ed700c 100644 --- a/fs/btrfs/extent_map.c +++ b/fs/btrfs/extent_map.c @@ -1663,6 +1663,13 @@ void set_page_extent_mapped(struct page *page) } } +void set_page_extent_head(struct page *page, unsigned long len) +{ + WARN_ON(page->private && page->private == EXTENT_PAGE_PRIVATE && + PageDirty(page)); + set_page_private(page, EXTENT_PAGE_PRIVATE_FIRST_PAGE | len << 2); +} + /* * basic readpage implementation. Locked extent state structs are inserted * into the tree that are removed when the IO is done (by the end_io @@ -2490,8 +2497,7 @@ struct extent_buffer *alloc_extent_buffer(struct extent_map_tree *tree, mark_page_accessed(page0); set_page_extent_mapped(page0); WARN_ON(!PageUptodate(page0)); - set_page_private(page0, EXTENT_PAGE_PRIVATE_FIRST_PAGE | - len << 2); + set_page_extent_head(page0, len); } else { i = 0; } @@ -2505,8 +2511,7 @@ struct extent_buffer *alloc_extent_buffer(struct extent_map_tree *tree, mark_page_accessed(p); if (i == 0) { eb->first_page = p; - set_page_private(p, EXTENT_PAGE_PRIVATE_FIRST_PAGE | - len << 2); + set_page_extent_head(p, len); } else { set_page_private(p, EXTENT_PAGE_PRIVATE); } @@ -2569,8 +2574,7 @@ struct extent_buffer *find_extent_buffer(struct extent_map_tree *tree, if (i == 0) { eb->first_page = p; - set_page_private(p, EXTENT_PAGE_PRIVATE_FIRST_PAGE | - len << 2); + set_page_extent_head(p, len); } else { set_page_private(p, EXTENT_PAGE_PRIVATE); } @@ -2643,6 +2647,11 @@ int clear_extent_buffer_dirty(struct extent_map_tree *tree, for (i = 0; i < num_pages; i++) { page = extent_buffer_page(eb, i); lock_page(page); + if (i == 0) + set_page_extent_head(page, eb->len); + else + set_page_private(page, EXTENT_PAGE_PRIVATE); + /* * if we're on the last page or the first page and the * block isn't aligned on a page boundary, do extra checks @@ -2697,9 +2706,12 @@ int set_extent_buffer_dirty(struct extent_map_tree *tree, */ if (i == 0) { lock_page(page); - set_page_private(page, - EXTENT_PAGE_PRIVATE_FIRST_PAGE | - eb->len << 2); + set_page_extent_head(page, eb->len); + } else if (PagePrivate(page) && + page->private != EXTENT_PAGE_PRIVATE) { + lock_page(page); + set_page_extent_mapped(page); + unlock_page(page); } __set_page_dirty_nobuffers(extent_buffer_page(eb, i)); if (i == 0) -- cgit v1.2.3-70-g09d2 From b3a0d8d28c607cb2531a68742afc2b967b1f6083 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Mon, 14 Jan 2008 13:22:53 -0500 Subject: Btrfs: Remove verbose WARN_ON Signed-off-by: Chris Mason --- fs/btrfs/extent_map.c | 2 -- 1 file changed, 2 deletions(-) (limited to 'fs/btrfs/extent_map.c') diff --git a/fs/btrfs/extent_map.c b/fs/btrfs/extent_map.c index f3a384ed700c..d71aed341abd 100644 --- a/fs/btrfs/extent_map.c +++ b/fs/btrfs/extent_map.c @@ -1665,8 +1665,6 @@ void set_page_extent_mapped(struct page *page) void set_page_extent_head(struct page *page, unsigned long len) { - WARN_ON(page->private && page->private == EXTENT_PAGE_PRIVATE && - PageDirty(page)); set_page_private(page, EXTENT_PAGE_PRIVATE_FIRST_PAGE | len << 2); } -- cgit v1.2.3-70-g09d2 From f0c5da1446cc500856a5e31c9a0e2a7bdd30e663 Mon Sep 17 00:00:00 2001 From: Yan Date: Tue, 22 Jan 2008 12:46:56 -0500 Subject: Btrfs: Fix for test_range_bit test_range_bit doesn't properly handle the case: there's a hole at the end of the range and there's no other extent_state after the range. Signed-off-by: Chris Mason --- fs/btrfs/extent_map.c | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'fs/btrfs/extent_map.c') diff --git a/fs/btrfs/extent_map.c b/fs/btrfs/extent_map.c index d71aed341abd..485cf0719b3c 100644 --- a/fs/btrfs/extent_map.c +++ b/fs/btrfs/extent_map.c @@ -1336,6 +1336,11 @@ int test_range_bit(struct extent_map_tree *tree, u64 start, u64 end, if (start > end) break; node = rb_next(node); + if (!node) { + if (filled) + bitset = 0; + break; + } } read_unlock_irq(&tree->lock); return bitset; -- cgit v1.2.3-70-g09d2 From 5f56406aabdf5444d040c5955effc665b1d0dbaf Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Tue, 22 Jan 2008 16:47:59 -0500 Subject: Btrfs: Fix hole insertion corner cases There were a few places that could cause duplicate extent insertion, this adjusts the code that creates holes to avoid it. lookup_extent_map is changed to correctly return all of the extents in a range, even when there are none matching at the start of the range. Signed-off-by: Chris Mason --- fs/btrfs/ctree.h | 1 + fs/btrfs/extent_map.c | 45 +++++++++++++++++++++++------ fs/btrfs/file.c | 78 ++++++++++++++++++++++++++++++++++++++++++++++++++- fs/btrfs/inode.c | 15 ++++++++-- 4 files changed, 126 insertions(+), 13 deletions(-) (limited to 'fs/btrfs/extent_map.c') diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 7a588ba2b747..b9f2975b55c9 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -1170,6 +1170,7 @@ int btrfs_update_inode(struct btrfs_trans_handle *trans, struct inode *inode); /* file.c */ int btrfs_drop_extent_cache(struct inode *inode, u64 start, u64 end); +int btrfs_check_file(struct btrfs_root *root, struct inode *inode); extern struct file_operations btrfs_file_operations; int btrfs_drop_extents(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct inode *inode, diff --git a/fs/btrfs/extent_map.c b/fs/btrfs/extent_map.c index 485cf0719b3c..010a287fbd71 100644 --- a/fs/btrfs/extent_map.c +++ b/fs/btrfs/extent_map.c @@ -204,10 +204,12 @@ static struct rb_node *tree_insert(struct rb_root *root, u64 offset, } static struct rb_node *__tree_search(struct rb_root *root, u64 offset, - struct rb_node **prev_ret) + struct rb_node **prev_ret, + struct rb_node **next_ret) { struct rb_node * n = root->rb_node; struct rb_node *prev = NULL; + struct rb_node *orig_prev = NULL; struct tree_entry *entry; struct tree_entry *prev_entry = NULL; @@ -223,13 +225,25 @@ static struct rb_node *__tree_search(struct rb_root *root, u64 offset, else return n; } - if (!prev_ret) - return NULL; - while(prev && offset > prev_entry->end) { - prev = rb_next(prev); + + if (prev_ret) { + orig_prev = prev; + while(prev && offset > prev_entry->end) { + prev = rb_next(prev); + prev_entry = rb_entry(prev, struct tree_entry, rb_node); + } + *prev_ret = prev; + prev = orig_prev; + } + + if (next_ret) { prev_entry = rb_entry(prev, struct tree_entry, rb_node); + while(prev && offset < prev_entry->start) { + prev = rb_prev(prev); + prev_entry = rb_entry(prev, struct tree_entry, rb_node); + } + *next_ret = prev; } - *prev_ret = prev; return NULL; } @@ -237,7 +251,7 @@ static inline struct rb_node *tree_search(struct rb_root *root, u64 offset) { struct rb_node *prev; struct rb_node *ret; - ret = __tree_search(root, offset, &prev); + ret = __tree_search(root, offset, &prev, NULL); if (!ret) return prev; return ret; @@ -248,7 +262,7 @@ static int tree_delete(struct rb_root *root, u64 offset) struct rb_node *node; struct tree_entry *entry; - node = __tree_search(root, offset, NULL); + node = __tree_search(root, offset, NULL, NULL); if (!node) return -ENOENT; entry = rb_entry(node, struct tree_entry, rb_node); @@ -314,9 +328,21 @@ struct extent_map *lookup_extent_mapping(struct extent_map_tree *tree, { struct extent_map *em; struct rb_node *rb_node; + struct rb_node *prev = NULL; + struct rb_node *next = NULL; read_lock_irq(&tree->lock); - rb_node = tree_search(&tree->map, start); + rb_node = __tree_search(&tree->map, start, &prev, &next); + if (!rb_node && prev) { + em = rb_entry(prev, struct extent_map, rb_node); + if (em->start <= end && em->end >= start) + goto found; + } + if (!rb_node && next) { + em = rb_entry(next, struct extent_map, rb_node); + if (em->start <= end && em->end >= start) + goto found; + } if (!rb_node) { em = NULL; goto out; @@ -330,6 +356,7 @@ struct extent_map *lookup_extent_mapping(struct extent_map_tree *tree, em = NULL; goto out; } +found: atomic_inc(&em->refs); out: read_unlock_irq(&tree->lock); diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c index 897242e87fa7..1cd8c908811e 100644 --- a/fs/btrfs/file.c +++ b/fs/btrfs/file.c @@ -278,7 +278,7 @@ static int noinline dirty_and_release_pages(struct btrfs_trans_handle *trans, u64 hole_size; u64 mask = root->sectorsize - 1; last_pos_in_file = (isize + mask) & ~mask; - hole_size = (start_pos - last_pos_in_file + mask) & ~mask; + hole_size = (end_pos - last_pos_in_file + mask) & ~mask; if (last_pos_in_file < start_pos) { err = btrfs_drop_extents(trans, root, inode, @@ -293,6 +293,7 @@ static int noinline dirty_and_release_pages(struct btrfs_trans_handle *trans, inode->i_ino, last_pos_in_file, 0, 0, hole_size); + btrfs_check_file(root, inode); } if (err) goto failed; @@ -378,6 +379,80 @@ int btrfs_drop_extent_cache(struct inode *inode, u64 start, u64 end) return 0; } +int btrfs_check_file(struct btrfs_root *root, struct inode *inode) +{ + return 0; +#if 0 + struct btrfs_path *path; + struct btrfs_key found_key; + struct extent_buffer *leaf; + struct btrfs_file_extent_item *extent; + u64 last_offset = 0; + int nritems; + int slot; + int found_type; + int ret; + int err = 0; + u64 extent_end = 0; + + path = btrfs_alloc_path(); + ret = btrfs_lookup_file_extent(NULL, root, path, inode->i_ino, + last_offset, 0); + while(1) { + nritems = btrfs_header_nritems(path->nodes[0]); + if (path->slots[0] >= nritems) { + ret = btrfs_next_leaf(root, path); + if (ret) + goto out; + nritems = btrfs_header_nritems(path->nodes[0]); + } + slot = path->slots[0]; + leaf = path->nodes[0]; + btrfs_item_key_to_cpu(leaf, &found_key, slot); + if (found_key.objectid != inode->i_ino) + break; + if (found_key.type != BTRFS_EXTENT_DATA_KEY) + goto out; + + if (found_key.offset != last_offset) { + WARN_ON(1); + btrfs_print_leaf(root, leaf); + printk("inode %lu found offset %Lu expected %Lu\n", + inode->i_ino, found_key.offset, last_offset); + err = 1; + goto out; + } + extent = btrfs_item_ptr(leaf, slot, + struct btrfs_file_extent_item); + found_type = btrfs_file_extent_type(leaf, extent); + if (found_type == BTRFS_FILE_EXTENT_REG) { + extent_end = found_key.offset + + btrfs_file_extent_num_bytes(leaf, extent); + } else if (found_type == BTRFS_FILE_EXTENT_INLINE) { + struct btrfs_item *item; + item = btrfs_item_nr(leaf, slot); + extent_end = found_key.offset + + btrfs_file_extent_inline_len(leaf, item); + extent_end = (extent_end + root->sectorsize - 1) & + ~((u64)root->sectorsize -1 ); + } + last_offset = extent_end; + path->slots[0]++; + } + if (last_offset < inode->i_size) { + WARN_ON(1); + btrfs_print_leaf(root, leaf); + printk("inode %lu found offset %Lu size %Lu\n", inode->i_ino, + last_offset, inode->i_size); + err = 1; + + } +out: + btrfs_free_path(path); + return err; +#endif +} + /* * this is very complex, but the basic idea is to drop all extents * in the range start - end. hint_block is filled in with a block number @@ -436,6 +511,7 @@ next_slot: slot = path->slots[0]; ret = 0; btrfs_item_key_to_cpu(leaf, &key, slot); + if (key.offset >= end || key.objectid != inode->i_ino) { goto out; } diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 51fc06dc4a5d..67005480e139 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -133,6 +133,7 @@ static int cow_file_range(struct inode *inode, u64 start, u64 end) ret = btrfs_insert_file_extent(trans, root, inode->i_ino, start, ins.objectid, ins.offset, ins.offset); + btrfs_check_file(root, inode); num_bytes -= cur_alloc_size; alloc_hint = ins.objectid + ins.offset; start += cur_alloc_size; @@ -965,12 +966,18 @@ static int btrfs_setattr(struct dentry *dentry, struct iattr *attr) u64 mask = root->sectorsize - 1; u64 pos = (inode->i_size + mask) & ~mask; u64 block_end = attr->ia_size | mask; + u64 hole_start; u64 hole_size; u64 alloc_hint = 0; if (attr->ia_size <= pos) goto out; + if (pos != inode->i_size) + hole_start = pos + root->sectorsize; + else + hole_start = pos; + mutex_lock(&root->fs_info->fs_mutex); err = btrfs_check_free_space(root, 1, 0); mutex_unlock(&root->fs_info->fs_mutex); @@ -980,19 +987,21 @@ static int btrfs_setattr(struct dentry *dentry, struct iattr *attr) btrfs_truncate_page(inode->i_mapping, inode->i_size); lock_extent(em_tree, pos, block_end, GFP_NOFS); - hole_size = (attr->ia_size - pos + mask) & ~mask; + hole_size = block_end - hole_start; mutex_lock(&root->fs_info->fs_mutex); trans = btrfs_start_transaction(root, 1); btrfs_set_trans_block_group(trans, inode); err = btrfs_drop_extents(trans, root, inode, - pos, pos + hole_size, pos, + pos, block_end, pos, &alloc_hint); if (alloc_hint != EXTENT_MAP_INLINE) { err = btrfs_insert_file_extent(trans, root, inode->i_ino, - pos, 0, 0, hole_size); + hole_start, 0, 0, + hole_size); + btrfs_check_file(root, inode); } btrfs_end_transaction(trans, root); mutex_unlock(&root->fs_info->fs_mutex); -- cgit v1.2.3-70-g09d2 From d1310b2e0cd98eb1348553e69b73827b436dca7b Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Thu, 24 Jan 2008 16:13:08 -0500 Subject: Btrfs: Split the extent_map code into two parts There is now extent_map for mapping offsets in the file to disk and extent_io for state tracking, IO submission and extent_bufers. The new extent_map code shifts from [start,end] pairs to [start,len], and pushes the locking out into the caller. This allows a few performance optimizations and is easier to use. A number of extent_map usage bugs were fixed, mostly with failing to remove extent_map entries when changing the file. Signed-off-by: Chris Mason --- fs/btrfs/Makefile | 3 +- fs/btrfs/btrfs_inode.h | 2 + fs/btrfs/ctree.h | 15 +- fs/btrfs/disk-io.c | 123 +- fs/btrfs/extent-tree.c | 43 +- fs/btrfs/extent_io.c | 3089 +++++++++++++++++++++++++++++++++++++++++++++++ fs/btrfs/extent_io.h | 193 +++ fs/btrfs/extent_map.c | 3119 ++---------------------------------------------- fs/btrfs/extent_map.h | 214 +--- fs/btrfs/file.c | 29 +- fs/btrfs/inode.c | 175 +-- fs/btrfs/super.c | 10 +- fs/btrfs/transaction.c | 8 +- fs/btrfs/transaction.h | 2 +- 14 files changed, 3612 insertions(+), 3413 deletions(-) create mode 100644 fs/btrfs/extent_io.c create mode 100644 fs/btrfs/extent_io.h (limited to 'fs/btrfs/extent_map.c') diff --git a/fs/btrfs/Makefile b/fs/btrfs/Makefile index 72cc3e16741f..9c61609f3714 100644 --- a/fs/btrfs/Makefile +++ b/fs/btrfs/Makefile @@ -5,7 +5,8 @@ obj-m := btrfs.o btrfs-y := super.o ctree.o extent-tree.o print-tree.o root-tree.o dir-item.o \ hash.o file-item.o inode-item.o inode-map.o disk-io.o \ transaction.o bit-radix.o inode.o file.o tree-defrag.o \ - extent_map.o sysfs.o struct-funcs.o xattr.o ordered-data.o + extent_map.o sysfs.o struct-funcs.o xattr.o ordered-data.o \ + extent_io.o ifeq ($(CONFIG_FS_POSIX_ACL),y) btrfs-y += acl.o diff --git a/fs/btrfs/btrfs_inode.h b/fs/btrfs/btrfs_inode.h index 9fc99883c70a..9a0647dc5a0b 100644 --- a/fs/btrfs/btrfs_inode.h +++ b/fs/btrfs/btrfs_inode.h @@ -20,6 +20,7 @@ #define __BTRFS_I__ #include "extent_map.h" +#include "extent_io.h" /* in memory btrfs inode */ struct btrfs_inode { @@ -27,6 +28,7 @@ struct btrfs_inode { struct btrfs_block_group_cache *block_group; struct btrfs_key location; struct extent_map_tree extent_tree; + struct extent_io_tree io_tree; struct inode vfs_inode; u64 ordered_trans; diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index b9f2975b55c9..6c65473e0fe3 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -27,6 +27,7 @@ #include #include #include "bit-radix.h" +#include "extent_io.h" #include "extent_map.h" struct btrfs_trans_handle; @@ -314,11 +315,11 @@ struct btrfs_fs_info { struct btrfs_root *tree_root; struct radix_tree_root fs_roots_radix; - struct extent_map_tree free_space_cache; - struct extent_map_tree block_group_cache; - struct extent_map_tree pinned_extents; - struct extent_map_tree pending_del; - struct extent_map_tree extent_ins; + struct extent_io_tree free_space_cache; + struct extent_io_tree block_group_cache; + struct extent_io_tree pinned_extents; + struct extent_io_tree pending_del; + struct extent_io_tree extent_ins; u64 generation; u64 last_trans_committed; @@ -956,7 +957,7 @@ u32 btrfs_count_snapshots_in_path(struct btrfs_root *root, u64 first_extent); int btrfs_extent_post_op(struct btrfs_trans_handle *trans, struct btrfs_root *root); -int btrfs_copy_pinned(struct btrfs_root *root, struct extent_map_tree *copy); +int btrfs_copy_pinned(struct btrfs_root *root, struct extent_io_tree *copy); struct btrfs_block_group_cache *btrfs_lookup_block_group(struct btrfs_fs_info *info, u64 bytenr); @@ -1001,7 +1002,7 @@ int btrfs_free_extent(struct btrfs_trans_handle *trans, struct btrfs_root u64 owner_objectid, u64 owner_offset, int pin); int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans, struct btrfs_root *root, - struct extent_map_tree *unpin); + struct extent_io_tree *unpin); int btrfs_inc_extent_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root, u64 bytenr, u64 num_bytes, diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 5d1f9bca2712..4c4ebea0b2a9 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -43,14 +43,14 @@ static int check_tree_block(struct btrfs_root *root, struct extent_buffer *buf) } #endif -static struct extent_map_ops btree_extent_map_ops; +static struct extent_io_ops btree_extent_io_ops; struct extent_buffer *btrfs_find_tree_block(struct btrfs_root *root, u64 bytenr, u32 blocksize) { struct inode *btree_inode = root->fs_info->btree_inode; struct extent_buffer *eb; - eb = find_extent_buffer(&BTRFS_I(btree_inode)->extent_tree, + eb = find_extent_buffer(&BTRFS_I(btree_inode)->io_tree, bytenr, blocksize, GFP_NOFS); return eb; } @@ -61,13 +61,13 @@ struct extent_buffer *btrfs_find_create_tree_block(struct btrfs_root *root, struct inode *btree_inode = root->fs_info->btree_inode; struct extent_buffer *eb; - eb = alloc_extent_buffer(&BTRFS_I(btree_inode)->extent_tree, + eb = alloc_extent_buffer(&BTRFS_I(btree_inode)->io_tree, bytenr, blocksize, NULL, GFP_NOFS); return eb; } struct extent_map *btree_get_extent(struct inode *inode, struct page *page, - size_t page_offset, u64 start, u64 end, + size_t page_offset, u64 start, u64 len, int create) { struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree; @@ -75,7 +75,9 @@ struct extent_map *btree_get_extent(struct inode *inode, struct page *page, int ret; again: - em = lookup_extent_mapping(em_tree, start, end); + spin_lock(&em_tree->lock); + em = lookup_extent_mapping(em_tree, start, len); + spin_unlock(&em_tree->lock); if (em) { goto out; } @@ -85,11 +87,14 @@ again: goto out; } em->start = 0; - em->end = (i_size_read(inode) & ~((u64)PAGE_CACHE_SIZE -1)) - 1; + em->len = i_size_read(inode); em->block_start = 0; - em->block_end = em->end; em->bdev = inode->i_sb->s_bdev; + + spin_lock(&em_tree->lock); ret = add_extent_mapping(em_tree, em); + spin_unlock(&em_tree->lock); + if (ret == -EEXIST) { free_extent_map(em); em = NULL; @@ -175,13 +180,13 @@ static int csum_tree_block(struct btrfs_root *root, struct extent_buffer *buf, int csum_dirty_buffer(struct btrfs_root *root, struct page *page) { - struct extent_map_tree *tree; + struct extent_io_tree *tree; u64 start = (u64)page->index << PAGE_CACHE_SHIFT; u64 found_start; int found_level; unsigned long len; struct extent_buffer *eb; - tree = &BTRFS_I(page->mapping->host)->extent_tree; + tree = &BTRFS_I(page->mapping->host)->io_tree; if (page->private == EXTENT_PAGE_PRIVATE) goto out; @@ -230,16 +235,16 @@ static int btree_writepage_io_hook(struct page *page, u64 start, u64 end) static int btree_writepage(struct page *page, struct writeback_control *wbc) { - struct extent_map_tree *tree; - tree = &BTRFS_I(page->mapping->host)->extent_tree; + struct extent_io_tree *tree; + tree = &BTRFS_I(page->mapping->host)->io_tree; return extent_write_full_page(tree, page, btree_get_extent, wbc); } static int btree_writepages(struct address_space *mapping, struct writeback_control *wbc) { - struct extent_map_tree *tree; - tree = &BTRFS_I(mapping->host)->extent_tree; + struct extent_io_tree *tree; + tree = &BTRFS_I(mapping->host)->io_tree; if (wbc->sync_mode == WB_SYNC_NONE) { u64 num_dirty; u64 start = 0; @@ -264,18 +269,20 @@ static int btree_writepages(struct address_space *mapping, int btree_readpage(struct file *file, struct page *page) { - struct extent_map_tree *tree; - tree = &BTRFS_I(page->mapping->host)->extent_tree; + struct extent_io_tree *tree; + tree = &BTRFS_I(page->mapping->host)->io_tree; return extent_read_full_page(tree, page, btree_get_extent); } static int btree_releasepage(struct page *page, gfp_t unused_gfp_flags) { - struct extent_map_tree *tree; + struct extent_io_tree *tree; + struct extent_map_tree *map; int ret; - tree = &BTRFS_I(page->mapping->host)->extent_tree; - ret = try_release_extent_mapping(tree, page); + tree = &BTRFS_I(page->mapping->host)->io_tree; + map = &BTRFS_I(page->mapping->host)->extent_tree; + ret = try_release_extent_mapping(map, tree, page); if (ret == 1) { ClearPagePrivate(page); set_page_private(page, 0); @@ -286,8 +293,8 @@ static int btree_releasepage(struct page *page, gfp_t unused_gfp_flags) static void btree_invalidatepage(struct page *page, unsigned long offset) { - struct extent_map_tree *tree; - tree = &BTRFS_I(page->mapping->host)->extent_tree; + struct extent_io_tree *tree; + tree = &BTRFS_I(page->mapping->host)->io_tree; extent_invalidatepage(tree, page, offset); btree_releasepage(page, GFP_NOFS); } @@ -331,7 +338,7 @@ int readahead_tree_block(struct btrfs_root *root, u64 bytenr, u32 blocksize) buf = btrfs_find_create_tree_block(root, bytenr, blocksize); if (!buf) return 0; - read_extent_buffer_pages(&BTRFS_I(btree_inode)->extent_tree, + read_extent_buffer_pages(&BTRFS_I(btree_inode)->io_tree, buf, 0, 0); free_extent_buffer(buf); return ret; @@ -342,40 +349,39 @@ struct extent_buffer *read_tree_block(struct btrfs_root *root, u64 bytenr, { struct extent_buffer *buf = NULL; struct inode *btree_inode = root->fs_info->btree_inode; - struct extent_map_tree *extent_tree; + struct extent_io_tree *io_tree; u64 end; int ret; - extent_tree = &BTRFS_I(btree_inode)->extent_tree; + io_tree = &BTRFS_I(btree_inode)->io_tree; buf = btrfs_find_create_tree_block(root, bytenr, blocksize); if (!buf) return NULL; - read_extent_buffer_pages(&BTRFS_I(btree_inode)->extent_tree, - buf, 0, 1); + read_extent_buffer_pages(&BTRFS_I(btree_inode)->io_tree, buf, 0, 1); if (buf->flags & EXTENT_CSUM) return buf; end = buf->start + PAGE_CACHE_SIZE - 1; - if (test_range_bit(extent_tree, buf->start, end, EXTENT_CSUM, 1)) { + if (test_range_bit(io_tree, buf->start, end, EXTENT_CSUM, 1)) { buf->flags |= EXTENT_CSUM; return buf; } - lock_extent(extent_tree, buf->start, end, GFP_NOFS); + lock_extent(io_tree, buf->start, end, GFP_NOFS); - if (test_range_bit(extent_tree, buf->start, end, EXTENT_CSUM, 1)) { + if (test_range_bit(io_tree, buf->start, end, EXTENT_CSUM, 1)) { buf->flags |= EXTENT_CSUM; goto out_unlock; } ret = csum_tree_block(root, buf, 1); - set_extent_bits(extent_tree, buf->start, end, EXTENT_CSUM, GFP_NOFS); + set_extent_bits(io_tree, buf->start, end, EXTENT_CSUM, GFP_NOFS); buf->flags |= EXTENT_CSUM; out_unlock: - unlock_extent(extent_tree, buf->start, end, GFP_NOFS); + unlock_extent(io_tree, buf->start, end, GFP_NOFS); return buf; } @@ -385,7 +391,7 @@ int clean_tree_block(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct inode *btree_inode = root->fs_info->btree_inode; if (btrfs_header_generation(buf) == root->fs_info->running_transaction->transid) - clear_extent_buffer_dirty(&BTRFS_I(btree_inode)->extent_tree, + clear_extent_buffer_dirty(&BTRFS_I(btree_inode)->io_tree, buf); return 0; } @@ -394,7 +400,7 @@ int wait_on_tree_block_writeback(struct btrfs_root *root, struct extent_buffer *buf) { struct inode *btree_inode = root->fs_info->btree_inode; - wait_on_extent_buffer_writeback(&BTRFS_I(btree_inode)->extent_tree, + wait_on_extent_buffer_writeback(&BTRFS_I(btree_inode)->io_tree, buf); return 0; } @@ -659,20 +665,23 @@ struct btrfs_root *open_ctree(struct super_block *sb) fs_info->btree_inode->i_nlink = 1; fs_info->btree_inode->i_size = sb->s_bdev->bd_inode->i_size; fs_info->btree_inode->i_mapping->a_ops = &btree_aops; - extent_map_tree_init(&BTRFS_I(fs_info->btree_inode)->extent_tree, + extent_io_tree_init(&BTRFS_I(fs_info->btree_inode)->io_tree, fs_info->btree_inode->i_mapping, GFP_NOFS); - BTRFS_I(fs_info->btree_inode)->extent_tree.ops = &btree_extent_map_ops; + extent_map_tree_init(&BTRFS_I(fs_info->btree_inode)->extent_tree, + GFP_NOFS); + + BTRFS_I(fs_info->btree_inode)->io_tree.ops = &btree_extent_io_ops; - extent_map_tree_init(&fs_info->free_space_cache, + extent_io_tree_init(&fs_info->free_space_cache, fs_info->btree_inode->i_mapping, GFP_NOFS); - extent_map_tree_init(&fs_info->block_group_cache, + extent_io_tree_init(&fs_info->block_group_cache, fs_info->btree_inode->i_mapping, GFP_NOFS); - extent_map_tree_init(&fs_info->pinned_extents, + extent_io_tree_init(&fs_info->pinned_extents, fs_info->btree_inode->i_mapping, GFP_NOFS); - extent_map_tree_init(&fs_info->pending_del, + extent_io_tree_init(&fs_info->pending_del, fs_info->btree_inode->i_mapping, GFP_NOFS); - extent_map_tree_init(&fs_info->extent_ins, + extent_io_tree_init(&fs_info->extent_ins, fs_info->btree_inode->i_mapping, GFP_NOFS); fs_info->do_barriers = 1; fs_info->closing = 0; @@ -787,7 +796,7 @@ int write_ctree_super(struct btrfs_trans_handle *trans, struct btrfs_root if (!btrfs_test_opt(root, NOBARRIER)) blkdev_issue_flush(sb->s_bdev, NULL); - set_extent_buffer_dirty(&BTRFS_I(btree_inode)->extent_tree, super); + set_extent_buffer_dirty(&BTRFS_I(btree_inode)->io_tree, super); ret = sync_page_range_nolock(btree_inode, btree_inode->i_mapping, super->start, super->len); if (!btrfs_test_opt(root, NOBARRIER)) @@ -864,12 +873,12 @@ int close_ctree(struct btrfs_root *root) filemap_write_and_wait(fs_info->btree_inode->i_mapping); - extent_map_tree_empty_lru(&fs_info->free_space_cache); - extent_map_tree_empty_lru(&fs_info->block_group_cache); - extent_map_tree_empty_lru(&fs_info->pinned_extents); - extent_map_tree_empty_lru(&fs_info->pending_del); - extent_map_tree_empty_lru(&fs_info->extent_ins); - extent_map_tree_empty_lru(&BTRFS_I(fs_info->btree_inode)->extent_tree); + extent_io_tree_empty_lru(&fs_info->free_space_cache); + extent_io_tree_empty_lru(&fs_info->block_group_cache); + extent_io_tree_empty_lru(&fs_info->pinned_extents); + extent_io_tree_empty_lru(&fs_info->pending_del); + extent_io_tree_empty_lru(&fs_info->extent_ins); + extent_io_tree_empty_lru(&BTRFS_I(fs_info->btree_inode)->io_tree); truncate_inode_pages(fs_info->btree_inode->i_mapping, 0); @@ -892,13 +901,13 @@ int close_ctree(struct btrfs_root *root) int btrfs_buffer_uptodate(struct extent_buffer *buf) { struct inode *btree_inode = buf->first_page->mapping->host; - return extent_buffer_uptodate(&BTRFS_I(btree_inode)->extent_tree, buf); + return extent_buffer_uptodate(&BTRFS_I(btree_inode)->io_tree, buf); } int btrfs_set_buffer_uptodate(struct extent_buffer *buf) { struct inode *btree_inode = buf->first_page->mapping->host; - return set_extent_buffer_uptodate(&BTRFS_I(btree_inode)->extent_tree, + return set_extent_buffer_uptodate(&BTRFS_I(btree_inode)->io_tree, buf); } @@ -914,7 +923,7 @@ void btrfs_mark_buffer_dirty(struct extent_buffer *buf) transid, root->fs_info->generation); WARN_ON(1); } - set_extent_buffer_dirty(&BTRFS_I(btree_inode)->extent_tree, buf); + set_extent_buffer_dirty(&BTRFS_I(btree_inode)->io_tree, buf); } void btrfs_throttle(struct btrfs_root *root) @@ -941,7 +950,7 @@ void btrfs_set_buffer_defrag(struct extent_buffer *buf) { struct btrfs_root *root = BTRFS_I(buf->first_page->mapping->host)->root; struct inode *btree_inode = root->fs_info->btree_inode; - set_extent_bits(&BTRFS_I(btree_inode)->extent_tree, buf->start, + set_extent_bits(&BTRFS_I(btree_inode)->io_tree, buf->start, buf->start + buf->len - 1, EXTENT_DEFRAG, GFP_NOFS); } @@ -949,7 +958,7 @@ void btrfs_set_buffer_defrag_done(struct extent_buffer *buf) { struct btrfs_root *root = BTRFS_I(buf->first_page->mapping->host)->root; struct inode *btree_inode = root->fs_info->btree_inode; - set_extent_bits(&BTRFS_I(btree_inode)->extent_tree, buf->start, + set_extent_bits(&BTRFS_I(btree_inode)->io_tree, buf->start, buf->start + buf->len - 1, EXTENT_DEFRAG_DONE, GFP_NOFS); } @@ -958,7 +967,7 @@ int btrfs_buffer_defrag(struct extent_buffer *buf) { struct btrfs_root *root = BTRFS_I(buf->first_page->mapping->host)->root; struct inode *btree_inode = root->fs_info->btree_inode; - return test_range_bit(&BTRFS_I(btree_inode)->extent_tree, + return test_range_bit(&BTRFS_I(btree_inode)->io_tree, buf->start, buf->start + buf->len - 1, EXTENT_DEFRAG, 0); } @@ -966,7 +975,7 @@ int btrfs_buffer_defrag_done(struct extent_buffer *buf) { struct btrfs_root *root = BTRFS_I(buf->first_page->mapping->host)->root; struct inode *btree_inode = root->fs_info->btree_inode; - return test_range_bit(&BTRFS_I(btree_inode)->extent_tree, + return test_range_bit(&BTRFS_I(btree_inode)->io_tree, buf->start, buf->start + buf->len - 1, EXTENT_DEFRAG_DONE, 0); } @@ -975,7 +984,7 @@ int btrfs_clear_buffer_defrag_done(struct extent_buffer *buf) { struct btrfs_root *root = BTRFS_I(buf->first_page->mapping->host)->root; struct inode *btree_inode = root->fs_info->btree_inode; - return clear_extent_bits(&BTRFS_I(btree_inode)->extent_tree, + return clear_extent_bits(&BTRFS_I(btree_inode)->io_tree, buf->start, buf->start + buf->len - 1, EXTENT_DEFRAG_DONE, GFP_NOFS); } @@ -984,7 +993,7 @@ int btrfs_clear_buffer_defrag(struct extent_buffer *buf) { struct btrfs_root *root = BTRFS_I(buf->first_page->mapping->host)->root; struct inode *btree_inode = root->fs_info->btree_inode; - return clear_extent_bits(&BTRFS_I(btree_inode)->extent_tree, + return clear_extent_bits(&BTRFS_I(btree_inode)->io_tree, buf->start, buf->start + buf->len - 1, EXTENT_DEFRAG, GFP_NOFS); } @@ -993,10 +1002,10 @@ int btrfs_read_buffer(struct extent_buffer *buf) { struct btrfs_root *root = BTRFS_I(buf->first_page->mapping->host)->root; struct inode *btree_inode = root->fs_info->btree_inode; - return read_extent_buffer_pages(&BTRFS_I(btree_inode)->extent_tree, + return read_extent_buffer_pages(&BTRFS_I(btree_inode)->io_tree, buf, 0, 1); } -static struct extent_map_ops btree_extent_map_ops = { +static struct extent_io_ops btree_extent_io_ops = { .writepage_io_hook = btree_writepage_io_hook, }; diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index b69a46691a96..1cf125ab7822 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -63,7 +63,7 @@ static int cache_block_group(struct btrfs_root *root, int ret; struct btrfs_key key; struct extent_buffer *leaf; - struct extent_map_tree *free_space_cache; + struct extent_io_tree *free_space_cache; int slot; u64 last = 0; u64 hole_size; @@ -158,7 +158,7 @@ struct btrfs_block_group_cache *btrfs_lookup_block_group(struct btrfs_fs_info *info, u64 bytenr) { - struct extent_map_tree *block_group_cache; + struct extent_io_tree *block_group_cache; struct btrfs_block_group_cache *block_group = NULL; u64 ptr; u64 start; @@ -281,7 +281,7 @@ struct btrfs_block_group_cache *btrfs_find_block_group(struct btrfs_root *root, int data, int owner) { struct btrfs_block_group_cache *cache; - struct extent_map_tree *block_group_cache; + struct extent_io_tree *block_group_cache; struct btrfs_block_group_cache *found_group = NULL; struct btrfs_fs_info *info = root->fs_info; u64 used; @@ -951,7 +951,7 @@ fail: int btrfs_write_dirty_block_groups(struct btrfs_trans_handle *trans, struct btrfs_root *root) { - struct extent_map_tree *block_group_cache; + struct extent_io_tree *block_group_cache; struct btrfs_block_group_cache *cache; int ret; int err = 0; @@ -1107,12 +1107,12 @@ static int update_pinned_extents(struct btrfs_root *root, return 0; } -int btrfs_copy_pinned(struct btrfs_root *root, struct extent_map_tree *copy) +int btrfs_copy_pinned(struct btrfs_root *root, struct extent_io_tree *copy) { u64 last = 0; u64 start; u64 end; - struct extent_map_tree *pinned_extents = &root->fs_info->pinned_extents; + struct extent_io_tree *pinned_extents = &root->fs_info->pinned_extents; int ret; while(1) { @@ -1128,12 +1128,12 @@ int btrfs_copy_pinned(struct btrfs_root *root, struct extent_map_tree *copy) int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans, struct btrfs_root *root, - struct extent_map_tree *unpin) + struct extent_io_tree *unpin) { u64 start; u64 end; int ret; - struct extent_map_tree *free_space_cache; + struct extent_io_tree *free_space_cache; free_space_cache = &root->fs_info->free_space_cache; while(1) { @@ -1329,8 +1329,8 @@ static int del_pending_extents(struct btrfs_trans_handle *trans, struct int err = 0; u64 start; u64 end; - struct extent_map_tree *pending_del; - struct extent_map_tree *pinned_extents; + struct extent_io_tree *pending_del; + struct extent_io_tree *pinned_extents; pending_del = &extent_root->fs_info->pending_del; pinned_extents = &extent_root->fs_info->pinned_extents; @@ -1802,7 +1802,7 @@ struct extent_buffer *__btrfs_alloc_free_block(struct btrfs_trans_handle *trans, set_extent_dirty(&trans->transaction->dirty_pages, buf->start, buf->start + buf->len - 1, GFP_NOFS); - set_extent_bits(&BTRFS_I(root->fs_info->btree_inode)->extent_tree, + set_extent_bits(&BTRFS_I(root->fs_info->btree_inode)->io_tree, buf->start, buf->start + buf->len - 1, EXTENT_CSUM, GFP_NOFS); buf->flags |= EXTENT_CSUM; @@ -2166,7 +2166,7 @@ static int noinline relocate_inode_pages(struct inode *inode, u64 start, unsigned long i; struct page *page; struct btrfs_root *root = BTRFS_I(inode)->root; - struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree; + struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree; struct file_ra_state *ra; ra = kzalloc(sizeof(*ra), GFP_NOFS); @@ -2195,15 +2195,14 @@ static int noinline relocate_inode_pages(struct inode *inode, u64 start, page_start = (u64)page->index << PAGE_CACHE_SHIFT; page_end = page_start + PAGE_CACHE_SIZE - 1; - lock_extent(em_tree, page_start, page_end, GFP_NOFS); + lock_extent(io_tree, page_start, page_end, GFP_NOFS); delalloc_start = page_start; - existing_delalloc = - count_range_bits(&BTRFS_I(inode)->extent_tree, - &delalloc_start, page_end, - PAGE_CACHE_SIZE, EXTENT_DELALLOC); + existing_delalloc = count_range_bits(io_tree, + &delalloc_start, page_end, + PAGE_CACHE_SIZE, EXTENT_DELALLOC); - set_extent_delalloc(em_tree, page_start, + set_extent_delalloc(io_tree, page_start, page_end, GFP_NOFS); spin_lock(&root->fs_info->delalloc_lock); @@ -2211,7 +2210,7 @@ static int noinline relocate_inode_pages(struct inode *inode, u64 start, existing_delalloc; spin_unlock(&root->fs_info->delalloc_lock); - unlock_extent(em_tree, page_start, page_end, GFP_NOFS); + unlock_extent(io_tree, page_start, page_end, GFP_NOFS); set_page_dirty(page); unlock_page(page); page_cache_release(page); @@ -2379,7 +2378,7 @@ int btrfs_shrink_extent_tree(struct btrfs_root *root, u64 new_size) u64 cur_byte; u64 total_found; struct btrfs_fs_info *info = root->fs_info; - struct extent_map_tree *block_group_cache; + struct extent_io_tree *block_group_cache; struct btrfs_key key; struct btrfs_key found_key; struct extent_buffer *leaf; @@ -2561,7 +2560,7 @@ int btrfs_grow_extent_tree(struct btrfs_trans_handle *trans, struct btrfs_block_group_cache *cache; struct btrfs_block_group_item *item; struct btrfs_fs_info *info = root->fs_info; - struct extent_map_tree *block_group_cache; + struct extent_io_tree *block_group_cache; struct btrfs_key key; struct extent_buffer *leaf; int ret; @@ -2645,7 +2644,7 @@ int btrfs_read_block_groups(struct btrfs_root *root) int bit; struct btrfs_block_group_cache *cache; struct btrfs_fs_info *info = root->fs_info; - struct extent_map_tree *block_group_cache; + struct extent_io_tree *block_group_cache; struct btrfs_key key; struct btrfs_key found_key; struct extent_buffer *leaf; diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c new file mode 100644 index 000000000000..15cc158a0498 --- /dev/null +++ b/fs/btrfs/extent_io.c @@ -0,0 +1,3089 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "extent_io.h" +#include "extent_map.h" + +/* temporary define until extent_map moves out of btrfs */ +struct kmem_cache *btrfs_cache_create(const char *name, size_t size, + unsigned long extra_flags, + void (*ctor)(void *, struct kmem_cache *, + unsigned long)); + +static struct kmem_cache *extent_state_cache; +static struct kmem_cache *extent_buffer_cache; + +static LIST_HEAD(buffers); +static LIST_HEAD(states); + +static spinlock_t state_lock = SPIN_LOCK_UNLOCKED; +#define BUFFER_LRU_MAX 64 + +struct tree_entry { + u64 start; + u64 end; + int in_tree; + struct rb_node rb_node; +}; + +struct extent_page_data { + struct bio *bio; + struct extent_io_tree *tree; + get_extent_t *get_extent; +}; + +int __init extent_io_init(void) +{ + extent_state_cache = btrfs_cache_create("extent_state", + sizeof(struct extent_state), 0, + NULL); + if (!extent_state_cache) + return -ENOMEM; + + extent_buffer_cache = btrfs_cache_create("extent_buffers", + sizeof(struct extent_buffer), 0, + NULL); + if (!extent_buffer_cache) + goto free_state_cache; + return 0; + +free_state_cache: + kmem_cache_destroy(extent_state_cache); + return -ENOMEM; +} + +void extent_io_exit(void) +{ + struct extent_state *state; + + while (!list_empty(&states)) { + state = list_entry(states.next, struct extent_state, list); + printk("state leak: start %Lu end %Lu state %lu in tree %d refs %d\n", state->start, state->end, state->state, state->in_tree, atomic_read(&state->refs)); + list_del(&state->list); + kmem_cache_free(extent_state_cache, state); + + } + + if (extent_state_cache) + kmem_cache_destroy(extent_state_cache); + if (extent_buffer_cache) + kmem_cache_destroy(extent_buffer_cache); +} + +void extent_io_tree_init(struct extent_io_tree *tree, + struct address_space *mapping, gfp_t mask) +{ + tree->state.rb_node = NULL; + tree->ops = NULL; + tree->dirty_bytes = 0; + rwlock_init(&tree->lock); + spin_lock_init(&tree->lru_lock); + tree->mapping = mapping; + INIT_LIST_HEAD(&tree->buffer_lru); + tree->lru_size = 0; +} +EXPORT_SYMBOL(extent_io_tree_init); + +void extent_io_tree_empty_lru(struct extent_io_tree *tree) +{ + struct extent_buffer *eb; + while(!list_empty(&tree->buffer_lru)) { + eb = list_entry(tree->buffer_lru.next, struct extent_buffer, + lru); + list_del_init(&eb->lru); + free_extent_buffer(eb); + } +} +EXPORT_SYMBOL(extent_io_tree_empty_lru); + +struct extent_state *alloc_extent_state(gfp_t mask) +{ + struct extent_state *state; + unsigned long flags; + + state = kmem_cache_alloc(extent_state_cache, mask); + if (!state || IS_ERR(state)) + return state; + state->state = 0; + state->in_tree = 0; + state->private = 0; + + spin_lock_irqsave(&state_lock, flags); + list_add(&state->list, &states); + spin_unlock_irqrestore(&state_lock, flags); + + atomic_set(&state->refs, 1); + init_waitqueue_head(&state->wq); + return state; +} +EXPORT_SYMBOL(alloc_extent_state); + +void free_extent_state(struct extent_state *state) +{ + unsigned long flags; + if (!state) + return; + if (atomic_dec_and_test(&state->refs)) { + WARN_ON(state->in_tree); + spin_lock_irqsave(&state_lock, flags); + list_del(&state->list); + spin_unlock_irqrestore(&state_lock, flags); + kmem_cache_free(extent_state_cache, state); + } +} +EXPORT_SYMBOL(free_extent_state); + +static struct rb_node *tree_insert(struct rb_root *root, u64 offset, + struct rb_node *node) +{ + struct rb_node ** p = &root->rb_node; + struct rb_node * parent = NULL; + struct tree_entry *entry; + + while(*p) { + parent = *p; + entry = rb_entry(parent, struct tree_entry, rb_node); + + if (offset < entry->start) + p = &(*p)->rb_left; + else if (offset > entry->end) + p = &(*p)->rb_right; + else + return parent; + } + + entry = rb_entry(node, struct tree_entry, rb_node); + entry->in_tree = 1; + rb_link_node(node, parent, p); + rb_insert_color(node, root); + return NULL; +} + +static struct rb_node *__tree_search(struct rb_root *root, u64 offset, + struct rb_node **prev_ret, + struct rb_node **next_ret) +{ + struct rb_node * n = root->rb_node; + struct rb_node *prev = NULL; + struct rb_node *orig_prev = NULL; + struct tree_entry *entry; + struct tree_entry *prev_entry = NULL; + + while(n) { + entry = rb_entry(n, struct tree_entry, rb_node); + prev = n; + prev_entry = entry; + + if (offset < entry->start) + n = n->rb_left; + else if (offset > entry->end) + n = n->rb_right; + else + return n; + } + + if (prev_ret) { + orig_prev = prev; + while(prev && offset > prev_entry->end) { + prev = rb_next(prev); + prev_entry = rb_entry(prev, struct tree_entry, rb_node); + } + *prev_ret = prev; + prev = orig_prev; + } + + if (next_ret) { + prev_entry = rb_entry(prev, struct tree_entry, rb_node); + while(prev && offset < prev_entry->start) { + prev = rb_prev(prev); + prev_entry = rb_entry(prev, struct tree_entry, rb_node); + } + *next_ret = prev; + } + return NULL; +} + +static inline struct rb_node *tree_search(struct rb_root *root, u64 offset) +{ + struct rb_node *prev; + struct rb_node *ret; + ret = __tree_search(root, offset, &prev, NULL); + if (!ret) + return prev; + return ret; +} + +/* + * utility function to look for merge candidates inside a given range. + * Any extents with matching state are merged together into a single + * extent in the tree. Extents with EXTENT_IO in their state field + * are not merged because the end_io handlers need to be able to do + * operations on them without sleeping (or doing allocations/splits). + * + * This should be called with the tree lock held. + */ +static int merge_state(struct extent_io_tree *tree, + struct extent_state *state) +{ + struct extent_state *other; + struct rb_node *other_node; + + if (state->state & EXTENT_IOBITS) + return 0; + + other_node = rb_prev(&state->rb_node); + if (other_node) { + other = rb_entry(other_node, struct extent_state, rb_node); + if (other->end == state->start - 1 && + other->state == state->state) { + state->start = other->start; + other->in_tree = 0; + rb_erase(&other->rb_node, &tree->state); + free_extent_state(other); + } + } + other_node = rb_next(&state->rb_node); + if (other_node) { + other = rb_entry(other_node, struct extent_state, rb_node); + if (other->start == state->end + 1 && + other->state == state->state) { + other->start = state->start; + state->in_tree = 0; + rb_erase(&state->rb_node, &tree->state); + free_extent_state(state); + } + } + return 0; +} + +/* + * insert an extent_state struct into the tree. 'bits' are set on the + * struct before it is inserted. + * + * This may return -EEXIST if the extent is already there, in which case the + * state struct is freed. + * + * The tree lock is not taken internally. This is a utility function and + * probably isn't what you want to call (see set/clear_extent_bit). + */ +static int insert_state(struct extent_io_tree *tree, + struct extent_state *state, u64 start, u64 end, + int bits) +{ + struct rb_node *node; + + if (end < start) { + printk("end < start %Lu %Lu\n", end, start); + WARN_ON(1); + } + if (bits & EXTENT_DIRTY) + tree->dirty_bytes += end - start + 1; + state->state |= bits; + state->start = start; + state->end = end; + node = tree_insert(&tree->state, end, &state->rb_node); + if (node) { + struct extent_state *found; + found = rb_entry(node, struct extent_state, rb_node); + printk("found node %Lu %Lu on insert of %Lu %Lu\n", found->start, found->end, start, end); + free_extent_state(state); + return -EEXIST; + } + merge_state(tree, state); + return 0; +} + +/* + * split a given extent state struct in two, inserting the preallocated + * struct 'prealloc' as the newly created second half. 'split' indicates an + * offset inside 'orig' where it should be split. + * + * Before calling, + * the tree has 'orig' at [orig->start, orig->end]. After calling, there + * are two extent state structs in the tree: + * prealloc: [orig->start, split - 1] + * orig: [ split, orig->end ] + * + * The tree locks are not taken by this function. They need to be held + * by the caller. + */ +static int split_state(struct extent_io_tree *tree, struct extent_state *orig, + struct extent_state *prealloc, u64 split) +{ + struct rb_node *node; + prealloc->start = orig->start; + prealloc->end = split - 1; + prealloc->state = orig->state; + orig->start = split; + + node = tree_insert(&tree->state, prealloc->end, &prealloc->rb_node); + if (node) { + struct extent_state *found; + found = rb_entry(node, struct extent_state, rb_node); + printk("found node %Lu %Lu on insert of %Lu %Lu\n", found->start, found->end, prealloc->start, prealloc->end); + free_extent_state(prealloc); + return -EEXIST; + } + return 0; +} + +/* + * utility function to clear some bits in an extent state struct. + * it will optionally wake up any one waiting on this state (wake == 1), or + * forcibly remove the state from the tree (delete == 1). + * + * If no bits are set on the state struct after clearing things, the + * struct is freed and removed from the tree + */ +static int clear_state_bit(struct extent_io_tree *tree, + struct extent_state *state, int bits, int wake, + int delete) +{ + int ret = state->state & bits; + + if ((bits & EXTENT_DIRTY) && (state->state & EXTENT_DIRTY)) { + u64 range = state->end - state->start + 1; + WARN_ON(range > tree->dirty_bytes); + tree->dirty_bytes -= range; + } + state->state &= ~bits; + if (wake) + wake_up(&state->wq); + if (delete || state->state == 0) { + if (state->in_tree) { + rb_erase(&state->rb_node, &tree->state); + state->in_tree = 0; + free_extent_state(state); + } else { + WARN_ON(1); + } + } else { + merge_state(tree, state); + } + return ret; +} + +/* + * clear some bits on a range in the tree. This may require splitting + * or inserting elements in the tree, so the gfp mask is used to + * indicate which allocations or sleeping are allowed. + * + * pass 'wake' == 1 to kick any sleepers, and 'delete' == 1 to remove + * the given range from the tree regardless of state (ie for truncate). + * + * the range [start, end] is inclusive. + * + * This takes the tree lock, and returns < 0 on error, > 0 if any of the + * bits were already set, or zero if none of the bits were already set. + */ +int clear_extent_bit(struct extent_io_tree *tree, u64 start, u64 end, + int bits, int wake, int delete, gfp_t mask) +{ + struct extent_state *state; + struct extent_state *prealloc = NULL; + struct rb_node *node; + unsigned long flags; + int err; + int set = 0; + +again: + if (!prealloc && (mask & __GFP_WAIT)) { + prealloc = alloc_extent_state(mask); + if (!prealloc) + return -ENOMEM; + } + + write_lock_irqsave(&tree->lock, flags); + /* + * this search will find the extents that end after + * our range starts + */ + node = tree_search(&tree->state, start); + if (!node) + goto out; + state = rb_entry(node, struct extent_state, rb_node); + if (state->start > end) + goto out; + WARN_ON(state->end < start); + + /* + * | ---- desired range ---- | + * | state | or + * | ------------- state -------------- | + * + * We need to split the extent we found, and may flip + * bits on second half. + * + * If the extent we found extends past our range, we + * just split and search again. It'll get split again + * the next time though. + * + * If the extent we found is inside our range, we clear + * the desired bit on it. + */ + + if (state->start < start) { + err = split_state(tree, state, prealloc, start); + BUG_ON(err == -EEXIST); + prealloc = NULL; + if (err) + goto out; + if (state->end <= end) { + start = state->end + 1; + set |= clear_state_bit(tree, state, bits, + wake, delete); + } else { + start = state->start; + } + goto search_again; + } + /* + * | ---- desired range ---- | + * | state | + * We need to split the extent, and clear the bit + * on the first half + */ + if (state->start <= end && state->end > end) { + err = split_state(tree, state, prealloc, end + 1); + BUG_ON(err == -EEXIST); + + if (wake) + wake_up(&state->wq); + set |= clear_state_bit(tree, prealloc, bits, + wake, delete); + prealloc = NULL; + goto out; + } + + start = state->end + 1; + set |= clear_state_bit(tree, state, bits, wake, delete); + goto search_again; + +out: + write_unlock_irqrestore(&tree->lock, flags); + if (prealloc) + free_extent_state(prealloc); + + return set; + +search_again: + if (start > end) + goto out; + write_unlock_irqrestore(&tree->lock, flags); + if (mask & __GFP_WAIT) + cond_resched(); + goto again; +} +EXPORT_SYMBOL(clear_extent_bit); + +static int wait_on_state(struct extent_io_tree *tree, + struct extent_state *state) +{ + DEFINE_WAIT(wait); + prepare_to_wait(&state->wq, &wait, TASK_UNINTERRUPTIBLE); + read_unlock_irq(&tree->lock); + schedule(); + read_lock_irq(&tree->lock); + finish_wait(&state->wq, &wait); + return 0; +} + +/* + * waits for one or more bits to clear on a range in the state tree. + * The range [start, end] is inclusive. + * The tree lock is taken by this function + */ +int wait_extent_bit(struct extent_io_tree *tree, u64 start, u64 end, int bits) +{ + struct extent_state *state; + struct rb_node *node; + + read_lock_irq(&tree->lock); +again: + while (1) { + /* + * this search will find all the extents that end after + * our range starts + */ + node = tree_search(&tree->state, start); + if (!node) + break; + + state = rb_entry(node, struct extent_state, rb_node); + + if (state->start > end) + goto out; + + if (state->state & bits) { + start = state->start; + atomic_inc(&state->refs); + wait_on_state(tree, state); + free_extent_state(state); + goto again; + } + start = state->end + 1; + + if (start > end) + break; + + if (need_resched()) { + read_unlock_irq(&tree->lock); + cond_resched(); + read_lock_irq(&tree->lock); + } + } +out: + read_unlock_irq(&tree->lock); + return 0; +} +EXPORT_SYMBOL(wait_extent_bit); + +static void set_state_bits(struct extent_io_tree *tree, + struct extent_state *state, + int bits) +{ + if ((bits & EXTENT_DIRTY) && !(state->state & EXTENT_DIRTY)) { + u64 range = state->end - state->start + 1; + tree->dirty_bytes += range; + } + state->state |= bits; +} + +/* + * set some bits on a range in the tree. This may require allocations + * or sleeping, so the gfp mask is used to indicate what is allowed. + * + * If 'exclusive' == 1, this will fail with -EEXIST if some part of the + * range already has the desired bits set. The start of the existing + * range is returned in failed_start in this case. + * + * [start, end] is inclusive + * This takes the tree lock. + */ +int set_extent_bit(struct extent_io_tree *tree, u64 start, u64 end, int bits, + int exclusive, u64 *failed_start, gfp_t mask) +{ + struct extent_state *state; + struct extent_state *prealloc = NULL; + struct rb_node *node; + unsigned long flags; + int err = 0; + int set; + u64 last_start; + u64 last_end; +again: + if (!prealloc && (mask & __GFP_WAIT)) { + prealloc = alloc_extent_state(mask); + if (!prealloc) + return -ENOMEM; + } + + write_lock_irqsave(&tree->lock, flags); + /* + * this search will find all the extents that end after + * our range starts. + */ + node = tree_search(&tree->state, start); + if (!node) { + err = insert_state(tree, prealloc, start, end, bits); + prealloc = NULL; + BUG_ON(err == -EEXIST); + goto out; + } + + state = rb_entry(node, struct extent_state, rb_node); + last_start = state->start; + last_end = state->end; + + /* + * | ---- desired range ---- | + * | state | + * + * Just lock what we found and keep going + */ + if (state->start == start && state->end <= end) { + set = state->state & bits; + if (set && exclusive) { + *failed_start = state->start; + err = -EEXIST; + goto out; + } + set_state_bits(tree, state, bits); + start = state->end + 1; + merge_state(tree, state); + goto search_again; + } + + /* + * | ---- desired range ---- | + * | state | + * or + * | ------------- state -------------- | + * + * We need to split the extent we found, and may flip bits on + * second half. + * + * If the extent we found extends past our + * range, we just split and search again. It'll get split + * again the next time though. + * + * If the extent we found is inside our range, we set the + * desired bit on it. + */ + if (state->start < start) { + set = state->state & bits; + if (exclusive && set) { + *failed_start = start; + err = -EEXIST; + goto out; + } + err = split_state(tree, state, prealloc, start); + BUG_ON(err == -EEXIST); + prealloc = NULL; + if (err) + goto out; + if (state->end <= end) { + set_state_bits(tree, state, bits); + start = state->end + 1; + merge_state(tree, state); + } else { + start = state->start; + } + goto search_again; + } + /* + * | ---- desired range ---- | + * | state | or | state | + * + * There's a hole, we need to insert something in it and + * ignore the extent we found. + */ + if (state->start > start) { + u64 this_end; + if (end < last_start) + this_end = end; + else + this_end = last_start -1; + err = insert_state(tree, prealloc, start, this_end, + bits); + prealloc = NULL; + BUG_ON(err == -EEXIST); + if (err) + goto out; + start = this_end + 1; + goto search_again; + } + /* + * | ---- desired range ---- | + * | state | + * We need to split the extent, and set the bit + * on the first half + */ + if (state->start <= end && state->end > end) { + set = state->state & bits; + if (exclusive && set) { + *failed_start = start; + err = -EEXIST; + goto out; + } + err = split_state(tree, state, prealloc, end + 1); + BUG_ON(err == -EEXIST); + + set_state_bits(tree, prealloc, bits); + merge_state(tree, prealloc); + prealloc = NULL; + goto out; + } + + goto search_again; + +out: + write_unlock_irqrestore(&tree->lock, flags); + if (prealloc) + free_extent_state(prealloc); + + return err; + +search_again: + if (start > end) + goto out; + write_unlock_irqrestore(&tree->lock, flags); + if (mask & __GFP_WAIT) + cond_resched(); + goto again; +} +EXPORT_SYMBOL(set_extent_bit); + +/* wrappers around set/clear extent bit */ +int set_extent_dirty(struct extent_io_tree *tree, u64 start, u64 end, + gfp_t mask) +{ + return set_extent_bit(tree, start, end, EXTENT_DIRTY, 0, NULL, + mask); +} +EXPORT_SYMBOL(set_extent_dirty); + +int set_extent_bits(struct extent_io_tree *tree, u64 start, u64 end, + int bits, gfp_t mask) +{ + return set_extent_bit(tree, start, end, bits, 0, NULL, + mask); +} +EXPORT_SYMBOL(set_extent_bits); + +int clear_extent_bits(struct extent_io_tree *tree, u64 start, u64 end, + int bits, gfp_t mask) +{ + return clear_extent_bit(tree, start, end, bits, 0, 0, mask); +} +EXPORT_SYMBOL(clear_extent_bits); + +int set_extent_delalloc(struct extent_io_tree *tree, u64 start, u64 end, + gfp_t mask) +{ + return set_extent_bit(tree, start, end, + EXTENT_DELALLOC | EXTENT_DIRTY, 0, NULL, + mask); +} +EXPORT_SYMBOL(set_extent_delalloc); + +int clear_extent_dirty(struct extent_io_tree *tree, u64 start, u64 end, + gfp_t mask) +{ + return clear_extent_bit(tree, start, end, + EXTENT_DIRTY | EXTENT_DELALLOC, 0, 0, mask); +} +EXPORT_SYMBOL(clear_extent_dirty); + +int set_extent_new(struct extent_io_tree *tree, u64 start, u64 end, + gfp_t mask) +{ + return set_extent_bit(tree, start, end, EXTENT_NEW, 0, NULL, + mask); +} +EXPORT_SYMBOL(set_extent_new); + +int clear_extent_new(struct extent_io_tree *tree, u64 start, u64 end, + gfp_t mask) +{ + return clear_extent_bit(tree, start, end, EXTENT_NEW, 0, 0, mask); +} +EXPORT_SYMBOL(clear_extent_new); + +int set_extent_uptodate(struct extent_io_tree *tree, u64 start, u64 end, + gfp_t mask) +{ + return set_extent_bit(tree, start, end, EXTENT_UPTODATE, 0, NULL, + mask); +} +EXPORT_SYMBOL(set_extent_uptodate); + +int clear_extent_uptodate(struct extent_io_tree *tree, u64 start, u64 end, + gfp_t mask) +{ + return clear_extent_bit(tree, start, end, EXTENT_UPTODATE, 0, 0, mask); +} +EXPORT_SYMBOL(clear_extent_uptodate); + +int set_extent_writeback(struct extent_io_tree *tree, u64 start, u64 end, + gfp_t mask) +{ + return set_extent_bit(tree, start, end, EXTENT_WRITEBACK, + 0, NULL, mask); +} +EXPORT_SYMBOL(set_extent_writeback); + +int clear_extent_writeback(struct extent_io_tree *tree, u64 start, u64 end, + gfp_t mask) +{ + return clear_extent_bit(tree, start, end, EXTENT_WRITEBACK, 1, 0, mask); +} +EXPORT_SYMBOL(clear_extent_writeback); + +int wait_on_extent_writeback(struct extent_io_tree *tree, u64 start, u64 end) +{ + return wait_extent_bit(tree, start, end, EXTENT_WRITEBACK); +} +EXPORT_SYMBOL(wait_on_extent_writeback); + +/* + * locks a range in ascending order, waiting for any locked regions + * it hits on the way. [start,end] are inclusive, and this will sleep. + */ +int lock_extent(struct extent_io_tree *tree, u64 start, u64 end, gfp_t mask) +{ + int err; + u64 failed_start; + while (1) { + err = set_extent_bit(tree, start, end, EXTENT_LOCKED, 1, + &failed_start, mask); + if (err == -EEXIST && (mask & __GFP_WAIT)) { + wait_extent_bit(tree, failed_start, end, EXTENT_LOCKED); + start = failed_start; + } else { + break; + } + WARN_ON(start > end); + } + return err; +} +EXPORT_SYMBOL(lock_extent); + +int unlock_extent(struct extent_io_tree *tree, u64 start, u64 end, + gfp_t mask) +{ + return clear_extent_bit(tree, start, end, EXTENT_LOCKED, 1, 0, mask); +} +EXPORT_SYMBOL(unlock_extent); + +/* + * helper function to set pages and extents in the tree dirty + */ +int set_range_dirty(struct extent_io_tree *tree, u64 start, u64 end) +{ + unsigned long index = start >> PAGE_CACHE_SHIFT; + unsigned long end_index = end >> PAGE_CACHE_SHIFT; + struct page *page; + + while (index <= end_index) { + page = find_get_page(tree->mapping, index); + BUG_ON(!page); + __set_page_dirty_nobuffers(page); + page_cache_release(page); + index++; + } + set_extent_dirty(tree, start, end, GFP_NOFS); + return 0; +} +EXPORT_SYMBOL(set_range_dirty); + +/* + * helper function to set both pages and extents in the tree writeback + */ +int set_range_writeback(struct extent_io_tree *tree, u64 start, u64 end) +{ + unsigned long index = start >> PAGE_CACHE_SHIFT; + unsigned long end_index = end >> PAGE_CACHE_SHIFT; + struct page *page; + + while (index <= end_index) { + page = find_get_page(tree->mapping, index); + BUG_ON(!page); + set_page_writeback(page); + page_cache_release(page); + index++; + } + set_extent_writeback(tree, start, end, GFP_NOFS); + return 0; +} +EXPORT_SYMBOL(set_range_writeback); + +int find_first_extent_bit(struct extent_io_tree *tree, u64 start, + u64 *start_ret, u64 *end_ret, int bits) +{ + struct rb_node *node; + struct extent_state *state; + int ret = 1; + + read_lock_irq(&tree->lock); + /* + * this search will find all the extents that end after + * our range starts. + */ + node = tree_search(&tree->state, start); + if (!node || IS_ERR(node)) { + goto out; + } + + while(1) { + state = rb_entry(node, struct extent_state, rb_node); + if (state->end >= start && (state->state & bits)) { + *start_ret = state->start; + *end_ret = state->end; + ret = 0; + break; + } + node = rb_next(node); + if (!node) + break; + } +out: + read_unlock_irq(&tree->lock); + return ret; +} +EXPORT_SYMBOL(find_first_extent_bit); + +u64 find_lock_delalloc_range(struct extent_io_tree *tree, + u64 *start, u64 *end, u64 max_bytes) +{ + struct rb_node *node; + struct extent_state *state; + u64 cur_start = *start; + u64 found = 0; + u64 total_bytes = 0; + + write_lock_irq(&tree->lock); + /* + * this search will find all the extents that end after + * our range starts. + */ +search_again: + node = tree_search(&tree->state, cur_start); + if (!node || IS_ERR(node)) { + *end = (u64)-1; + goto out; + } + + while(1) { + state = rb_entry(node, struct extent_state, rb_node); + if (found && state->start != cur_start) { + goto out; + } + if (!(state->state & EXTENT_DELALLOC)) { + if (!found) + *end = state->end; + goto out; + } + if (!found) { + struct extent_state *prev_state; + struct rb_node *prev_node = node; + while(1) { + prev_node = rb_prev(prev_node); + if (!prev_node) + break; + prev_state = rb_entry(prev_node, + struct extent_state, + rb_node); + if (!(prev_state->state & EXTENT_DELALLOC)) + break; + state = prev_state; + node = prev_node; + } + } + if (state->state & EXTENT_LOCKED) { + DEFINE_WAIT(wait); + atomic_inc(&state->refs); + prepare_to_wait(&state->wq, &wait, + TASK_UNINTERRUPTIBLE); + write_unlock_irq(&tree->lock); + schedule(); + write_lock_irq(&tree->lock); + finish_wait(&state->wq, &wait); + free_extent_state(state); + goto search_again; + } + state->state |= EXTENT_LOCKED; + if (!found) + *start = state->start; + found++; + *end = state->end; + cur_start = state->end + 1; + node = rb_next(node); + if (!node) + break; + total_bytes += state->end - state->start + 1; + if (total_bytes >= max_bytes) + break; + } +out: + write_unlock_irq(&tree->lock); + return found; +} + +u64 count_range_bits(struct extent_io_tree *tree, + u64 *start, u64 search_end, u64 max_bytes, + unsigned long bits) +{ + struct rb_node *node; + struct extent_state *state; + u64 cur_start = *start; + u64 total_bytes = 0; + int found = 0; + + if (search_end <= cur_start) { + printk("search_end %Lu start %Lu\n", search_end, cur_start); + WARN_ON(1); + return 0; + } + + write_lock_irq(&tree->lock); + if (cur_start == 0 && bits == EXTENT_DIRTY) { + total_bytes = tree->dirty_bytes; + goto out; + } + /* + * this search will find all the extents that end after + * our range starts. + */ + node = tree_search(&tree->state, cur_start); + if (!node || IS_ERR(node)) { + goto out; + } + + while(1) { + state = rb_entry(node, struct extent_state, rb_node); + if (state->start > search_end) + break; + if (state->end >= cur_start && (state->state & bits)) { + total_bytes += min(search_end, state->end) + 1 - + max(cur_start, state->start); + if (total_bytes >= max_bytes) + break; + if (!found) { + *start = state->start; + found = 1; + } + } + node = rb_next(node); + if (!node) + break; + } +out: + write_unlock_irq(&tree->lock); + return total_bytes; +} +/* + * helper function to lock both pages and extents in the tree. + * pages must be locked first. + */ +int lock_range(struct extent_io_tree *tree, u64 start, u64 end) +{ + unsigned long index = start >> PAGE_CACHE_SHIFT; + unsigned long end_index = end >> PAGE_CACHE_SHIFT; + struct page *page; + int err; + + while (index <= end_index) { + page = grab_cache_page(tree->mapping, index); + if (!page) { + err = -ENOMEM; + goto failed; + } + if (IS_ERR(page)) { + err = PTR_ERR(page); + goto failed; + } + index++; + } + lock_extent(tree, start, end, GFP_NOFS); + return 0; + +failed: + /* + * we failed above in getting the page at 'index', so we undo here + * up to but not including the page at 'index' + */ + end_index = index; + index = start >> PAGE_CACHE_SHIFT; + while (index < end_index) { + page = find_get_page(tree->mapping, index); + unlock_page(page); + page_cache_release(page); + index++; + } + return err; +} +EXPORT_SYMBOL(lock_range); + +/* + * helper function to unlock both pages and extents in the tree. + */ +int unlock_range(struct extent_io_tree *tree, u64 start, u64 end) +{ + unsigned long index = start >> PAGE_CACHE_SHIFT; + unsigned long end_index = end >> PAGE_CACHE_SHIFT; + struct page *page; + + while (index <= end_index) { + page = find_get_page(tree->mapping, index); + unlock_page(page); + page_cache_release(page); + index++; + } + unlock_extent(tree, start, end, GFP_NOFS); + return 0; +} +EXPORT_SYMBOL(unlock_range); + +int set_state_private(struct extent_io_tree *tree, u64 start, u64 private) +{ + struct rb_node *node; + struct extent_state *state; + int ret = 0; + + write_lock_irq(&tree->lock); + /* + * this search will find all the extents that end after + * our range starts. + */ + node = tree_search(&tree->state, start); + if (!node || IS_ERR(node)) { + ret = -ENOENT; + goto out; + } + state = rb_entry(node, struct extent_state, rb_node); + if (state->start != start) { + ret = -ENOENT; + goto out; + } + state->private = private; +out: + write_unlock_irq(&tree->lock); + return ret; +} + +int get_state_private(struct extent_io_tree *tree, u64 start, u64 *private) +{ + struct rb_node *node; + struct extent_state *state; + int ret = 0; + + read_lock_irq(&tree->lock); + /* + * this search will find all the extents that end after + * our range starts. + */ + node = tree_search(&tree->state, start); + if (!node || IS_ERR(node)) { + ret = -ENOENT; + goto out; + } + state = rb_entry(node, struct extent_state, rb_node); + if (state->start != start) { + ret = -ENOENT; + goto out; + } + *private = state->private; +out: + read_unlock_irq(&tree->lock); + return ret; +} + +/* + * searches a range in the state tree for a given mask. + * If 'filled' == 1, this returns 1 only if ever extent in the tree + * has the bits set. Otherwise, 1 is returned if any bit in the + * range is found set. + */ +int test_range_bit(struct extent_io_tree *tree, u64 start, u64 end, + int bits, int filled) +{ + struct extent_state *state = NULL; + struct rb_node *node; + int bitset = 0; + unsigned long flags; + + read_lock_irqsave(&tree->lock, flags); + node = tree_search(&tree->state, start); + while (node && start <= end) { + state = rb_entry(node, struct extent_state, rb_node); + + if (filled && state->start > start) { + bitset = 0; + break; + } + + if (state->start > end) + break; + + if (state->state & bits) { + bitset = 1; + if (!filled) + break; + } else if (filled) { + bitset = 0; + break; + } + start = state->end + 1; + if (start > end) + break; + node = rb_next(node); + if (!node) { + if (filled) + bitset = 0; + break; + } + } + read_unlock_irqrestore(&tree->lock, flags); + return bitset; +} +EXPORT_SYMBOL(test_range_bit); + +/* + * helper function to set a given page up to date if all the + * extents in the tree for that page are up to date + */ +static int check_page_uptodate(struct extent_io_tree *tree, + struct page *page) +{ + u64 start = (u64)page->index << PAGE_CACHE_SHIFT; + u64 end = start + PAGE_CACHE_SIZE - 1; + if (test_range_bit(tree, start, end, EXTENT_UPTODATE, 1)) + SetPageUptodate(page); + return 0; +} + +/* + * helper function to unlock a page if all the extents in the tree + * for that page are unlocked + */ +static int check_page_locked(struct extent_io_tree *tree, + struct page *page) +{ + u64 start = (u64)page->index << PAGE_CACHE_SHIFT; + u64 end = start + PAGE_CACHE_SIZE - 1; + if (!test_range_bit(tree, start, end, EXTENT_LOCKED, 0)) + unlock_page(page); + return 0; +} + +/* + * helper function to end page writeback if all the extents + * in the tree for that page are done with writeback + */ +static int check_page_writeback(struct extent_io_tree *tree, + struct page *page) +{ + u64 start = (u64)page->index << PAGE_CACHE_SHIFT; + u64 end = start + PAGE_CACHE_SIZE - 1; + if (!test_range_bit(tree, start, end, EXTENT_WRITEBACK, 0)) + end_page_writeback(page); + return 0; +} + +/* lots and lots of room for performance fixes in the end_bio funcs */ + +/* + * after a writepage IO is done, we need to: + * clear the uptodate bits on error + * clear the writeback bits in the extent tree for this IO + * end_page_writeback if the page has no more pending IO + * + * Scheduling is not allowed, so the extent state tree is expected + * to have one and only one object corresponding to this IO. + */ +#if LINUX_VERSION_CODE > KERNEL_VERSION(2,6,23) +static void end_bio_extent_writepage(struct bio *bio, int err) +#else +static int end_bio_extent_writepage(struct bio *bio, + unsigned int bytes_done, int err) +#endif +{ + const int uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags); + struct bio_vec *bvec = bio->bi_io_vec + bio->bi_vcnt - 1; + struct extent_io_tree *tree = bio->bi_private; + u64 start; + u64 end; + int whole_page; + +#if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,23) + if (bio->bi_size) + return 1; +#endif + + do { + struct page *page = bvec->bv_page; + start = ((u64)page->index << PAGE_CACHE_SHIFT) + + bvec->bv_offset; + end = start + bvec->bv_len - 1; + + if (bvec->bv_offset == 0 && bvec->bv_len == PAGE_CACHE_SIZE) + whole_page = 1; + else + whole_page = 0; + + if (--bvec >= bio->bi_io_vec) + prefetchw(&bvec->bv_page->flags); + + if (!uptodate) { + clear_extent_uptodate(tree, start, end, GFP_ATOMIC); + ClearPageUptodate(page); + SetPageError(page); + } + clear_extent_writeback(tree, start, end, GFP_ATOMIC); + + if (whole_page) + end_page_writeback(page); + else + check_page_writeback(tree, page); + if (tree->ops && tree->ops->writepage_end_io_hook) + tree->ops->writepage_end_io_hook(page, start, end); + } while (bvec >= bio->bi_io_vec); + + bio_put(bio); +#if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,23) + return 0; +#endif +} + +/* + * after a readpage IO is done, we need to: + * clear the uptodate bits on error + * set the uptodate bits if things worked + * set the page up to date if all extents in the tree are uptodate + * clear the lock bit in the extent tree + * unlock the page if there are no other extents locked for it + * + * Scheduling is not allowed, so the extent state tree is expected + * to have one and only one object corresponding to this IO. + */ +#if LINUX_VERSION_CODE > KERNEL_VERSION(2,6,23) +static void end_bio_extent_readpage(struct bio *bio, int err) +#else +static int end_bio_extent_readpage(struct bio *bio, + unsigned int bytes_done, int err) +#endif +{ + int uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags); + struct bio_vec *bvec = bio->bi_io_vec + bio->bi_vcnt - 1; + struct extent_io_tree *tree = bio->bi_private; + u64 start; + u64 end; + int whole_page; + int ret; + +#if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,23) + if (bio->bi_size) + return 1; +#endif + + do { + struct page *page = bvec->bv_page; + start = ((u64)page->index << PAGE_CACHE_SHIFT) + + bvec->bv_offset; + end = start + bvec->bv_len - 1; + + if (bvec->bv_offset == 0 && bvec->bv_len == PAGE_CACHE_SIZE) + whole_page = 1; + else + whole_page = 0; + + if (--bvec >= bio->bi_io_vec) + prefetchw(&bvec->bv_page->flags); + + if (uptodate && tree->ops && tree->ops->readpage_end_io_hook) { + ret = tree->ops->readpage_end_io_hook(page, start, end); + if (ret) + uptodate = 0; + } + if (uptodate) { + set_extent_uptodate(tree, start, end, GFP_ATOMIC); + if (whole_page) + SetPageUptodate(page); + else + check_page_uptodate(tree, page); + } else { + ClearPageUptodate(page); + SetPageError(page); + } + + unlock_extent(tree, start, end, GFP_ATOMIC); + + if (whole_page) + unlock_page(page); + else + check_page_locked(tree, page); + } while (bvec >= bio->bi_io_vec); + + bio_put(bio); +#if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,23) + return 0; +#endif +} + +/* + * IO done from prepare_write is pretty simple, we just unlock + * the structs in the extent tree when done, and set the uptodate bits + * as appropriate. + */ +#if LINUX_VERSION_CODE > KERNEL_VERSION(2,6,23) +static void end_bio_extent_preparewrite(struct bio *bio, int err) +#else +static int end_bio_extent_preparewrite(struct bio *bio, + unsigned int bytes_done, int err) +#endif +{ + const int uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags); + struct bio_vec *bvec = bio->bi_io_vec + bio->bi_vcnt - 1; + struct extent_io_tree *tree = bio->bi_private; + u64 start; + u64 end; + +#if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,23) + if (bio->bi_size) + return 1; +#endif + + do { + struct page *page = bvec->bv_page; + start = ((u64)page->index << PAGE_CACHE_SHIFT) + + bvec->bv_offset; + end = start + bvec->bv_len - 1; + + if (--bvec >= bio->bi_io_vec) + prefetchw(&bvec->bv_page->flags); + + if (uptodate) { + set_extent_uptodate(tree, start, end, GFP_ATOMIC); + } else { + ClearPageUptodate(page); + SetPageError(page); + } + + unlock_extent(tree, start, end, GFP_ATOMIC); + + } while (bvec >= bio->bi_io_vec); + + bio_put(bio); +#if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,23) + return 0; +#endif +} + +static struct bio * +extent_bio_alloc(struct block_device *bdev, u64 first_sector, int nr_vecs, + gfp_t gfp_flags) +{ + struct bio *bio; + + bio = bio_alloc(gfp_flags, nr_vecs); + + if (bio == NULL && (current->flags & PF_MEMALLOC)) { + while (!bio && (nr_vecs /= 2)) + bio = bio_alloc(gfp_flags, nr_vecs); + } + + if (bio) { + bio->bi_bdev = bdev; + bio->bi_sector = first_sector; + } + return bio; +} + +static int submit_one_bio(int rw, struct bio *bio) +{ + u64 maxsector; + int ret = 0; + + bio_get(bio); + + maxsector = bio->bi_bdev->bd_inode->i_size >> 9; + if (maxsector < bio->bi_sector) { + printk("sector too large max %Lu got %llu\n", maxsector, + (unsigned long long)bio->bi_sector); + WARN_ON(1); + } + + submit_bio(rw, bio); + if (bio_flagged(bio, BIO_EOPNOTSUPP)) + ret = -EOPNOTSUPP; + bio_put(bio); + return ret; +} + +static int submit_extent_page(int rw, struct extent_io_tree *tree, + struct page *page, sector_t sector, + size_t size, unsigned long offset, + struct block_device *bdev, + struct bio **bio_ret, + unsigned long max_pages, + bio_end_io_t end_io_func) +{ + int ret = 0; + struct bio *bio; + int nr; + + if (bio_ret && *bio_ret) { + bio = *bio_ret; + if (bio->bi_sector + (bio->bi_size >> 9) != sector || + bio_add_page(bio, page, size, offset) < size) { + ret = submit_one_bio(rw, bio); + bio = NULL; + } else { + return 0; + } + } + nr = min_t(int, max_pages, bio_get_nr_vecs(bdev)); + bio = extent_bio_alloc(bdev, sector, nr, GFP_NOFS | __GFP_HIGH); + if (!bio) { + printk("failed to allocate bio nr %d\n", nr); + } + bio_add_page(bio, page, size, offset); + bio->bi_end_io = end_io_func; + bio->bi_private = tree; + if (bio_ret) { + *bio_ret = bio; + } else { + ret = submit_one_bio(rw, bio); + } + + return ret; +} + +void set_page_extent_mapped(struct page *page) +{ + if (!PagePrivate(page)) { + SetPagePrivate(page); + WARN_ON(!page->mapping->a_ops->invalidatepage); + set_page_private(page, EXTENT_PAGE_PRIVATE); + page_cache_get(page); + } +} + +void set_page_extent_head(struct page *page, unsigned long len) +{ + set_page_private(page, EXTENT_PAGE_PRIVATE_FIRST_PAGE | len << 2); +} + +/* + * basic readpage implementation. Locked extent state structs are inserted + * into the tree that are removed when the IO is done (by the end_io + * handlers) + */ +static int __extent_read_full_page(struct extent_io_tree *tree, + struct page *page, + get_extent_t *get_extent, + struct bio **bio) +{ + struct inode *inode = page->mapping->host; + u64 start = (u64)page->index << PAGE_CACHE_SHIFT; + u64 page_end = start + PAGE_CACHE_SIZE - 1; + u64 end; + u64 cur = start; + u64 extent_offset; + u64 last_byte = i_size_read(inode); + u64 block_start; + u64 cur_end; + sector_t sector; + struct extent_map *em; + struct block_device *bdev; + int ret; + int nr = 0; + size_t page_offset = 0; + size_t iosize; + size_t blocksize = inode->i_sb->s_blocksize; + + set_page_extent_mapped(page); + + end = page_end; + lock_extent(tree, start, end, GFP_NOFS); + + while (cur <= end) { + if (cur >= last_byte) { + char *userpage; + iosize = PAGE_CACHE_SIZE - page_offset; + userpage = kmap_atomic(page, KM_USER0); + memset(userpage + page_offset, 0, iosize); + flush_dcache_page(page); + kunmap_atomic(userpage, KM_USER0); + set_extent_uptodate(tree, cur, cur + iosize - 1, + GFP_NOFS); + unlock_extent(tree, cur, cur + iosize - 1, GFP_NOFS); + break; + } + em = get_extent(inode, page, page_offset, cur, + end - cur + 1, 0); + if (IS_ERR(em) || !em) { + SetPageError(page); + unlock_extent(tree, cur, end, GFP_NOFS); + break; + } + + extent_offset = cur - em->start; + BUG_ON(extent_map_end(em) <= cur); + BUG_ON(end < cur); + + iosize = min(extent_map_end(em) - cur, end - cur + 1); + cur_end = min(extent_map_end(em) - 1, end); + iosize = (iosize + blocksize - 1) & ~((u64)blocksize - 1); + sector = (em->block_start + extent_offset) >> 9; + bdev = em->bdev; + block_start = em->block_start; + free_extent_map(em); + em = NULL; + + /* we've found a hole, just zero and go on */ + if (block_start == EXTENT_MAP_HOLE) { + char *userpage; + userpage = kmap_atomic(page, KM_USER0); + memset(userpage + page_offset, 0, iosize); + flush_dcache_page(page); + kunmap_atomic(userpage, KM_USER0); + + set_extent_uptodate(tree, cur, cur + iosize - 1, + GFP_NOFS); + unlock_extent(tree, cur, cur + iosize - 1, GFP_NOFS); + cur = cur + iosize; + page_offset += iosize; + continue; + } + /* the get_extent function already copied into the page */ + if (test_range_bit(tree, cur, cur_end, EXTENT_UPTODATE, 1)) { + unlock_extent(tree, cur, cur + iosize - 1, GFP_NOFS); + cur = cur + iosize; + page_offset += iosize; + continue; + } + + ret = 0; + if (tree->ops && tree->ops->readpage_io_hook) { + ret = tree->ops->readpage_io_hook(page, cur, + cur + iosize - 1); + } + if (!ret) { + unsigned long nr = (last_byte >> PAGE_CACHE_SHIFT) + 1; + nr -= page->index; + ret = submit_extent_page(READ, tree, page, + sector, iosize, page_offset, + bdev, bio, nr, + end_bio_extent_readpage); + } + if (ret) + SetPageError(page); + cur = cur + iosize; + page_offset += iosize; + nr++; + } + if (!nr) { + if (!PageError(page)) + SetPageUptodate(page); + unlock_page(page); + } + return 0; +} + +int extent_read_full_page(struct extent_io_tree *tree, struct page *page, + get_extent_t *get_extent) +{ + struct bio *bio = NULL; + int ret; + + ret = __extent_read_full_page(tree, page, get_extent, &bio); + if (bio) + submit_one_bio(READ, bio); + return ret; +} +EXPORT_SYMBOL(extent_read_full_page); + +/* + * the writepage semantics are similar to regular writepage. extent + * records are inserted to lock ranges in the tree, and as dirty areas + * are found, they are marked writeback. Then the lock bits are removed + * and the end_io handler clears the writeback ranges + */ +static int __extent_writepage(struct page *page, struct writeback_control *wbc, + void *data) +{ + struct inode *inode = page->mapping->host; + struct extent_page_data *epd = data; + struct extent_io_tree *tree = epd->tree; + u64 start = (u64)page->index << PAGE_CACHE_SHIFT; + u64 delalloc_start; + u64 page_end = start + PAGE_CACHE_SIZE - 1; + u64 end; + u64 cur = start; + u64 extent_offset; + u64 last_byte = i_size_read(inode); + u64 block_start; + u64 iosize; + sector_t sector; + struct extent_map *em; + struct block_device *bdev; + int ret; + int nr = 0; + size_t page_offset = 0; + size_t blocksize; + loff_t i_size = i_size_read(inode); + unsigned long end_index = i_size >> PAGE_CACHE_SHIFT; + u64 nr_delalloc; + u64 delalloc_end; + + WARN_ON(!PageLocked(page)); + if (page->index > end_index) { + clear_extent_dirty(tree, start, page_end, GFP_NOFS); + unlock_page(page); + return 0; + } + + if (page->index == end_index) { + char *userpage; + + size_t offset = i_size & (PAGE_CACHE_SIZE - 1); + + userpage = kmap_atomic(page, KM_USER0); + memset(userpage + offset, 0, PAGE_CACHE_SIZE - offset); + flush_dcache_page(page); + kunmap_atomic(userpage, KM_USER0); + } + + set_page_extent_mapped(page); + + delalloc_start = start; + delalloc_end = 0; + while(delalloc_end < page_end) { + nr_delalloc = find_lock_delalloc_range(tree, &delalloc_start, + &delalloc_end, + 128 * 1024 * 1024); + if (nr_delalloc == 0) { + delalloc_start = delalloc_end + 1; + continue; + } + tree->ops->fill_delalloc(inode, delalloc_start, + delalloc_end); + clear_extent_bit(tree, delalloc_start, + delalloc_end, + EXTENT_LOCKED | EXTENT_DELALLOC, + 1, 0, GFP_NOFS); + delalloc_start = delalloc_end + 1; + } + lock_extent(tree, start, page_end, GFP_NOFS); + + end = page_end; + if (test_range_bit(tree, start, page_end, EXTENT_DELALLOC, 0)) { + printk("found delalloc bits after lock_extent\n"); + } + + if (last_byte <= start) { + clear_extent_dirty(tree, start, page_end, GFP_NOFS); + goto done; + } + + set_extent_uptodate(tree, start, page_end, GFP_NOFS); + blocksize = inode->i_sb->s_blocksize; + + while (cur <= end) { + if (cur >= last_byte) { + clear_extent_dirty(tree, cur, page_end, GFP_NOFS); + break; + } + em = epd->get_extent(inode, page, page_offset, cur, + end - cur + 1, 1); + if (IS_ERR(em) || !em) { + SetPageError(page); + break; + } + + extent_offset = cur - em->start; + BUG_ON(extent_map_end(em) <= cur); + BUG_ON(end < cur); + iosize = min(extent_map_end(em) - cur, end - cur + 1); + iosize = (iosize + blocksize - 1) & ~((u64)blocksize - 1); + sector = (em->block_start + extent_offset) >> 9; + bdev = em->bdev; + block_start = em->block_start; + free_extent_map(em); + em = NULL; + + if (block_start == EXTENT_MAP_HOLE || + block_start == EXTENT_MAP_INLINE) { + clear_extent_dirty(tree, cur, + cur + iosize - 1, GFP_NOFS); + cur = cur + iosize; + page_offset += iosize; + continue; + } + + /* leave this out until we have a page_mkwrite call */ + if (0 && !test_range_bit(tree, cur, cur + iosize - 1, + EXTENT_DIRTY, 0)) { + cur = cur + iosize; + page_offset += iosize; + continue; + } + clear_extent_dirty(tree, cur, cur + iosize - 1, GFP_NOFS); + if (tree->ops && tree->ops->writepage_io_hook) { + ret = tree->ops->writepage_io_hook(page, cur, + cur + iosize - 1); + } else { + ret = 0; + } + if (ret) + SetPageError(page); + else { + unsigned long max_nr = end_index + 1; + set_range_writeback(tree, cur, cur + iosize - 1); + if (!PageWriteback(page)) { + printk("warning page %lu not writeback, " + "cur %llu end %llu\n", page->index, + (unsigned long long)cur, + (unsigned long long)end); + } + + ret = submit_extent_page(WRITE, tree, page, sector, + iosize, page_offset, bdev, + &epd->bio, max_nr, + end_bio_extent_writepage); + if (ret) + SetPageError(page); + } + cur = cur + iosize; + page_offset += iosize; + nr++; + } +done: + if (nr == 0) { + /* make sure the mapping tag for page dirty gets cleared */ + set_page_writeback(page); + end_page_writeback(page); + } + unlock_extent(tree, start, page_end, GFP_NOFS); + unlock_page(page); + return 0; +} + +#if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,18) + +/* Taken directly from 2.6.23 for 2.6.18 back port */ +typedef int (*writepage_t)(struct page *page, struct writeback_control *wbc, + void *data); + +/** + * write_cache_pages - walk the list of dirty pages of the given address space + * and write all of them. + * @mapping: address space structure to write + * @wbc: subtract the number of written pages from *@wbc->nr_to_write + * @writepage: function called for each page + * @data: data passed to writepage function + * + * If a page is already under I/O, write_cache_pages() skips it, even + * if it's dirty. This is desirable behaviour for memory-cleaning writeback, + * but it is INCORRECT for data-integrity system calls such as fsync(). fsync() + * and msync() need to guarantee that all the data which was dirty at the time + * the call was made get new I/O started against them. If wbc->sync_mode is + * WB_SYNC_ALL then we were called for data integrity and we must wait for + * existing IO to complete. + */ +static int write_cache_pages(struct address_space *mapping, + struct writeback_control *wbc, writepage_t writepage, + void *data) +{ + struct backing_dev_info *bdi = mapping->backing_dev_info; + int ret = 0; + int done = 0; + struct pagevec pvec; + int nr_pages; + pgoff_t index; + pgoff_t end; /* Inclusive */ + int scanned = 0; + int range_whole = 0; + + if (wbc->nonblocking && bdi_write_congested(bdi)) { + wbc->encountered_congestion = 1; + return 0; + } + + pagevec_init(&pvec, 0); + if (wbc->range_cyclic) { + index = mapping->writeback_index; /* Start from prev offset */ + end = -1; + } else { + index = wbc->range_start >> PAGE_CACHE_SHIFT; + end = wbc->range_end >> PAGE_CACHE_SHIFT; + if (wbc->range_start == 0 && wbc->range_end == LLONG_MAX) + range_whole = 1; + scanned = 1; + } +retry: + while (!done && (index <= end) && + (nr_pages = pagevec_lookup_tag(&pvec, mapping, &index, + PAGECACHE_TAG_DIRTY, + min(end - index, (pgoff_t)PAGEVEC_SIZE-1) + 1))) { + unsigned i; + + scanned = 1; + for (i = 0; i < nr_pages; i++) { + struct page *page = pvec.pages[i]; + + /* + * At this point we hold neither mapping->tree_lock nor + * lock on the page itself: the page may be truncated or + * invalidated (changing page->mapping to NULL), or even + * swizzled back from swapper_space to tmpfs file + * mapping + */ + lock_page(page); + + if (unlikely(page->mapping != mapping)) { + unlock_page(page); + continue; + } + + if (!wbc->range_cyclic && page->index > end) { + done = 1; + unlock_page(page); + continue; + } + + if (wbc->sync_mode != WB_SYNC_NONE) + wait_on_page_writeback(page); + + if (PageWriteback(page) || + !clear_page_dirty_for_io(page)) { + unlock_page(page); + continue; + } + + ret = (*writepage)(page, wbc, data); + + if (unlikely(ret == AOP_WRITEPAGE_ACTIVATE)) { + unlock_page(page); + ret = 0; + } + if (ret || (--(wbc->nr_to_write) <= 0)) + done = 1; + if (wbc->nonblocking && bdi_write_congested(bdi)) { + wbc->encountered_congestion = 1; + done = 1; + } + } + pagevec_release(&pvec); + cond_resched(); + } + if (!scanned && !done) { + /* + * We hit the last page and there is more work to be done: wrap + * back to the start of the file + */ + scanned = 1; + index = 0; + goto retry; + } + if (wbc->range_cyclic || (range_whole && wbc->nr_to_write > 0)) + mapping->writeback_index = index; + return ret; +} +#endif + +int extent_write_full_page(struct extent_io_tree *tree, struct page *page, + get_extent_t *get_extent, + struct writeback_control *wbc) +{ + int ret; + struct address_space *mapping = page->mapping; + struct extent_page_data epd = { + .bio = NULL, + .tree = tree, + .get_extent = get_extent, + }; + struct writeback_control wbc_writepages = { + .bdi = wbc->bdi, + .sync_mode = WB_SYNC_NONE, + .older_than_this = NULL, + .nr_to_write = 64, + .range_start = page_offset(page) + PAGE_CACHE_SIZE, + .range_end = (loff_t)-1, + }; + + + ret = __extent_writepage(page, wbc, &epd); + + write_cache_pages(mapping, &wbc_writepages, __extent_writepage, &epd); + if (epd.bio) { + submit_one_bio(WRITE, epd.bio); + } + return ret; +} +EXPORT_SYMBOL(extent_write_full_page); + + +int extent_writepages(struct extent_io_tree *tree, + struct address_space *mapping, + get_extent_t *get_extent, + struct writeback_control *wbc) +{ + int ret = 0; + struct extent_page_data epd = { + .bio = NULL, + .tree = tree, + .get_extent = get_extent, + }; + + ret = write_cache_pages(mapping, wbc, __extent_writepage, &epd); + if (epd.bio) { + submit_one_bio(WRITE, epd.bio); + } + return ret; +} +EXPORT_SYMBOL(extent_writepages); + +int extent_readpages(struct extent_io_tree *tree, + struct address_space *mapping, + struct list_head *pages, unsigned nr_pages, + get_extent_t get_extent) +{ + struct bio *bio = NULL; + unsigned page_idx; + struct pagevec pvec; + + pagevec_init(&pvec, 0); + for (page_idx = 0; page_idx < nr_pages; page_idx++) { + struct page *page = list_entry(pages->prev, struct page, lru); + + prefetchw(&page->flags); + list_del(&page->lru); + /* + * what we want to do here is call add_to_page_cache_lru, + * but that isn't exported, so we reproduce it here + */ + if (!add_to_page_cache(page, mapping, + page->index, GFP_KERNEL)) { + + /* open coding of lru_cache_add, also not exported */ + page_cache_get(page); + if (!pagevec_add(&pvec, page)) + __pagevec_lru_add(&pvec); + __extent_read_full_page(tree, page, get_extent, &bio); + } + page_cache_release(page); + } + if (pagevec_count(&pvec)) + __pagevec_lru_add(&pvec); + BUG_ON(!list_empty(pages)); + if (bio) + submit_one_bio(READ, bio); + return 0; +} +EXPORT_SYMBOL(extent_readpages); + +/* + * basic invalidatepage code, this waits on any locked or writeback + * ranges corresponding to the page, and then deletes any extent state + * records from the tree + */ +int extent_invalidatepage(struct extent_io_tree *tree, + struct page *page, unsigned long offset) +{ + u64 start = ((u64)page->index << PAGE_CACHE_SHIFT); + u64 end = start + PAGE_CACHE_SIZE - 1; + size_t blocksize = page->mapping->host->i_sb->s_blocksize; + + start += (offset + blocksize -1) & ~(blocksize - 1); + if (start > end) + return 0; + + lock_extent(tree, start, end, GFP_NOFS); + wait_on_extent_writeback(tree, start, end); + clear_extent_bit(tree, start, end, + EXTENT_LOCKED | EXTENT_DIRTY | EXTENT_DELALLOC, + 1, 1, GFP_NOFS); + return 0; +} +EXPORT_SYMBOL(extent_invalidatepage); + +/* + * simple commit_write call, set_range_dirty is used to mark both + * the pages and the extent records as dirty + */ +int extent_commit_write(struct extent_io_tree *tree, + struct inode *inode, struct page *page, + unsigned from, unsigned to) +{ + loff_t pos = ((loff_t)page->index << PAGE_CACHE_SHIFT) + to; + + set_page_extent_mapped(page); + set_page_dirty(page); + + if (pos > inode->i_size) { + i_size_write(inode, pos); + mark_inode_dirty(inode); + } + return 0; +} +EXPORT_SYMBOL(extent_commit_write); + +int extent_prepare_write(struct extent_io_tree *tree, + struct inode *inode, struct page *page, + unsigned from, unsigned to, get_extent_t *get_extent) +{ + u64 page_start = (u64)page->index << PAGE_CACHE_SHIFT; + u64 page_end = page_start + PAGE_CACHE_SIZE - 1; + u64 block_start; + u64 orig_block_start; + u64 block_end; + u64 cur_end; + struct extent_map *em; + unsigned blocksize = 1 << inode->i_blkbits; + size_t page_offset = 0; + size_t block_off_start; + size_t block_off_end; + int err = 0; + int iocount = 0; + int ret = 0; + int isnew; + + set_page_extent_mapped(page); + + block_start = (page_start + from) & ~((u64)blocksize - 1); + block_end = (page_start + to - 1) | (blocksize - 1); + orig_block_start = block_start; + + lock_extent(tree, page_start, page_end, GFP_NOFS); + while(block_start <= block_end) { + em = get_extent(inode, page, page_offset, block_start, + block_end - block_start + 1, 1); + if (IS_ERR(em) || !em) { + goto err; + } + cur_end = min(block_end, extent_map_end(em) - 1); + block_off_start = block_start & (PAGE_CACHE_SIZE - 1); + block_off_end = block_off_start + blocksize; + isnew = clear_extent_new(tree, block_start, cur_end, GFP_NOFS); + + if (!PageUptodate(page) && isnew && + (block_off_end > to || block_off_start < from)) { + void *kaddr; + + kaddr = kmap_atomic(page, KM_USER0); + if (block_off_end > to) + memset(kaddr + to, 0, block_off_end - to); + if (block_off_start < from) + memset(kaddr + block_off_start, 0, + from - block_off_start); + flush_dcache_page(page); + kunmap_atomic(kaddr, KM_USER0); + } + if ((em->block_start != EXTENT_MAP_HOLE && + em->block_start != EXTENT_MAP_INLINE) && + !isnew && !PageUptodate(page) && + (block_off_end > to || block_off_start < from) && + !test_range_bit(tree, block_start, cur_end, + EXTENT_UPTODATE, 1)) { + u64 sector; + u64 extent_offset = block_start - em->start; + size_t iosize; + sector = (em->block_start + extent_offset) >> 9; + iosize = (cur_end - block_start + blocksize) & + ~((u64)blocksize - 1); + /* + * we've already got the extent locked, but we + * need to split the state such that our end_bio + * handler can clear the lock. + */ + set_extent_bit(tree, block_start, + block_start + iosize - 1, + EXTENT_LOCKED, 0, NULL, GFP_NOFS); + ret = submit_extent_page(READ, tree, page, + sector, iosize, page_offset, em->bdev, + NULL, 1, + end_bio_extent_preparewrite); + iocount++; + block_start = block_start + iosize; + } else { + set_extent_uptodate(tree, block_start, cur_end, + GFP_NOFS); + unlock_extent(tree, block_start, cur_end, GFP_NOFS); + block_start = cur_end + 1; + } + page_offset = block_start & (PAGE_CACHE_SIZE - 1); + free_extent_map(em); + } + if (iocount) { + wait_extent_bit(tree, orig_block_start, + block_end, EXTENT_LOCKED); + } + check_page_uptodate(tree, page); +err: + /* FIXME, zero out newly allocated blocks on error */ + return err; +} +EXPORT_SYMBOL(extent_prepare_write); + +/* + * a helper for releasepage. As long as there are no locked extents + * in the range corresponding to the page, both state records and extent + * map records are removed + */ +int try_release_extent_mapping(struct extent_map_tree *map, + struct extent_io_tree *tree, struct page *page) +{ + struct extent_map *em; + u64 start = (u64)page->index << PAGE_CACHE_SHIFT; + u64 end = start + PAGE_CACHE_SIZE - 1; + u64 orig_start = start; + int ret = 1; + + while (start <= end) { + spin_lock(&map->lock); + em = lookup_extent_mapping(map, start, end); + if (!em || IS_ERR(em)) { + spin_unlock(&map->lock); + break; + } + if (!test_range_bit(tree, em->start, extent_map_end(em) - 1, + EXTENT_LOCKED, 0)) { + remove_extent_mapping(map, em); + /* once for the rb tree */ + free_extent_map(em); + } + start = extent_map_end(em); + spin_unlock(&map->lock); + + /* once for us */ + free_extent_map(em); + } + if (test_range_bit(tree, orig_start, end, EXTENT_LOCKED, 0)) + ret = 0; + else + clear_extent_bit(tree, orig_start, end, EXTENT_UPTODATE, + 1, 1, GFP_NOFS); + return ret; +} +EXPORT_SYMBOL(try_release_extent_mapping); + +sector_t extent_bmap(struct address_space *mapping, sector_t iblock, + get_extent_t *get_extent) +{ + struct inode *inode = mapping->host; + u64 start = iblock << inode->i_blkbits; + sector_t sector = 0; + struct extent_map *em; + + em = get_extent(inode, NULL, 0, start, (1 << inode->i_blkbits), 0); + if (!em || IS_ERR(em)) + return 0; + + if (em->block_start == EXTENT_MAP_INLINE || + em->block_start == EXTENT_MAP_HOLE) + goto out; + + sector = (em->block_start + start - em->start) >> inode->i_blkbits; +printk("bmap finds %Lu %Lu block %Lu\n", em->start, em->len, em->block_start); +out: + free_extent_map(em); + return sector; +} + +static int add_lru(struct extent_io_tree *tree, struct extent_buffer *eb) +{ + if (list_empty(&eb->lru)) { + extent_buffer_get(eb); + list_add(&eb->lru, &tree->buffer_lru); + tree->lru_size++; + if (tree->lru_size >= BUFFER_LRU_MAX) { + struct extent_buffer *rm; + rm = list_entry(tree->buffer_lru.prev, + struct extent_buffer, lru); + tree->lru_size--; + list_del_init(&rm->lru); + free_extent_buffer(rm); + } + } else + list_move(&eb->lru, &tree->buffer_lru); + return 0; +} +static struct extent_buffer *find_lru(struct extent_io_tree *tree, + u64 start, unsigned long len) +{ + struct list_head *lru = &tree->buffer_lru; + struct list_head *cur = lru->next; + struct extent_buffer *eb; + + if (list_empty(lru)) + return NULL; + + do { + eb = list_entry(cur, struct extent_buffer, lru); + if (eb->start == start && eb->len == len) { + extent_buffer_get(eb); + return eb; + } + cur = cur->next; + } while (cur != lru); + return NULL; +} + +static inline unsigned long num_extent_pages(u64 start, u64 len) +{ + return ((start + len + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT) - + (start >> PAGE_CACHE_SHIFT); +} + +static inline struct page *extent_buffer_page(struct extent_buffer *eb, + unsigned long i) +{ + struct page *p; + struct address_space *mapping; + + if (i == 0) + return eb->first_page; + i += eb->start >> PAGE_CACHE_SHIFT; + mapping = eb->first_page->mapping; + read_lock_irq(&mapping->tree_lock); + p = radix_tree_lookup(&mapping->page_tree, i); + read_unlock_irq(&mapping->tree_lock); + return p; +} + +static struct extent_buffer *__alloc_extent_buffer(struct extent_io_tree *tree, + u64 start, + unsigned long len, + gfp_t mask) +{ + struct extent_buffer *eb = NULL; + + spin_lock(&tree->lru_lock); + eb = find_lru(tree, start, len); + spin_unlock(&tree->lru_lock); + if (eb) { + return eb; + } + + eb = kmem_cache_zalloc(extent_buffer_cache, mask); + INIT_LIST_HEAD(&eb->lru); + eb->start = start; + eb->len = len; + atomic_set(&eb->refs, 1); + + return eb; +} + +static void __free_extent_buffer(struct extent_buffer *eb) +{ + kmem_cache_free(extent_buffer_cache, eb); +} + +struct extent_buffer *alloc_extent_buffer(struct extent_io_tree *tree, + u64 start, unsigned long len, + struct page *page0, + gfp_t mask) +{ + unsigned long num_pages = num_extent_pages(start, len); + unsigned long i; + unsigned long index = start >> PAGE_CACHE_SHIFT; + struct extent_buffer *eb; + struct page *p; + struct address_space *mapping = tree->mapping; + int uptodate = 1; + + eb = __alloc_extent_buffer(tree, start, len, mask); + if (!eb || IS_ERR(eb)) + return NULL; + + if (eb->flags & EXTENT_BUFFER_FILLED) + goto lru_add; + + if (page0) { + eb->first_page = page0; + i = 1; + index++; + page_cache_get(page0); + mark_page_accessed(page0); + set_page_extent_mapped(page0); + WARN_ON(!PageUptodate(page0)); + set_page_extent_head(page0, len); + } else { + i = 0; + } + for (; i < num_pages; i++, index++) { + p = find_or_create_page(mapping, index, mask | __GFP_HIGHMEM); + if (!p) { + WARN_ON(1); + goto fail; + } + set_page_extent_mapped(p); + mark_page_accessed(p); + if (i == 0) { + eb->first_page = p; + set_page_extent_head(p, len); + } else { + set_page_private(p, EXTENT_PAGE_PRIVATE); + } + if (!PageUptodate(p)) + uptodate = 0; + unlock_page(p); + } + if (uptodate) + eb->flags |= EXTENT_UPTODATE; + eb->flags |= EXTENT_BUFFER_FILLED; + +lru_add: + spin_lock(&tree->lru_lock); + add_lru(tree, eb); + spin_unlock(&tree->lru_lock); + return eb; + +fail: + spin_lock(&tree->lru_lock); + list_del_init(&eb->lru); + spin_unlock(&tree->lru_lock); + if (!atomic_dec_and_test(&eb->refs)) + return NULL; + for (index = 1; index < i; index++) { + page_cache_release(extent_buffer_page(eb, index)); + } + if (i > 0) + page_cache_release(extent_buffer_page(eb, 0)); + __free_extent_buffer(eb); + return NULL; +} +EXPORT_SYMBOL(alloc_extent_buffer); + +struct extent_buffer *find_extent_buffer(struct extent_io_tree *tree, + u64 start, unsigned long len, + gfp_t mask) +{ + unsigned long num_pages = num_extent_pages(start, len); + unsigned long i; + unsigned long index = start >> PAGE_CACHE_SHIFT; + struct extent_buffer *eb; + struct page *p; + struct address_space *mapping = tree->mapping; + int uptodate = 1; + + eb = __alloc_extent_buffer(tree, start, len, mask); + if (!eb || IS_ERR(eb)) + return NULL; + + if (eb->flags & EXTENT_BUFFER_FILLED) + goto lru_add; + + for (i = 0; i < num_pages; i++, index++) { + p = find_lock_page(mapping, index); + if (!p) { + goto fail; + } + set_page_extent_mapped(p); + mark_page_accessed(p); + + if (i == 0) { + eb->first_page = p; + set_page_extent_head(p, len); + } else { + set_page_private(p, EXTENT_PAGE_PRIVATE); + } + + if (!PageUptodate(p)) + uptodate = 0; + unlock_page(p); + } + if (uptodate) + eb->flags |= EXTENT_UPTODATE; + eb->flags |= EXTENT_BUFFER_FILLED; + +lru_add: + spin_lock(&tree->lru_lock); + add_lru(tree, eb); + spin_unlock(&tree->lru_lock); + return eb; +fail: + spin_lock(&tree->lru_lock); + list_del_init(&eb->lru); + spin_unlock(&tree->lru_lock); + if (!atomic_dec_and_test(&eb->refs)) + return NULL; + for (index = 1; index < i; index++) { + page_cache_release(extent_buffer_page(eb, index)); + } + if (i > 0) + page_cache_release(extent_buffer_page(eb, 0)); + __free_extent_buffer(eb); + return NULL; +} +EXPORT_SYMBOL(find_extent_buffer); + +void free_extent_buffer(struct extent_buffer *eb) +{ + unsigned long i; + unsigned long num_pages; + + if (!eb) + return; + + if (!atomic_dec_and_test(&eb->refs)) + return; + + WARN_ON(!list_empty(&eb->lru)); + num_pages = num_extent_pages(eb->start, eb->len); + + for (i = 1; i < num_pages; i++) { + page_cache_release(extent_buffer_page(eb, i)); + } + page_cache_release(extent_buffer_page(eb, 0)); + __free_extent_buffer(eb); +} +EXPORT_SYMBOL(free_extent_buffer); + +int clear_extent_buffer_dirty(struct extent_io_tree *tree, + struct extent_buffer *eb) +{ + int set; + unsigned long i; + unsigned long num_pages; + struct page *page; + + u64 start = eb->start; + u64 end = start + eb->len - 1; + + set = clear_extent_dirty(tree, start, end, GFP_NOFS); + num_pages = num_extent_pages(eb->start, eb->len); + + for (i = 0; i < num_pages; i++) { + page = extent_buffer_page(eb, i); + lock_page(page); + if (i == 0) + set_page_extent_head(page, eb->len); + else + set_page_private(page, EXTENT_PAGE_PRIVATE); + + /* + * if we're on the last page or the first page and the + * block isn't aligned on a page boundary, do extra checks + * to make sure we don't clean page that is partially dirty + */ + if ((i == 0 && (eb->start & (PAGE_CACHE_SIZE - 1))) || + ((i == num_pages - 1) && + ((eb->start + eb->len) & (PAGE_CACHE_SIZE - 1)))) { + start = (u64)page->index << PAGE_CACHE_SHIFT; + end = start + PAGE_CACHE_SIZE - 1; + if (test_range_bit(tree, start, end, + EXTENT_DIRTY, 0)) { + unlock_page(page); + continue; + } + } + clear_page_dirty_for_io(page); + write_lock_irq(&page->mapping->tree_lock); + if (!PageDirty(page)) { + radix_tree_tag_clear(&page->mapping->page_tree, + page_index(page), + PAGECACHE_TAG_DIRTY); + } + write_unlock_irq(&page->mapping->tree_lock); + unlock_page(page); + } + return 0; +} +EXPORT_SYMBOL(clear_extent_buffer_dirty); + +int wait_on_extent_buffer_writeback(struct extent_io_tree *tree, + struct extent_buffer *eb) +{ + return wait_on_extent_writeback(tree, eb->start, + eb->start + eb->len - 1); +} +EXPORT_SYMBOL(wait_on_extent_buffer_writeback); + +int set_extent_buffer_dirty(struct extent_io_tree *tree, + struct extent_buffer *eb) +{ + unsigned long i; + unsigned long num_pages; + + num_pages = num_extent_pages(eb->start, eb->len); + for (i = 0; i < num_pages; i++) { + struct page *page = extent_buffer_page(eb, i); + /* writepage may need to do something special for the + * first page, we have to make sure page->private is + * properly set. releasepage may drop page->private + * on us if the page isn't already dirty. + */ + if (i == 0) { + lock_page(page); + set_page_extent_head(page, eb->len); + } else if (PagePrivate(page) && + page->private != EXTENT_PAGE_PRIVATE) { + lock_page(page); + set_page_extent_mapped(page); + unlock_page(page); + } + __set_page_dirty_nobuffers(extent_buffer_page(eb, i)); + if (i == 0) + unlock_page(page); + } + return set_extent_dirty(tree, eb->start, + eb->start + eb->len - 1, GFP_NOFS); +} +EXPORT_SYMBOL(set_extent_buffer_dirty); + +int set_extent_buffer_uptodate(struct extent_io_tree *tree, + struct extent_buffer *eb) +{ + unsigned long i; + struct page *page; + unsigned long num_pages; + + num_pages = num_extent_pages(eb->start, eb->len); + + set_extent_uptodate(tree, eb->start, eb->start + eb->len - 1, + GFP_NOFS); + for (i = 0; i < num_pages; i++) { + page = extent_buffer_page(eb, i); + if ((i == 0 && (eb->start & (PAGE_CACHE_SIZE - 1))) || + ((i == num_pages - 1) && + ((eb->start + eb->len) & (PAGE_CACHE_SIZE - 1)))) { + check_page_uptodate(tree, page); + continue; + } + SetPageUptodate(page); + } + return 0; +} +EXPORT_SYMBOL(set_extent_buffer_uptodate); + +int extent_buffer_uptodate(struct extent_io_tree *tree, + struct extent_buffer *eb) +{ + if (eb->flags & EXTENT_UPTODATE) + return 1; + return test_range_bit(tree, eb->start, eb->start + eb->len - 1, + EXTENT_UPTODATE, 1); +} +EXPORT_SYMBOL(extent_buffer_uptodate); + +int read_extent_buffer_pages(struct extent_io_tree *tree, + struct extent_buffer *eb, + u64 start, + int wait) +{ + unsigned long i; + unsigned long start_i; + struct page *page; + int err; + int ret = 0; + unsigned long num_pages; + + if (eb->flags & EXTENT_UPTODATE) + return 0; + + if (0 && test_range_bit(tree, eb->start, eb->start + eb->len - 1, + EXTENT_UPTODATE, 1)) { + return 0; + } + + if (start) { + WARN_ON(start < eb->start); + start_i = (start >> PAGE_CACHE_SHIFT) - + (eb->start >> PAGE_CACHE_SHIFT); + } else { + start_i = 0; + } + + num_pages = num_extent_pages(eb->start, eb->len); + for (i = start_i; i < num_pages; i++) { + page = extent_buffer_page(eb, i); + if (PageUptodate(page)) { + continue; + } + if (!wait) { + if (TestSetPageLocked(page)) { + continue; + } + } else { + lock_page(page); + } + if (!PageUptodate(page)) { + err = page->mapping->a_ops->readpage(NULL, page); + if (err) { + ret = err; + } + } else { + unlock_page(page); + } + } + + if (ret || !wait) { + return ret; + } + + for (i = start_i; i < num_pages; i++) { + page = extent_buffer_page(eb, i); + wait_on_page_locked(page); + if (!PageUptodate(page)) { + ret = -EIO; + } + } + if (!ret) + eb->flags |= EXTENT_UPTODATE; + return ret; +} +EXPORT_SYMBOL(read_extent_buffer_pages); + +void read_extent_buffer(struct extent_buffer *eb, void *dstv, + unsigned long start, + unsigned long len) +{ + size_t cur; + size_t offset; + struct page *page; + char *kaddr; + char *dst = (char *)dstv; + size_t start_offset = eb->start & ((u64)PAGE_CACHE_SIZE - 1); + unsigned long i = (start_offset + start) >> PAGE_CACHE_SHIFT; + unsigned long num_pages = num_extent_pages(eb->start, eb->len); + + WARN_ON(start > eb->len); + WARN_ON(start + len > eb->start + eb->len); + + offset = (start_offset + start) & ((unsigned long)PAGE_CACHE_SIZE - 1); + + while(len > 0) { + page = extent_buffer_page(eb, i); + if (!PageUptodate(page)) { + printk("page %lu not up to date i %lu, total %lu, len %lu\n", page->index, i, num_pages, eb->len); + WARN_ON(1); + } + WARN_ON(!PageUptodate(page)); + + cur = min(len, (PAGE_CACHE_SIZE - offset)); + kaddr = kmap_atomic(page, KM_USER1); + memcpy(dst, kaddr + offset, cur); + kunmap_atomic(kaddr, KM_USER1); + + dst += cur; + len -= cur; + offset = 0; + i++; + } +} +EXPORT_SYMBOL(read_extent_buffer); + +int map_private_extent_buffer(struct extent_buffer *eb, unsigned long start, + unsigned long min_len, char **token, char **map, + unsigned long *map_start, + unsigned long *map_len, int km) +{ + size_t offset = start & (PAGE_CACHE_SIZE - 1); + char *kaddr; + struct page *p; + size_t start_offset = eb->start & ((u64)PAGE_CACHE_SIZE - 1); + unsigned long i = (start_offset + start) >> PAGE_CACHE_SHIFT; + unsigned long end_i = (start_offset + start + min_len - 1) >> + PAGE_CACHE_SHIFT; + + if (i != end_i) + return -EINVAL; + + if (i == 0) { + offset = start_offset; + *map_start = 0; + } else { + offset = 0; + *map_start = ((u64)i << PAGE_CACHE_SHIFT) - start_offset; + } + if (start + min_len > eb->len) { +printk("bad mapping eb start %Lu len %lu, wanted %lu %lu\n", eb->start, eb->len, start, min_len); + WARN_ON(1); + } + + p = extent_buffer_page(eb, i); + WARN_ON(!PageUptodate(p)); + kaddr = kmap_atomic(p, km); + *token = kaddr; + *map = kaddr + offset; + *map_len = PAGE_CACHE_SIZE - offset; + return 0; +} +EXPORT_SYMBOL(map_private_extent_buffer); + +int map_extent_buffer(struct extent_buffer *eb, unsigned long start, + unsigned long min_len, + char **token, char **map, + unsigned long *map_start, + unsigned long *map_len, int km) +{ + int err; + int save = 0; + if (eb->map_token) { + unmap_extent_buffer(eb, eb->map_token, km); + eb->map_token = NULL; + save = 1; + } + err = map_private_extent_buffer(eb, start, min_len, token, map, + map_start, map_len, km); + if (!err && save) { + eb->map_token = *token; + eb->kaddr = *map; + eb->map_start = *map_start; + eb->map_len = *map_len; + } + return err; +} +EXPORT_SYMBOL(map_extent_buffer); + +void unmap_extent_buffer(struct extent_buffer *eb, char *token, int km) +{ + kunmap_atomic(token, km); +} +EXPORT_SYMBOL(unmap_extent_buffer); + +int memcmp_extent_buffer(struct extent_buffer *eb, const void *ptrv, + unsigned long start, + unsigned long len) +{ + size_t cur; + size_t offset; + struct page *page; + char *kaddr; + char *ptr = (char *)ptrv; + size_t start_offset = eb->start & ((u64)PAGE_CACHE_SIZE - 1); + unsigned long i = (start_offset + start) >> PAGE_CACHE_SHIFT; + int ret = 0; + + WARN_ON(start > eb->len); + WARN_ON(start + len > eb->start + eb->len); + + offset = (start_offset + start) & ((unsigned long)PAGE_CACHE_SIZE - 1); + + while(len > 0) { + page = extent_buffer_page(eb, i); + WARN_ON(!PageUptodate(page)); + + cur = min(len, (PAGE_CACHE_SIZE - offset)); + + kaddr = kmap_atomic(page, KM_USER0); + ret = memcmp(ptr, kaddr + offset, cur); + kunmap_atomic(kaddr, KM_USER0); + if (ret) + break; + + ptr += cur; + len -= cur; + offset = 0; + i++; + } + return ret; +} +EXPORT_SYMBOL(memcmp_extent_buffer); + +void write_extent_buffer(struct extent_buffer *eb, const void *srcv, + unsigned long start, unsigned long len) +{ + size_t cur; + size_t offset; + struct page *page; + char *kaddr; + char *src = (char *)srcv; + size_t start_offset = eb->start & ((u64)PAGE_CACHE_SIZE - 1); + unsigned long i = (start_offset + start) >> PAGE_CACHE_SHIFT; + + WARN_ON(start > eb->len); + WARN_ON(start + len > eb->start + eb->len); + + offset = (start_offset + start) & ((unsigned long)PAGE_CACHE_SIZE - 1); + + while(len > 0) { + page = extent_buffer_page(eb, i); + WARN_ON(!PageUptodate(page)); + + cur = min(len, PAGE_CACHE_SIZE - offset); + kaddr = kmap_atomic(page, KM_USER1); + memcpy(kaddr + offset, src, cur); + kunmap_atomic(kaddr, KM_USER1); + + src += cur; + len -= cur; + offset = 0; + i++; + } +} +EXPORT_SYMBOL(write_extent_buffer); + +void memset_extent_buffer(struct extent_buffer *eb, char c, + unsigned long start, unsigned long len) +{ + size_t cur; + size_t offset; + struct page *page; + char *kaddr; + size_t start_offset = eb->start & ((u64)PAGE_CACHE_SIZE - 1); + unsigned long i = (start_offset + start) >> PAGE_CACHE_SHIFT; + + WARN_ON(start > eb->len); + WARN_ON(start + len > eb->start + eb->len); + + offset = (start_offset + start) & ((unsigned long)PAGE_CACHE_SIZE - 1); + + while(len > 0) { + page = extent_buffer_page(eb, i); + WARN_ON(!PageUptodate(page)); + + cur = min(len, PAGE_CACHE_SIZE - offset); + kaddr = kmap_atomic(page, KM_USER0); + memset(kaddr + offset, c, cur); + kunmap_atomic(kaddr, KM_USER0); + + len -= cur; + offset = 0; + i++; + } +} +EXPORT_SYMBOL(memset_extent_buffer); + +void copy_extent_buffer(struct extent_buffer *dst, struct extent_buffer *src, + unsigned long dst_offset, unsigned long src_offset, + unsigned long len) +{ + u64 dst_len = dst->len; + size_t cur; + size_t offset; + struct page *page; + char *kaddr; + size_t start_offset = dst->start & ((u64)PAGE_CACHE_SIZE - 1); + unsigned long i = (start_offset + dst_offset) >> PAGE_CACHE_SHIFT; + + WARN_ON(src->len != dst_len); + + offset = (start_offset + dst_offset) & + ((unsigned long)PAGE_CACHE_SIZE - 1); + + while(len > 0) { + page = extent_buffer_page(dst, i); + WARN_ON(!PageUptodate(page)); + + cur = min(len, (unsigned long)(PAGE_CACHE_SIZE - offset)); + + kaddr = kmap_atomic(page, KM_USER0); + read_extent_buffer(src, kaddr + offset, src_offset, cur); + kunmap_atomic(kaddr, KM_USER0); + + src_offset += cur; + len -= cur; + offset = 0; + i++; + } +} +EXPORT_SYMBOL(copy_extent_buffer); + +static void move_pages(struct page *dst_page, struct page *src_page, + unsigned long dst_off, unsigned long src_off, + unsigned long len) +{ + char *dst_kaddr = kmap_atomic(dst_page, KM_USER0); + if (dst_page == src_page) { + memmove(dst_kaddr + dst_off, dst_kaddr + src_off, len); + } else { + char *src_kaddr = kmap_atomic(src_page, KM_USER1); + char *p = dst_kaddr + dst_off + len; + char *s = src_kaddr + src_off + len; + + while (len--) + *--p = *--s; + + kunmap_atomic(src_kaddr, KM_USER1); + } + kunmap_atomic(dst_kaddr, KM_USER0); +} + +static void copy_pages(struct page *dst_page, struct page *src_page, + unsigned long dst_off, unsigned long src_off, + unsigned long len) +{ + char *dst_kaddr = kmap_atomic(dst_page, KM_USER0); + char *src_kaddr; + + if (dst_page != src_page) + src_kaddr = kmap_atomic(src_page, KM_USER1); + else + src_kaddr = dst_kaddr; + + memcpy(dst_kaddr + dst_off, src_kaddr + src_off, len); + kunmap_atomic(dst_kaddr, KM_USER0); + if (dst_page != src_page) + kunmap_atomic(src_kaddr, KM_USER1); +} + +void memcpy_extent_buffer(struct extent_buffer *dst, unsigned long dst_offset, + unsigned long src_offset, unsigned long len) +{ + size_t cur; + size_t dst_off_in_page; + size_t src_off_in_page; + size_t start_offset = dst->start & ((u64)PAGE_CACHE_SIZE - 1); + unsigned long dst_i; + unsigned long src_i; + + if (src_offset + len > dst->len) { + printk("memmove bogus src_offset %lu move len %lu len %lu\n", + src_offset, len, dst->len); + BUG_ON(1); + } + if (dst_offset + len > dst->len) { + printk("memmove bogus dst_offset %lu move len %lu len %lu\n", + dst_offset, len, dst->len); + BUG_ON(1); + } + + while(len > 0) { + dst_off_in_page = (start_offset + dst_offset) & + ((unsigned long)PAGE_CACHE_SIZE - 1); + src_off_in_page = (start_offset + src_offset) & + ((unsigned long)PAGE_CACHE_SIZE - 1); + + dst_i = (start_offset + dst_offset) >> PAGE_CACHE_SHIFT; + src_i = (start_offset + src_offset) >> PAGE_CACHE_SHIFT; + + cur = min(len, (unsigned long)(PAGE_CACHE_SIZE - + src_off_in_page)); + cur = min_t(unsigned long, cur, + (unsigned long)(PAGE_CACHE_SIZE - dst_off_in_page)); + + copy_pages(extent_buffer_page(dst, dst_i), + extent_buffer_page(dst, src_i), + dst_off_in_page, src_off_in_page, cur); + + src_offset += cur; + dst_offset += cur; + len -= cur; + } +} +EXPORT_SYMBOL(memcpy_extent_buffer); + +void memmove_extent_buffer(struct extent_buffer *dst, unsigned long dst_offset, + unsigned long src_offset, unsigned long len) +{ + size_t cur; + size_t dst_off_in_page; + size_t src_off_in_page; + unsigned long dst_end = dst_offset + len - 1; + unsigned long src_end = src_offset + len - 1; + size_t start_offset = dst->start & ((u64)PAGE_CACHE_SIZE - 1); + unsigned long dst_i; + unsigned long src_i; + + if (src_offset + len > dst->len) { + printk("memmove bogus src_offset %lu move len %lu len %lu\n", + src_offset, len, dst->len); + BUG_ON(1); + } + if (dst_offset + len > dst->len) { + printk("memmove bogus dst_offset %lu move len %lu len %lu\n", + dst_offset, len, dst->len); + BUG_ON(1); + } + if (dst_offset < src_offset) { + memcpy_extent_buffer(dst, dst_offset, src_offset, len); + return; + } + while(len > 0) { + dst_i = (start_offset + dst_end) >> PAGE_CACHE_SHIFT; + src_i = (start_offset + src_end) >> PAGE_CACHE_SHIFT; + + dst_off_in_page = (start_offset + dst_end) & + ((unsigned long)PAGE_CACHE_SIZE - 1); + src_off_in_page = (start_offset + src_end) & + ((unsigned long)PAGE_CACHE_SIZE - 1); + + cur = min_t(unsigned long, len, src_off_in_page + 1); + cur = min(cur, dst_off_in_page + 1); + move_pages(extent_buffer_page(dst, dst_i), + extent_buffer_page(dst, src_i), + dst_off_in_page - cur + 1, + src_off_in_page - cur + 1, cur); + + dst_end -= cur; + src_end -= cur; + len -= cur; + } +} +EXPORT_SYMBOL(memmove_extent_buffer); diff --git a/fs/btrfs/extent_io.h b/fs/btrfs/extent_io.h new file mode 100644 index 000000000000..06be1fe84b29 --- /dev/null +++ b/fs/btrfs/extent_io.h @@ -0,0 +1,193 @@ +#ifndef __EXTENTIO__ +#define __EXTENTIO__ + +#include + +/* bits for the extent state */ +#define EXTENT_DIRTY 1 +#define EXTENT_WRITEBACK (1 << 1) +#define EXTENT_UPTODATE (1 << 2) +#define EXTENT_LOCKED (1 << 3) +#define EXTENT_NEW (1 << 4) +#define EXTENT_DELALLOC (1 << 5) +#define EXTENT_DEFRAG (1 << 6) +#define EXTENT_DEFRAG_DONE (1 << 7) +#define EXTENT_BUFFER_FILLED (1 << 8) +#define EXTENT_CSUM (1 << 9) +#define EXTENT_IOBITS (EXTENT_LOCKED | EXTENT_WRITEBACK) + +/* + * page->private values. Every page that is controlled by the extent + * map has page->private set to one. + */ +#define EXTENT_PAGE_PRIVATE 1 +#define EXTENT_PAGE_PRIVATE_FIRST_PAGE 3 + +struct extent_io_ops { + int (*fill_delalloc)(struct inode *inode, u64 start, u64 end); + int (*writepage_io_hook)(struct page *page, u64 start, u64 end); + int (*readpage_io_hook)(struct page *page, u64 start, u64 end); + int (*readpage_end_io_hook)(struct page *page, u64 start, u64 end); + void (*writepage_end_io_hook)(struct page *page, u64 start, u64 end); +}; + +struct extent_io_tree { + struct rb_root state; + struct address_space *mapping; + u64 dirty_bytes; + rwlock_t lock; + struct extent_io_ops *ops; + spinlock_t lru_lock; + struct list_head buffer_lru; + int lru_size; +}; + +struct extent_state { + u64 start; + u64 end; /* inclusive */ + int in_tree; + struct rb_node rb_node; + wait_queue_head_t wq; + atomic_t refs; + unsigned long state; + + /* for use by the FS */ + u64 private; + + struct list_head list; +}; + +struct extent_buffer { + u64 start; + unsigned long len; + char *map_token; + char *kaddr; + unsigned long map_start; + unsigned long map_len; + struct page *first_page; + struct list_head lru; + atomic_t refs; + int flags; +}; + +struct extent_map_tree; + +typedef struct extent_map *(get_extent_t)(struct inode *inode, + struct page *page, + size_t page_offset, + u64 start, u64 len, + int create); + +void extent_io_tree_init(struct extent_io_tree *tree, + struct address_space *mapping, gfp_t mask); +void extent_io_tree_empty_lru(struct extent_io_tree *tree); +int try_release_extent_mapping(struct extent_map_tree *map, + struct extent_io_tree *tree, struct page *page); +int lock_extent(struct extent_io_tree *tree, u64 start, u64 end, gfp_t mask); +int unlock_extent(struct extent_io_tree *tree, u64 start, u64 end, gfp_t mask); +int extent_read_full_page(struct extent_io_tree *tree, struct page *page, + get_extent_t *get_extent); +int __init extent_io_init(void); +void extent_io_exit(void); + +u64 count_range_bits(struct extent_io_tree *tree, + u64 *start, u64 search_end, + u64 max_bytes, unsigned long bits); + +int test_range_bit(struct extent_io_tree *tree, u64 start, u64 end, + int bits, int filled); +int clear_extent_bits(struct extent_io_tree *tree, u64 start, u64 end, + int bits, gfp_t mask); +int set_extent_bits(struct extent_io_tree *tree, u64 start, u64 end, + int bits, gfp_t mask); +int set_extent_uptodate(struct extent_io_tree *tree, u64 start, u64 end, + gfp_t mask); +int set_extent_new(struct extent_io_tree *tree, u64 start, u64 end, + gfp_t mask); +int set_extent_dirty(struct extent_io_tree *tree, u64 start, u64 end, + gfp_t mask); +int clear_extent_dirty(struct extent_io_tree *tree, u64 start, u64 end, + gfp_t mask); +int set_extent_delalloc(struct extent_io_tree *tree, u64 start, u64 end, + gfp_t mask); +int find_first_extent_bit(struct extent_io_tree *tree, u64 start, + u64 *start_ret, u64 *end_ret, int bits); +int extent_invalidatepage(struct extent_io_tree *tree, + struct page *page, unsigned long offset); +int extent_write_full_page(struct extent_io_tree *tree, struct page *page, + get_extent_t *get_extent, + struct writeback_control *wbc); +int extent_writepages(struct extent_io_tree *tree, + struct address_space *mapping, + get_extent_t *get_extent, + struct writeback_control *wbc); +int extent_readpages(struct extent_io_tree *tree, + struct address_space *mapping, + struct list_head *pages, unsigned nr_pages, + get_extent_t get_extent); +int extent_prepare_write(struct extent_io_tree *tree, + struct inode *inode, struct page *page, + unsigned from, unsigned to, get_extent_t *get_extent); +int extent_commit_write(struct extent_io_tree *tree, + struct inode *inode, struct page *page, + unsigned from, unsigned to); +sector_t extent_bmap(struct address_space *mapping, sector_t iblock, + get_extent_t *get_extent); +int set_range_dirty(struct extent_io_tree *tree, u64 start, u64 end); +int set_state_private(struct extent_io_tree *tree, u64 start, u64 private); +int get_state_private(struct extent_io_tree *tree, u64 start, u64 *private); +void set_page_extent_mapped(struct page *page); + +struct extent_buffer *alloc_extent_buffer(struct extent_io_tree *tree, + u64 start, unsigned long len, + struct page *page0, + gfp_t mask); +struct extent_buffer *find_extent_buffer(struct extent_io_tree *tree, + u64 start, unsigned long len, + gfp_t mask); +void free_extent_buffer(struct extent_buffer *eb); +int read_extent_buffer_pages(struct extent_io_tree *tree, + struct extent_buffer *eb, u64 start, int wait); + +static inline void extent_buffer_get(struct extent_buffer *eb) +{ + atomic_inc(&eb->refs); +} + +int memcmp_extent_buffer(struct extent_buffer *eb, const void *ptrv, + unsigned long start, + unsigned long len); +void read_extent_buffer(struct extent_buffer *eb, void *dst, + unsigned long start, + unsigned long len); +void write_extent_buffer(struct extent_buffer *eb, const void *src, + unsigned long start, unsigned long len); +void copy_extent_buffer(struct extent_buffer *dst, struct extent_buffer *src, + unsigned long dst_offset, unsigned long src_offset, + unsigned long len); +void memcpy_extent_buffer(struct extent_buffer *dst, unsigned long dst_offset, + unsigned long src_offset, unsigned long len); +void memmove_extent_buffer(struct extent_buffer *dst, unsigned long dst_offset, + unsigned long src_offset, unsigned long len); +void memset_extent_buffer(struct extent_buffer *eb, char c, + unsigned long start, unsigned long len); +int wait_on_extent_buffer_writeback(struct extent_io_tree *tree, + struct extent_buffer *eb); +int clear_extent_buffer_dirty(struct extent_io_tree *tree, + struct extent_buffer *eb); +int set_extent_buffer_dirty(struct extent_io_tree *tree, + struct extent_buffer *eb); +int set_extent_buffer_uptodate(struct extent_io_tree *tree, + struct extent_buffer *eb); +int extent_buffer_uptodate(struct extent_io_tree *tree, + struct extent_buffer *eb); +int map_extent_buffer(struct extent_buffer *eb, unsigned long offset, + unsigned long min_len, char **token, char **map, + unsigned long *map_start, + unsigned long *map_len, int km); +int map_private_extent_buffer(struct extent_buffer *eb, unsigned long offset, + unsigned long min_len, char **token, char **map, + unsigned long *map_start, + unsigned long *map_len, int km); +void unmap_extent_buffer(struct extent_buffer *eb, char *token, int km); +#endif diff --git a/fs/btrfs/extent_map.c b/fs/btrfs/extent_map.c index 010a287fbd71..268ad8facf6e 100644 --- a/fs/btrfs/extent_map.c +++ b/fs/btrfs/extent_map.c @@ -1,17 +1,10 @@ -#include -#include -#include -#include +#include #include -#include -#include +#include #include #include -#include -#include #include -#include -#include +#include #include "extent_map.h" /* temporary define until extent_map moves out of btrfs */ @@ -21,27 +14,6 @@ struct kmem_cache *btrfs_cache_create(const char *name, size_t size, unsigned long)); static struct kmem_cache *extent_map_cache; -static struct kmem_cache *extent_state_cache; -static struct kmem_cache *extent_buffer_cache; - -static LIST_HEAD(buffers); -static LIST_HEAD(states); - -static spinlock_t state_lock = SPIN_LOCK_UNLOCKED; -#define BUFFER_LRU_MAX 64 - -struct tree_entry { - u64 start; - u64 end; - int in_tree; - struct rb_node rb_node; -}; - -struct extent_page_data { - struct bio *bio; - struct extent_map_tree *tree; - get_extent_t *get_extent; -}; int __init extent_map_init(void) { @@ -50,72 +22,23 @@ int __init extent_map_init(void) NULL); if (!extent_map_cache) return -ENOMEM; - extent_state_cache = btrfs_cache_create("extent_state", - sizeof(struct extent_state), 0, - NULL); - if (!extent_state_cache) - goto free_map_cache; - extent_buffer_cache = btrfs_cache_create("extent_buffers", - sizeof(struct extent_buffer), 0, - NULL); - if (!extent_buffer_cache) - goto free_state_cache; return 0; - -free_state_cache: - kmem_cache_destroy(extent_state_cache); -free_map_cache: - kmem_cache_destroy(extent_map_cache); - return -ENOMEM; } void extent_map_exit(void) { - struct extent_state *state; - - while (!list_empty(&states)) { - state = list_entry(states.next, struct extent_state, list); - printk("state leak: start %Lu end %Lu state %lu in tree %d refs %d\n", state->start, state->end, state->state, state->in_tree, atomic_read(&state->refs)); - list_del(&state->list); - kmem_cache_free(extent_state_cache, state); - - } - if (extent_map_cache) kmem_cache_destroy(extent_map_cache); - if (extent_state_cache) - kmem_cache_destroy(extent_state_cache); - if (extent_buffer_cache) - kmem_cache_destroy(extent_buffer_cache); } -void extent_map_tree_init(struct extent_map_tree *tree, - struct address_space *mapping, gfp_t mask) +void extent_map_tree_init(struct extent_map_tree *tree, gfp_t mask) { tree->map.rb_node = NULL; - tree->state.rb_node = NULL; - tree->ops = NULL; - tree->dirty_bytes = 0; - rwlock_init(&tree->lock); - spin_lock_init(&tree->lru_lock); - tree->mapping = mapping; - INIT_LIST_HEAD(&tree->buffer_lru); - tree->lru_size = 0; + tree->last = NULL; + spin_lock_init(&tree->lock); } EXPORT_SYMBOL(extent_map_tree_init); -void extent_map_tree_empty_lru(struct extent_map_tree *tree) -{ - struct extent_buffer *eb; - while(!list_empty(&tree->buffer_lru)) { - eb = list_entry(tree->buffer_lru.next, struct extent_buffer, - lru); - list_del_init(&eb->lru); - free_extent_buffer(eb); - } -} -EXPORT_SYMBOL(extent_map_tree_empty_lru); - struct extent_map *alloc_extent_map(gfp_t mask) { struct extent_map *em; @@ -123,6 +46,7 @@ struct extent_map *alloc_extent_map(gfp_t mask) if (!em || IS_ERR(em)) return em; em->in_tree = 0; + em->flags = 0; atomic_set(&em->refs, 1); return em; } @@ -132,6 +56,7 @@ void free_extent_map(struct extent_map *em) { if (!em) return; + WARN_ON(atomic_read(&em->refs) == 0); if (atomic_dec_and_test(&em->refs)) { WARN_ON(em->in_tree); kmem_cache_free(extent_map_cache, em); @@ -139,64 +64,28 @@ void free_extent_map(struct extent_map *em) } EXPORT_SYMBOL(free_extent_map); - -struct extent_state *alloc_extent_state(gfp_t mask) -{ - struct extent_state *state; - unsigned long flags; - - state = kmem_cache_alloc(extent_state_cache, mask); - if (!state || IS_ERR(state)) - return state; - state->state = 0; - state->in_tree = 0; - state->private = 0; - - spin_lock_irqsave(&state_lock, flags); - list_add(&state->list, &states); - spin_unlock_irqrestore(&state_lock, flags); - - atomic_set(&state->refs, 1); - init_waitqueue_head(&state->wq); - return state; -} -EXPORT_SYMBOL(alloc_extent_state); - -void free_extent_state(struct extent_state *state) -{ - unsigned long flags; - if (!state) - return; - if (atomic_dec_and_test(&state->refs)) { - WARN_ON(state->in_tree); - spin_lock_irqsave(&state_lock, flags); - list_del(&state->list); - spin_unlock_irqrestore(&state_lock, flags); - kmem_cache_free(extent_state_cache, state); - } -} -EXPORT_SYMBOL(free_extent_state); - static struct rb_node *tree_insert(struct rb_root *root, u64 offset, struct rb_node *node) { struct rb_node ** p = &root->rb_node; struct rb_node * parent = NULL; - struct tree_entry *entry; + struct extent_map *entry; while(*p) { parent = *p; - entry = rb_entry(parent, struct tree_entry, rb_node); + entry = rb_entry(parent, struct extent_map, rb_node); + + WARN_ON(!entry->in_tree); if (offset < entry->start) p = &(*p)->rb_left; - else if (offset > entry->end) + else if (offset >= extent_map_end(entry)) p = &(*p)->rb_right; else return parent; } - entry = rb_entry(node, struct tree_entry, rb_node); + entry = rb_entry(node, struct extent_map, rb_node); entry->in_tree = 1; rb_link_node(node, parent, p); rb_insert_color(node, root); @@ -210,17 +99,19 @@ static struct rb_node *__tree_search(struct rb_root *root, u64 offset, struct rb_node * n = root->rb_node; struct rb_node *prev = NULL; struct rb_node *orig_prev = NULL; - struct tree_entry *entry; - struct tree_entry *prev_entry = NULL; + struct extent_map *entry; + struct extent_map *prev_entry = NULL; while(n) { - entry = rb_entry(n, struct tree_entry, rb_node); + entry = rb_entry(n, struct extent_map, rb_node); prev = n; prev_entry = entry; + WARN_ON(!entry->in_tree); + if (offset < entry->start) n = n->rb_left; - else if (offset > entry->end) + else if (offset >= extent_map_end(entry)) n = n->rb_right; else return n; @@ -228,19 +119,19 @@ static struct rb_node *__tree_search(struct rb_root *root, u64 offset, if (prev_ret) { orig_prev = prev; - while(prev && offset > prev_entry->end) { + while(prev && offset >= extent_map_end(prev_entry)) { prev = rb_next(prev); - prev_entry = rb_entry(prev, struct tree_entry, rb_node); + prev_entry = rb_entry(prev, struct extent_map, rb_node); } *prev_ret = prev; prev = orig_prev; } if (next_ret) { - prev_entry = rb_entry(prev, struct tree_entry, rb_node); + prev_entry = rb_entry(prev, struct extent_map, rb_node); while(prev && offset < prev_entry->start) { prev = rb_prev(prev); - prev_entry = rb_entry(prev, struct tree_entry, rb_node); + prev_entry = rb_entry(prev, struct extent_map, rb_node); } *next_ret = prev; } @@ -257,22 +148,26 @@ static inline struct rb_node *tree_search(struct rb_root *root, u64 offset) return ret; } -static int tree_delete(struct rb_root *root, u64 offset) +static int mergable_maps(struct extent_map *prev, struct extent_map *next) { - struct rb_node *node; - struct tree_entry *entry; - - node = __tree_search(root, offset, NULL, NULL); - if (!node) - return -ENOENT; - entry = rb_entry(node, struct tree_entry, rb_node); - entry->in_tree = 0; - rb_erase(node, root); + if (extent_map_end(prev) == next->start && + prev->flags == next->flags && + prev->bdev == next->bdev && + ((next->block_start == EXTENT_MAP_HOLE && + prev->block_start == EXTENT_MAP_HOLE) || + (next->block_start == EXTENT_MAP_INLINE && + prev->block_start == EXTENT_MAP_INLINE) || + (next->block_start == EXTENT_MAP_DELALLOC && + prev->block_start == EXTENT_MAP_DELALLOC) || + (next->block_start < EXTENT_MAP_LAST_BYTE - 1 && + next->block_start == extent_map_block_end(prev)))) { + return 1; + } return 0; } /* - * add_extent_mapping tries a simple backward merge with existing + * add_extent_mapping tries a simple forward/backward merge with existing * mappings. The extent_map struct passed in will be inserted into * the tree directly (no copies made, just a reference taken). */ @@ -280,13 +175,12 @@ int add_extent_mapping(struct extent_map_tree *tree, struct extent_map *em) { int ret = 0; - struct extent_map *prev = NULL; + struct extent_map *merge = NULL; struct rb_node *rb; - write_lock_irq(&tree->lock); - rb = tree_insert(&tree->map, em->end, &em->rb_node); + rb = tree_insert(&tree->map, em->start, &em->rb_node); if (rb) { - prev = rb_entry(rb, struct extent_map, rb_node); + merge = rb_entry(rb, struct extent_map, rb_node); ret = -EEXIST; goto out; } @@ -294,53 +188,60 @@ int add_extent_mapping(struct extent_map_tree *tree, if (em->start != 0) { rb = rb_prev(&em->rb_node); if (rb) - prev = rb_entry(rb, struct extent_map, rb_node); - if (prev && prev->end + 1 == em->start && - ((em->block_start == EXTENT_MAP_HOLE && - prev->block_start == EXTENT_MAP_HOLE) || - (em->block_start == EXTENT_MAP_INLINE && - prev->block_start == EXTENT_MAP_INLINE) || - (em->block_start == EXTENT_MAP_DELALLOC && - prev->block_start == EXTENT_MAP_DELALLOC) || - (em->block_start < EXTENT_MAP_DELALLOC - 1 && - em->block_start == prev->block_end + 1))) { - em->start = prev->start; - em->block_start = prev->block_start; - rb_erase(&prev->rb_node, &tree->map); - prev->in_tree = 0; - free_extent_map(prev); + merge = rb_entry(rb, struct extent_map, rb_node); + if (rb && mergable_maps(merge, em)) { + em->start = merge->start; + em->len += merge->len; + em->block_start = merge->block_start; + merge->in_tree = 0; + rb_erase(&merge->rb_node, &tree->map); + free_extent_map(merge); } } + rb = rb_next(&em->rb_node); + if (rb) + merge = rb_entry(rb, struct extent_map, rb_node); + if (rb && mergable_maps(em, merge)) { + em->len += merge->len; + rb_erase(&merge->rb_node, &tree->map); + merge->in_tree = 0; + free_extent_map(merge); + } + tree->last = em; out: - write_unlock_irq(&tree->lock); return ret; } EXPORT_SYMBOL(add_extent_mapping); +static u64 range_end(u64 start, u64 len) +{ + if (start + len < start) + return (u64)-1; + return start + len; +} + /* * lookup_extent_mapping returns the first extent_map struct in the - * tree that intersects the [start, end] (inclusive) range. There may + * tree that intersects the [start, len] range. There may * be additional objects in the tree that intersect, so check the object * returned carefully to make sure you don't need additional lookups. */ struct extent_map *lookup_extent_mapping(struct extent_map_tree *tree, - u64 start, u64 end) + u64 start, u64 len) { struct extent_map *em; struct rb_node *rb_node; - struct rb_node *prev = NULL; - struct rb_node *next = NULL; + struct rb_node *prev = NULL; struct rb_node *next = NULL; u64 end = range_end(start, len); em = tree->last; if (em && end > em->start && start < extent_map_end(em)) goto found; - read_lock_irq(&tree->lock); rb_node = __tree_search(&tree->map, start, &prev, &next); if (!rb_node && prev) { em = rb_entry(prev, struct extent_map, rb_node); - if (em->start <= end && em->end >= start) + if (end > em->start && start < extent_map_end(em)) goto found; } if (!rb_node && next) { em = rb_entry(next, struct extent_map, rb_node); - if (em->start <= end && em->end >= start) + if (end > em->start && start < extent_map_end(em)) goto found; } if (!rb_node) { @@ -352,14 +253,16 @@ struct extent_map *lookup_extent_mapping(struct extent_map_tree *tree, goto out; } em = rb_entry(rb_node, struct extent_map, rb_node); - if (em->end < start || em->start > end) { - em = NULL; - goto out; - } + if (end > em->start && start < extent_map_end(em)) + goto found; + + em = NULL; + goto out; + found: atomic_inc(&em->refs); + tree->last = em; out: - read_unlock_irq(&tree->lock); return em; } EXPORT_SYMBOL(lookup_extent_mapping); @@ -370,2866 +273,12 @@ EXPORT_SYMBOL(lookup_extent_mapping); */ int remove_extent_mapping(struct extent_map_tree *tree, struct extent_map *em) { - int ret; + int ret = 0; - write_lock_irq(&tree->lock); - ret = tree_delete(&tree->map, em->end); - write_unlock_irq(&tree->lock); + rb_erase(&em->rb_node, &tree->map); + em->in_tree = 0; + if (tree->last == em) + tree->last = NULL; return ret; } EXPORT_SYMBOL(remove_extent_mapping); - -/* - * utility function to look for merge candidates inside a given range. - * Any extents with matching state are merged together into a single - * extent in the tree. Extents with EXTENT_IO in their state field - * are not merged because the end_io handlers need to be able to do - * operations on them without sleeping (or doing allocations/splits). - * - * This should be called with the tree lock held. - */ -static int merge_state(struct extent_map_tree *tree, - struct extent_state *state) -{ - struct extent_state *other; - struct rb_node *other_node; - - if (state->state & EXTENT_IOBITS) - return 0; - - other_node = rb_prev(&state->rb_node); - if (other_node) { - other = rb_entry(other_node, struct extent_state, rb_node); - if (other->end == state->start - 1 && - other->state == state->state) { - state->start = other->start; - other->in_tree = 0; - rb_erase(&other->rb_node, &tree->state); - free_extent_state(other); - } - } - other_node = rb_next(&state->rb_node); - if (other_node) { - other = rb_entry(other_node, struct extent_state, rb_node); - if (other->start == state->end + 1 && - other->state == state->state) { - other->start = state->start; - state->in_tree = 0; - rb_erase(&state->rb_node, &tree->state); - free_extent_state(state); - } - } - return 0; -} - -/* - * insert an extent_state struct into the tree. 'bits' are set on the - * struct before it is inserted. - * - * This may return -EEXIST if the extent is already there, in which case the - * state struct is freed. - * - * The tree lock is not taken internally. This is a utility function and - * probably isn't what you want to call (see set/clear_extent_bit). - */ -static int insert_state(struct extent_map_tree *tree, - struct extent_state *state, u64 start, u64 end, - int bits) -{ - struct rb_node *node; - - if (end < start) { - printk("end < start %Lu %Lu\n", end, start); - WARN_ON(1); - } - if (bits & EXTENT_DIRTY) - tree->dirty_bytes += end - start + 1; - state->state |= bits; - state->start = start; - state->end = end; - node = tree_insert(&tree->state, end, &state->rb_node); - if (node) { - struct extent_state *found; - found = rb_entry(node, struct extent_state, rb_node); - printk("found node %Lu %Lu on insert of %Lu %Lu\n", found->start, found->end, start, end); - free_extent_state(state); - return -EEXIST; - } - merge_state(tree, state); - return 0; -} - -/* - * split a given extent state struct in two, inserting the preallocated - * struct 'prealloc' as the newly created second half. 'split' indicates an - * offset inside 'orig' where it should be split. - * - * Before calling, - * the tree has 'orig' at [orig->start, orig->end]. After calling, there - * are two extent state structs in the tree: - * prealloc: [orig->start, split - 1] - * orig: [ split, orig->end ] - * - * The tree locks are not taken by this function. They need to be held - * by the caller. - */ -static int split_state(struct extent_map_tree *tree, struct extent_state *orig, - struct extent_state *prealloc, u64 split) -{ - struct rb_node *node; - prealloc->start = orig->start; - prealloc->end = split - 1; - prealloc->state = orig->state; - orig->start = split; - - node = tree_insert(&tree->state, prealloc->end, &prealloc->rb_node); - if (node) { - struct extent_state *found; - found = rb_entry(node, struct extent_state, rb_node); - printk("found node %Lu %Lu on insert of %Lu %Lu\n", found->start, found->end, prealloc->start, prealloc->end); - free_extent_state(prealloc); - return -EEXIST; - } - return 0; -} - -/* - * utility function to clear some bits in an extent state struct. - * it will optionally wake up any one waiting on this state (wake == 1), or - * forcibly remove the state from the tree (delete == 1). - * - * If no bits are set on the state struct after clearing things, the - * struct is freed and removed from the tree - */ -static int clear_state_bit(struct extent_map_tree *tree, - struct extent_state *state, int bits, int wake, - int delete) -{ - int ret = state->state & bits; - - if ((bits & EXTENT_DIRTY) && (state->state & EXTENT_DIRTY)) { - u64 range = state->end - state->start + 1; - WARN_ON(range > tree->dirty_bytes); - tree->dirty_bytes -= range; - } - state->state &= ~bits; - if (wake) - wake_up(&state->wq); - if (delete || state->state == 0) { - if (state->in_tree) { - rb_erase(&state->rb_node, &tree->state); - state->in_tree = 0; - free_extent_state(state); - } else { - WARN_ON(1); - } - } else { - merge_state(tree, state); - } - return ret; -} - -/* - * clear some bits on a range in the tree. This may require splitting - * or inserting elements in the tree, so the gfp mask is used to - * indicate which allocations or sleeping are allowed. - * - * pass 'wake' == 1 to kick any sleepers, and 'delete' == 1 to remove - * the given range from the tree regardless of state (ie for truncate). - * - * the range [start, end] is inclusive. - * - * This takes the tree lock, and returns < 0 on error, > 0 if any of the - * bits were already set, or zero if none of the bits were already set. - */ -int clear_extent_bit(struct extent_map_tree *tree, u64 start, u64 end, - int bits, int wake, int delete, gfp_t mask) -{ - struct extent_state *state; - struct extent_state *prealloc = NULL; - struct rb_node *node; - unsigned long flags; - int err; - int set = 0; - -again: - if (!prealloc && (mask & __GFP_WAIT)) { - prealloc = alloc_extent_state(mask); - if (!prealloc) - return -ENOMEM; - } - - write_lock_irqsave(&tree->lock, flags); - /* - * this search will find the extents that end after - * our range starts - */ - node = tree_search(&tree->state, start); - if (!node) - goto out; - state = rb_entry(node, struct extent_state, rb_node); - if (state->start > end) - goto out; - WARN_ON(state->end < start); - - /* - * | ---- desired range ---- | - * | state | or - * | ------------- state -------------- | - * - * We need to split the extent we found, and may flip - * bits on second half. - * - * If the extent we found extends past our range, we - * just split and search again. It'll get split again - * the next time though. - * - * If the extent we found is inside our range, we clear - * the desired bit on it. - */ - - if (state->start < start) { - err = split_state(tree, state, prealloc, start); - BUG_ON(err == -EEXIST); - prealloc = NULL; - if (err) - goto out; - if (state->end <= end) { - start = state->end + 1; - set |= clear_state_bit(tree, state, bits, - wake, delete); - } else { - start = state->start; - } - goto search_again; - } - /* - * | ---- desired range ---- | - * | state | - * We need to split the extent, and clear the bit - * on the first half - */ - if (state->start <= end && state->end > end) { - err = split_state(tree, state, prealloc, end + 1); - BUG_ON(err == -EEXIST); - - if (wake) - wake_up(&state->wq); - set |= clear_state_bit(tree, prealloc, bits, - wake, delete); - prealloc = NULL; - goto out; - } - - start = state->end + 1; - set |= clear_state_bit(tree, state, bits, wake, delete); - goto search_again; - -out: - write_unlock_irqrestore(&tree->lock, flags); - if (prealloc) - free_extent_state(prealloc); - - return set; - -search_again: - if (start > end) - goto out; - write_unlock_irqrestore(&tree->lock, flags); - if (mask & __GFP_WAIT) - cond_resched(); - goto again; -} -EXPORT_SYMBOL(clear_extent_bit); - -static int wait_on_state(struct extent_map_tree *tree, - struct extent_state *state) -{ - DEFINE_WAIT(wait); - prepare_to_wait(&state->wq, &wait, TASK_UNINTERRUPTIBLE); - read_unlock_irq(&tree->lock); - schedule(); - read_lock_irq(&tree->lock); - finish_wait(&state->wq, &wait); - return 0; -} - -/* - * waits for one or more bits to clear on a range in the state tree. - * The range [start, end] is inclusive. - * The tree lock is taken by this function - */ -int wait_extent_bit(struct extent_map_tree *tree, u64 start, u64 end, int bits) -{ - struct extent_state *state; - struct rb_node *node; - - read_lock_irq(&tree->lock); -again: - while (1) { - /* - * this search will find all the extents that end after - * our range starts - */ - node = tree_search(&tree->state, start); - if (!node) - break; - - state = rb_entry(node, struct extent_state, rb_node); - - if (state->start > end) - goto out; - - if (state->state & bits) { - start = state->start; - atomic_inc(&state->refs); - wait_on_state(tree, state); - free_extent_state(state); - goto again; - } - start = state->end + 1; - - if (start > end) - break; - - if (need_resched()) { - read_unlock_irq(&tree->lock); - cond_resched(); - read_lock_irq(&tree->lock); - } - } -out: - read_unlock_irq(&tree->lock); - return 0; -} -EXPORT_SYMBOL(wait_extent_bit); - -static void set_state_bits(struct extent_map_tree *tree, - struct extent_state *state, - int bits) -{ - if ((bits & EXTENT_DIRTY) && !(state->state & EXTENT_DIRTY)) { - u64 range = state->end - state->start + 1; - tree->dirty_bytes += range; - } - state->state |= bits; -} - -/* - * set some bits on a range in the tree. This may require allocations - * or sleeping, so the gfp mask is used to indicate what is allowed. - * - * If 'exclusive' == 1, this will fail with -EEXIST if some part of the - * range already has the desired bits set. The start of the existing - * range is returned in failed_start in this case. - * - * [start, end] is inclusive - * This takes the tree lock. - */ -int set_extent_bit(struct extent_map_tree *tree, u64 start, u64 end, int bits, - int exclusive, u64 *failed_start, gfp_t mask) -{ - struct extent_state *state; - struct extent_state *prealloc = NULL; - struct rb_node *node; - unsigned long flags; - int err = 0; - int set; - u64 last_start; - u64 last_end; -again: - if (!prealloc && (mask & __GFP_WAIT)) { - prealloc = alloc_extent_state(mask); - if (!prealloc) - return -ENOMEM; - } - - write_lock_irqsave(&tree->lock, flags); - /* - * this search will find all the extents that end after - * our range starts. - */ - node = tree_search(&tree->state, start); - if (!node) { - err = insert_state(tree, prealloc, start, end, bits); - prealloc = NULL; - BUG_ON(err == -EEXIST); - goto out; - } - - state = rb_entry(node, struct extent_state, rb_node); - last_start = state->start; - last_end = state->end; - - /* - * | ---- desired range ---- | - * | state | - * - * Just lock what we found and keep going - */ - if (state->start == start && state->end <= end) { - set = state->state & bits; - if (set && exclusive) { - *failed_start = state->start; - err = -EEXIST; - goto out; - } - set_state_bits(tree, state, bits); - start = state->end + 1; - merge_state(tree, state); - goto search_again; - } - - /* - * | ---- desired range ---- | - * | state | - * or - * | ------------- state -------------- | - * - * We need to split the extent we found, and may flip bits on - * second half. - * - * If the extent we found extends past our - * range, we just split and search again. It'll get split - * again the next time though. - * - * If the extent we found is inside our range, we set the - * desired bit on it. - */ - if (state->start < start) { - set = state->state & bits; - if (exclusive && set) { - *failed_start = start; - err = -EEXIST; - goto out; - } - err = split_state(tree, state, prealloc, start); - BUG_ON(err == -EEXIST); - prealloc = NULL; - if (err) - goto out; - if (state->end <= end) { - set_state_bits(tree, state, bits); - start = state->end + 1; - merge_state(tree, state); - } else { - start = state->start; - } - goto search_again; - } - /* - * | ---- desired range ---- | - * | state | or | state | - * - * There's a hole, we need to insert something in it and - * ignore the extent we found. - */ - if (state->start > start) { - u64 this_end; - if (end < last_start) - this_end = end; - else - this_end = last_start -1; - err = insert_state(tree, prealloc, start, this_end, - bits); - prealloc = NULL; - BUG_ON(err == -EEXIST); - if (err) - goto out; - start = this_end + 1; - goto search_again; - } - /* - * | ---- desired range ---- | - * | state | - * We need to split the extent, and set the bit - * on the first half - */ - if (state->start <= end && state->end > end) { - set = state->state & bits; - if (exclusive && set) { - *failed_start = start; - err = -EEXIST; - goto out; - } - err = split_state(tree, state, prealloc, end + 1); - BUG_ON(err == -EEXIST); - - set_state_bits(tree, prealloc, bits); - merge_state(tree, prealloc); - prealloc = NULL; - goto out; - } - - goto search_again; - -out: - write_unlock_irqrestore(&tree->lock, flags); - if (prealloc) - free_extent_state(prealloc); - - return err; - -search_again: - if (start > end) - goto out; - write_unlock_irqrestore(&tree->lock, flags); - if (mask & __GFP_WAIT) - cond_resched(); - goto again; -} -EXPORT_SYMBOL(set_extent_bit); - -/* wrappers around set/clear extent bit */ -int set_extent_dirty(struct extent_map_tree *tree, u64 start, u64 end, - gfp_t mask) -{ - return set_extent_bit(tree, start, end, EXTENT_DIRTY, 0, NULL, - mask); -} -EXPORT_SYMBOL(set_extent_dirty); - -int set_extent_bits(struct extent_map_tree *tree, u64 start, u64 end, - int bits, gfp_t mask) -{ - return set_extent_bit(tree, start, end, bits, 0, NULL, - mask); -} -EXPORT_SYMBOL(set_extent_bits); - -int clear_extent_bits(struct extent_map_tree *tree, u64 start, u64 end, - int bits, gfp_t mask) -{ - return clear_extent_bit(tree, start, end, bits, 0, 0, mask); -} -EXPORT_SYMBOL(clear_extent_bits); - -int set_extent_delalloc(struct extent_map_tree *tree, u64 start, u64 end, - gfp_t mask) -{ - return set_extent_bit(tree, start, end, - EXTENT_DELALLOC | EXTENT_DIRTY, 0, NULL, - mask); -} -EXPORT_SYMBOL(set_extent_delalloc); - -int clear_extent_dirty(struct extent_map_tree *tree, u64 start, u64 end, - gfp_t mask) -{ - return clear_extent_bit(tree, start, end, - EXTENT_DIRTY | EXTENT_DELALLOC, 0, 0, mask); -} -EXPORT_SYMBOL(clear_extent_dirty); - -int set_extent_new(struct extent_map_tree *tree, u64 start, u64 end, - gfp_t mask) -{ - return set_extent_bit(tree, start, end, EXTENT_NEW, 0, NULL, - mask); -} -EXPORT_SYMBOL(set_extent_new); - -int clear_extent_new(struct extent_map_tree *tree, u64 start, u64 end, - gfp_t mask) -{ - return clear_extent_bit(tree, start, end, EXTENT_NEW, 0, 0, mask); -} -EXPORT_SYMBOL(clear_extent_new); - -int set_extent_uptodate(struct extent_map_tree *tree, u64 start, u64 end, - gfp_t mask) -{ - return set_extent_bit(tree, start, end, EXTENT_UPTODATE, 0, NULL, - mask); -} -EXPORT_SYMBOL(set_extent_uptodate); - -int clear_extent_uptodate(struct extent_map_tree *tree, u64 start, u64 end, - gfp_t mask) -{ - return clear_extent_bit(tree, start, end, EXTENT_UPTODATE, 0, 0, mask); -} -EXPORT_SYMBOL(clear_extent_uptodate); - -int set_extent_writeback(struct extent_map_tree *tree, u64 start, u64 end, - gfp_t mask) -{ - return set_extent_bit(tree, start, end, EXTENT_WRITEBACK, - 0, NULL, mask); -} -EXPORT_SYMBOL(set_extent_writeback); - -int clear_extent_writeback(struct extent_map_tree *tree, u64 start, u64 end, - gfp_t mask) -{ - return clear_extent_bit(tree, start, end, EXTENT_WRITEBACK, 1, 0, mask); -} -EXPORT_SYMBOL(clear_extent_writeback); - -int wait_on_extent_writeback(struct extent_map_tree *tree, u64 start, u64 end) -{ - return wait_extent_bit(tree, start, end, EXTENT_WRITEBACK); -} -EXPORT_SYMBOL(wait_on_extent_writeback); - -/* - * locks a range in ascending order, waiting for any locked regions - * it hits on the way. [start,end] are inclusive, and this will sleep. - */ -int lock_extent(struct extent_map_tree *tree, u64 start, u64 end, gfp_t mask) -{ - int err; - u64 failed_start; - while (1) { - err = set_extent_bit(tree, start, end, EXTENT_LOCKED, 1, - &failed_start, mask); - if (err == -EEXIST && (mask & __GFP_WAIT)) { - wait_extent_bit(tree, failed_start, end, EXTENT_LOCKED); - start = failed_start; - } else { - break; - } - WARN_ON(start > end); - } - return err; -} -EXPORT_SYMBOL(lock_extent); - -int unlock_extent(struct extent_map_tree *tree, u64 start, u64 end, - gfp_t mask) -{ - return clear_extent_bit(tree, start, end, EXTENT_LOCKED, 1, 0, mask); -} -EXPORT_SYMBOL(unlock_extent); - -/* - * helper function to set pages and extents in the tree dirty - */ -int set_range_dirty(struct extent_map_tree *tree, u64 start, u64 end) -{ - unsigned long index = start >> PAGE_CACHE_SHIFT; - unsigned long end_index = end >> PAGE_CACHE_SHIFT; - struct page *page; - - while (index <= end_index) { - page = find_get_page(tree->mapping, index); - BUG_ON(!page); - __set_page_dirty_nobuffers(page); - page_cache_release(page); - index++; - } - set_extent_dirty(tree, start, end, GFP_NOFS); - return 0; -} -EXPORT_SYMBOL(set_range_dirty); - -/* - * helper function to set both pages and extents in the tree writeback - */ -int set_range_writeback(struct extent_map_tree *tree, u64 start, u64 end) -{ - unsigned long index = start >> PAGE_CACHE_SHIFT; - unsigned long end_index = end >> PAGE_CACHE_SHIFT; - struct page *page; - - while (index <= end_index) { - page = find_get_page(tree->mapping, index); - BUG_ON(!page); - set_page_writeback(page); - page_cache_release(page); - index++; - } - set_extent_writeback(tree, start, end, GFP_NOFS); - return 0; -} -EXPORT_SYMBOL(set_range_writeback); - -int find_first_extent_bit(struct extent_map_tree *tree, u64 start, - u64 *start_ret, u64 *end_ret, int bits) -{ - struct rb_node *node; - struct extent_state *state; - int ret = 1; - - read_lock_irq(&tree->lock); - /* - * this search will find all the extents that end after - * our range starts. - */ - node = tree_search(&tree->state, start); - if (!node || IS_ERR(node)) { - goto out; - } - - while(1) { - state = rb_entry(node, struct extent_state, rb_node); - if (state->end >= start && (state->state & bits)) { - *start_ret = state->start; - *end_ret = state->end; - ret = 0; - break; - } - node = rb_next(node); - if (!node) - break; - } -out: - read_unlock_irq(&tree->lock); - return ret; -} -EXPORT_SYMBOL(find_first_extent_bit); - -u64 find_lock_delalloc_range(struct extent_map_tree *tree, - u64 *start, u64 *end, u64 max_bytes) -{ - struct rb_node *node; - struct extent_state *state; - u64 cur_start = *start; - u64 found = 0; - u64 total_bytes = 0; - - write_lock_irq(&tree->lock); - /* - * this search will find all the extents that end after - * our range starts. - */ -search_again: - node = tree_search(&tree->state, cur_start); - if (!node || IS_ERR(node)) { - *end = (u64)-1; - goto out; - } - - while(1) { - state = rb_entry(node, struct extent_state, rb_node); - if (found && state->start != cur_start) { - goto out; - } - if (!(state->state & EXTENT_DELALLOC)) { - if (!found) - *end = state->end; - goto out; - } - if (!found) { - struct extent_state *prev_state; - struct rb_node *prev_node = node; - while(1) { - prev_node = rb_prev(prev_node); - if (!prev_node) - break; - prev_state = rb_entry(prev_node, - struct extent_state, - rb_node); - if (!(prev_state->state & EXTENT_DELALLOC)) - break; - state = prev_state; - node = prev_node; - } - } - if (state->state & EXTENT_LOCKED) { - DEFINE_WAIT(wait); - atomic_inc(&state->refs); - prepare_to_wait(&state->wq, &wait, - TASK_UNINTERRUPTIBLE); - write_unlock_irq(&tree->lock); - schedule(); - write_lock_irq(&tree->lock); - finish_wait(&state->wq, &wait); - free_extent_state(state); - goto search_again; - } - state->state |= EXTENT_LOCKED; - if (!found) - *start = state->start; - found++; - *end = state->end; - cur_start = state->end + 1; - node = rb_next(node); - if (!node) - break; - total_bytes += state->end - state->start + 1; - if (total_bytes >= max_bytes) - break; - } -out: - write_unlock_irq(&tree->lock); - return found; -} - -u64 count_range_bits(struct extent_map_tree *tree, - u64 *start, u64 search_end, u64 max_bytes, - unsigned long bits) -{ - struct rb_node *node; - struct extent_state *state; - u64 cur_start = *start; - u64 total_bytes = 0; - int found = 0; - - if (search_end <= cur_start) { - printk("search_end %Lu start %Lu\n", search_end, cur_start); - WARN_ON(1); - return 0; - } - - write_lock_irq(&tree->lock); - if (cur_start == 0 && bits == EXTENT_DIRTY) { - total_bytes = tree->dirty_bytes; - goto out; - } - /* - * this search will find all the extents that end after - * our range starts. - */ - node = tree_search(&tree->state, cur_start); - if (!node || IS_ERR(node)) { - goto out; - } - - while(1) { - state = rb_entry(node, struct extent_state, rb_node); - if (state->start > search_end) - break; - if (state->end >= cur_start && (state->state & bits)) { - total_bytes += min(search_end, state->end) + 1 - - max(cur_start, state->start); - if (total_bytes >= max_bytes) - break; - if (!found) { - *start = state->start; - found = 1; - } - } - node = rb_next(node); - if (!node) - break; - } -out: - write_unlock_irq(&tree->lock); - return total_bytes; -} -/* - * helper function to lock both pages and extents in the tree. - * pages must be locked first. - */ -int lock_range(struct extent_map_tree *tree, u64 start, u64 end) -{ - unsigned long index = start >> PAGE_CACHE_SHIFT; - unsigned long end_index = end >> PAGE_CACHE_SHIFT; - struct page *page; - int err; - - while (index <= end_index) { - page = grab_cache_page(tree->mapping, index); - if (!page) { - err = -ENOMEM; - goto failed; - } - if (IS_ERR(page)) { - err = PTR_ERR(page); - goto failed; - } - index++; - } - lock_extent(tree, start, end, GFP_NOFS); - return 0; - -failed: - /* - * we failed above in getting the page at 'index', so we undo here - * up to but not including the page at 'index' - */ - end_index = index; - index = start >> PAGE_CACHE_SHIFT; - while (index < end_index) { - page = find_get_page(tree->mapping, index); - unlock_page(page); - page_cache_release(page); - index++; - } - return err; -} -EXPORT_SYMBOL(lock_range); - -/* - * helper function to unlock both pages and extents in the tree. - */ -int unlock_range(struct extent_map_tree *tree, u64 start, u64 end) -{ - unsigned long index = start >> PAGE_CACHE_SHIFT; - unsigned long end_index = end >> PAGE_CACHE_SHIFT; - struct page *page; - - while (index <= end_index) { - page = find_get_page(tree->mapping, index); - unlock_page(page); - page_cache_release(page); - index++; - } - unlock_extent(tree, start, end, GFP_NOFS); - return 0; -} -EXPORT_SYMBOL(unlock_range); - -int set_state_private(struct extent_map_tree *tree, u64 start, u64 private) -{ - struct rb_node *node; - struct extent_state *state; - int ret = 0; - - write_lock_irq(&tree->lock); - /* - * this search will find all the extents that end after - * our range starts. - */ - node = tree_search(&tree->state, start); - if (!node || IS_ERR(node)) { - ret = -ENOENT; - goto out; - } - state = rb_entry(node, struct extent_state, rb_node); - if (state->start != start) { - ret = -ENOENT; - goto out; - } - state->private = private; -out: - write_unlock_irq(&tree->lock); - return ret; -} - -int get_state_private(struct extent_map_tree *tree, u64 start, u64 *private) -{ - struct rb_node *node; - struct extent_state *state; - int ret = 0; - - read_lock_irq(&tree->lock); - /* - * this search will find all the extents that end after - * our range starts. - */ - node = tree_search(&tree->state, start); - if (!node || IS_ERR(node)) { - ret = -ENOENT; - goto out; - } - state = rb_entry(node, struct extent_state, rb_node); - if (state->start != start) { - ret = -ENOENT; - goto out; - } - *private = state->private; -out: - read_unlock_irq(&tree->lock); - return ret; -} - -/* - * searches a range in the state tree for a given mask. - * If 'filled' == 1, this returns 1 only if ever extent in the tree - * has the bits set. Otherwise, 1 is returned if any bit in the - * range is found set. - */ -int test_range_bit(struct extent_map_tree *tree, u64 start, u64 end, - int bits, int filled) -{ - struct extent_state *state = NULL; - struct rb_node *node; - int bitset = 0; - - read_lock_irq(&tree->lock); - node = tree_search(&tree->state, start); - while (node && start <= end) { - state = rb_entry(node, struct extent_state, rb_node); - - if (filled && state->start > start) { - bitset = 0; - break; - } - - if (state->start > end) - break; - - if (state->state & bits) { - bitset = 1; - if (!filled) - break; - } else if (filled) { - bitset = 0; - break; - } - start = state->end + 1; - if (start > end) - break; - node = rb_next(node); - if (!node) { - if (filled) - bitset = 0; - break; - } - } - read_unlock_irq(&tree->lock); - return bitset; -} -EXPORT_SYMBOL(test_range_bit); - -/* - * helper function to set a given page up to date if all the - * extents in the tree for that page are up to date - */ -static int check_page_uptodate(struct extent_map_tree *tree, - struct page *page) -{ - u64 start = (u64)page->index << PAGE_CACHE_SHIFT; - u64 end = start + PAGE_CACHE_SIZE - 1; - if (test_range_bit(tree, start, end, EXTENT_UPTODATE, 1)) - SetPageUptodate(page); - return 0; -} - -/* - * helper function to unlock a page if all the extents in the tree - * for that page are unlocked - */ -static int check_page_locked(struct extent_map_tree *tree, - struct page *page) -{ - u64 start = (u64)page->index << PAGE_CACHE_SHIFT; - u64 end = start + PAGE_CACHE_SIZE - 1; - if (!test_range_bit(tree, start, end, EXTENT_LOCKED, 0)) - unlock_page(page); - return 0; -} - -/* - * helper function to end page writeback if all the extents - * in the tree for that page are done with writeback - */ -static int check_page_writeback(struct extent_map_tree *tree, - struct page *page) -{ - u64 start = (u64)page->index << PAGE_CACHE_SHIFT; - u64 end = start + PAGE_CACHE_SIZE - 1; - if (!test_range_bit(tree, start, end, EXTENT_WRITEBACK, 0)) - end_page_writeback(page); - return 0; -} - -/* lots and lots of room for performance fixes in the end_bio funcs */ - -/* - * after a writepage IO is done, we need to: - * clear the uptodate bits on error - * clear the writeback bits in the extent tree for this IO - * end_page_writeback if the page has no more pending IO - * - * Scheduling is not allowed, so the extent state tree is expected - * to have one and only one object corresponding to this IO. - */ -#if LINUX_VERSION_CODE > KERNEL_VERSION(2,6,23) -static void end_bio_extent_writepage(struct bio *bio, int err) -#else -static int end_bio_extent_writepage(struct bio *bio, - unsigned int bytes_done, int err) -#endif -{ - const int uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags); - struct bio_vec *bvec = bio->bi_io_vec + bio->bi_vcnt - 1; - struct extent_map_tree *tree = bio->bi_private; - u64 start; - u64 end; - int whole_page; - -#if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,23) - if (bio->bi_size) - return 1; -#endif - - do { - struct page *page = bvec->bv_page; - start = ((u64)page->index << PAGE_CACHE_SHIFT) + - bvec->bv_offset; - end = start + bvec->bv_len - 1; - - if (bvec->bv_offset == 0 && bvec->bv_len == PAGE_CACHE_SIZE) - whole_page = 1; - else - whole_page = 0; - - if (--bvec >= bio->bi_io_vec) - prefetchw(&bvec->bv_page->flags); - - if (!uptodate) { - clear_extent_uptodate(tree, start, end, GFP_ATOMIC); - ClearPageUptodate(page); - SetPageError(page); - } - clear_extent_writeback(tree, start, end, GFP_ATOMIC); - - if (whole_page) - end_page_writeback(page); - else - check_page_writeback(tree, page); - if (tree->ops && tree->ops->writepage_end_io_hook) - tree->ops->writepage_end_io_hook(page, start, end); - } while (bvec >= bio->bi_io_vec); - - bio_put(bio); -#if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,23) - return 0; -#endif -} - -/* - * after a readpage IO is done, we need to: - * clear the uptodate bits on error - * set the uptodate bits if things worked - * set the page up to date if all extents in the tree are uptodate - * clear the lock bit in the extent tree - * unlock the page if there are no other extents locked for it - * - * Scheduling is not allowed, so the extent state tree is expected - * to have one and only one object corresponding to this IO. - */ -#if LINUX_VERSION_CODE > KERNEL_VERSION(2,6,23) -static void end_bio_extent_readpage(struct bio *bio, int err) -#else -static int end_bio_extent_readpage(struct bio *bio, - unsigned int bytes_done, int err) -#endif -{ - int uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags); - struct bio_vec *bvec = bio->bi_io_vec + bio->bi_vcnt - 1; - struct extent_map_tree *tree = bio->bi_private; - u64 start; - u64 end; - int whole_page; - int ret; - -#if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,23) - if (bio->bi_size) - return 1; -#endif - - do { - struct page *page = bvec->bv_page; - start = ((u64)page->index << PAGE_CACHE_SHIFT) + - bvec->bv_offset; - end = start + bvec->bv_len - 1; - - if (bvec->bv_offset == 0 && bvec->bv_len == PAGE_CACHE_SIZE) - whole_page = 1; - else - whole_page = 0; - - if (--bvec >= bio->bi_io_vec) - prefetchw(&bvec->bv_page->flags); - - if (uptodate && tree->ops && tree->ops->readpage_end_io_hook) { - ret = tree->ops->readpage_end_io_hook(page, start, end); - if (ret) - uptodate = 0; - } - if (uptodate) { - set_extent_uptodate(tree, start, end, GFP_ATOMIC); - if (whole_page) - SetPageUptodate(page); - else - check_page_uptodate(tree, page); - } else { - ClearPageUptodate(page); - SetPageError(page); - } - - unlock_extent(tree, start, end, GFP_ATOMIC); - - if (whole_page) - unlock_page(page); - else - check_page_locked(tree, page); - } while (bvec >= bio->bi_io_vec); - - bio_put(bio); -#if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,23) - return 0; -#endif -} - -/* - * IO done from prepare_write is pretty simple, we just unlock - * the structs in the extent tree when done, and set the uptodate bits - * as appropriate. - */ -#if LINUX_VERSION_CODE > KERNEL_VERSION(2,6,23) -static void end_bio_extent_preparewrite(struct bio *bio, int err) -#else -static int end_bio_extent_preparewrite(struct bio *bio, - unsigned int bytes_done, int err) -#endif -{ - const int uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags); - struct bio_vec *bvec = bio->bi_io_vec + bio->bi_vcnt - 1; - struct extent_map_tree *tree = bio->bi_private; - u64 start; - u64 end; - -#if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,23) - if (bio->bi_size) - return 1; -#endif - - do { - struct page *page = bvec->bv_page; - start = ((u64)page->index << PAGE_CACHE_SHIFT) + - bvec->bv_offset; - end = start + bvec->bv_len - 1; - - if (--bvec >= bio->bi_io_vec) - prefetchw(&bvec->bv_page->flags); - - if (uptodate) { - set_extent_uptodate(tree, start, end, GFP_ATOMIC); - } else { - ClearPageUptodate(page); - SetPageError(page); - } - - unlock_extent(tree, start, end, GFP_ATOMIC); - - } while (bvec >= bio->bi_io_vec); - - bio_put(bio); -#if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,23) - return 0; -#endif -} - -static struct bio * -extent_bio_alloc(struct block_device *bdev, u64 first_sector, int nr_vecs, - gfp_t gfp_flags) -{ - struct bio *bio; - - bio = bio_alloc(gfp_flags, nr_vecs); - - if (bio == NULL && (current->flags & PF_MEMALLOC)) { - while (!bio && (nr_vecs /= 2)) - bio = bio_alloc(gfp_flags, nr_vecs); - } - - if (bio) { - bio->bi_bdev = bdev; - bio->bi_sector = first_sector; - } - return bio; -} - -static int submit_one_bio(int rw, struct bio *bio) -{ - u64 maxsector; - int ret = 0; - - bio_get(bio); - - maxsector = bio->bi_bdev->bd_inode->i_size >> 9; - if (maxsector < bio->bi_sector) { - printk("sector too large max %Lu got %llu\n", maxsector, - (unsigned long long)bio->bi_sector); - WARN_ON(1); - } - - submit_bio(rw, bio); - if (bio_flagged(bio, BIO_EOPNOTSUPP)) - ret = -EOPNOTSUPP; - bio_put(bio); - return ret; -} - -static int submit_extent_page(int rw, struct extent_map_tree *tree, - struct page *page, sector_t sector, - size_t size, unsigned long offset, - struct block_device *bdev, - struct bio **bio_ret, - unsigned long max_pages, - bio_end_io_t end_io_func) -{ - int ret = 0; - struct bio *bio; - int nr; - - if (bio_ret && *bio_ret) { - bio = *bio_ret; - if (bio->bi_sector + (bio->bi_size >> 9) != sector || - bio_add_page(bio, page, size, offset) < size) { - ret = submit_one_bio(rw, bio); - bio = NULL; - } else { - return 0; - } - } - nr = min_t(int, max_pages, bio_get_nr_vecs(bdev)); - bio = extent_bio_alloc(bdev, sector, nr, GFP_NOFS | __GFP_HIGH); - if (!bio) { - printk("failed to allocate bio nr %d\n", nr); - } - bio_add_page(bio, page, size, offset); - bio->bi_end_io = end_io_func; - bio->bi_private = tree; - if (bio_ret) { - *bio_ret = bio; - } else { - ret = submit_one_bio(rw, bio); - } - - return ret; -} - -void set_page_extent_mapped(struct page *page) -{ - if (!PagePrivate(page)) { - SetPagePrivate(page); - WARN_ON(!page->mapping->a_ops->invalidatepage); - set_page_private(page, EXTENT_PAGE_PRIVATE); - page_cache_get(page); - } -} - -void set_page_extent_head(struct page *page, unsigned long len) -{ - set_page_private(page, EXTENT_PAGE_PRIVATE_FIRST_PAGE | len << 2); -} - -/* - * basic readpage implementation. Locked extent state structs are inserted - * into the tree that are removed when the IO is done (by the end_io - * handlers) - */ -static int __extent_read_full_page(struct extent_map_tree *tree, - struct page *page, - get_extent_t *get_extent, - struct bio **bio) -{ - struct inode *inode = page->mapping->host; - u64 start = (u64)page->index << PAGE_CACHE_SHIFT; - u64 page_end = start + PAGE_CACHE_SIZE - 1; - u64 end; - u64 cur = start; - u64 extent_offset; - u64 last_byte = i_size_read(inode); - u64 block_start; - u64 cur_end; - sector_t sector; - struct extent_map *em; - struct block_device *bdev; - int ret; - int nr = 0; - size_t page_offset = 0; - size_t iosize; - size_t blocksize = inode->i_sb->s_blocksize; - - set_page_extent_mapped(page); - - end = page_end; - lock_extent(tree, start, end, GFP_NOFS); - - while (cur <= end) { - if (cur >= last_byte) { - char *userpage; - iosize = PAGE_CACHE_SIZE - page_offset; - userpage = kmap_atomic(page, KM_USER0); - memset(userpage + page_offset, 0, iosize); - flush_dcache_page(page); - kunmap_atomic(userpage, KM_USER0); - set_extent_uptodate(tree, cur, cur + iosize - 1, - GFP_NOFS); - unlock_extent(tree, cur, cur + iosize - 1, GFP_NOFS); - break; - } - em = get_extent(inode, page, page_offset, cur, end, 0); - if (IS_ERR(em) || !em) { - SetPageError(page); - unlock_extent(tree, cur, end, GFP_NOFS); - break; - } - - extent_offset = cur - em->start; - BUG_ON(em->end < cur); - BUG_ON(end < cur); - - iosize = min(em->end - cur, end - cur) + 1; - cur_end = min(em->end, end); - iosize = (iosize + blocksize - 1) & ~((u64)blocksize - 1); - sector = (em->block_start + extent_offset) >> 9; - bdev = em->bdev; - block_start = em->block_start; - free_extent_map(em); - em = NULL; - - /* we've found a hole, just zero and go on */ - if (block_start == EXTENT_MAP_HOLE) { - char *userpage; - userpage = kmap_atomic(page, KM_USER0); - memset(userpage + page_offset, 0, iosize); - flush_dcache_page(page); - kunmap_atomic(userpage, KM_USER0); - - set_extent_uptodate(tree, cur, cur + iosize - 1, - GFP_NOFS); - unlock_extent(tree, cur, cur + iosize - 1, GFP_NOFS); - cur = cur + iosize; - page_offset += iosize; - continue; - } - /* the get_extent function already copied into the page */ - if (test_range_bit(tree, cur, cur_end, EXTENT_UPTODATE, 1)) { - unlock_extent(tree, cur, cur + iosize - 1, GFP_NOFS); - cur = cur + iosize; - page_offset += iosize; - continue; - } - - ret = 0; - if (tree->ops && tree->ops->readpage_io_hook) { - ret = tree->ops->readpage_io_hook(page, cur, - cur + iosize - 1); - } - if (!ret) { - unsigned long nr = (last_byte >> PAGE_CACHE_SHIFT) + 1; - nr -= page->index; - ret = submit_extent_page(READ, tree, page, - sector, iosize, page_offset, - bdev, bio, nr, - end_bio_extent_readpage); - } - if (ret) - SetPageError(page); - cur = cur + iosize; - page_offset += iosize; - nr++; - } - if (!nr) { - if (!PageError(page)) - SetPageUptodate(page); - unlock_page(page); - } - return 0; -} - -int extent_read_full_page(struct extent_map_tree *tree, struct page *page, - get_extent_t *get_extent) -{ - struct bio *bio = NULL; - int ret; - - ret = __extent_read_full_page(tree, page, get_extent, &bio); - if (bio) - submit_one_bio(READ, bio); - return ret; -} -EXPORT_SYMBOL(extent_read_full_page); - -/* - * the writepage semantics are similar to regular writepage. extent - * records are inserted to lock ranges in the tree, and as dirty areas - * are found, they are marked writeback. Then the lock bits are removed - * and the end_io handler clears the writeback ranges - */ -static int __extent_writepage(struct page *page, struct writeback_control *wbc, - void *data) -{ - struct inode *inode = page->mapping->host; - struct extent_page_data *epd = data; - struct extent_map_tree *tree = epd->tree; - u64 start = (u64)page->index << PAGE_CACHE_SHIFT; - u64 delalloc_start; - u64 page_end = start + PAGE_CACHE_SIZE - 1; - u64 end; - u64 cur = start; - u64 extent_offset; - u64 last_byte = i_size_read(inode); - u64 block_start; - u64 iosize; - sector_t sector; - struct extent_map *em; - struct block_device *bdev; - int ret; - int nr = 0; - size_t page_offset = 0; - size_t blocksize; - loff_t i_size = i_size_read(inode); - unsigned long end_index = i_size >> PAGE_CACHE_SHIFT; - u64 nr_delalloc; - u64 delalloc_end; - - WARN_ON(!PageLocked(page)); - if (page->index > end_index) { - clear_extent_dirty(tree, start, page_end, GFP_NOFS); - unlock_page(page); - return 0; - } - - if (page->index == end_index) { - char *userpage; - - size_t offset = i_size & (PAGE_CACHE_SIZE - 1); - - userpage = kmap_atomic(page, KM_USER0); - memset(userpage + offset, 0, PAGE_CACHE_SIZE - offset); - flush_dcache_page(page); - kunmap_atomic(userpage, KM_USER0); - } - - set_page_extent_mapped(page); - - delalloc_start = start; - delalloc_end = 0; - while(delalloc_end < page_end) { - nr_delalloc = find_lock_delalloc_range(tree, &delalloc_start, - &delalloc_end, - 128 * 1024 * 1024); - if (nr_delalloc == 0) { - delalloc_start = delalloc_end + 1; - continue; - } - tree->ops->fill_delalloc(inode, delalloc_start, - delalloc_end); - clear_extent_bit(tree, delalloc_start, - delalloc_end, - EXTENT_LOCKED | EXTENT_DELALLOC, - 1, 0, GFP_NOFS); - delalloc_start = delalloc_end + 1; - } - lock_extent(tree, start, page_end, GFP_NOFS); - - end = page_end; - if (test_range_bit(tree, start, page_end, EXTENT_DELALLOC, 0)) { - printk("found delalloc bits after lock_extent\n"); - } - - if (last_byte <= start) { - clear_extent_dirty(tree, start, page_end, GFP_NOFS); - goto done; - } - - set_extent_uptodate(tree, start, page_end, GFP_NOFS); - blocksize = inode->i_sb->s_blocksize; - - while (cur <= end) { - if (cur >= last_byte) { - clear_extent_dirty(tree, cur, page_end, GFP_NOFS); - break; - } - em = epd->get_extent(inode, page, page_offset, cur, end, 1); - if (IS_ERR(em) || !em) { - SetPageError(page); - break; - } - - extent_offset = cur - em->start; - BUG_ON(em->end < cur); - BUG_ON(end < cur); - iosize = min(em->end - cur, end - cur) + 1; - iosize = (iosize + blocksize - 1) & ~((u64)blocksize - 1); - sector = (em->block_start + extent_offset) >> 9; - bdev = em->bdev; - block_start = em->block_start; - free_extent_map(em); - em = NULL; - - if (block_start == EXTENT_MAP_HOLE || - block_start == EXTENT_MAP_INLINE) { - clear_extent_dirty(tree, cur, - cur + iosize - 1, GFP_NOFS); - cur = cur + iosize; - page_offset += iosize; - continue; - } - - /* leave this out until we have a page_mkwrite call */ - if (0 && !test_range_bit(tree, cur, cur + iosize - 1, - EXTENT_DIRTY, 0)) { - cur = cur + iosize; - page_offset += iosize; - continue; - } - clear_extent_dirty(tree, cur, cur + iosize - 1, GFP_NOFS); - if (tree->ops && tree->ops->writepage_io_hook) { - ret = tree->ops->writepage_io_hook(page, cur, - cur + iosize - 1); - } else { - ret = 0; - } - if (ret) - SetPageError(page); - else { - unsigned long max_nr = end_index + 1; - set_range_writeback(tree, cur, cur + iosize - 1); - if (!PageWriteback(page)) { - printk("warning page %lu not writeback, " - "cur %llu end %llu\n", page->index, - (unsigned long long)cur, - (unsigned long long)end); - } - - ret = submit_extent_page(WRITE, tree, page, sector, - iosize, page_offset, bdev, - &epd->bio, max_nr, - end_bio_extent_writepage); - if (ret) - SetPageError(page); - } - cur = cur + iosize; - page_offset += iosize; - nr++; - } -done: - if (nr == 0) { - /* make sure the mapping tag for page dirty gets cleared */ - set_page_writeback(page); - end_page_writeback(page); - } - unlock_extent(tree, start, page_end, GFP_NOFS); - unlock_page(page); - return 0; -} - -#if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,18) - -/* Taken directly from 2.6.23 for 2.6.18 back port */ -typedef int (*writepage_t)(struct page *page, struct writeback_control *wbc, - void *data); - -/** - * write_cache_pages - walk the list of dirty pages of the given address space - * and write all of them. - * @mapping: address space structure to write - * @wbc: subtract the number of written pages from *@wbc->nr_to_write - * @writepage: function called for each page - * @data: data passed to writepage function - * - * If a page is already under I/O, write_cache_pages() skips it, even - * if it's dirty. This is desirable behaviour for memory-cleaning writeback, - * but it is INCORRECT for data-integrity system calls such as fsync(). fsync() - * and msync() need to guarantee that all the data which was dirty at the time - * the call was made get new I/O started against them. If wbc->sync_mode is - * WB_SYNC_ALL then we were called for data integrity and we must wait for - * existing IO to complete. - */ -static int write_cache_pages(struct address_space *mapping, - struct writeback_control *wbc, writepage_t writepage, - void *data) -{ - struct backing_dev_info *bdi = mapping->backing_dev_info; - int ret = 0; - int done = 0; - struct pagevec pvec; - int nr_pages; - pgoff_t index; - pgoff_t end; /* Inclusive */ - int scanned = 0; - int range_whole = 0; - - if (wbc->nonblocking && bdi_write_congested(bdi)) { - wbc->encountered_congestion = 1; - return 0; - } - - pagevec_init(&pvec, 0); - if (wbc->range_cyclic) { - index = mapping->writeback_index; /* Start from prev offset */ - end = -1; - } else { - index = wbc->range_start >> PAGE_CACHE_SHIFT; - end = wbc->range_end >> PAGE_CACHE_SHIFT; - if (wbc->range_start == 0 && wbc->range_end == LLONG_MAX) - range_whole = 1; - scanned = 1; - } -retry: - while (!done && (index <= end) && - (nr_pages = pagevec_lookup_tag(&pvec, mapping, &index, - PAGECACHE_TAG_DIRTY, - min(end - index, (pgoff_t)PAGEVEC_SIZE-1) + 1))) { - unsigned i; - - scanned = 1; - for (i = 0; i < nr_pages; i++) { - struct page *page = pvec.pages[i]; - - /* - * At this point we hold neither mapping->tree_lock nor - * lock on the page itself: the page may be truncated or - * invalidated (changing page->mapping to NULL), or even - * swizzled back from swapper_space to tmpfs file - * mapping - */ - lock_page(page); - - if (unlikely(page->mapping != mapping)) { - unlock_page(page); - continue; - } - - if (!wbc->range_cyclic && page->index > end) { - done = 1; - unlock_page(page); - continue; - } - - if (wbc->sync_mode != WB_SYNC_NONE) - wait_on_page_writeback(page); - - if (PageWriteback(page) || - !clear_page_dirty_for_io(page)) { - unlock_page(page); - continue; - } - - ret = (*writepage)(page, wbc, data); - - if (unlikely(ret == AOP_WRITEPAGE_ACTIVATE)) { - unlock_page(page); - ret = 0; - } - if (ret || (--(wbc->nr_to_write) <= 0)) - done = 1; - if (wbc->nonblocking && bdi_write_congested(bdi)) { - wbc->encountered_congestion = 1; - done = 1; - } - } - pagevec_release(&pvec); - cond_resched(); - } - if (!scanned && !done) { - /* - * We hit the last page and there is more work to be done: wrap - * back to the start of the file - */ - scanned = 1; - index = 0; - goto retry; - } - if (wbc->range_cyclic || (range_whole && wbc->nr_to_write > 0)) - mapping->writeback_index = index; - return ret; -} -#endif - -int extent_write_full_page(struct extent_map_tree *tree, struct page *page, - get_extent_t *get_extent, - struct writeback_control *wbc) -{ - int ret; - struct address_space *mapping = page->mapping; - struct extent_page_data epd = { - .bio = NULL, - .tree = tree, - .get_extent = get_extent, - }; - struct writeback_control wbc_writepages = { - .bdi = wbc->bdi, - .sync_mode = WB_SYNC_NONE, - .older_than_this = NULL, - .nr_to_write = 64, - .range_start = page_offset(page) + PAGE_CACHE_SIZE, - .range_end = (loff_t)-1, - }; - - - ret = __extent_writepage(page, wbc, &epd); - - write_cache_pages(mapping, &wbc_writepages, __extent_writepage, &epd); - if (epd.bio) { - submit_one_bio(WRITE, epd.bio); - } - return ret; -} -EXPORT_SYMBOL(extent_write_full_page); - - -int extent_writepages(struct extent_map_tree *tree, - struct address_space *mapping, - get_extent_t *get_extent, - struct writeback_control *wbc) -{ - int ret = 0; - struct extent_page_data epd = { - .bio = NULL, - .tree = tree, - .get_extent = get_extent, - }; - - ret = write_cache_pages(mapping, wbc, __extent_writepage, &epd); - if (epd.bio) { - submit_one_bio(WRITE, epd.bio); - } - return ret; -} -EXPORT_SYMBOL(extent_writepages); - -int extent_readpages(struct extent_map_tree *tree, - struct address_space *mapping, - struct list_head *pages, unsigned nr_pages, - get_extent_t get_extent) -{ - struct bio *bio = NULL; - unsigned page_idx; - struct pagevec pvec; - - pagevec_init(&pvec, 0); - for (page_idx = 0; page_idx < nr_pages; page_idx++) { - struct page *page = list_entry(pages->prev, struct page, lru); - - prefetchw(&page->flags); - list_del(&page->lru); - /* - * what we want to do here is call add_to_page_cache_lru, - * but that isn't exported, so we reproduce it here - */ - if (!add_to_page_cache(page, mapping, - page->index, GFP_KERNEL)) { - - /* open coding of lru_cache_add, also not exported */ - page_cache_get(page); - if (!pagevec_add(&pvec, page)) - __pagevec_lru_add(&pvec); - __extent_read_full_page(tree, page, get_extent, &bio); - } - page_cache_release(page); - } - if (pagevec_count(&pvec)) - __pagevec_lru_add(&pvec); - BUG_ON(!list_empty(pages)); - if (bio) - submit_one_bio(READ, bio); - return 0; -} -EXPORT_SYMBOL(extent_readpages); - -/* - * basic invalidatepage code, this waits on any locked or writeback - * ranges corresponding to the page, and then deletes any extent state - * records from the tree - */ -int extent_invalidatepage(struct extent_map_tree *tree, - struct page *page, unsigned long offset) -{ - u64 start = ((u64)page->index << PAGE_CACHE_SHIFT); - u64 end = start + PAGE_CACHE_SIZE - 1; - size_t blocksize = page->mapping->host->i_sb->s_blocksize; - - start += (offset + blocksize -1) & ~(blocksize - 1); - if (start > end) - return 0; - - lock_extent(tree, start, end, GFP_NOFS); - wait_on_extent_writeback(tree, start, end); - clear_extent_bit(tree, start, end, - EXTENT_LOCKED | EXTENT_DIRTY | EXTENT_DELALLOC, - 1, 1, GFP_NOFS); - return 0; -} -EXPORT_SYMBOL(extent_invalidatepage); - -/* - * simple commit_write call, set_range_dirty is used to mark both - * the pages and the extent records as dirty - */ -int extent_commit_write(struct extent_map_tree *tree, - struct inode *inode, struct page *page, - unsigned from, unsigned to) -{ - loff_t pos = ((loff_t)page->index << PAGE_CACHE_SHIFT) + to; - - set_page_extent_mapped(page); - set_page_dirty(page); - - if (pos > inode->i_size) { - i_size_write(inode, pos); - mark_inode_dirty(inode); - } - return 0; -} -EXPORT_SYMBOL(extent_commit_write); - -int extent_prepare_write(struct extent_map_tree *tree, - struct inode *inode, struct page *page, - unsigned from, unsigned to, get_extent_t *get_extent) -{ - u64 page_start = (u64)page->index << PAGE_CACHE_SHIFT; - u64 page_end = page_start + PAGE_CACHE_SIZE - 1; - u64 block_start; - u64 orig_block_start; - u64 block_end; - u64 cur_end; - struct extent_map *em; - unsigned blocksize = 1 << inode->i_blkbits; - size_t page_offset = 0; - size_t block_off_start; - size_t block_off_end; - int err = 0; - int iocount = 0; - int ret = 0; - int isnew; - - set_page_extent_mapped(page); - - block_start = (page_start + from) & ~((u64)blocksize - 1); - block_end = (page_start + to - 1) | (blocksize - 1); - orig_block_start = block_start; - - lock_extent(tree, page_start, page_end, GFP_NOFS); - while(block_start <= block_end) { - em = get_extent(inode, page, page_offset, block_start, - block_end, 1); - if (IS_ERR(em) || !em) { - goto err; - } - cur_end = min(block_end, em->end); - block_off_start = block_start & (PAGE_CACHE_SIZE - 1); - block_off_end = block_off_start + blocksize; - isnew = clear_extent_new(tree, block_start, cur_end, GFP_NOFS); - - if (!PageUptodate(page) && isnew && - (block_off_end > to || block_off_start < from)) { - void *kaddr; - - kaddr = kmap_atomic(page, KM_USER0); - if (block_off_end > to) - memset(kaddr + to, 0, block_off_end - to); - if (block_off_start < from) - memset(kaddr + block_off_start, 0, - from - block_off_start); - flush_dcache_page(page); - kunmap_atomic(kaddr, KM_USER0); - } - if ((em->block_start != EXTENT_MAP_HOLE && - em->block_start != EXTENT_MAP_INLINE) && - !isnew && !PageUptodate(page) && - (block_off_end > to || block_off_start < from) && - !test_range_bit(tree, block_start, cur_end, - EXTENT_UPTODATE, 1)) { - u64 sector; - u64 extent_offset = block_start - em->start; - size_t iosize; - sector = (em->block_start + extent_offset) >> 9; - iosize = (cur_end - block_start + blocksize) & - ~((u64)blocksize - 1); - /* - * we've already got the extent locked, but we - * need to split the state such that our end_bio - * handler can clear the lock. - */ - set_extent_bit(tree, block_start, - block_start + iosize - 1, - EXTENT_LOCKED, 0, NULL, GFP_NOFS); - ret = submit_extent_page(READ, tree, page, - sector, iosize, page_offset, em->bdev, - NULL, 1, - end_bio_extent_preparewrite); - iocount++; - block_start = block_start + iosize; - } else { - set_extent_uptodate(tree, block_start, cur_end, - GFP_NOFS); - unlock_extent(tree, block_start, cur_end, GFP_NOFS); - block_start = cur_end + 1; - } - page_offset = block_start & (PAGE_CACHE_SIZE - 1); - free_extent_map(em); - } - if (iocount) { - wait_extent_bit(tree, orig_block_start, - block_end, EXTENT_LOCKED); - } - check_page_uptodate(tree, page); -err: - /* FIXME, zero out newly allocated blocks on error */ - return err; -} -EXPORT_SYMBOL(extent_prepare_write); - -/* - * a helper for releasepage. As long as there are no locked extents - * in the range corresponding to the page, both state records and extent - * map records are removed - */ -int try_release_extent_mapping(struct extent_map_tree *tree, struct page *page) -{ - struct extent_map *em; - u64 start = (u64)page->index << PAGE_CACHE_SHIFT; - u64 end = start + PAGE_CACHE_SIZE - 1; - u64 orig_start = start; - int ret = 1; - - while (start <= end) { - em = lookup_extent_mapping(tree, start, end); - if (!em || IS_ERR(em)) - break; - if (!test_range_bit(tree, em->start, em->end, - EXTENT_LOCKED, 0)) { - remove_extent_mapping(tree, em); - /* once for the rb tree */ - free_extent_map(em); - } - start = em->end + 1; - /* once for us */ - free_extent_map(em); - } - if (test_range_bit(tree, orig_start, end, EXTENT_LOCKED, 0)) - ret = 0; - else - clear_extent_bit(tree, orig_start, end, EXTENT_UPTODATE, - 1, 1, GFP_NOFS); - return ret; -} -EXPORT_SYMBOL(try_release_extent_mapping); - -sector_t extent_bmap(struct address_space *mapping, sector_t iblock, - get_extent_t *get_extent) -{ - struct inode *inode = mapping->host; - u64 start = iblock << inode->i_blkbits; - u64 end = start + (1 << inode->i_blkbits) - 1; - sector_t sector = 0; - struct extent_map *em; - - em = get_extent(inode, NULL, 0, start, end, 0); - if (!em || IS_ERR(em)) - return 0; - - if (em->block_start == EXTENT_MAP_INLINE || - em->block_start == EXTENT_MAP_HOLE) - goto out; - - sector = (em->block_start + start - em->start) >> inode->i_blkbits; -out: - free_extent_map(em); - return sector; -} - -static int add_lru(struct extent_map_tree *tree, struct extent_buffer *eb) -{ - if (list_empty(&eb->lru)) { - extent_buffer_get(eb); - list_add(&eb->lru, &tree->buffer_lru); - tree->lru_size++; - if (tree->lru_size >= BUFFER_LRU_MAX) { - struct extent_buffer *rm; - rm = list_entry(tree->buffer_lru.prev, - struct extent_buffer, lru); - tree->lru_size--; - list_del_init(&rm->lru); - free_extent_buffer(rm); - } - } else - list_move(&eb->lru, &tree->buffer_lru); - return 0; -} -static struct extent_buffer *find_lru(struct extent_map_tree *tree, - u64 start, unsigned long len) -{ - struct list_head *lru = &tree->buffer_lru; - struct list_head *cur = lru->next; - struct extent_buffer *eb; - - if (list_empty(lru)) - return NULL; - - do { - eb = list_entry(cur, struct extent_buffer, lru); - if (eb->start == start && eb->len == len) { - extent_buffer_get(eb); - return eb; - } - cur = cur->next; - } while (cur != lru); - return NULL; -} - -static inline unsigned long num_extent_pages(u64 start, u64 len) -{ - return ((start + len + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT) - - (start >> PAGE_CACHE_SHIFT); -} - -static inline struct page *extent_buffer_page(struct extent_buffer *eb, - unsigned long i) -{ - struct page *p; - struct address_space *mapping; - - if (i == 0) - return eb->first_page; - i += eb->start >> PAGE_CACHE_SHIFT; - mapping = eb->first_page->mapping; - read_lock_irq(&mapping->tree_lock); - p = radix_tree_lookup(&mapping->page_tree, i); - read_unlock_irq(&mapping->tree_lock); - return p; -} - -static struct extent_buffer *__alloc_extent_buffer(struct extent_map_tree *tree, - u64 start, - unsigned long len, - gfp_t mask) -{ - struct extent_buffer *eb = NULL; - - spin_lock(&tree->lru_lock); - eb = find_lru(tree, start, len); - spin_unlock(&tree->lru_lock); - if (eb) { - return eb; - } - - eb = kmem_cache_zalloc(extent_buffer_cache, mask); - INIT_LIST_HEAD(&eb->lru); - eb->start = start; - eb->len = len; - atomic_set(&eb->refs, 1); - - return eb; -} - -static void __free_extent_buffer(struct extent_buffer *eb) -{ - kmem_cache_free(extent_buffer_cache, eb); -} - -struct extent_buffer *alloc_extent_buffer(struct extent_map_tree *tree, - u64 start, unsigned long len, - struct page *page0, - gfp_t mask) -{ - unsigned long num_pages = num_extent_pages(start, len); - unsigned long i; - unsigned long index = start >> PAGE_CACHE_SHIFT; - struct extent_buffer *eb; - struct page *p; - struct address_space *mapping = tree->mapping; - int uptodate = 1; - - eb = __alloc_extent_buffer(tree, start, len, mask); - if (!eb || IS_ERR(eb)) - return NULL; - - if (eb->flags & EXTENT_BUFFER_FILLED) - goto lru_add; - - if (page0) { - eb->first_page = page0; - i = 1; - index++; - page_cache_get(page0); - mark_page_accessed(page0); - set_page_extent_mapped(page0); - WARN_ON(!PageUptodate(page0)); - set_page_extent_head(page0, len); - } else { - i = 0; - } - for (; i < num_pages; i++, index++) { - p = find_or_create_page(mapping, index, mask | __GFP_HIGHMEM); - if (!p) { - WARN_ON(1); - goto fail; - } - set_page_extent_mapped(p); - mark_page_accessed(p); - if (i == 0) { - eb->first_page = p; - set_page_extent_head(p, len); - } else { - set_page_private(p, EXTENT_PAGE_PRIVATE); - } - if (!PageUptodate(p)) - uptodate = 0; - unlock_page(p); - } - if (uptodate) - eb->flags |= EXTENT_UPTODATE; - eb->flags |= EXTENT_BUFFER_FILLED; - -lru_add: - spin_lock(&tree->lru_lock); - add_lru(tree, eb); - spin_unlock(&tree->lru_lock); - return eb; - -fail: - spin_lock(&tree->lru_lock); - list_del_init(&eb->lru); - spin_unlock(&tree->lru_lock); - if (!atomic_dec_and_test(&eb->refs)) - return NULL; - for (index = 1; index < i; index++) { - page_cache_release(extent_buffer_page(eb, index)); - } - if (i > 0) - page_cache_release(extent_buffer_page(eb, 0)); - __free_extent_buffer(eb); - return NULL; -} -EXPORT_SYMBOL(alloc_extent_buffer); - -struct extent_buffer *find_extent_buffer(struct extent_map_tree *tree, - u64 start, unsigned long len, - gfp_t mask) -{ - unsigned long num_pages = num_extent_pages(start, len); - unsigned long i; - unsigned long index = start >> PAGE_CACHE_SHIFT; - struct extent_buffer *eb; - struct page *p; - struct address_space *mapping = tree->mapping; - int uptodate = 1; - - eb = __alloc_extent_buffer(tree, start, len, mask); - if (!eb || IS_ERR(eb)) - return NULL; - - if (eb->flags & EXTENT_BUFFER_FILLED) - goto lru_add; - - for (i = 0; i < num_pages; i++, index++) { - p = find_lock_page(mapping, index); - if (!p) { - goto fail; - } - set_page_extent_mapped(p); - mark_page_accessed(p); - - if (i == 0) { - eb->first_page = p; - set_page_extent_head(p, len); - } else { - set_page_private(p, EXTENT_PAGE_PRIVATE); - } - - if (!PageUptodate(p)) - uptodate = 0; - unlock_page(p); - } - if (uptodate) - eb->flags |= EXTENT_UPTODATE; - eb->flags |= EXTENT_BUFFER_FILLED; - -lru_add: - spin_lock(&tree->lru_lock); - add_lru(tree, eb); - spin_unlock(&tree->lru_lock); - return eb; -fail: - spin_lock(&tree->lru_lock); - list_del_init(&eb->lru); - spin_unlock(&tree->lru_lock); - if (!atomic_dec_and_test(&eb->refs)) - return NULL; - for (index = 1; index < i; index++) { - page_cache_release(extent_buffer_page(eb, index)); - } - if (i > 0) - page_cache_release(extent_buffer_page(eb, 0)); - __free_extent_buffer(eb); - return NULL; -} -EXPORT_SYMBOL(find_extent_buffer); - -void free_extent_buffer(struct extent_buffer *eb) -{ - unsigned long i; - unsigned long num_pages; - - if (!eb) - return; - - if (!atomic_dec_and_test(&eb->refs)) - return; - - WARN_ON(!list_empty(&eb->lru)); - num_pages = num_extent_pages(eb->start, eb->len); - - for (i = 1; i < num_pages; i++) { - page_cache_release(extent_buffer_page(eb, i)); - } - page_cache_release(extent_buffer_page(eb, 0)); - __free_extent_buffer(eb); -} -EXPORT_SYMBOL(free_extent_buffer); - -int clear_extent_buffer_dirty(struct extent_map_tree *tree, - struct extent_buffer *eb) -{ - int set; - unsigned long i; - unsigned long num_pages; - struct page *page; - - u64 start = eb->start; - u64 end = start + eb->len - 1; - - set = clear_extent_dirty(tree, start, end, GFP_NOFS); - num_pages = num_extent_pages(eb->start, eb->len); - - for (i = 0; i < num_pages; i++) { - page = extent_buffer_page(eb, i); - lock_page(page); - if (i == 0) - set_page_extent_head(page, eb->len); - else - set_page_private(page, EXTENT_PAGE_PRIVATE); - - /* - * if we're on the last page or the first page and the - * block isn't aligned on a page boundary, do extra checks - * to make sure we don't clean page that is partially dirty - */ - if ((i == 0 && (eb->start & (PAGE_CACHE_SIZE - 1))) || - ((i == num_pages - 1) && - ((eb->start + eb->len) & (PAGE_CACHE_SIZE - 1)))) { - start = (u64)page->index << PAGE_CACHE_SHIFT; - end = start + PAGE_CACHE_SIZE - 1; - if (test_range_bit(tree, start, end, - EXTENT_DIRTY, 0)) { - unlock_page(page); - continue; - } - } - clear_page_dirty_for_io(page); - write_lock_irq(&page->mapping->tree_lock); - if (!PageDirty(page)) { - radix_tree_tag_clear(&page->mapping->page_tree, - page_index(page), - PAGECACHE_TAG_DIRTY); - } - write_unlock_irq(&page->mapping->tree_lock); - unlock_page(page); - } - return 0; -} -EXPORT_SYMBOL(clear_extent_buffer_dirty); - -int wait_on_extent_buffer_writeback(struct extent_map_tree *tree, - struct extent_buffer *eb) -{ - return wait_on_extent_writeback(tree, eb->start, - eb->start + eb->len - 1); -} -EXPORT_SYMBOL(wait_on_extent_buffer_writeback); - -int set_extent_buffer_dirty(struct extent_map_tree *tree, - struct extent_buffer *eb) -{ - unsigned long i; - unsigned long num_pages; - - num_pages = num_extent_pages(eb->start, eb->len); - for (i = 0; i < num_pages; i++) { - struct page *page = extent_buffer_page(eb, i); - /* writepage may need to do something special for the - * first page, we have to make sure page->private is - * properly set. releasepage may drop page->private - * on us if the page isn't already dirty. - */ - if (i == 0) { - lock_page(page); - set_page_extent_head(page, eb->len); - } else if (PagePrivate(page) && - page->private != EXTENT_PAGE_PRIVATE) { - lock_page(page); - set_page_extent_mapped(page); - unlock_page(page); - } - __set_page_dirty_nobuffers(extent_buffer_page(eb, i)); - if (i == 0) - unlock_page(page); - } - return set_extent_dirty(tree, eb->start, - eb->start + eb->len - 1, GFP_NOFS); -} -EXPORT_SYMBOL(set_extent_buffer_dirty); - -int set_extent_buffer_uptodate(struct extent_map_tree *tree, - struct extent_buffer *eb) -{ - unsigned long i; - struct page *page; - unsigned long num_pages; - - num_pages = num_extent_pages(eb->start, eb->len); - - set_extent_uptodate(tree, eb->start, eb->start + eb->len - 1, - GFP_NOFS); - for (i = 0; i < num_pages; i++) { - page = extent_buffer_page(eb, i); - if ((i == 0 && (eb->start & (PAGE_CACHE_SIZE - 1))) || - ((i == num_pages - 1) && - ((eb->start + eb->len) & (PAGE_CACHE_SIZE - 1)))) { - check_page_uptodate(tree, page); - continue; - } - SetPageUptodate(page); - } - return 0; -} -EXPORT_SYMBOL(set_extent_buffer_uptodate); - -int extent_buffer_uptodate(struct extent_map_tree *tree, - struct extent_buffer *eb) -{ - if (eb->flags & EXTENT_UPTODATE) - return 1; - return test_range_bit(tree, eb->start, eb->start + eb->len - 1, - EXTENT_UPTODATE, 1); -} -EXPORT_SYMBOL(extent_buffer_uptodate); - -int read_extent_buffer_pages(struct extent_map_tree *tree, - struct extent_buffer *eb, - u64 start, - int wait) -{ - unsigned long i; - unsigned long start_i; - struct page *page; - int err; - int ret = 0; - unsigned long num_pages; - - if (eb->flags & EXTENT_UPTODATE) - return 0; - - if (0 && test_range_bit(tree, eb->start, eb->start + eb->len - 1, - EXTENT_UPTODATE, 1)) { - return 0; - } - - if (start) { - WARN_ON(start < eb->start); - start_i = (start >> PAGE_CACHE_SHIFT) - - (eb->start >> PAGE_CACHE_SHIFT); - } else { - start_i = 0; - } - - num_pages = num_extent_pages(eb->start, eb->len); - for (i = start_i; i < num_pages; i++) { - page = extent_buffer_page(eb, i); - if (PageUptodate(page)) { - continue; - } - if (!wait) { - if (TestSetPageLocked(page)) { - continue; - } - } else { - lock_page(page); - } - if (!PageUptodate(page)) { - err = page->mapping->a_ops->readpage(NULL, page); - if (err) { - ret = err; - } - } else { - unlock_page(page); - } - } - - if (ret || !wait) { - return ret; - } - - for (i = start_i; i < num_pages; i++) { - page = extent_buffer_page(eb, i); - wait_on_page_locked(page); - if (!PageUptodate(page)) { - ret = -EIO; - } - } - if (!ret) - eb->flags |= EXTENT_UPTODATE; - return ret; -} -EXPORT_SYMBOL(read_extent_buffer_pages); - -void read_extent_buffer(struct extent_buffer *eb, void *dstv, - unsigned long start, - unsigned long len) -{ - size_t cur; - size_t offset; - struct page *page; - char *kaddr; - char *dst = (char *)dstv; - size_t start_offset = eb->start & ((u64)PAGE_CACHE_SIZE - 1); - unsigned long i = (start_offset + start) >> PAGE_CACHE_SHIFT; - unsigned long num_pages = num_extent_pages(eb->start, eb->len); - - WARN_ON(start > eb->len); - WARN_ON(start + len > eb->start + eb->len); - - offset = (start_offset + start) & ((unsigned long)PAGE_CACHE_SIZE - 1); - - while(len > 0) { - page = extent_buffer_page(eb, i); - if (!PageUptodate(page)) { - printk("page %lu not up to date i %lu, total %lu, len %lu\n", page->index, i, num_pages, eb->len); - WARN_ON(1); - } - WARN_ON(!PageUptodate(page)); - - cur = min(len, (PAGE_CACHE_SIZE - offset)); - kaddr = kmap_atomic(page, KM_USER1); - memcpy(dst, kaddr + offset, cur); - kunmap_atomic(kaddr, KM_USER1); - - dst += cur; - len -= cur; - offset = 0; - i++; - } -} -EXPORT_SYMBOL(read_extent_buffer); - -int map_private_extent_buffer(struct extent_buffer *eb, unsigned long start, - unsigned long min_len, char **token, char **map, - unsigned long *map_start, - unsigned long *map_len, int km) -{ - size_t offset = start & (PAGE_CACHE_SIZE - 1); - char *kaddr; - struct page *p; - size_t start_offset = eb->start & ((u64)PAGE_CACHE_SIZE - 1); - unsigned long i = (start_offset + start) >> PAGE_CACHE_SHIFT; - unsigned long end_i = (start_offset + start + min_len - 1) >> - PAGE_CACHE_SHIFT; - - if (i != end_i) - return -EINVAL; - - if (i == 0) { - offset = start_offset; - *map_start = 0; - } else { - offset = 0; - *map_start = ((u64)i << PAGE_CACHE_SHIFT) - start_offset; - } - if (start + min_len > eb->len) { -printk("bad mapping eb start %Lu len %lu, wanted %lu %lu\n", eb->start, eb->len, start, min_len); - WARN_ON(1); - } - - p = extent_buffer_page(eb, i); - WARN_ON(!PageUptodate(p)); - kaddr = kmap_atomic(p, km); - *token = kaddr; - *map = kaddr + offset; - *map_len = PAGE_CACHE_SIZE - offset; - return 0; -} -EXPORT_SYMBOL(map_private_extent_buffer); - -int map_extent_buffer(struct extent_buffer *eb, unsigned long start, - unsigned long min_len, - char **token, char **map, - unsigned long *map_start, - unsigned long *map_len, int km) -{ - int err; - int save = 0; - if (eb->map_token) { - unmap_extent_buffer(eb, eb->map_token, km); - eb->map_token = NULL; - save = 1; - } - err = map_private_extent_buffer(eb, start, min_len, token, map, - map_start, map_len, km); - if (!err && save) { - eb->map_token = *token; - eb->kaddr = *map; - eb->map_start = *map_start; - eb->map_len = *map_len; - } - return err; -} -EXPORT_SYMBOL(map_extent_buffer); - -void unmap_extent_buffer(struct extent_buffer *eb, char *token, int km) -{ - kunmap_atomic(token, km); -} -EXPORT_SYMBOL(unmap_extent_buffer); - -int memcmp_extent_buffer(struct extent_buffer *eb, const void *ptrv, - unsigned long start, - unsigned long len) -{ - size_t cur; - size_t offset; - struct page *page; - char *kaddr; - char *ptr = (char *)ptrv; - size_t start_offset = eb->start & ((u64)PAGE_CACHE_SIZE - 1); - unsigned long i = (start_offset + start) >> PAGE_CACHE_SHIFT; - int ret = 0; - - WARN_ON(start > eb->len); - WARN_ON(start + len > eb->start + eb->len); - - offset = (start_offset + start) & ((unsigned long)PAGE_CACHE_SIZE - 1); - - while(len > 0) { - page = extent_buffer_page(eb, i); - WARN_ON(!PageUptodate(page)); - - cur = min(len, (PAGE_CACHE_SIZE - offset)); - - kaddr = kmap_atomic(page, KM_USER0); - ret = memcmp(ptr, kaddr + offset, cur); - kunmap_atomic(kaddr, KM_USER0); - if (ret) - break; - - ptr += cur; - len -= cur; - offset = 0; - i++; - } - return ret; -} -EXPORT_SYMBOL(memcmp_extent_buffer); - -void write_extent_buffer(struct extent_buffer *eb, const void *srcv, - unsigned long start, unsigned long len) -{ - size_t cur; - size_t offset; - struct page *page; - char *kaddr; - char *src = (char *)srcv; - size_t start_offset = eb->start & ((u64)PAGE_CACHE_SIZE - 1); - unsigned long i = (start_offset + start) >> PAGE_CACHE_SHIFT; - - WARN_ON(start > eb->len); - WARN_ON(start + len > eb->start + eb->len); - - offset = (start_offset + start) & ((unsigned long)PAGE_CACHE_SIZE - 1); - - while(len > 0) { - page = extent_buffer_page(eb, i); - WARN_ON(!PageUptodate(page)); - - cur = min(len, PAGE_CACHE_SIZE - offset); - kaddr = kmap_atomic(page, KM_USER1); - memcpy(kaddr + offset, src, cur); - kunmap_atomic(kaddr, KM_USER1); - - src += cur; - len -= cur; - offset = 0; - i++; - } -} -EXPORT_SYMBOL(write_extent_buffer); - -void memset_extent_buffer(struct extent_buffer *eb, char c, - unsigned long start, unsigned long len) -{ - size_t cur; - size_t offset; - struct page *page; - char *kaddr; - size_t start_offset = eb->start & ((u64)PAGE_CACHE_SIZE - 1); - unsigned long i = (start_offset + start) >> PAGE_CACHE_SHIFT; - - WARN_ON(start > eb->len); - WARN_ON(start + len > eb->start + eb->len); - - offset = (start_offset + start) & ((unsigned long)PAGE_CACHE_SIZE - 1); - - while(len > 0) { - page = extent_buffer_page(eb, i); - WARN_ON(!PageUptodate(page)); - - cur = min(len, PAGE_CACHE_SIZE - offset); - kaddr = kmap_atomic(page, KM_USER0); - memset(kaddr + offset, c, cur); - kunmap_atomic(kaddr, KM_USER0); - - len -= cur; - offset = 0; - i++; - } -} -EXPORT_SYMBOL(memset_extent_buffer); - -void copy_extent_buffer(struct extent_buffer *dst, struct extent_buffer *src, - unsigned long dst_offset, unsigned long src_offset, - unsigned long len) -{ - u64 dst_len = dst->len; - size_t cur; - size_t offset; - struct page *page; - char *kaddr; - size_t start_offset = dst->start & ((u64)PAGE_CACHE_SIZE - 1); - unsigned long i = (start_offset + dst_offset) >> PAGE_CACHE_SHIFT; - - WARN_ON(src->len != dst_len); - - offset = (start_offset + dst_offset) & - ((unsigned long)PAGE_CACHE_SIZE - 1); - - while(len > 0) { - page = extent_buffer_page(dst, i); - WARN_ON(!PageUptodate(page)); - - cur = min(len, (unsigned long)(PAGE_CACHE_SIZE - offset)); - - kaddr = kmap_atomic(page, KM_USER0); - read_extent_buffer(src, kaddr + offset, src_offset, cur); - kunmap_atomic(kaddr, KM_USER0); - - src_offset += cur; - len -= cur; - offset = 0; - i++; - } -} -EXPORT_SYMBOL(copy_extent_buffer); - -static void move_pages(struct page *dst_page, struct page *src_page, - unsigned long dst_off, unsigned long src_off, - unsigned long len) -{ - char *dst_kaddr = kmap_atomic(dst_page, KM_USER0); - if (dst_page == src_page) { - memmove(dst_kaddr + dst_off, dst_kaddr + src_off, len); - } else { - char *src_kaddr = kmap_atomic(src_page, KM_USER1); - char *p = dst_kaddr + dst_off + len; - char *s = src_kaddr + src_off + len; - - while (len--) - *--p = *--s; - - kunmap_atomic(src_kaddr, KM_USER1); - } - kunmap_atomic(dst_kaddr, KM_USER0); -} - -static void copy_pages(struct page *dst_page, struct page *src_page, - unsigned long dst_off, unsigned long src_off, - unsigned long len) -{ - char *dst_kaddr = kmap_atomic(dst_page, KM_USER0); - char *src_kaddr; - - if (dst_page != src_page) - src_kaddr = kmap_atomic(src_page, KM_USER1); - else - src_kaddr = dst_kaddr; - - memcpy(dst_kaddr + dst_off, src_kaddr + src_off, len); - kunmap_atomic(dst_kaddr, KM_USER0); - if (dst_page != src_page) - kunmap_atomic(src_kaddr, KM_USER1); -} - -void memcpy_extent_buffer(struct extent_buffer *dst, unsigned long dst_offset, - unsigned long src_offset, unsigned long len) -{ - size_t cur; - size_t dst_off_in_page; - size_t src_off_in_page; - size_t start_offset = dst->start & ((u64)PAGE_CACHE_SIZE - 1); - unsigned long dst_i; - unsigned long src_i; - - if (src_offset + len > dst->len) { - printk("memmove bogus src_offset %lu move len %lu len %lu\n", - src_offset, len, dst->len); - BUG_ON(1); - } - if (dst_offset + len > dst->len) { - printk("memmove bogus dst_offset %lu move len %lu len %lu\n", - dst_offset, len, dst->len); - BUG_ON(1); - } - - while(len > 0) { - dst_off_in_page = (start_offset + dst_offset) & - ((unsigned long)PAGE_CACHE_SIZE - 1); - src_off_in_page = (start_offset + src_offset) & - ((unsigned long)PAGE_CACHE_SIZE - 1); - - dst_i = (start_offset + dst_offset) >> PAGE_CACHE_SHIFT; - src_i = (start_offset + src_offset) >> PAGE_CACHE_SHIFT; - - cur = min(len, (unsigned long)(PAGE_CACHE_SIZE - - src_off_in_page)); - cur = min_t(unsigned long, cur, - (unsigned long)(PAGE_CACHE_SIZE - dst_off_in_page)); - - copy_pages(extent_buffer_page(dst, dst_i), - extent_buffer_page(dst, src_i), - dst_off_in_page, src_off_in_page, cur); - - src_offset += cur; - dst_offset += cur; - len -= cur; - } -} -EXPORT_SYMBOL(memcpy_extent_buffer); - -void memmove_extent_buffer(struct extent_buffer *dst, unsigned long dst_offset, - unsigned long src_offset, unsigned long len) -{ - size_t cur; - size_t dst_off_in_page; - size_t src_off_in_page; - unsigned long dst_end = dst_offset + len - 1; - unsigned long src_end = src_offset + len - 1; - size_t start_offset = dst->start & ((u64)PAGE_CACHE_SIZE - 1); - unsigned long dst_i; - unsigned long src_i; - - if (src_offset + len > dst->len) { - printk("memmove bogus src_offset %lu move len %lu len %lu\n", - src_offset, len, dst->len); - BUG_ON(1); - } - if (dst_offset + len > dst->len) { - printk("memmove bogus dst_offset %lu move len %lu len %lu\n", - dst_offset, len, dst->len); - BUG_ON(1); - } - if (dst_offset < src_offset) { - memcpy_extent_buffer(dst, dst_offset, src_offset, len); - return; - } - while(len > 0) { - dst_i = (start_offset + dst_end) >> PAGE_CACHE_SHIFT; - src_i = (start_offset + src_end) >> PAGE_CACHE_SHIFT; - - dst_off_in_page = (start_offset + dst_end) & - ((unsigned long)PAGE_CACHE_SIZE - 1); - src_off_in_page = (start_offset + src_end) & - ((unsigned long)PAGE_CACHE_SIZE - 1); - - cur = min_t(unsigned long, len, src_off_in_page + 1); - cur = min(cur, dst_off_in_page + 1); - move_pages(extent_buffer_page(dst, dst_i), - extent_buffer_page(dst, src_i), - dst_off_in_page - cur + 1, - src_off_in_page - cur + 1, cur); - - dst_end -= cur; - src_end -= cur; - len -= cur; - } -} -EXPORT_SYMBOL(memmove_extent_buffer); diff --git a/fs/btrfs/extent_map.h b/fs/btrfs/extent_map.h index ea60f5447b5b..56314217cfc0 100644 --- a/fs/btrfs/extent_map.h +++ b/fs/btrfs/extent_map.h @@ -3,215 +3,53 @@ #include +#define EXTENT_MAP_LAST_BYTE (u64)-4 #define EXTENT_MAP_HOLE (u64)-3 #define EXTENT_MAP_INLINE (u64)-2 #define EXTENT_MAP_DELALLOC (u64)-1 -/* bits for the extent state */ -#define EXTENT_DIRTY 1 -#define EXTENT_WRITEBACK (1 << 1) -#define EXTENT_UPTODATE (1 << 2) -#define EXTENT_LOCKED (1 << 3) -#define EXTENT_NEW (1 << 4) -#define EXTENT_DELALLOC (1 << 5) -#define EXTENT_DEFRAG (1 << 6) -#define EXTENT_DEFRAG_DONE (1 << 7) -#define EXTENT_BUFFER_FILLED (1 << 8) -#define EXTENT_CSUM (1 << 9) -#define EXTENT_IOBITS (EXTENT_LOCKED | EXTENT_WRITEBACK) - -/* - * page->private values. Every page that is controlled by the extent - * map has page->private set to one. - */ -#define EXTENT_PAGE_PRIVATE 1 -#define EXTENT_PAGE_PRIVATE_FIRST_PAGE 3 - - -struct extent_map_ops { - int (*fill_delalloc)(struct inode *inode, u64 start, u64 end); - int (*writepage_io_hook)(struct page *page, u64 start, u64 end); - int (*readpage_io_hook)(struct page *page, u64 start, u64 end); - int (*readpage_end_io_hook)(struct page *page, u64 start, u64 end); - void (*writepage_end_io_hook)(struct page *page, u64 start, u64 end); -}; - -struct extent_map_tree { - struct rb_root map; - struct rb_root state; - struct address_space *mapping; - u64 dirty_bytes; - rwlock_t lock; - struct extent_map_ops *ops; - spinlock_t lru_lock; - struct list_head buffer_lru; - int lru_size; -}; - -/* note, this must start with the same fields as fs/extent_map.c:tree_entry */ struct extent_map { - u64 start; - u64 end; /* inclusive */ - int in_tree; struct rb_node rb_node; - /* block_start and block_end are in bytes */ + + /* all of these are in bytes */ + u64 start; + u64 len; u64 block_start; - u64 block_end; /* inclusive */ + unsigned long flags; struct block_device *bdev; atomic_t refs; -}; - -/* note, this must start with the same fields as fs/extent_map.c:tree_entry */ -struct extent_state { - u64 start; - u64 end; /* inclusive */ int in_tree; - struct rb_node rb_node; - wait_queue_head_t wq; - atomic_t refs; - unsigned long state; - - /* for use by the FS */ - u64 private; - - struct list_head list; }; -struct extent_buffer { - u64 start; - unsigned long len; - char *map_token; - char *kaddr; - unsigned long map_start; - unsigned long map_len; - struct page *first_page; - struct list_head lru; - atomic_t refs; - int flags; +struct extent_map_tree { + struct rb_root map; + struct extent_map *last; + spinlock_t lock; }; -typedef struct extent_map *(get_extent_t)(struct inode *inode, - struct page *page, - size_t page_offset, - u64 start, u64 end, - int create); +static inline u64 extent_map_end(struct extent_map *em) +{ + if (em->start + em->len < em->start) + return (u64)-1; + return em->start + em->len; +} + +static inline u64 extent_map_block_end(struct extent_map *em) +{ + if (em->block_start + em->len < em->block_start) + return (u64)-1; + return em->block_start + em->len; +} -void extent_map_tree_init(struct extent_map_tree *tree, - struct address_space *mapping, gfp_t mask); -void extent_map_tree_empty_lru(struct extent_map_tree *tree); +void extent_map_tree_init(struct extent_map_tree *tree, gfp_t mask); struct extent_map *lookup_extent_mapping(struct extent_map_tree *tree, - u64 start, u64 end); + u64 start, u64 len); int add_extent_mapping(struct extent_map_tree *tree, struct extent_map *em); int remove_extent_mapping(struct extent_map_tree *tree, struct extent_map *em); -int try_release_extent_mapping(struct extent_map_tree *tree, struct page *page); -int lock_extent(struct extent_map_tree *tree, u64 start, u64 end, gfp_t mask); -int unlock_extent(struct extent_map_tree *tree, u64 start, u64 end, gfp_t mask); + struct extent_map *alloc_extent_map(gfp_t mask); void free_extent_map(struct extent_map *em); -int extent_read_full_page(struct extent_map_tree *tree, struct page *page, - get_extent_t *get_extent); int __init extent_map_init(void); void extent_map_exit(void); - -u64 count_range_bits(struct extent_map_tree *tree, - u64 *start, u64 search_end, - u64 max_bytes, unsigned long bits); - -int test_range_bit(struct extent_map_tree *tree, u64 start, u64 end, - int bits, int filled); -int clear_extent_bits(struct extent_map_tree *tree, u64 start, u64 end, - int bits, gfp_t mask); -int set_extent_bits(struct extent_map_tree *tree, u64 start, u64 end, - int bits, gfp_t mask); -int set_extent_uptodate(struct extent_map_tree *tree, u64 start, u64 end, - gfp_t mask); -int set_extent_new(struct extent_map_tree *tree, u64 start, u64 end, - gfp_t mask); -int set_extent_dirty(struct extent_map_tree *tree, u64 start, u64 end, - gfp_t mask); -int clear_extent_dirty(struct extent_map_tree *tree, u64 start, u64 end, - gfp_t mask); -int set_extent_delalloc(struct extent_map_tree *tree, u64 start, u64 end, - gfp_t mask); -int find_first_extent_bit(struct extent_map_tree *tree, u64 start, - u64 *start_ret, u64 *end_ret, int bits); -int extent_invalidatepage(struct extent_map_tree *tree, - struct page *page, unsigned long offset); -int extent_write_full_page(struct extent_map_tree *tree, struct page *page, - get_extent_t *get_extent, - struct writeback_control *wbc); -int extent_writepages(struct extent_map_tree *tree, - struct address_space *mapping, - get_extent_t *get_extent, - struct writeback_control *wbc); -int extent_readpages(struct extent_map_tree *tree, - struct address_space *mapping, - struct list_head *pages, unsigned nr_pages, - get_extent_t get_extent); -int extent_prepare_write(struct extent_map_tree *tree, - struct inode *inode, struct page *page, - unsigned from, unsigned to, get_extent_t *get_extent); -int extent_commit_write(struct extent_map_tree *tree, - struct inode *inode, struct page *page, - unsigned from, unsigned to); -sector_t extent_bmap(struct address_space *mapping, sector_t iblock, - get_extent_t *get_extent); -int set_range_dirty(struct extent_map_tree *tree, u64 start, u64 end); -int set_state_private(struct extent_map_tree *tree, u64 start, u64 private); -int get_state_private(struct extent_map_tree *tree, u64 start, u64 *private); -void set_page_extent_mapped(struct page *page); - -struct extent_buffer *alloc_extent_buffer(struct extent_map_tree *tree, - u64 start, unsigned long len, - struct page *page0, - gfp_t mask); -struct extent_buffer *find_extent_buffer(struct extent_map_tree *tree, - u64 start, unsigned long len, - gfp_t mask); -void free_extent_buffer(struct extent_buffer *eb); -int read_extent_buffer_pages(struct extent_map_tree *tree, - struct extent_buffer *eb, u64 start, int wait); - -static inline void extent_buffer_get(struct extent_buffer *eb) -{ - atomic_inc(&eb->refs); -} - -int memcmp_extent_buffer(struct extent_buffer *eb, const void *ptrv, - unsigned long start, - unsigned long len); -void read_extent_buffer(struct extent_buffer *eb, void *dst, - unsigned long start, - unsigned long len); -void write_extent_buffer(struct extent_buffer *eb, const void *src, - unsigned long start, unsigned long len); -void copy_extent_buffer(struct extent_buffer *dst, struct extent_buffer *src, - unsigned long dst_offset, unsigned long src_offset, - unsigned long len); -void memcpy_extent_buffer(struct extent_buffer *dst, unsigned long dst_offset, - unsigned long src_offset, unsigned long len); -void memmove_extent_buffer(struct extent_buffer *dst, unsigned long dst_offset, - unsigned long src_offset, unsigned long len); -void memset_extent_buffer(struct extent_buffer *eb, char c, - unsigned long start, unsigned long len); -int wait_on_extent_buffer_writeback(struct extent_map_tree *tree, - struct extent_buffer *eb); -int clear_extent_buffer_dirty(struct extent_map_tree *tree, - struct extent_buffer *eb); -int set_extent_buffer_dirty(struct extent_map_tree *tree, - struct extent_buffer *eb); -int set_extent_buffer_uptodate(struct extent_map_tree *tree, - struct extent_buffer *eb); -int extent_buffer_uptodate(struct extent_map_tree *tree, - struct extent_buffer *eb); -int map_extent_buffer(struct extent_buffer *eb, unsigned long offset, - unsigned long min_len, char **token, char **map, - unsigned long *map_start, - unsigned long *map_len, int km); -int map_private_extent_buffer(struct extent_buffer *eb, unsigned long offset, - unsigned long min_len, char **token, char **map, - unsigned long *map_start, - unsigned long *map_len, int km); -void unmap_extent_buffer(struct extent_buffer *eb, char *token, int km); #endif diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c index 1cd8c908811e..c5bb00f92396 100644 --- a/fs/btrfs/file.c +++ b/fs/btrfs/file.c @@ -233,8 +233,7 @@ static int noinline dirty_and_release_pages(struct btrfs_trans_handle *trans, int err = 0; int i; struct inode *inode = fdentry(file)->d_inode; - struct extent_map *em; - struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree; + struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree; u64 hint_byte; u64 num_bytes; u64 start_pos; @@ -242,11 +241,6 @@ static int noinline dirty_and_release_pages(struct btrfs_trans_handle *trans, u64 end_pos = pos + write_bytes; u64 inline_size; loff_t isize = i_size_read(inode); - em = alloc_extent_map(GFP_NOFS); - if (!em) - return -ENOMEM; - - em->bdev = inode->i_sb->s_bdev; start_pos = pos & ~((u64)root->sectorsize - 1); num_bytes = (write_bytes + pos - start_pos + @@ -254,7 +248,7 @@ static int noinline dirty_and_release_pages(struct btrfs_trans_handle *trans, end_of_last_block = start_pos + num_bytes - 1; - lock_extent(em_tree, start_pos, end_of_last_block, GFP_NOFS); + lock_extent(io_tree, start_pos, end_of_last_block, GFP_NOFS); mutex_lock(&root->fs_info->fs_mutex); trans = btrfs_start_transaction(root, 1); if (!trans) { @@ -268,7 +262,7 @@ static int noinline dirty_and_release_pages(struct btrfs_trans_handle *trans, if ((end_of_last_block & 4095) == 0) { printk("strange end of last %Lu %zu %Lu\n", start_pos, write_bytes, end_of_last_block); } - set_extent_uptodate(em_tree, start_pos, end_of_last_block, GFP_NOFS); + set_extent_uptodate(io_tree, start_pos, end_of_last_block, GFP_NOFS); /* FIXME...EIEIO, ENOSPC and more */ @@ -293,6 +287,8 @@ static int noinline dirty_and_release_pages(struct btrfs_trans_handle *trans, inode->i_ino, last_pos_in_file, 0, 0, hole_size); + btrfs_drop_extent_cache(inode, last_pos_in_file, + last_pos_in_file + hole_size -1); btrfs_check_file(root, inode); } if (err) @@ -320,12 +316,12 @@ static int noinline dirty_and_release_pages(struct btrfs_trans_handle *trans, last_end += PAGE_CACHE_SIZE - 1; if (start_pos < isize) { u64 delalloc_start = start_pos; - existing_delalloc = count_range_bits(em_tree, + existing_delalloc = count_range_bits(io_tree, &delalloc_start, end_of_last_block, (u64)-1, EXTENT_DELALLOC); } - set_extent_delalloc(em_tree, start_pos, end_of_last_block, + set_extent_delalloc(io_tree, start_pos, end_of_last_block, GFP_NOFS); spin_lock(&root->fs_info->delalloc_lock); root->fs_info->delalloc_bytes += (end_of_last_block + 1 - @@ -346,6 +342,7 @@ static int noinline dirty_and_release_pages(struct btrfs_trans_handle *trans, inline_size -= start_pos; err = insert_inline_extent(trans, root, inode, start_pos, inline_size, pages, 0, num_pages); + btrfs_drop_extent_cache(inode, start_pos, aligned_end - 1); BUG_ON(err); } if (end_pos > isize) { @@ -356,8 +353,7 @@ failed: err = btrfs_end_transaction(trans, root); out_unlock: mutex_unlock(&root->fs_info->fs_mutex); - unlock_extent(em_tree, start_pos, end_of_last_block, GFP_NOFS); - free_extent_map(em); + unlock_extent(io_tree, start_pos, end_of_last_block, GFP_NOFS); return err; } @@ -367,10 +363,15 @@ int btrfs_drop_extent_cache(struct inode *inode, u64 start, u64 end) struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree; while(1) { + spin_lock(&em_tree->lock); em = lookup_extent_mapping(em_tree, start, end); - if (!em) + if (!em) { + spin_unlock(&em_tree->lock); break; + } remove_extent_mapping(em_tree, em); + spin_unlock(&em_tree->lock); + /* once for us */ free_extent_map(em); /* once for the tree*/ diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 67005480e139..16d3aef45d18 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -53,7 +53,7 @@ static struct inode_operations btrfs_file_inode_operations; static struct address_space_operations btrfs_aops; static struct address_space_operations btrfs_symlink_aops; static struct file_operations btrfs_dir_file_operations; -static struct extent_map_ops btrfs_extent_map_ops; +static struct extent_io_ops btrfs_extent_io_ops; static struct kmem_cache *btrfs_inode_cachep; struct kmem_cache *btrfs_trans_handle_cachep; @@ -104,6 +104,8 @@ static int cow_file_range(struct inode *inode, u64 start, u64 end) u64 num_bytes; u64 cur_alloc_size; u64 blocksize = root->sectorsize; + u64 orig_start = start; + u64 orig_num_bytes; struct btrfs_key ins; int ret; @@ -115,6 +117,7 @@ static int cow_file_range(struct inode *inode, u64 start, u64 end) num_bytes = max(blocksize, num_bytes); ret = btrfs_drop_extents(trans, root, inode, start, start + num_bytes, start, &alloc_hint); + orig_num_bytes = num_bytes; if (alloc_hint == EXTENT_MAP_INLINE) goto out; @@ -138,6 +141,8 @@ static int cow_file_range(struct inode *inode, u64 start, u64 end) alloc_hint = ins.objectid + ins.offset; start += cur_alloc_size; } + btrfs_drop_extent_cache(inode, orig_start, + orig_start + orig_num_bytes - 1); btrfs_add_ordered_inode(inode); out: btrfs_end_transaction(trans, root); @@ -297,7 +302,7 @@ int btrfs_readpage_io_hook(struct page *page, u64 start, u64 end) int ret = 0; struct inode *inode = page->mapping->host; struct btrfs_root *root = BTRFS_I(inode)->root; - struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree; + struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree; struct btrfs_csum_item *item; struct btrfs_path *path = NULL; u32 csum; @@ -317,7 +322,7 @@ int btrfs_readpage_io_hook(struct page *page, u64 start, u64 end) } read_extent_buffer(path->nodes[0], &csum, (unsigned long)item, BTRFS_CRC32_SIZE); - set_state_private(em_tree, start, csum); + set_state_private(io_tree, start, csum); out: if (path) btrfs_free_path(path); @@ -329,17 +334,19 @@ int btrfs_readpage_end_io_hook(struct page *page, u64 start, u64 end) { size_t offset = start - ((u64)page->index << PAGE_CACHE_SHIFT); struct inode *inode = page->mapping->host; - struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree; + struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree; char *kaddr; u64 private; int ret; struct btrfs_root *root = BTRFS_I(inode)->root; u32 csum = ~(u32)0; unsigned long flags; + if (btrfs_test_opt(root, NODATASUM) || btrfs_test_flag(inode, NODATASUM)) return 0; - ret = get_state_private(em_tree, start, &private); + + ret = get_state_private(io_tree, start, &private); local_irq_save(flags); kaddr = kmap_atomic(page, KM_IRQ0); if (ret) { @@ -428,7 +435,7 @@ void btrfs_read_locked_inode(struct inode *inode) switch (inode->i_mode & S_IFMT) { case S_IFREG: inode->i_mapping->a_ops = &btrfs_aops; - BTRFS_I(inode)->extent_tree.ops = &btrfs_extent_map_ops; + BTRFS_I(inode)->io_tree.ops = &btrfs_extent_io_ops; inode->i_fop = &btrfs_file_operations; inode->i_op = &btrfs_file_inode_operations; break; @@ -873,7 +880,7 @@ static int btrfs_cow_one_page(struct inode *inode, struct page *page, size_t zero_start) { char *kaddr; - struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree; + struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree; struct btrfs_root *root = BTRFS_I(inode)->root; u64 page_start = (u64)page->index << PAGE_CACHE_SHIFT; u64 page_end = page_start + PAGE_CACHE_SIZE - 1; @@ -884,12 +891,12 @@ static int btrfs_cow_one_page(struct inode *inode, struct page *page, WARN_ON(!PageLocked(page)); set_page_extent_mapped(page); - lock_extent(em_tree, page_start, page_end, GFP_NOFS); + lock_extent(io_tree, page_start, page_end, GFP_NOFS); delalloc_start = page_start; - existing_delalloc = count_range_bits(&BTRFS_I(inode)->extent_tree, + existing_delalloc = count_range_bits(&BTRFS_I(inode)->io_tree, &delalloc_start, page_end, PAGE_CACHE_SIZE, EXTENT_DELALLOC); - set_extent_delalloc(&BTRFS_I(inode)->extent_tree, page_start, + set_extent_delalloc(&BTRFS_I(inode)->io_tree, page_start, page_end, GFP_NOFS); spin_lock(&root->fs_info->delalloc_lock); @@ -903,7 +910,7 @@ static int btrfs_cow_one_page(struct inode *inode, struct page *page, kunmap(page); } set_page_dirty(page); - unlock_extent(em_tree, page_start, page_end, GFP_NOFS); + unlock_extent(io_tree, page_start, page_end, GFP_NOFS); return ret; } @@ -961,7 +968,7 @@ static int btrfs_setattr(struct dentry *dentry, struct iattr *attr) attr->ia_valid & ATTR_SIZE && attr->ia_size > inode->i_size) { struct btrfs_trans_handle *trans; struct btrfs_root *root = BTRFS_I(inode)->root; - struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree; + struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree; u64 mask = root->sectorsize - 1; u64 pos = (inode->i_size + mask) & ~mask; @@ -986,7 +993,7 @@ static int btrfs_setattr(struct dentry *dentry, struct iattr *attr) btrfs_truncate_page(inode->i_mapping, inode->i_size); - lock_extent(em_tree, pos, block_end, GFP_NOFS); + lock_extent(io_tree, pos, block_end, GFP_NOFS); hole_size = block_end - hole_start; mutex_lock(&root->fs_info->fs_mutex); @@ -1001,11 +1008,13 @@ static int btrfs_setattr(struct dentry *dentry, struct iattr *attr) inode->i_ino, hole_start, 0, 0, hole_size); + btrfs_drop_extent_cache(inode, hole_start, + hole_size - 1); btrfs_check_file(root, inode); } btrfs_end_transaction(trans, root); mutex_unlock(&root->fs_info->fs_mutex); - unlock_extent(em_tree, pos, block_end, GFP_NOFS); + unlock_extent(io_tree, pos, block_end, GFP_NOFS); if (err) return err; } @@ -1189,7 +1198,8 @@ static int btrfs_init_locked_inode(struct inode *inode, void *p) struct btrfs_iget_args *args = p; inode->i_ino = args->ino; BTRFS_I(inode)->root = args->root; - extent_map_tree_init(&BTRFS_I(inode)->extent_tree, + extent_map_tree_init(&BTRFS_I(inode)->extent_tree, GFP_NOFS); + extent_io_tree_init(&BTRFS_I(inode)->io_tree, inode->i_mapping, GFP_NOFS); return 0; } @@ -1485,7 +1495,8 @@ static struct inode *btrfs_new_inode(struct btrfs_trans_handle *trans, if (!inode) return ERR_PTR(-ENOMEM); - extent_map_tree_init(&BTRFS_I(inode)->extent_tree, + extent_map_tree_init(&BTRFS_I(inode)->extent_tree, GFP_NOFS); + extent_io_tree_init(&BTRFS_I(inode)->io_tree, inode->i_mapping, GFP_NOFS); BTRFS_I(inode)->root = root; @@ -1672,9 +1683,10 @@ static int btrfs_create(struct inode *dir, struct dentry *dentry, inode->i_mapping->a_ops = &btrfs_aops; inode->i_fop = &btrfs_file_operations; inode->i_op = &btrfs_file_inode_operations; - extent_map_tree_init(&BTRFS_I(inode)->extent_tree, + extent_map_tree_init(&BTRFS_I(inode)->extent_tree, GFP_NOFS); + extent_io_tree_init(&BTRFS_I(inode)->io_tree, inode->i_mapping, GFP_NOFS); - BTRFS_I(inode)->extent_tree.ops = &btrfs_extent_map_ops; + BTRFS_I(inode)->io_tree.ops = &btrfs_extent_io_ops; } dir->i_sb->s_dirt = 1; btrfs_update_inode_block_group(trans, inode); @@ -1816,7 +1828,7 @@ out_unlock: } struct extent_map *btrfs_get_extent(struct inode *inode, struct page *page, - size_t page_offset, u64 start, u64 end, + size_t page_offset, u64 start, u64 len, int create) { int ret; @@ -1826,7 +1838,6 @@ struct extent_map *btrfs_get_extent(struct inode *inode, struct page *page, u64 extent_end = 0; u64 objectid = inode->i_ino; u32 found_type; - int failed_insert = 0; struct btrfs_path *path; struct btrfs_root *root = BTRFS_I(inode)->root; struct btrfs_file_extent_item *item; @@ -1834,6 +1845,7 @@ struct extent_map *btrfs_get_extent(struct inode *inode, struct page *page, struct btrfs_key found_key; struct extent_map *em = NULL; struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree; + struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree; struct btrfs_trans_handle *trans = NULL; path = btrfs_alloc_path(); @@ -1841,24 +1853,26 @@ struct extent_map *btrfs_get_extent(struct inode *inode, struct page *page, mutex_lock(&root->fs_info->fs_mutex); again: - em = lookup_extent_mapping(em_tree, start, end); + spin_lock(&em_tree->lock); + em = lookup_extent_mapping(em_tree, start, len); + spin_unlock(&em_tree->lock); + if (em) { if (em->start > start) { - printk("get_extent start %Lu em start %Lu\n", - start, em->start); + printk("get_extent lookup [%Lu %Lu] em [%Lu %Lu]\n", + start, len, em->start, em->len); WARN_ON(1); } goto out; } + em = alloc_extent_map(GFP_NOFS); if (!em) { - em = alloc_extent_map(GFP_NOFS); - if (!em) { - err = -ENOMEM; - goto out; - } - em->start = EXTENT_MAP_HOLE; - em->end = EXTENT_MAP_HOLE; + err = -ENOMEM; + goto out; } + + em->start = EXTENT_MAP_HOLE; + em->len = (u64)-1; em->bdev = inode->i_sb->s_bdev; ret = btrfs_lookup_file_extent(trans, root, path, objectid, start, trans != NULL); @@ -1893,28 +1907,25 @@ again: if (start < extent_start || start >= extent_end) { em->start = start; if (start < extent_start) { - if (end < extent_start) + if (start + len <= extent_start) goto not_found; - em->end = extent_end - 1; + em->len = extent_end - extent_start; } else { - em->end = end; + em->len = len; } goto not_found_em; } bytenr = btrfs_file_extent_disk_bytenr(leaf, item); if (bytenr == 0) { em->start = extent_start; - em->end = extent_end - 1; + em->len = extent_end - extent_start; em->block_start = EXTENT_MAP_HOLE; - em->block_end = EXTENT_MAP_HOLE; goto insert; } bytenr += btrfs_file_extent_offset(leaf, item); em->block_start = bytenr; - em->block_end = em->block_start + - btrfs_file_extent_num_bytes(leaf, item) - 1; em->start = extent_start; - em->end = extent_end - 1; + em->len = extent_end - extent_start; goto insert; } else if (found_type == BTRFS_FILE_EXTENT_INLINE) { unsigned long ptr; @@ -1925,25 +1936,24 @@ again: size = btrfs_file_extent_inline_len(leaf, btrfs_item_nr(leaf, path->slots[0])); - extent_end = (extent_start + size - 1) | - ((u64)root->sectorsize - 1); + extent_end = (extent_start + size + root->sectorsize - 1) & + ~((u64)root->sectorsize - 1); if (start < extent_start || start >= extent_end) { em->start = start; if (start < extent_start) { - if (end < extent_start) + if (start + len <= extent_start) goto not_found; - em->end = extent_end; + em->len = extent_end - extent_start; } else { - em->end = end; + em->len = len; } goto not_found_em; } em->block_start = EXTENT_MAP_INLINE; - em->block_end = EXTENT_MAP_INLINE; if (!page) { em->start = extent_start; - em->end = extent_start + size - 1; + em->len = size; goto out; } @@ -1952,8 +1962,7 @@ again: copy_size = min_t(u64, PAGE_CACHE_SIZE - page_offset, size - extent_offset); em->start = extent_start + extent_offset; - em->end = (em->start + copy_size -1) | - ((u64)root->sectorsize -1); + em->len = copy_size; map = kmap(page); ptr = btrfs_file_extent_inline_start(item) + extent_offset; if (create == 0 && !PageUptodate(page)) { @@ -1974,7 +1983,8 @@ again: btrfs_mark_buffer_dirty(leaf); } kunmap(page); - set_extent_uptodate(em_tree, em->start, em->end, GFP_NOFS); + set_extent_uptodate(io_tree, em->start, + extent_map_end(em) - 1, GFP_NOFS); goto insert; } else { printk("unkknown found_type %d\n", found_type); @@ -1982,33 +1992,29 @@ again: } not_found: em->start = start; - em->end = end; + em->len = len; not_found_em: em->block_start = EXTENT_MAP_HOLE; - em->block_end = EXTENT_MAP_HOLE; insert: btrfs_release_path(root, path); - if (em->start > start || em->end < start) { - printk("bad extent! em: [%Lu %Lu] passed [%Lu %Lu]\n", em->start, em->end, start, end); + if (em->start > start || extent_map_end(em) <= start) { + printk("bad extent! em: [%Lu %Lu] passed [%Lu %Lu]\n", em->start, em->len, start, len); err = -EIO; goto out; } + + err = 0; + spin_lock(&em_tree->lock); ret = add_extent_mapping(em_tree, em); if (ret == -EEXIST) { free_extent_map(em); - em = NULL; - if (0 && failed_insert == 1) { - btrfs_drop_extent_cache(inode, start, end); - } - failed_insert++; - if (failed_insert > 5) { - printk("failing to insert %Lu %Lu\n", start, end); + em = lookup_extent_mapping(em_tree, start, len); + if (!em) { err = -EIO; - goto out; + printk("failing to insert %Lu %Lu\n", start, len); } - goto again; } - err = 0; + spin_unlock(&em_tree->lock); out: btrfs_free_path(path); if (trans) { @@ -2032,14 +2038,14 @@ static sector_t btrfs_bmap(struct address_space *mapping, sector_t iblock) int btrfs_readpage(struct file *file, struct page *page) { - struct extent_map_tree *tree; - tree = &BTRFS_I(page->mapping->host)->extent_tree; + struct extent_io_tree *tree; + tree = &BTRFS_I(page->mapping->host)->io_tree; return extent_read_full_page(tree, page, btrfs_get_extent); } static int btrfs_writepage(struct page *page, struct writeback_control *wbc) { - struct extent_map_tree *tree; + struct extent_io_tree *tree; if (current->flags & PF_MEMALLOC) { @@ -2047,15 +2053,15 @@ static int btrfs_writepage(struct page *page, struct writeback_control *wbc) unlock_page(page); return 0; } - tree = &BTRFS_I(page->mapping->host)->extent_tree; + tree = &BTRFS_I(page->mapping->host)->io_tree; return extent_write_full_page(tree, page, btrfs_get_extent, wbc); } static int btrfs_writepages(struct address_space *mapping, struct writeback_control *wbc) { - struct extent_map_tree *tree; - tree = &BTRFS_I(mapping->host)->extent_tree; + struct extent_io_tree *tree; + tree = &BTRFS_I(mapping->host)->io_tree; return extent_writepages(tree, mapping, btrfs_get_extent, wbc); } @@ -2063,19 +2069,21 @@ static int btrfs_readpages(struct file *file, struct address_space *mapping, struct list_head *pages, unsigned nr_pages) { - struct extent_map_tree *tree; - tree = &BTRFS_I(mapping->host)->extent_tree; + struct extent_io_tree *tree; + tree = &BTRFS_I(mapping->host)->io_tree; return extent_readpages(tree, mapping, pages, nr_pages, btrfs_get_extent); } static int btrfs_releasepage(struct page *page, gfp_t unused_gfp_flags) { - struct extent_map_tree *tree; + struct extent_io_tree *tree; + struct extent_map_tree *map; int ret; - tree = &BTRFS_I(page->mapping->host)->extent_tree; - ret = try_release_extent_mapping(tree, page); + tree = &BTRFS_I(page->mapping->host)->io_tree; + map = &BTRFS_I(page->mapping->host)->extent_tree; + ret = try_release_extent_mapping(map, tree, page); if (ret == 1) { ClearPagePrivate(page); set_page_private(page, 0); @@ -2086,9 +2094,9 @@ static int btrfs_releasepage(struct page *page, gfp_t unused_gfp_flags) static void btrfs_invalidatepage(struct page *page, unsigned long offset) { - struct extent_map_tree *tree; + struct extent_io_tree *tree; - tree = &BTRFS_I(page->mapping->host)->extent_tree; + tree = &BTRFS_I(page->mapping->host)->io_tree; extent_invalidatepage(tree, page, offset); btrfs_releasepage(page, GFP_NOFS); } @@ -2374,7 +2382,7 @@ unsigned long btrfs_force_ra(struct address_space *mapping, int btrfs_defrag_file(struct file *file) { struct inode *inode = fdentry(file)->d_inode; struct btrfs_root *root = BTRFS_I(inode)->root; - struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree; + struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree; struct page *page; unsigned long last_index; unsigned long ra_index = 0; @@ -2414,13 +2422,13 @@ int btrfs_defrag_file(struct file *file) { page_start = (u64)page->index << PAGE_CACHE_SHIFT; page_end = page_start + PAGE_CACHE_SIZE - 1; - lock_extent(em_tree, page_start, page_end, GFP_NOFS); + lock_extent(io_tree, page_start, page_end, GFP_NOFS); delalloc_start = page_start; existing_delalloc = - count_range_bits(&BTRFS_I(inode)->extent_tree, + count_range_bits(&BTRFS_I(inode)->io_tree, &delalloc_start, page_end, PAGE_CACHE_SIZE, EXTENT_DELALLOC); - set_extent_delalloc(em_tree, page_start, + set_extent_delalloc(io_tree, page_start, page_end, GFP_NOFS); spin_lock(&root->fs_info->delalloc_lock); @@ -2428,7 +2436,7 @@ int btrfs_defrag_file(struct file *file) { existing_delalloc; spin_unlock(&root->fs_info->delalloc_lock); - unlock_extent(em_tree, page_start, page_end, GFP_NOFS); + unlock_extent(io_tree, page_start, page_end, GFP_NOFS); set_page_dirty(page); unlock_page(page); page_cache_release(page); @@ -2842,9 +2850,10 @@ static int btrfs_symlink(struct inode *dir, struct dentry *dentry, inode->i_mapping->a_ops = &btrfs_aops; inode->i_fop = &btrfs_file_operations; inode->i_op = &btrfs_file_inode_operations; - extent_map_tree_init(&BTRFS_I(inode)->extent_tree, + extent_map_tree_init(&BTRFS_I(inode)->extent_tree, GFP_NOFS); + extent_io_tree_init(&BTRFS_I(inode)->io_tree, inode->i_mapping, GFP_NOFS); - BTRFS_I(inode)->extent_tree.ops = &btrfs_extent_map_ops; + BTRFS_I(inode)->io_tree.ops = &btrfs_extent_io_ops; } dir->i_sb->s_dirt = 1; btrfs_update_inode_block_group(trans, inode); @@ -2934,7 +2943,7 @@ static struct file_operations btrfs_dir_file_operations = { #endif }; -static struct extent_map_ops btrfs_extent_map_ops = { +static struct extent_io_ops btrfs_extent_io_ops = { .fill_delalloc = run_delalloc_range, .writepage_io_hook = btrfs_writepage_io_hook, .readpage_io_hook = btrfs_readpage_io_hook, diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index 8b52c69fda2e..f8a1016600b1 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c @@ -468,10 +468,15 @@ static int __init init_btrfs_fs(void) err = btrfs_init_cachep(); if (err) goto free_transaction_sys; - err = extent_map_init(); + + err = extent_io_init(); if (err) goto free_cachep; + err = extent_map_init(); + if (err) + goto free_extent_io; + err = register_filesystem(&btrfs_fs_type); if (err) goto free_extent_map; @@ -479,6 +484,8 @@ static int __init init_btrfs_fs(void) free_extent_map: extent_map_exit(); +free_extent_io: + extent_io_exit(); free_cachep: btrfs_destroy_cachep(); free_transaction_sys: @@ -492,6 +499,7 @@ static void __exit exit_btrfs_fs(void) btrfs_exit_transaction_sys(); btrfs_destroy_cachep(); extent_map_exit(); + extent_io_exit(); unregister_filesystem(&btrfs_fs_type); btrfs_exit_sysfs(); } diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c index 163c01a24498..b4a1bc62a784 100644 --- a/fs/btrfs/transaction.c +++ b/fs/btrfs/transaction.c @@ -70,7 +70,7 @@ static int join_transaction(struct btrfs_root *root) INIT_LIST_HEAD(&cur_trans->pending_snapshots); list_add_tail(&cur_trans->list, &root->fs_info->trans_list); btrfs_ordered_inode_tree_init(&cur_trans->ordered_inode_tree); - extent_map_tree_init(&cur_trans->dirty_pages, + extent_io_tree_init(&cur_trans->dirty_pages, root->fs_info->btree_inode->i_mapping, GFP_NOFS); } else { @@ -153,7 +153,7 @@ int btrfs_write_and_wait_transaction(struct btrfs_trans_handle *trans, int ret; int err; int werr = 0; - struct extent_map_tree *dirty_pages; + struct extent_io_tree *dirty_pages; struct page *page; struct inode *btree_inode = root->fs_info->btree_inode; u64 start; @@ -610,7 +610,7 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans, struct btrfs_transaction *cur_trans; struct btrfs_transaction *prev_trans = NULL; struct list_head dirty_fs_roots; - struct extent_map_tree *pinned_copy; + struct extent_io_tree *pinned_copy; DEFINE_WAIT(wait); int ret; @@ -639,7 +639,7 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans, if (!pinned_copy) return -ENOMEM; - extent_map_tree_init(pinned_copy, + extent_io_tree_init(pinned_copy, root->fs_info->btree_inode->i_mapping, GFP_NOFS); trans->transaction->in_commit = 1; diff --git a/fs/btrfs/transaction.h b/fs/btrfs/transaction.h index fd52e9b23922..c3172ddb3321 100644 --- a/fs/btrfs/transaction.h +++ b/fs/btrfs/transaction.h @@ -29,7 +29,7 @@ struct btrfs_transaction { int use_count; int commit_done; struct list_head list; - struct extent_map_tree dirty_pages; + struct extent_io_tree dirty_pages; unsigned long start_time; struct btrfs_ordered_inode_tree ordered_inode_tree; wait_queue_head_t writer_wait; -- cgit v1.2.3-70-g09d2 From 306929f364b993581c91596230807fa1c022268a Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 10 Jun 2008 10:21:04 -0400 Subject: btrfs: fix strange indentation in lookup_extent_mapping Signed-off-by: Chris Mason --- fs/btrfs/extent_map.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) (limited to 'fs/btrfs/extent_map.c') diff --git a/fs/btrfs/extent_map.c b/fs/btrfs/extent_map.c index 268ad8facf6e..ba46f7911d99 100644 --- a/fs/btrfs/extent_map.c +++ b/fs/btrfs/extent_map.c @@ -231,7 +231,13 @@ struct extent_map *lookup_extent_mapping(struct extent_map_tree *tree, { struct extent_map *em; struct rb_node *rb_node; - struct rb_node *prev = NULL; struct rb_node *next = NULL; u64 end = range_end(start, len); em = tree->last; if (em && end > em->start && start < extent_map_end(em)) goto found; + struct rb_node *prev = NULL; + struct rb_node *next = NULL; + u64 end = range_end(start, len); + + em = tree->last; + if (em && end > em->start && start < extent_map_end(em)) + goto found; rb_node = __tree_search(&tree->map, start, &prev, &next); if (!rb_node && prev) { -- cgit v1.2.3-70-g09d2 From 9d2423c5c3fbb0f110ac0b6cdc5a8e4d64729483 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 11 Jun 2008 21:52:17 -0400 Subject: Btrfs: kerneldoc comments for extent_map.c Add kerneldoc comments for all exported functions. Signed-off-by: Christoph Hellwig Signed-off-by: Chris Mason --- fs/btrfs/extent_map.c | 61 +++++++++++++++++++++++++++++++++++++++++---------- 1 file changed, 49 insertions(+), 12 deletions(-) (limited to 'fs/btrfs/extent_map.c') diff --git a/fs/btrfs/extent_map.c b/fs/btrfs/extent_map.c index ba46f7911d99..f5a04eb9a2ac 100644 --- a/fs/btrfs/extent_map.c +++ b/fs/btrfs/extent_map.c @@ -31,6 +31,14 @@ void extent_map_exit(void) kmem_cache_destroy(extent_map_cache); } +/** + * extent_map_tree_init - initialize extent map tree + * @tree: tree to initialize + * @mask: flags for memory allocations during tree operations + * + * Initialize the extent tree @tree. Should be called for each new inode + * or other user of the extent_map interface. + */ void extent_map_tree_init(struct extent_map_tree *tree, gfp_t mask) { tree->map.rb_node = NULL; @@ -39,6 +47,14 @@ void extent_map_tree_init(struct extent_map_tree *tree, gfp_t mask) } EXPORT_SYMBOL(extent_map_tree_init); +/** + * alloc_extent_map - allocate new extent map structure + * @mask: memory allocation flags + * + * Allocate a new extent_map structure. The new structure is + * returned with a reference count of one and needs to be + * freed using free_extent_map() + */ struct extent_map *alloc_extent_map(gfp_t mask) { struct extent_map *em; @@ -52,6 +68,13 @@ struct extent_map *alloc_extent_map(gfp_t mask) } EXPORT_SYMBOL(alloc_extent_map); +/** + * free_extent_map - drop reference count of an extent_map + * @em: extent map beeing releasead + * + * Drops the reference out on @em by one and free the structure + * if the reference count hits zero. + */ void free_extent_map(struct extent_map *em) { if (!em) @@ -166,10 +189,15 @@ static int mergable_maps(struct extent_map *prev, struct extent_map *next) return 0; } -/* - * add_extent_mapping tries a simple forward/backward merge with existing - * mappings. The extent_map struct passed in will be inserted into - * the tree directly (no copies made, just a reference taken). +/** + * add_extent_mapping - add new extent map to the extent tree + * @tree: tree to insert new map in + * @em: map to insert + * + * Insert @em into @tree or perform a simple forward/backward merge with + * existing mappings. The extent_map struct passed in will be inserted + * into the tree directly, with an additional reference taken, or a + * reference dropped if the merge attempt was sucessfull. */ int add_extent_mapping(struct extent_map_tree *tree, struct extent_map *em) @@ -220,11 +248,16 @@ static u64 range_end(u64 start, u64 len) return start + len; } -/* - * lookup_extent_mapping returns the first extent_map struct in the - * tree that intersects the [start, len] range. There may - * be additional objects in the tree that intersect, so check the object - * returned carefully to make sure you don't need additional lookups. +/** + * lookup_extent_mapping - lookup extent_map + * @tree: tree to lookup in + * @start: byte offset to start the search + * @len: length of the lookup range + * + * Find and return the first extent_map struct in @tree that intersects the + * [start, len] range. There may be additional objects in the tree that + * intersect, so check the object returned carefully to make sure that no + * additional lookups are needed. */ struct extent_map *lookup_extent_mapping(struct extent_map_tree *tree, u64 start, u64 len) @@ -273,9 +306,13 @@ out: } EXPORT_SYMBOL(lookup_extent_mapping); -/* - * removes an extent_map struct from the tree. No reference counts are - * dropped, and no checks are done to see if the range is in use +/** + * remove_extent_mapping - removes an extent_map from the extent tree + * @tree: extent tree to remove from + * @em: extent map beeing removed + * + * Removes @em from @tree. No reference counts are dropped, and no checks + * are done to see if the range is in use */ int remove_extent_mapping(struct extent_map_tree *tree, struct extent_map *em) { -- cgit v1.2.3-70-g09d2 From e6dcd2dc9c489108648e2ed543315dd134d50a9a Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Thu, 17 Jul 2008 12:53:50 -0400 Subject: Btrfs: New data=ordered implementation The old data=ordered code would force commit to wait until all the data extents from the transaction were fully on disk. This introduced large latencies into the commit and stalled new writers in the transaction for a long time. The new code changes the way data allocations and extents work: * When delayed allocation is filled, data extents are reserved, and the extent bit EXTENT_ORDERED is set on the entire range of the extent. A struct btrfs_ordered_extent is allocated an inserted into a per-inode rbtree to track the pending extents. * As each page is written EXTENT_ORDERED is cleared on the bytes corresponding to that page. * When all of the bytes corresponding to a single struct btrfs_ordered_extent are written, The previously reserved extent is inserted into the FS btree and into the extent allocation trees. The checksums for the file data are also updated. Signed-off-by: Chris Mason --- fs/btrfs/btrfs_inode.h | 4 +- fs/btrfs/ctree.h | 19 +- fs/btrfs/disk-io.c | 13 +- fs/btrfs/extent-tree.c | 132 +++++++++----- fs/btrfs/extent_io.c | 52 +++++- fs/btrfs/extent_io.h | 14 +- fs/btrfs/extent_map.c | 5 +- fs/btrfs/file-item.c | 62 ++++--- fs/btrfs/file.c | 67 ++++--- fs/btrfs/inode.c | 447 ++++++++++++++++++++++++++++++----------------- fs/btrfs/ordered-data.c | 455 ++++++++++++++++++++++++++++++------------------ fs/btrfs/ordered-data.h | 71 ++++++-- fs/btrfs/transaction.c | 67 +------ fs/btrfs/transaction.h | 4 - 14 files changed, 910 insertions(+), 502 deletions(-) (limited to 'fs/btrfs/extent_map.c') diff --git a/fs/btrfs/btrfs_inode.h b/fs/btrfs/btrfs_inode.h index 40b4e0c9cd09..8d03687510e0 100644 --- a/fs/btrfs/btrfs_inode.h +++ b/fs/btrfs/btrfs_inode.h @@ -21,6 +21,7 @@ #include "extent_map.h" #include "extent_io.h" +#include "ordered-data.h" /* in memory btrfs inode */ struct btrfs_inode { @@ -32,9 +33,8 @@ struct btrfs_inode { struct extent_io_tree io_failure_tree; struct mutex csum_mutex; struct inode vfs_inode; - atomic_t ordered_writeback; + struct btrfs_ordered_inode_tree ordered_tree; - u64 ordered_trans; /* * transid of the trans_handle that last modified this inode */ diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index f3783dbd9b60..ceebc052ddcb 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -25,6 +25,7 @@ #include #include #include +#include #include #include "bit-radix.h" #include "extent_io.h" @@ -37,6 +38,7 @@ extern struct kmem_cache *btrfs_trans_handle_cachep; extern struct kmem_cache *btrfs_transaction_cachep; extern struct kmem_cache *btrfs_bit_radix_cachep; extern struct kmem_cache *btrfs_path_cachep; +struct btrfs_ordered_sum; #define BTRFS_MAGIC "_B5RfS_M" @@ -510,6 +512,7 @@ struct btrfs_fs_info { u64 max_inline; u64 alloc_start; struct btrfs_transaction *running_transaction; + wait_queue_head_t transaction_throttle; struct btrfs_super_block super_copy; struct btrfs_super_block super_for_commit; struct block_device *__bdev; @@ -541,6 +544,7 @@ struct btrfs_fs_info { */ struct btrfs_workers workers; struct btrfs_workers endio_workers; + struct btrfs_workers endio_write_workers; struct btrfs_workers submit_workers; struct task_struct *transaction_kthread; struct task_struct *cleaner_kthread; @@ -1384,6 +1388,17 @@ int btrfs_alloc_extent(struct btrfs_trans_handle *trans, u64 owner, u64 owner_offset, u64 empty_size, u64 hint_byte, u64 search_end, struct btrfs_key *ins, u64 data); +int btrfs_alloc_reserved_extent(struct btrfs_trans_handle *trans, + struct btrfs_root *root, + u64 root_objectid, u64 ref_generation, + u64 owner, u64 owner_offset, + struct btrfs_key *ins); +int btrfs_reserve_extent(struct btrfs_trans_handle *trans, + struct btrfs_root *root, + u64 num_bytes, u64 min_alloc_size, + u64 empty_size, u64 hint_byte, + u64 search_end, struct btrfs_key *ins, + u64 data); int btrfs_inc_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct extent_buffer *buf); int btrfs_free_extent(struct btrfs_trans_handle *trans, struct btrfs_root @@ -1556,9 +1571,9 @@ int btrfs_lookup_file_extent(struct btrfs_trans_handle *trans, u64 bytenr, int mod); int btrfs_csum_file_blocks(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct inode *inode, - struct bio *bio, char *sums); + struct btrfs_ordered_sum *sums); int btrfs_csum_one_bio(struct btrfs_root *root, - struct bio *bio, char **sums_ret); + struct bio *bio, struct btrfs_ordered_sum **sums_ret); struct btrfs_csum_item *btrfs_lookup_csum(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct btrfs_path *path, diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index b01b3f4f92a9..4a5ebafb935a 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -407,7 +407,11 @@ static int end_workqueue_bio(struct bio *bio, end_io_wq->error = err; end_io_wq->work.func = end_workqueue_fn; end_io_wq->work.flags = 0; - btrfs_queue_worker(&fs_info->endio_workers, &end_io_wq->work); + if (bio->bi_rw & (1 << BIO_RW)) + btrfs_queue_worker(&fs_info->endio_write_workers, + &end_io_wq->work); + else + btrfs_queue_worker(&fs_info->endio_workers, &end_io_wq->work); #if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,23) return 0; @@ -1286,6 +1290,7 @@ struct btrfs_root *open_ctree(struct super_block *sb, mutex_init(&fs_info->transaction_kthread_mutex); mutex_init(&fs_info->cleaner_mutex); mutex_init(&fs_info->volume_mutex); + init_waitqueue_head(&fs_info->transaction_throttle); #if 0 ret = add_hasher(fs_info, "crc32c"); @@ -1325,9 +1330,13 @@ struct btrfs_root *open_ctree(struct super_block *sb, btrfs_init_workers(&fs_info->workers, fs_info->thread_pool_size); btrfs_init_workers(&fs_info->submit_workers, fs_info->thread_pool_size); btrfs_init_workers(&fs_info->endio_workers, fs_info->thread_pool_size); + btrfs_init_workers(&fs_info->endio_write_workers, + fs_info->thread_pool_size); btrfs_start_workers(&fs_info->workers, 1); btrfs_start_workers(&fs_info->submit_workers, 1); btrfs_start_workers(&fs_info->endio_workers, fs_info->thread_pool_size); + btrfs_start_workers(&fs_info->endio_write_workers, + fs_info->thread_pool_size); err = -EINVAL; if (btrfs_super_num_devices(disk_super) > fs_devices->open_devices) { @@ -1447,6 +1456,7 @@ fail_sb_buffer: extent_io_tree_empty_lru(&BTRFS_I(fs_info->btree_inode)->io_tree); btrfs_stop_workers(&fs_info->workers); btrfs_stop_workers(&fs_info->endio_workers); + btrfs_stop_workers(&fs_info->endio_write_workers); btrfs_stop_workers(&fs_info->submit_workers); fail_iput: iput(fs_info->btree_inode); @@ -1702,6 +1712,7 @@ int close_ctree(struct btrfs_root *root) btrfs_stop_workers(&fs_info->workers); btrfs_stop_workers(&fs_info->endio_workers); + btrfs_stop_workers(&fs_info->endio_write_workers); btrfs_stop_workers(&fs_info->submit_workers); iput(fs_info->btree_inode); diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 8ebfa6be0790..343d1101c31c 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -1895,36 +1895,17 @@ error: return ret; } -/* - * finds a free extent and does all the dirty work required for allocation - * returns the key for the extent through ins, and a tree buffer for - * the first block of the extent through buf. - * - * returns 0 if everything worked, non-zero otherwise. - */ -int btrfs_alloc_extent(struct btrfs_trans_handle *trans, - struct btrfs_root *root, - u64 num_bytes, u64 min_alloc_size, - u64 root_objectid, u64 ref_generation, - u64 owner, u64 owner_offset, - u64 empty_size, u64 hint_byte, - u64 search_end, struct btrfs_key *ins, u64 data) +static int __btrfs_reserve_extent(struct btrfs_trans_handle *trans, + struct btrfs_root *root, + u64 num_bytes, u64 min_alloc_size, + u64 empty_size, u64 hint_byte, + u64 search_end, struct btrfs_key *ins, + u64 data) { int ret; - int pending_ret; - u64 super_used; - u64 root_used; u64 search_start = 0; u64 alloc_profile; - u32 sizes[2]; struct btrfs_fs_info *info = root->fs_info; - struct btrfs_root *extent_root = info->extent_root; - struct btrfs_extent_item *extent_item; - struct btrfs_extent_ref *ref; - struct btrfs_path *path; - struct btrfs_key keys[2]; - - maybe_lock_mutex(root); if (data) { alloc_profile = info->avail_data_alloc_bits & @@ -1974,11 +1955,48 @@ again: } if (ret) { printk("allocation failed flags %Lu\n", data); - } - if (ret) { BUG(); - goto out; } + clear_extent_dirty(&root->fs_info->free_space_cache, + ins->objectid, ins->objectid + ins->offset - 1, + GFP_NOFS); + return 0; +} + +int btrfs_reserve_extent(struct btrfs_trans_handle *trans, + struct btrfs_root *root, + u64 num_bytes, u64 min_alloc_size, + u64 empty_size, u64 hint_byte, + u64 search_end, struct btrfs_key *ins, + u64 data) +{ + int ret; + maybe_lock_mutex(root); + ret = __btrfs_reserve_extent(trans, root, num_bytes, min_alloc_size, + empty_size, hint_byte, search_end, ins, + data); + maybe_unlock_mutex(root); + return ret; +} + +static int __btrfs_alloc_reserved_extent(struct btrfs_trans_handle *trans, + struct btrfs_root *root, + u64 root_objectid, u64 ref_generation, + u64 owner, u64 owner_offset, + struct btrfs_key *ins) +{ + int ret; + int pending_ret; + u64 super_used; + u64 root_used; + u64 num_bytes = ins->offset; + u32 sizes[2]; + struct btrfs_fs_info *info = root->fs_info; + struct btrfs_root *extent_root = info->extent_root; + struct btrfs_extent_item *extent_item; + struct btrfs_extent_ref *ref; + struct btrfs_path *path; + struct btrfs_key keys[2]; /* block accounting for super block */ spin_lock_irq(&info->delalloc_lock); @@ -1990,10 +2008,6 @@ again: root_used = btrfs_root_used(&root->root_item); btrfs_set_root_used(&root->root_item, root_used + num_bytes); - clear_extent_dirty(&root->fs_info->free_space_cache, - ins->objectid, ins->objectid + ins->offset - 1, - GFP_NOFS); - if (root == extent_root) { set_extent_bits(&root->fs_info->extent_ins, ins->objectid, ins->objectid + ins->offset - 1, @@ -2001,10 +2015,6 @@ again: goto update_block; } - WARN_ON(trans->alloc_exclude_nr); - trans->alloc_exclude_start = ins->objectid; - trans->alloc_exclude_nr = ins->offset; - memcpy(&keys[0], ins, sizeof(*ins)); keys[1].offset = hash_extent_ref(root_objectid, ref_generation, owner, owner_offset); @@ -2054,6 +2064,51 @@ update_block: BUG(); } out: + return ret; +} + +int btrfs_alloc_reserved_extent(struct btrfs_trans_handle *trans, + struct btrfs_root *root, + u64 root_objectid, u64 ref_generation, + u64 owner, u64 owner_offset, + struct btrfs_key *ins) +{ + int ret; + maybe_lock_mutex(root); + ret = __btrfs_alloc_reserved_extent(trans, root, root_objectid, + ref_generation, owner, + owner_offset, ins); + maybe_unlock_mutex(root); + return ret; +} +/* + * finds a free extent and does all the dirty work required for allocation + * returns the key for the extent through ins, and a tree buffer for + * the first block of the extent through buf. + * + * returns 0 if everything worked, non-zero otherwise. + */ +int btrfs_alloc_extent(struct btrfs_trans_handle *trans, + struct btrfs_root *root, + u64 num_bytes, u64 min_alloc_size, + u64 root_objectid, u64 ref_generation, + u64 owner, u64 owner_offset, + u64 empty_size, u64 hint_byte, + u64 search_end, struct btrfs_key *ins, u64 data) +{ + int ret; + + maybe_lock_mutex(root); + + ret = __btrfs_reserve_extent(trans, root, num_bytes, + min_alloc_size, empty_size, hint_byte, + search_end, ins, data); + BUG_ON(ret); + ret = __btrfs_alloc_reserved_extent(trans, root, root_objectid, + ref_generation, owner, + owner_offset, ins); + BUG_ON(ret); + maybe_unlock_mutex(root); return ret; } @@ -2288,8 +2343,8 @@ static int noinline walk_down_tree(struct btrfs_trans_handle *trans, mutex_lock(&root->fs_info->alloc_mutex); /* we've dropped the lock, double check */ - ret = drop_snap_lookup_refcount(root, bytenr, - blocksize, &refs); + ret = lookup_extent_ref(NULL, root, bytenr, blocksize, + &refs); BUG_ON(ret); if (refs != 1) { parent = path->nodes[*level]; @@ -2584,7 +2639,6 @@ out_unlock: kfree(ra); trans = btrfs_start_transaction(BTRFS_I(inode)->root, 1); if (trans) { - btrfs_add_ordered_inode(inode); btrfs_end_transaction(trans, BTRFS_I(inode)->root); mark_inode_dirty(inode); } diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index 40a5f53cb040..3f82a6e9ca4f 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c @@ -793,6 +793,13 @@ int set_extent_dirty(struct extent_io_tree *tree, u64 start, u64 end, } EXPORT_SYMBOL(set_extent_dirty); +int set_extent_ordered(struct extent_io_tree *tree, u64 start, u64 end, + gfp_t mask) +{ + return set_extent_bit(tree, start, end, EXTENT_ORDERED, 0, NULL, mask); +} +EXPORT_SYMBOL(set_extent_ordered); + int set_extent_bits(struct extent_io_tree *tree, u64 start, u64 end, int bits, gfp_t mask) { @@ -812,8 +819,8 @@ int set_extent_delalloc(struct extent_io_tree *tree, u64 start, u64 end, gfp_t mask) { return set_extent_bit(tree, start, end, - EXTENT_DELALLOC | EXTENT_DIRTY, 0, NULL, - mask); + EXTENT_DELALLOC | EXTENT_DIRTY, + 0, NULL, mask); } EXPORT_SYMBOL(set_extent_delalloc); @@ -825,6 +832,13 @@ int clear_extent_dirty(struct extent_io_tree *tree, u64 start, u64 end, } EXPORT_SYMBOL(clear_extent_dirty); +int clear_extent_ordered(struct extent_io_tree *tree, u64 start, u64 end, + gfp_t mask) +{ + return clear_extent_bit(tree, start, end, EXTENT_ORDERED, 1, 0, mask); +} +EXPORT_SYMBOL(clear_extent_ordered); + int set_extent_new(struct extent_io_tree *tree, u64 start, u64 end, gfp_t mask) { @@ -1395,10 +1409,9 @@ static int end_bio_extent_writepage(struct bio *bio, if (--bvec >= bio->bi_io_vec) prefetchw(&bvec->bv_page->flags); - if (tree->ops && tree->ops->writepage_end_io_hook) { ret = tree->ops->writepage_end_io_hook(page, start, - end, state); + end, state, uptodate); if (ret) uptodate = 0; } @@ -1868,9 +1881,14 @@ static int __extent_read_full_page(struct extent_io_tree *tree, unlock_extent(tree, cur, end, GFP_NOFS); break; } - extent_offset = cur - em->start; + if (extent_map_end(em) <= cur) { +printk("bad mapping em [%Lu %Lu] cur %Lu\n", em->start, extent_map_end(em), cur); + } BUG_ON(extent_map_end(em) <= cur); + if (end < cur) { +printk("2bad mapping end %Lu cur %Lu\n", end, cur); + } BUG_ON(end < cur); iosize = min(extent_map_end(em) - cur, end - cur + 1); @@ -1976,6 +1994,7 @@ static int __extent_writepage(struct page *page, struct writeback_control *wbc, u64 last_byte = i_size_read(inode); u64 block_start; u64 iosize; + u64 unlock_start; sector_t sector; struct extent_map *em; struct block_device *bdev; @@ -1988,7 +2007,6 @@ static int __extent_writepage(struct page *page, struct writeback_control *wbc, u64 nr_delalloc; u64 delalloc_end; - WARN_ON(!PageLocked(page)); page_offset = i_size & (PAGE_CACHE_SIZE - 1); if (page->index > end_index || @@ -2030,6 +2048,7 @@ static int __extent_writepage(struct page *page, struct writeback_control *wbc, delalloc_start = delalloc_end + 1; } lock_extent(tree, start, page_end, GFP_NOFS); + unlock_start = start; end = page_end; if (test_range_bit(tree, start, page_end, EXTENT_DELALLOC, 0)) { @@ -2038,6 +2057,11 @@ static int __extent_writepage(struct page *page, struct writeback_control *wbc, if (last_byte <= start) { clear_extent_dirty(tree, start, page_end, GFP_NOFS); + unlock_extent(tree, start, page_end, GFP_NOFS); + if (tree->ops && tree->ops->writepage_end_io_hook) + tree->ops->writepage_end_io_hook(page, start, + page_end, NULL, 1); + unlock_start = page_end + 1; goto done; } @@ -2047,6 +2071,11 @@ static int __extent_writepage(struct page *page, struct writeback_control *wbc, while (cur <= end) { if (cur >= last_byte) { clear_extent_dirty(tree, cur, page_end, GFP_NOFS); + unlock_extent(tree, unlock_start, page_end, GFP_NOFS); + if (tree->ops && tree->ops->writepage_end_io_hook) + tree->ops->writepage_end_io_hook(page, cur, + page_end, NULL, 1); + unlock_start = page_end + 1; break; } em = epd->get_extent(inode, page, page_offset, cur, @@ -2071,8 +2100,16 @@ static int __extent_writepage(struct page *page, struct writeback_control *wbc, block_start == EXTENT_MAP_INLINE) { clear_extent_dirty(tree, cur, cur + iosize - 1, GFP_NOFS); + + unlock_extent(tree, unlock_start, cur + iosize -1, + GFP_NOFS); + if (tree->ops && tree->ops->writepage_end_io_hook) + tree->ops->writepage_end_io_hook(page, cur, + cur + iosize - 1, + NULL, 1); cur = cur + iosize; page_offset += iosize; + unlock_start = cur; continue; } @@ -2119,7 +2156,8 @@ done: set_page_writeback(page); end_page_writeback(page); } - unlock_extent(tree, start, page_end, GFP_NOFS); + if (unlock_start <= page_end) + unlock_extent(tree, unlock_start, page_end, GFP_NOFS); unlock_page(page); return 0; } diff --git a/fs/btrfs/extent_io.h b/fs/btrfs/extent_io.h index f1960dafaa19..2268a7995896 100644 --- a/fs/btrfs/extent_io.h +++ b/fs/btrfs/extent_io.h @@ -13,6 +13,8 @@ #define EXTENT_DEFRAG (1 << 6) #define EXTENT_DEFRAG_DONE (1 << 7) #define EXTENT_BUFFER_FILLED (1 << 8) +#define EXTENT_ORDERED (1 << 9) +#define EXTENT_ORDERED_METADATA (1 << 10) #define EXTENT_IOBITS (EXTENT_LOCKED | EXTENT_WRITEBACK) /* @@ -42,7 +44,7 @@ struct extent_io_ops { int (*readpage_end_io_hook)(struct page *page, u64 start, u64 end, struct extent_state *state); int (*writepage_end_io_hook)(struct page *page, u64 start, u64 end, - struct extent_state *state); + struct extent_state *state, int uptodate); int (*set_bit_hook)(struct inode *inode, u64 start, u64 end, unsigned long old, unsigned long bits); int (*clear_bit_hook)(struct inode *inode, u64 start, u64 end, @@ -131,6 +133,8 @@ int test_range_bit(struct extent_io_tree *tree, u64 start, u64 end, int bits, int filled); int clear_extent_bits(struct extent_io_tree *tree, u64 start, u64 end, int bits, gfp_t mask); +int clear_extent_bit(struct extent_io_tree *tree, u64 start, u64 end, + int bits, int wake, int delete, gfp_t mask); int set_extent_bits(struct extent_io_tree *tree, u64 start, u64 end, int bits, gfp_t mask); int set_extent_uptodate(struct extent_io_tree *tree, u64 start, u64 end, @@ -141,8 +145,14 @@ int set_extent_dirty(struct extent_io_tree *tree, u64 start, u64 end, gfp_t mask); int clear_extent_dirty(struct extent_io_tree *tree, u64 start, u64 end, gfp_t mask); +int clear_extent_ordered(struct extent_io_tree *tree, u64 start, u64 end, + gfp_t mask); +int clear_extent_ordered_metadata(struct extent_io_tree *tree, u64 start, + u64 end, gfp_t mask); int set_extent_delalloc(struct extent_io_tree *tree, u64 start, u64 end, gfp_t mask); +int set_extent_ordered(struct extent_io_tree *tree, u64 start, u64 end, + gfp_t mask); int find_first_extent_bit(struct extent_io_tree *tree, u64 start, u64 *start_ret, u64 *end_ret, int bits); struct extent_state *find_first_extent_bit_state(struct extent_io_tree *tree, @@ -209,6 +219,8 @@ void memset_extent_buffer(struct extent_buffer *eb, char c, unsigned long start, unsigned long len); int wait_on_extent_buffer_writeback(struct extent_io_tree *tree, struct extent_buffer *eb); +int wait_on_extent_writeback(struct extent_io_tree *tree, u64 start, u64 end); +int wait_extent_bit(struct extent_io_tree *tree, u64 start, u64 end, int bits); int clear_extent_buffer_dirty(struct extent_io_tree *tree, struct extent_buffer *eb); int set_extent_buffer_dirty(struct extent_io_tree *tree, diff --git a/fs/btrfs/extent_map.c b/fs/btrfs/extent_map.c index f5a04eb9a2ac..81123277c2b8 100644 --- a/fs/btrfs/extent_map.c +++ b/fs/btrfs/extent_map.c @@ -206,10 +206,11 @@ int add_extent_mapping(struct extent_map_tree *tree, struct extent_map *merge = NULL; struct rb_node *rb; + BUG_ON(spin_trylock(&tree->lock)); rb = tree_insert(&tree->map, em->start, &em->rb_node); if (rb) { - merge = rb_entry(rb, struct extent_map, rb_node); ret = -EEXIST; + free_extent_map(merge); goto out; } atomic_inc(&em->refs); @@ -268,6 +269,7 @@ struct extent_map *lookup_extent_mapping(struct extent_map_tree *tree, struct rb_node *next = NULL; u64 end = range_end(start, len); + BUG_ON(spin_trylock(&tree->lock)); em = tree->last; if (em && end > em->start && start < extent_map_end(em)) goto found; @@ -318,6 +320,7 @@ int remove_extent_mapping(struct extent_map_tree *tree, struct extent_map *em) { int ret = 0; + BUG_ON(spin_trylock(&tree->lock)); rb_erase(&em->rb_node, &tree->map); em->in_tree = 0; if (tree->last == em) diff --git a/fs/btrfs/file-item.c b/fs/btrfs/file-item.c index f537eb43c2c6..345caf8ff516 100644 --- a/fs/btrfs/file-item.c +++ b/fs/btrfs/file-item.c @@ -135,26 +135,37 @@ int btrfs_lookup_file_extent(struct btrfs_trans_handle *trans, } int btrfs_csum_one_bio(struct btrfs_root *root, - struct bio *bio, char **sums_ret) + struct bio *bio, struct btrfs_ordered_sum **sums_ret) { - u32 *sums; + struct btrfs_ordered_sum *sums; + struct btrfs_sector_sum *sector_sum; char *data; struct bio_vec *bvec = bio->bi_io_vec; int bio_index = 0; - sums = kmalloc(bio->bi_vcnt * BTRFS_CRC32_SIZE, GFP_NOFS); + WARN_ON(bio->bi_vcnt <= 0); + sums = kzalloc(btrfs_ordered_sum_size(root, bio->bi_size), GFP_NOFS); if (!sums) return -ENOMEM; - *sums_ret = (char *)sums; + *sums_ret = sums; + sector_sum = &sums->sums; + sums->file_offset = page_offset(bvec->bv_page); + sums->len = bio->bi_size; + INIT_LIST_HEAD(&sums->list); while(bio_index < bio->bi_vcnt) { data = kmap_atomic(bvec->bv_page, KM_USER0); - *sums = ~(u32)0; - *sums = btrfs_csum_data(root, data + bvec->bv_offset, - *sums, bvec->bv_len); + sector_sum->sum = ~(u32)0; + sector_sum->sum = btrfs_csum_data(root, + data + bvec->bv_offset, + sector_sum->sum, + bvec->bv_len); kunmap_atomic(data, KM_USER0); - btrfs_csum_final(*sums, (char *)sums); - sums++; + btrfs_csum_final(sector_sum->sum, + (char *)§or_sum->sum); + sector_sum->offset = page_offset(bvec->bv_page) + + bvec->bv_offset; + sector_sum++; bio_index++; bvec++; } @@ -163,7 +174,7 @@ int btrfs_csum_one_bio(struct btrfs_root *root, int btrfs_csum_file_blocks(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct inode *inode, - struct bio *bio, char *sums) + struct btrfs_ordered_sum *sums) { u64 objectid = inode->i_ino; u64 offset; @@ -171,17 +182,16 @@ int btrfs_csum_file_blocks(struct btrfs_trans_handle *trans, struct btrfs_key file_key; struct btrfs_key found_key; u64 next_offset; + u64 total_bytes = 0; int found_next; struct btrfs_path *path; struct btrfs_csum_item *item; struct btrfs_csum_item *item_end; struct extent_buffer *leaf = NULL; u64 csum_offset; - u32 *sums32 = (u32 *)sums; + struct btrfs_sector_sum *sector_sum; u32 nritems; u32 ins_size; - int bio_index = 0; - struct bio_vec *bvec = bio->bi_io_vec; char *eb_map; char *eb_token; unsigned long map_len; @@ -189,10 +199,11 @@ int btrfs_csum_file_blocks(struct btrfs_trans_handle *trans, path = btrfs_alloc_path(); BUG_ON(!path); + sector_sum = &sums->sums; again: next_offset = (u64)-1; found_next = 0; - offset = page_offset(bvec->bv_page) + bvec->bv_offset; + offset = sector_sum->offset; file_key.objectid = objectid; file_key.offset = offset; btrfs_set_key_type(&file_key, BTRFS_CSUM_ITEM_KEY); @@ -303,7 +314,7 @@ found: item_end = (struct btrfs_csum_item *)((unsigned char *)item_end + btrfs_item_size_nr(leaf, path->slots[0])); eb_token = NULL; -next_bvec: +next_sector: if (!eb_token || (unsigned long)item + BTRFS_CRC32_SIZE >= map_start + map_len) { @@ -321,21 +332,20 @@ next_bvec: } if (eb_token) { memcpy(eb_token + ((unsigned long)item & (PAGE_CACHE_SIZE - 1)), - sums32, BTRFS_CRC32_SIZE); + §or_sum->sum, BTRFS_CRC32_SIZE); } else { - write_extent_buffer(leaf, sums32, (unsigned long)item, - BTRFS_CRC32_SIZE); + write_extent_buffer(leaf, §or_sum->sum, + (unsigned long)item, BTRFS_CRC32_SIZE); } - bio_index++; - bvec++; - sums32++; - if (bio_index < bio->bi_vcnt) { + total_bytes += root->sectorsize; + sector_sum++; + if (total_bytes < sums->len) { item = (struct btrfs_csum_item *)((char *)item + BTRFS_CRC32_SIZE); if (item < item_end && offset + PAGE_CACHE_SIZE == - page_offset(bvec->bv_page)) { - offset = page_offset(bvec->bv_page); - goto next_bvec; + sector_sum->offset) { + offset = sector_sum->offset; + goto next_sector; } } if (eb_token) { @@ -343,7 +353,7 @@ next_bvec: eb_token = NULL; } btrfs_mark_buffer_dirty(path->nodes[0]); - if (bio_index < bio->bi_vcnt) { + if (total_bytes < sums->len) { btrfs_release_path(root, path); goto again; } diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c index 8037792f8789..12e765f7e0d4 100644 --- a/fs/btrfs/file.c +++ b/fs/btrfs/file.c @@ -34,7 +34,6 @@ #include "disk-io.h" #include "transaction.h" #include "btrfs_inode.h" -#include "ordered-data.h" #include "ioctl.h" #include "print-tree.h" #include "compat.h" @@ -273,7 +272,9 @@ static int noinline dirty_and_release_pages(struct btrfs_trans_handle *trans, u64 mask = root->sectorsize - 1; last_pos_in_file = (isize + mask) & ~mask; hole_size = (start_pos - last_pos_in_file + mask) & ~mask; - if (last_pos_in_file < start_pos) { + if (hole_size > 0) { + btrfs_wait_ordered_range(inode, last_pos_in_file, + last_pos_in_file + hole_size); err = btrfs_drop_extents(trans, root, inode, last_pos_in_file, last_pos_in_file + hole_size, @@ -303,19 +304,17 @@ static int noinline dirty_and_release_pages(struct btrfs_trans_handle *trans, inline_size > root->fs_info->max_inline || (inline_size & (root->sectorsize -1)) == 0 || inline_size >= BTRFS_MAX_INLINE_DATA_SIZE(root)) { - u64 last_end; - + /* check for reserved extents on each page, we don't want + * to reset the delalloc bit on things that already have + * extents reserved. + */ + set_extent_delalloc(io_tree, start_pos, + end_of_last_block, GFP_NOFS); for (i = 0; i < num_pages; i++) { struct page *p = pages[i]; SetPageUptodate(p); set_page_dirty(p); } - last_end = (u64)(pages[num_pages -1]->index) << - PAGE_CACHE_SHIFT; - last_end += PAGE_CACHE_SIZE - 1; - set_extent_delalloc(io_tree, start_pos, end_of_last_block, - GFP_NOFS); - btrfs_add_ordered_inode(inode); } else { u64 aligned_end; /* step one, delete the existing extents in this range */ @@ -350,10 +349,13 @@ int btrfs_drop_extent_cache(struct inode *inode, u64 start, u64 end) struct extent_map *split = NULL; struct extent_map *split2 = NULL; struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree; + struct extent_map *tmp; u64 len = end - start + 1; + u64 next_start; int ret; int testend = 1; + WARN_ON(end < start); if (end == (u64)-1) { len = (u64)-1; testend = 0; @@ -370,6 +372,8 @@ int btrfs_drop_extent_cache(struct inode *inode, u64 start, u64 end) spin_unlock(&em_tree->lock); break; } + tmp = rb_entry(&em->rb_node, struct extent_map, rb_node); + next_start = tmp->start; remove_extent_mapping(em_tree, em); if (em->block_start < EXTENT_MAP_LAST_BYTE && @@ -778,37 +782,58 @@ static int prepare_pages(struct btrfs_root *root, struct file *file, struct inode *inode = fdentry(file)->d_inode; int err = 0; u64 start_pos; + u64 last_pos; start_pos = pos & ~((u64)root->sectorsize - 1); + last_pos = ((u64)index + num_pages) << PAGE_CACHE_SHIFT; memset(pages, 0, num_pages * sizeof(struct page *)); - +again: for (i = 0; i < num_pages; i++) { pages[i] = grab_cache_page(inode->i_mapping, index + i); if (!pages[i]) { err = -ENOMEM; BUG_ON(1); } -#if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,18) - ClearPageDirty(pages[i]); -#else - cancel_dirty_page(pages[i], PAGE_CACHE_SIZE); -#endif wait_on_page_writeback(pages[i]); - set_page_extent_mapped(pages[i]); - WARN_ON(!PageLocked(pages[i])); } if (start_pos < inode->i_size) { - u64 last_pos; - last_pos = ((u64)index + num_pages) << PAGE_CACHE_SHIFT; + struct btrfs_ordered_extent *ordered; lock_extent(&BTRFS_I(inode)->io_tree, start_pos, last_pos - 1, GFP_NOFS); + ordered = btrfs_lookup_first_ordered_extent(inode, last_pos -1); + if (ordered && + ordered->file_offset + ordered->len > start_pos && + ordered->file_offset < last_pos) { + btrfs_put_ordered_extent(ordered); + unlock_extent(&BTRFS_I(inode)->io_tree, + start_pos, last_pos - 1, GFP_NOFS); + for (i = 0; i < num_pages; i++) { + unlock_page(pages[i]); + page_cache_release(pages[i]); + } + btrfs_wait_ordered_range(inode, start_pos, + last_pos - start_pos); + goto again; + } + if (ordered) + btrfs_put_ordered_extent(ordered); + clear_extent_bits(&BTRFS_I(inode)->io_tree, start_pos, last_pos - 1, EXTENT_DIRTY | EXTENT_DELALLOC, GFP_NOFS); unlock_extent(&BTRFS_I(inode)->io_tree, start_pos, last_pos - 1, GFP_NOFS); } + for (i = 0; i < num_pages; i++) { +#if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,18) + ClearPageDirty(pages[i]); +#else + cancel_dirty_page(pages[i], PAGE_CACHE_SIZE); +#endif + set_page_extent_mapped(pages[i]); + WARN_ON(!PageLocked(pages[i])); + } return 0; } @@ -969,13 +994,11 @@ out_nolock: (start_pos + num_written - 1) >> PAGE_CACHE_SHIFT); } current->backing_dev_info = NULL; - btrfs_ordered_throttle(root, inode); return num_written ? num_written : err; } int btrfs_release_file(struct inode * inode, struct file * filp) { - btrfs_del_ordered_inode(inode, 0); if (filp->private_data) btrfs_ioctl_trans_end(filp); return 0; diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index d39433dfb2c7..c5a62f0b9595 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -43,6 +43,7 @@ #include "ioctl.h" #include "print-tree.h" #include "volumes.h" +#include "ordered-data.h" struct btrfs_iget_args { u64 ino; @@ -109,10 +110,11 @@ static int cow_file_range(struct inode *inode, u64 start, u64 end) u64 num_bytes; u64 cur_alloc_size; u64 blocksize = root->sectorsize; - u64 orig_start = start; u64 orig_num_bytes; struct btrfs_key ins; - int ret; + struct extent_map *em; + struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree; + int ret = 0; trans = btrfs_start_transaction(root, 1); BUG_ON(!trans); @@ -120,33 +122,44 @@ static int cow_file_range(struct inode *inode, u64 start, u64 end) num_bytes = (end - start + blocksize) & ~(blocksize - 1); num_bytes = max(blocksize, num_bytes); - ret = btrfs_drop_extents(trans, root, inode, - start, start + num_bytes, start, &alloc_hint); orig_num_bytes = num_bytes; if (alloc_hint == EXTENT_MAP_INLINE) goto out; BUG_ON(num_bytes > btrfs_super_total_bytes(&root->fs_info->super_copy)); + btrfs_drop_extent_cache(inode, start, start + num_bytes - 1); while(num_bytes > 0) { cur_alloc_size = min(num_bytes, root->fs_info->max_extent); - ret = btrfs_alloc_extent(trans, root, cur_alloc_size, - root->sectorsize, - root->root_key.objectid, - trans->transid, - inode->i_ino, start, 0, - alloc_hint, (u64)-1, &ins, 1); + ret = btrfs_reserve_extent(trans, root, cur_alloc_size, + root->sectorsize, 0, 0, + (u64)-1, &ins, 1); if (ret) { WARN_ON(1); goto out; } + em = alloc_extent_map(GFP_NOFS); + em->start = start; + em->len = ins.offset; + em->block_start = ins.objectid; + em->bdev = root->fs_info->fs_devices->latest_bdev; + while(1) { + spin_lock(&em_tree->lock); + ret = add_extent_mapping(em_tree, em); + spin_unlock(&em_tree->lock); + if (ret != -EEXIST) { + free_extent_map(em); + break; + } + btrfs_drop_extent_cache(inode, start, + start + ins.offset - 1); + } + cur_alloc_size = ins.offset; - ret = btrfs_insert_file_extent(trans, root, inode->i_ino, - start, ins.objectid, ins.offset, - ins.offset, 0); - inode->i_blocks += ins.offset >> 9; - btrfs_check_file(root, inode); + ret = btrfs_add_ordered_extent(inode, start, ins.objectid, + ins.offset); + BUG_ON(ret); if (num_bytes < cur_alloc_size) { printk("num_bytes %Lu cur_alloc %Lu\n", num_bytes, cur_alloc_size); @@ -156,10 +169,6 @@ static int cow_file_range(struct inode *inode, u64 start, u64 end) alloc_hint = ins.objectid + ins.offset; start += cur_alloc_size; } - btrfs_drop_extent_cache(inode, orig_start, - orig_start + orig_num_bytes - 1); - btrfs_add_ordered_inode(inode); - btrfs_update_inode(trans, root, inode); out: btrfs_end_transaction(trans, root); return ret; @@ -341,25 +350,15 @@ int __btrfs_submit_bio_hook(struct inode *inode, int rw, struct bio *bio, int mirror_num) { struct btrfs_root *root = BTRFS_I(inode)->root; - struct btrfs_trans_handle *trans; int ret = 0; - char *sums = NULL; + struct btrfs_ordered_sum *sums; ret = btrfs_csum_one_bio(root, bio, &sums); BUG_ON(ret); - trans = btrfs_start_transaction(root, 1); - - btrfs_set_trans_block_group(trans, inode); - mutex_lock(&BTRFS_I(inode)->csum_mutex); - btrfs_csum_file_blocks(trans, root, inode, bio, sums); - mutex_unlock(&BTRFS_I(inode)->csum_mutex); - - ret = btrfs_end_transaction(trans, root); + ret = btrfs_add_ordered_sum(inode, sums); BUG_ON(ret); - kfree(sums); - return btrfs_map_bio(root, rw, bio, mirror_num, 1); } @@ -369,14 +368,10 @@ int btrfs_submit_bio_hook(struct inode *inode, int rw, struct bio *bio, struct btrfs_root *root = BTRFS_I(inode)->root; int ret = 0; - if (!(rw & (1 << BIO_RW))) { - ret = btrfs_bio_wq_end_io(root->fs_info, bio, 0); - BUG_ON(ret); - goto mapit; - } + ret = btrfs_bio_wq_end_io(root->fs_info, bio, 0); + BUG_ON(ret); - if (btrfs_test_opt(root, NODATASUM) || - btrfs_test_flag(inode, NODATASUM)) { + if (!(rw & (1 << BIO_RW))) { goto mapit; } @@ -387,6 +382,96 @@ mapit: return btrfs_map_bio(root, rw, bio, mirror_num, 0); } +static int add_pending_csums(struct btrfs_trans_handle *trans, + struct inode *inode, u64 file_offset, + struct list_head *list) +{ + struct list_head *cur; + struct btrfs_ordered_sum *sum; + + btrfs_set_trans_block_group(trans, inode); + while(!list_empty(list)) { + cur = list->next; + sum = list_entry(cur, struct btrfs_ordered_sum, list); + mutex_lock(&BTRFS_I(inode)->csum_mutex); + btrfs_csum_file_blocks(trans, BTRFS_I(inode)->root, + inode, sum); + mutex_unlock(&BTRFS_I(inode)->csum_mutex); + list_del(&sum->list); + kfree(sum); + } + return 0; +} + +int btrfs_writepage_end_io_hook(struct page *page, u64 start, u64 end, + struct extent_state *state, int uptodate) +{ + struct inode *inode = page->mapping->host; + struct btrfs_root *root = BTRFS_I(inode)->root; + struct btrfs_trans_handle *trans; + struct btrfs_ordered_extent *ordered_extent; + struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree; + u64 alloc_hint = 0; + struct list_head list; + struct btrfs_key ins; + int ret; + + ret = btrfs_dec_test_ordered_pending(inode, start, end - start + 1); + if (!ret) { + return 0; + } + + trans = btrfs_start_transaction(root, 1); + + ordered_extent = btrfs_lookup_ordered_extent(inode, start); + BUG_ON(!ordered_extent); + + lock_extent(io_tree, ordered_extent->file_offset, + ordered_extent->file_offset + ordered_extent->len - 1, + GFP_NOFS); + + INIT_LIST_HEAD(&list); + + ins.objectid = ordered_extent->start; + ins.offset = ordered_extent->len; + ins.type = BTRFS_EXTENT_ITEM_KEY; + ret = btrfs_alloc_reserved_extent(trans, root, root->root_key.objectid, + trans->transid, inode->i_ino, + ordered_extent->file_offset, &ins); + BUG_ON(ret); + ret = btrfs_drop_extents(trans, root, inode, + ordered_extent->file_offset, + ordered_extent->file_offset + + ordered_extent->len, + ordered_extent->file_offset, &alloc_hint); + BUG_ON(ret); + ret = btrfs_insert_file_extent(trans, root, inode->i_ino, + ordered_extent->file_offset, + ordered_extent->start, + ordered_extent->len, + ordered_extent->len, 0); + BUG_ON(ret); + btrfs_drop_extent_cache(inode, ordered_extent->file_offset, + ordered_extent->file_offset + + ordered_extent->len - 1); + inode->i_blocks += ordered_extent->len >> 9; + unlock_extent(io_tree, ordered_extent->file_offset, + ordered_extent->file_offset + ordered_extent->len - 1, + GFP_NOFS); + add_pending_csums(trans, inode, ordered_extent->file_offset, + &ordered_extent->list); + + btrfs_remove_ordered_extent(inode, ordered_extent); + /* once for us */ + btrfs_put_ordered_extent(ordered_extent); + /* once for the tree */ + btrfs_put_ordered_extent(ordered_extent); + + btrfs_update_inode(trans, root, inode); + btrfs_end_transaction(trans, root); + return 0; +} + int btrfs_readpage_io_hook(struct page *page, u64 start, u64 end) { int ret = 0; @@ -409,7 +494,8 @@ int btrfs_readpage_io_hook(struct page *page, u64 start, u64 end) if (ret == -ENOENT || ret == -EFBIG) ret = 0; csum = 0; - printk("no csum found for inode %lu start %Lu\n", inode->i_ino, start); + printk("no csum found for inode %lu start %Lu\n", inode->i_ino, + start); goto out; } read_extent_buffer(path->nodes[0], &csum, (unsigned long)item, @@ -833,7 +919,6 @@ static int btrfs_unlink(struct inode *dir, struct dentry *dentry) { struct btrfs_root *root; struct btrfs_trans_handle *trans; - struct inode *inode = dentry->d_inode; int ret; unsigned long nr = 0; @@ -849,14 +934,6 @@ static int btrfs_unlink(struct inode *dir, struct dentry *dentry) ret = btrfs_unlink_trans(trans, root, dir, dentry); nr = trans->blocks_used; - if (inode->i_nlink == 0) { - /* if the inode isn't linked anywhere, - * we don't need to worry about - * data=ordered - */ - btrfs_del_ordered_inode(inode, 1); - } - btrfs_end_transaction_throttle(trans, root); fail: btrfs_btree_balance_dirty(root, nr); @@ -931,6 +1008,7 @@ static int btrfs_truncate_in_trans(struct btrfs_trans_handle *trans, int extent_type = -1; u64 mask = root->sectorsize - 1; + btrfs_wait_ordered_range(inode, inode->i_size & (~mask), (u64)-1); btrfs_drop_extent_cache(inode, inode->i_size & (~mask), (u64)-1); path = btrfs_alloc_path(); path->reada = -1; @@ -1117,34 +1195,6 @@ error: return ret; } -static int btrfs_cow_one_page(struct inode *inode, struct page *page, - size_t zero_start) -{ - char *kaddr; - struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree; - u64 page_start = (u64)page->index << PAGE_CACHE_SHIFT; - u64 page_end = page_start + PAGE_CACHE_SIZE - 1; - int ret = 0; - - WARN_ON(!PageLocked(page)); - set_page_extent_mapped(page); - - lock_extent(io_tree, page_start, page_end, GFP_NOFS); - set_extent_delalloc(&BTRFS_I(inode)->io_tree, page_start, - page_end, GFP_NOFS); - - if (zero_start != PAGE_CACHE_SIZE) { - kaddr = kmap(page); - memset(kaddr + zero_start, 0, PAGE_CACHE_SIZE - zero_start); - flush_dcache_page(page); - kunmap(page); - } - set_page_dirty(page); - unlock_extent(io_tree, page_start, page_end, GFP_NOFS); - - return ret; -} - /* * taken from block_truncate_page, but does cow as it zeros out * any bytes left in the last page in the file. @@ -1153,12 +1203,16 @@ static int btrfs_truncate_page(struct address_space *mapping, loff_t from) { struct inode *inode = mapping->host; struct btrfs_root *root = BTRFS_I(inode)->root; + struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree; + struct btrfs_ordered_extent *ordered; + char *kaddr; u32 blocksize = root->sectorsize; pgoff_t index = from >> PAGE_CACHE_SHIFT; unsigned offset = from & (PAGE_CACHE_SIZE-1); struct page *page; int ret = 0; u64 page_start; + u64 page_end; if ((offset & (blocksize - 1)) == 0) goto out; @@ -1168,6 +1222,10 @@ again: page = grab_cache_page(mapping, index); if (!page) goto out; + + page_start = page_offset(page); + page_end = page_start + PAGE_CACHE_SIZE - 1; + if (!PageUptodate(page)) { ret = btrfs_readpage(NULL, page); lock_page(page); @@ -1181,10 +1239,32 @@ again: goto out; } } - - page_start = (u64)page->index << PAGE_CACHE_SHIFT; wait_on_page_writeback(page); - ret = btrfs_cow_one_page(inode, page, offset); + + lock_extent(io_tree, page_start, page_end, GFP_NOFS); + set_page_extent_mapped(page); + + ordered = btrfs_lookup_ordered_extent(inode, page_start); + if (ordered) { + unlock_extent(io_tree, page_start, page_end, GFP_NOFS); + unlock_page(page); + page_cache_release(page); + btrfs_wait_ordered_extent(inode, ordered); + btrfs_put_ordered_extent(ordered); + goto again; + } + + set_extent_delalloc(&BTRFS_I(inode)->io_tree, page_start, + page_end, GFP_NOFS); + ret = 0; + if (offset != PAGE_CACHE_SIZE) { + kaddr = kmap(page); + memset(kaddr + offset, 0, PAGE_CACHE_SIZE - offset); + flush_dcache_page(page); + kunmap(page); + } + set_page_dirty(page); + unlock_extent(io_tree, page_start, page_end, GFP_NOFS); unlock_page(page); page_cache_release(page); @@ -1222,8 +1302,9 @@ static int btrfs_setattr(struct dentry *dentry, struct iattr *attr) btrfs_truncate_page(inode->i_mapping, inode->i_size); - lock_extent(io_tree, hole_start, block_end - 1, GFP_NOFS); hole_size = block_end - hole_start; + btrfs_wait_ordered_range(inode, hole_start, hole_size); + lock_extent(io_tree, hole_start, block_end - 1, GFP_NOFS); trans = btrfs_start_transaction(root, 1); btrfs_set_trans_block_group(trans, inode); @@ -1258,6 +1339,7 @@ void btrfs_delete_inode(struct inode *inode) unsigned long nr; int ret; + btrfs_wait_ordered_range(inode, 0, (u64)-1); truncate_inode_pages(&inode->i_data, 0); if (is_bad_inode(inode)) { goto no_delete; @@ -1403,7 +1485,6 @@ static int btrfs_init_locked_inode(struct inode *inode, void *p) extent_io_tree_init(&BTRFS_I(inode)->io_failure_tree, inode->i_mapping, GFP_NOFS); mutex_init(&BTRFS_I(inode)->csum_mutex); - atomic_set(&BTRFS_I(inode)->ordered_writeback, 0); return 0; } @@ -1705,7 +1786,6 @@ static struct inode *btrfs_new_inode(struct btrfs_trans_handle *trans, extent_io_tree_init(&BTRFS_I(inode)->io_failure_tree, inode->i_mapping, GFP_NOFS); mutex_init(&BTRFS_I(inode)->csum_mutex); - atomic_set(&BTRFS_I(inode)->ordered_writeback, 0); BTRFS_I(inode)->delalloc_bytes = 0; BTRFS_I(inode)->root = root; @@ -1930,7 +2010,6 @@ static int btrfs_create(struct inode *dir, struct dentry *dentry, inode->i_mapping, GFP_NOFS); mutex_init(&BTRFS_I(inode)->csum_mutex); BTRFS_I(inode)->delalloc_bytes = 0; - atomic_set(&BTRFS_I(inode)->ordered_writeback, 0); BTRFS_I(inode)->io_tree.ops = &btrfs_extent_io_ops; } dir->i_sb->s_dirt = 1; @@ -2066,64 +2145,18 @@ out_unlock: static int merge_extent_mapping(struct extent_map_tree *em_tree, struct extent_map *existing, - struct extent_map *em) + struct extent_map *em, + u64 map_start, u64 map_len) { u64 start_diff; - u64 new_end; - int ret = 0; - int real_blocks = existing->block_start < EXTENT_MAP_LAST_BYTE; - - if (real_blocks && em->block_start >= EXTENT_MAP_LAST_BYTE) - goto invalid; - - if (!real_blocks && em->block_start != existing->block_start) - goto invalid; - - new_end = max(existing->start + existing->len, em->start + em->len); - - if (existing->start >= em->start) { - if (em->start + em->len < existing->start) - goto invalid; - start_diff = existing->start - em->start; - if (real_blocks && em->block_start + start_diff != - existing->block_start) - goto invalid; - - em->len = new_end - em->start; - - remove_extent_mapping(em_tree, existing); - /* free for the tree */ - free_extent_map(existing); - ret = add_extent_mapping(em_tree, em); - - } else if (em->start > existing->start) { - - if (existing->start + existing->len < em->start) - goto invalid; - - start_diff = em->start - existing->start; - if (real_blocks && existing->block_start + start_diff != - em->block_start) - goto invalid; - - remove_extent_mapping(em_tree, existing); - em->block_start = existing->block_start; - em->start = existing->start; - em->len = new_end - existing->start; - free_extent_map(existing); - - ret = add_extent_mapping(em_tree, em); - } else { - goto invalid; - } - return ret; - -invalid: - printk("invalid extent map merge [%Lu %Lu %Lu] [%Lu %Lu %Lu]\n", - existing->start, existing->len, existing->block_start, - em->start, em->len, em->block_start); - return -EIO; + BUG_ON(map_start < em->start || map_start >= extent_map_end(em)); + start_diff = map_start - em->start; + em->start = map_start; + em->len = map_len; + if (em->block_start < EXTENT_MAP_LAST_BYTE) + em->block_start += start_diff; + return add_extent_mapping(em_tree, em); } struct extent_map *btrfs_get_extent(struct inode *inode, struct page *page, @@ -2170,10 +2203,9 @@ again: err = -ENOMEM; goto out; } - + em->bdev = root->fs_info->fs_devices->latest_bdev; em->start = EXTENT_MAP_HOLE; em->len = (u64)-1; - em->bdev = root->fs_info->fs_devices->latest_bdev; ret = btrfs_lookup_file_extent(trans, root, path, objectid, start, trans != NULL); if (ret < 0) { @@ -2314,6 +2346,9 @@ insert: */ if (ret == -EEXIST) { struct extent_map *existing; + + ret = 0; + existing = lookup_extent_mapping(em_tree, start, len); if (existing && (existing->start > start || existing->start + existing->len <= start)) { @@ -2325,7 +2360,8 @@ insert: em->len); if (existing) { err = merge_extent_mapping(em_tree, existing, - em); + em, start, + root->sectorsize); free_extent_map(existing); if (err) { free_extent_map(em); @@ -2341,6 +2377,7 @@ insert: } else { free_extent_map(em); em = existing; + err = 0; } } spin_unlock(&em_tree->lock); @@ -2348,8 +2385,9 @@ out: btrfs_free_path(path); if (trans) { ret = btrfs_end_transaction(trans, root); - if (!err) + if (!err) { err = ret; + } } if (err) { free_extent_map(em); @@ -2474,8 +2512,7 @@ btrfs_readpages(struct file *file, struct address_space *mapping, return extent_readpages(tree, mapping, pages, nr_pages, btrfs_get_extent); } - -static int btrfs_releasepage(struct page *page, gfp_t gfp_flags) +static int __btrfs_releasepage(struct page *page, gfp_t gfp_flags) { struct extent_io_tree *tree; struct extent_map_tree *map; @@ -2493,15 +2530,54 @@ static int btrfs_releasepage(struct page *page, gfp_t gfp_flags) return ret; } +static int btrfs_releasepage(struct page *page, gfp_t gfp_flags) +{ + struct btrfs_ordered_extent *ordered; + + ordered = btrfs_lookup_ordered_extent(page->mapping->host, + page_offset(page)); + if (ordered) { + btrfs_put_ordered_extent(ordered); + return 0; + } + return __btrfs_releasepage(page, gfp_flags); +} + static void btrfs_invalidatepage(struct page *page, unsigned long offset) { struct extent_io_tree *tree; + struct btrfs_ordered_extent *ordered; + u64 page_start = page_offset(page); + u64 page_end = page_start + PAGE_CACHE_SIZE - 1; + wait_on_page_writeback(page); tree = &BTRFS_I(page->mapping->host)->io_tree; - extent_invalidatepage(tree, page, offset); - btrfs_releasepage(page, GFP_NOFS); + if (offset) { + btrfs_releasepage(page, GFP_NOFS); + return; + } + + lock_extent(tree, page_start, page_end, GFP_NOFS); + ordered = btrfs_lookup_ordered_extent(page->mapping->host, + page_offset(page)); + if (ordered) { + clear_extent_bit(tree, page_start, page_end, + EXTENT_DIRTY | EXTENT_DELALLOC | + EXTENT_LOCKED, 1, 0, GFP_NOFS); + btrfs_writepage_end_io_hook(page, page_start, + page_end, NULL, 1); + btrfs_put_ordered_extent(ordered); + lock_extent(tree, page_start, page_end, GFP_NOFS); + } + clear_extent_bit(tree, page_start, page_end, + EXTENT_LOCKED | EXTENT_DIRTY | EXTENT_DELALLOC | + EXTENT_ORDERED, + 1, 1, GFP_NOFS); + __btrfs_releasepage(page, GFP_NOFS); + if (PagePrivate(page)) { - invalidate_extent_lru(tree, page_offset(page), PAGE_CACHE_SIZE); + invalidate_extent_lru(tree, page_offset(page), + PAGE_CACHE_SIZE); ClearPagePrivate(page); set_page_private(page, 0); page_cache_release(page); @@ -2527,35 +2603,63 @@ int btrfs_page_mkwrite(struct vm_area_struct *vma, struct page *page) { struct inode *inode = fdentry(vma->vm_file)->d_inode; struct btrfs_root *root = BTRFS_I(inode)->root; - unsigned long end; + struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree; + struct btrfs_ordered_extent *ordered; + char *kaddr; + unsigned long zero_start; loff_t size; int ret; u64 page_start; + u64 page_end; ret = btrfs_check_free_space(root, PAGE_CACHE_SIZE, 0); if (ret) goto out; ret = -EINVAL; - +again: lock_page(page); - wait_on_page_writeback(page); size = i_size_read(inode); - page_start = (u64)page->index << PAGE_CACHE_SHIFT; + page_start = page_offset(page); + page_end = page_start + PAGE_CACHE_SIZE - 1; if ((page->mapping != inode->i_mapping) || - (page_start > size)) { + (page_start >= size)) { /* page got truncated out from underneath us */ goto out_unlock; } + wait_on_page_writeback(page); + + lock_extent(io_tree, page_start, page_end, GFP_NOFS); + set_page_extent_mapped(page); + + ordered = btrfs_lookup_ordered_extent(inode, page_start); + if (ordered) { + unlock_extent(io_tree, page_start, page_end, GFP_NOFS); + unlock_page(page); + btrfs_wait_ordered_extent(inode, ordered); + btrfs_put_ordered_extent(ordered); + goto again; + } + + set_extent_delalloc(&BTRFS_I(inode)->io_tree, page_start, + page_end, GFP_NOFS); + ret = 0; /* page is wholly or partially inside EOF */ if (page_start + PAGE_CACHE_SIZE > size) - end = size & ~PAGE_CACHE_MASK; + zero_start = size & ~PAGE_CACHE_MASK; else - end = PAGE_CACHE_SIZE; + zero_start = PAGE_CACHE_SIZE; - ret = btrfs_cow_one_page(inode, page, end); + if (zero_start != PAGE_CACHE_SIZE) { + kaddr = kmap(page); + memset(kaddr + zero_start, 0, PAGE_CACHE_SIZE - zero_start); + flush_dcache_page(page); + kunmap(page); + } + set_page_dirty(page); + unlock_extent(io_tree, page_start, page_end, GFP_NOFS); out_unlock: unlock_page(page); @@ -2662,15 +2766,28 @@ struct inode *btrfs_alloc_inode(struct super_block *sb) if (!ei) return NULL; ei->last_trans = 0; - ei->ordered_trans = 0; + btrfs_ordered_inode_tree_init(&ei->ordered_tree); return &ei->vfs_inode; } void btrfs_destroy_inode(struct inode *inode) { + struct btrfs_ordered_extent *ordered; WARN_ON(!list_empty(&inode->i_dentry)); WARN_ON(inode->i_data.nrpages); + while(1) { + ordered = btrfs_lookup_first_ordered_extent(inode, (u64)-1); + if (!ordered) + break; + else { + printk("found ordered extent %Lu %Lu\n", + ordered->file_offset, ordered->len); + btrfs_remove_ordered_extent(inode, ordered); + btrfs_put_ordered_extent(ordered); + btrfs_put_ordered_extent(ordered); + } + } btrfs_drop_extent_cache(inode, 0, (u64)-1); kmem_cache_free(btrfs_inode_cachep, BTRFS_I(inode)); } @@ -2869,7 +2986,6 @@ static int btrfs_symlink(struct inode *dir, struct dentry *dentry, inode->i_mapping, GFP_NOFS); mutex_init(&BTRFS_I(inode)->csum_mutex); BTRFS_I(inode)->delalloc_bytes = 0; - atomic_set(&BTRFS_I(inode)->ordered_writeback, 0); BTRFS_I(inode)->io_tree.ops = &btrfs_extent_io_ops; } dir->i_sb->s_dirt = 1; @@ -2921,6 +3037,20 @@ out_fail: return err; } +static int btrfs_set_page_dirty(struct page *page) +{ + struct inode *inode = page->mapping->host; + u64 page_start = page_offset(page); + u64 page_end = page_start + PAGE_CACHE_SIZE - 1; + + if (!test_range_bit(&BTRFS_I(inode)->io_tree, page_start, page_end, + EXTENT_DELALLOC, 0)) { +printk("inode %lu page %Lu not delalloc\n", inode->i_ino, page_offset(page)); +WARN_ON(1); + } + return __set_page_dirty_nobuffers(page); +} + static int btrfs_permission(struct inode *inode, int mask, struct nameidata *nd) { @@ -2967,6 +3097,7 @@ static struct extent_io_ops btrfs_extent_io_ops = { .merge_bio_hook = btrfs_merge_bio_hook, .readpage_io_hook = btrfs_readpage_io_hook, .readpage_end_io_hook = btrfs_readpage_end_io_hook, + .writepage_end_io_hook = btrfs_writepage_end_io_hook, .readpage_io_failed_hook = btrfs_io_failed_hook, .set_bit_hook = btrfs_set_bit_hook, .clear_bit_hook = btrfs_clear_bit_hook, @@ -2982,7 +3113,7 @@ static struct address_space_operations btrfs_aops = { .direct_IO = btrfs_direct_IO, .invalidatepage = btrfs_invalidatepage, .releasepage = btrfs_releasepage, - .set_page_dirty = __set_page_dirty_nobuffers, + .set_page_dirty = btrfs_set_page_dirty, }; static struct address_space_operations btrfs_symlink_aops = { diff --git a/fs/btrfs/ordered-data.c b/fs/btrfs/ordered-data.c index 254da8225664..6513270f054c 100644 --- a/fs/btrfs/ordered-data.c +++ b/fs/btrfs/ordered-data.c @@ -22,48 +22,30 @@ #include "ctree.h" #include "transaction.h" #include "btrfs_inode.h" +#include "extent_io.h" -struct tree_entry { - u64 root_objectid; - u64 objectid; - struct inode *inode; - struct rb_node rb_node; -}; -/* - * returns > 0 if entry passed (root, objectid) is > entry, - * < 0 if (root, objectid) < entry and zero if they are equal - */ -static int comp_entry(struct tree_entry *entry, u64 root_objectid, - u64 objectid) +static u64 entry_end(struct btrfs_ordered_extent *entry) { - if (root_objectid < entry->root_objectid) - return -1; - if (root_objectid > entry->root_objectid) - return 1; - if (objectid < entry->objectid) - return -1; - if (objectid > entry->objectid) - return 1; - return 0; + if (entry->file_offset + entry->len < entry->file_offset) + return (u64)-1; + return entry->file_offset + entry->len; } -static struct rb_node *tree_insert(struct rb_root *root, u64 root_objectid, - u64 objectid, struct rb_node *node) +static struct rb_node *tree_insert(struct rb_root *root, u64 file_offset, + struct rb_node *node) { struct rb_node ** p = &root->rb_node; struct rb_node * parent = NULL; - struct tree_entry *entry; - int comp; + struct btrfs_ordered_extent *entry; while(*p) { parent = *p; - entry = rb_entry(parent, struct tree_entry, rb_node); + entry = rb_entry(parent, struct btrfs_ordered_extent, rb_node); - comp = comp_entry(entry, root_objectid, objectid); - if (comp < 0) + if (file_offset < entry->file_offset) p = &(*p)->rb_left; - else if (comp > 0) + else if (file_offset >= entry_end(entry)) p = &(*p)->rb_right; else return parent; @@ -74,24 +56,23 @@ static struct rb_node *tree_insert(struct rb_root *root, u64 root_objectid, return NULL; } -static struct rb_node *__tree_search(struct rb_root *root, u64 root_objectid, - u64 objectid, struct rb_node **prev_ret) +static struct rb_node *__tree_search(struct rb_root *root, u64 file_offset, + struct rb_node **prev_ret) { struct rb_node * n = root->rb_node; struct rb_node *prev = NULL; - struct tree_entry *entry; - struct tree_entry *prev_entry = NULL; - int comp; + struct rb_node *test; + struct btrfs_ordered_extent *entry; + struct btrfs_ordered_extent *prev_entry = NULL; while(n) { - entry = rb_entry(n, struct tree_entry, rb_node); + entry = rb_entry(n, struct btrfs_ordered_extent, rb_node); prev = n; prev_entry = entry; - comp = comp_entry(entry, root_objectid, objectid); - if (comp < 0) + if (file_offset < entry->file_offset) n = n->rb_left; - else if (comp > 0) + else if (file_offset >= entry_end(entry)) n = n->rb_right; else return n; @@ -99,195 +80,329 @@ static struct rb_node *__tree_search(struct rb_root *root, u64 root_objectid, if (!prev_ret) return NULL; - while(prev && comp_entry(prev_entry, root_objectid, objectid) >= 0) { - prev = rb_next(prev); - prev_entry = rb_entry(prev, struct tree_entry, rb_node); + while(prev && file_offset >= entry_end(prev_entry)) { + test = rb_next(prev); + if (!test) + break; + prev_entry = rb_entry(test, struct btrfs_ordered_extent, + rb_node); + if (file_offset < entry_end(prev_entry)) + break; + + prev = test; + } + if (prev) + prev_entry = rb_entry(prev, struct btrfs_ordered_extent, + rb_node); + while(prev && file_offset < entry_end(prev_entry)) { + test = rb_prev(prev); + if (!test) + break; + prev_entry = rb_entry(test, struct btrfs_ordered_extent, + rb_node); + prev = test; } *prev_ret = prev; return NULL; } -static inline struct rb_node *tree_search(struct rb_root *root, - u64 root_objectid, u64 objectid) +static int offset_in_entry(struct btrfs_ordered_extent *entry, u64 file_offset) +{ + if (file_offset < entry->file_offset || + entry->file_offset + entry->len <= file_offset) + return 0; + return 1; +} + +static inline struct rb_node *tree_search(struct btrfs_ordered_inode_tree *tree, + u64 file_offset) { + struct rb_root *root = &tree->tree; struct rb_node *prev; struct rb_node *ret; - ret = __tree_search(root, root_objectid, objectid, &prev); + struct btrfs_ordered_extent *entry; + + if (tree->last) { + entry = rb_entry(tree->last, struct btrfs_ordered_extent, + rb_node); + if (offset_in_entry(entry, file_offset)) + return tree->last; + } + ret = __tree_search(root, file_offset, &prev); if (!ret) - return prev; + ret = prev; + if (ret) + tree->last = ret; return ret; } -int btrfs_add_ordered_inode(struct inode *inode) +int btrfs_add_ordered_extent(struct inode *inode, u64 file_offset, + u64 start, u64 len) { - struct btrfs_root *root = BTRFS_I(inode)->root; - u64 root_objectid = root->root_key.objectid; - u64 transid = root->fs_info->running_transaction->transid; - struct tree_entry *entry; - struct rb_node *node; struct btrfs_ordered_inode_tree *tree; + struct rb_node *node; + struct btrfs_ordered_extent *entry; - if (transid <= BTRFS_I(inode)->ordered_trans) - return 0; - - tree = &root->fs_info->running_transaction->ordered_inode_tree; - - read_lock(&tree->lock); - node = __tree_search(&tree->tree, root_objectid, inode->i_ino, NULL); - read_unlock(&tree->lock); - if (node) { - return 0; - } - - entry = kmalloc(sizeof(*entry), GFP_NOFS); + tree = &BTRFS_I(inode)->ordered_tree; + entry = kzalloc(sizeof(*entry), GFP_NOFS); if (!entry) return -ENOMEM; - write_lock(&tree->lock); - entry->objectid = inode->i_ino; - entry->root_objectid = root_objectid; + mutex_lock(&tree->mutex); + entry->file_offset = file_offset; + entry->start = start; + entry->len = len; entry->inode = inode; + /* one ref for the tree */ + atomic_set(&entry->refs, 1); + init_waitqueue_head(&entry->wait); + INIT_LIST_HEAD(&entry->list); - node = tree_insert(&tree->tree, root_objectid, - inode->i_ino, &entry->rb_node); - - BTRFS_I(inode)->ordered_trans = transid; - if (!node) - igrab(inode); - - write_unlock(&tree->lock); + node = tree_insert(&tree->tree, file_offset, + &entry->rb_node); + if (node) { + entry = rb_entry(node, struct btrfs_ordered_extent, rb_node); + atomic_inc(&entry->refs); + } + set_extent_ordered(&BTRFS_I(inode)->io_tree, file_offset, + entry_end(entry) - 1, GFP_NOFS); - if (node) - kfree(entry); + set_bit(BTRFS_ORDERED_START, &entry->flags); + mutex_unlock(&tree->mutex); + BUG_ON(node); return 0; } -int btrfs_find_first_ordered_inode(struct btrfs_ordered_inode_tree *tree, - u64 *root_objectid, u64 *objectid, - struct inode **inode) +int btrfs_add_ordered_sum(struct inode *inode, struct btrfs_ordered_sum *sum) { - struct tree_entry *entry; + struct btrfs_ordered_inode_tree *tree; struct rb_node *node; + struct btrfs_ordered_extent *entry; - write_lock(&tree->lock); - node = tree_search(&tree->tree, *root_objectid, *objectid); + tree = &BTRFS_I(inode)->ordered_tree; + mutex_lock(&tree->mutex); + node = tree_search(tree, sum->file_offset); if (!node) { - write_unlock(&tree->lock); - return 0; +search_fail: +printk("add ordered sum failed to find a node for inode %lu offset %Lu\n", inode->i_ino, sum->file_offset); + node = rb_first(&tree->tree); + while(node) { + entry = rb_entry(node, struct btrfs_ordered_extent, rb_node); + printk("entry %Lu %Lu %Lu\n", entry->file_offset, entry->file_offset + entry->len, entry->start); + node = rb_next(node); + } + BUG(); } - entry = rb_entry(node, struct tree_entry, rb_node); + BUG_ON(!node); - while(comp_entry(entry, *root_objectid, *objectid) >= 0) { - node = rb_next(node); - if (!node) - break; - entry = rb_entry(node, struct tree_entry, rb_node); - } - if (!node) { - write_unlock(&tree->lock); - return 0; + entry = rb_entry(node, struct btrfs_ordered_extent, rb_node); + if (!offset_in_entry(entry, sum->file_offset)) { + goto search_fail; } - *root_objectid = entry->root_objectid; - *inode = entry->inode; - atomic_inc(&entry->inode->i_count); - *objectid = entry->objectid; - write_unlock(&tree->lock); - return 1; + list_add_tail(&sum->list, &entry->list); + mutex_unlock(&tree->mutex); + return 0; } -int btrfs_find_del_first_ordered_inode(struct btrfs_ordered_inode_tree *tree, - u64 *root_objectid, u64 *objectid, - struct inode **inode) +int btrfs_dec_test_ordered_pending(struct inode *inode, + u64 file_offset, u64 io_size) { - struct tree_entry *entry; + struct btrfs_ordered_inode_tree *tree; struct rb_node *node; - - write_lock(&tree->lock); - node = tree_search(&tree->tree, *root_objectid, *objectid); + struct btrfs_ordered_extent *entry; + struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree; + int ret; + + tree = &BTRFS_I(inode)->ordered_tree; + mutex_lock(&tree->mutex); + clear_extent_ordered(io_tree, file_offset, file_offset + io_size - 1, + GFP_NOFS); + node = tree_search(tree, file_offset); if (!node) { - write_unlock(&tree->lock); - return 0; + ret = 1; + goto out; } - entry = rb_entry(node, struct tree_entry, rb_node); - while(comp_entry(entry, *root_objectid, *objectid) >= 0) { - node = rb_next(node); - if (!node) - break; - entry = rb_entry(node, struct tree_entry, rb_node); + entry = rb_entry(node, struct btrfs_ordered_extent, rb_node); + if (!offset_in_entry(entry, file_offset)) { + ret = 1; + goto out; } - if (!node) { - write_unlock(&tree->lock); - return 0; + + ret = test_range_bit(io_tree, entry->file_offset, + entry->file_offset + entry->len - 1, + EXTENT_ORDERED, 0); + if (!test_bit(BTRFS_ORDERED_START, &entry->flags)) { +printk("inode %lu not ready yet for extent %Lu %Lu\n", inode->i_ino, entry->file_offset, entry_end(entry)); } + if (ret == 0) + ret = test_and_set_bit(BTRFS_ORDERED_IO_DONE, &entry->flags); +out: + mutex_unlock(&tree->mutex); + return ret == 0; +} - *root_objectid = entry->root_objectid; - *objectid = entry->objectid; - *inode = entry->inode; - atomic_inc(&entry->inode->i_count); - rb_erase(node, &tree->tree); - write_unlock(&tree->lock); - kfree(entry); - return 1; +int btrfs_put_ordered_extent(struct btrfs_ordered_extent *entry) +{ + if (atomic_dec_and_test(&entry->refs)) + kfree(entry); + return 0; } -static void __btrfs_del_ordered_inode(struct btrfs_ordered_inode_tree *tree, - struct inode *inode, - u64 root_objectid, u64 objectid) +int btrfs_remove_ordered_extent(struct inode *inode, + struct btrfs_ordered_extent *entry) { - struct tree_entry *entry; + struct btrfs_ordered_inode_tree *tree; struct rb_node *node; - struct rb_node *prev; - write_lock(&tree->lock); - node = __tree_search(&tree->tree, root_objectid, objectid, &prev); - if (!node) { - write_unlock(&tree->lock); - return; - } + tree = &BTRFS_I(inode)->ordered_tree; + mutex_lock(&tree->mutex); + node = &entry->rb_node; rb_erase(node, &tree->tree); - BTRFS_I(inode)->ordered_trans = 0; - write_unlock(&tree->lock); - atomic_dec(&inode->i_count); - entry = rb_entry(node, struct tree_entry, rb_node); - kfree(entry); - return; + tree->last = NULL; + set_bit(BTRFS_ORDERED_COMPLETE, &entry->flags); + mutex_unlock(&tree->mutex); + wake_up(&entry->wait); + return 0; } -void btrfs_del_ordered_inode(struct inode *inode, int force) +void btrfs_wait_ordered_extent(struct inode *inode, + struct btrfs_ordered_extent *entry) { - struct btrfs_root *root = BTRFS_I(inode)->root; - u64 root_objectid = root->root_key.objectid; + u64 start = entry->file_offset; + u64 end = start + entry->len - 1; +#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,22) + do_sync_file_range(file, start, end, SYNC_FILE_RANGE_WRITE); +#else + do_sync_mapping_range(inode->i_mapping, start, end, + SYNC_FILE_RANGE_WRITE); +#endif + wait_event(entry->wait, + test_bit(BTRFS_ORDERED_COMPLETE, &entry->flags)); +} - if (!BTRFS_I(inode)->ordered_trans) { - return; - } +static void btrfs_start_ordered_extent(struct inode *inode, + struct btrfs_ordered_extent *entry, int wait) +{ + u64 start = entry->file_offset; + u64 end = start + entry->len - 1; - if (!force && (mapping_tagged(inode->i_mapping, PAGECACHE_TAG_DIRTY) || - mapping_tagged(inode->i_mapping, PAGECACHE_TAG_WRITEBACK))) - return; +#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,22) + do_sync_file_range(file, start, end, SYNC_FILE_RANGE_WRITE); +#else + do_sync_mapping_range(inode->i_mapping, start, end, + SYNC_FILE_RANGE_WRITE); +#endif + if (wait) + wait_event(entry->wait, test_bit(BTRFS_ORDERED_COMPLETE, + &entry->flags)); +} - spin_lock(&root->fs_info->new_trans_lock); - if (root->fs_info->running_transaction) { - struct btrfs_ordered_inode_tree *tree; - tree = &root->fs_info->running_transaction->ordered_inode_tree; - __btrfs_del_ordered_inode(tree, inode, root_objectid, - inode->i_ino); +void btrfs_wait_ordered_range(struct inode *inode, u64 start, u64 len) +{ + u64 end; + struct btrfs_ordered_extent *ordered; + int found; + int should_wait = 0; + +again: + if (start + len < start) + end = (u64)-1; + else + end = start + len - 1; + found = 0; + while(1) { + ordered = btrfs_lookup_first_ordered_extent(inode, end); + if (!ordered) { + break; + } + if (ordered->file_offset >= start + len) { + btrfs_put_ordered_extent(ordered); + break; + } + if (ordered->file_offset + ordered->len < start) { + btrfs_put_ordered_extent(ordered); + break; + } + btrfs_start_ordered_extent(inode, ordered, should_wait); + found++; + end = ordered->file_offset; + btrfs_put_ordered_extent(ordered); + if (end == 0) + break; + end--; + } + if (should_wait && found) { + should_wait = 0; + goto again; } - spin_unlock(&root->fs_info->new_trans_lock); } -int btrfs_ordered_throttle(struct btrfs_root *root, struct inode *inode) +int btrfs_add_ordered_pending(struct inode *inode, + struct btrfs_ordered_extent *ordered, + u64 start, u64 len) { - struct btrfs_transaction *cur = root->fs_info->running_transaction; - while(cur == root->fs_info->running_transaction && - atomic_read(&BTRFS_I(inode)->ordered_writeback)) { -#if LINUX_VERSION_CODE > KERNEL_VERSION(2,6,18) - congestion_wait(WRITE, HZ/20); -#else - blk_congestion_wait(WRITE, HZ/20); -#endif - } + WARN_ON(1); return 0; +#if 0 + int ret; + struct btrfs_ordered_inode_tree *tree; + struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree; + + tree = &BTRFS_I(inode)->ordered_tree; + mutex_lock(&tree->mutex); + if (test_bit(BTRFS_ORDERED_IO_DONE, &ordered->flags)) { + ret = -EAGAIN; + goto out; + } + set_extent_ordered(io_tree, start, start + len - 1, GFP_NOFS); + ret = 0; +out: + mutex_unlock(&tree->mutex); + return ret; +#endif +} + +struct btrfs_ordered_extent *btrfs_lookup_ordered_extent(struct inode *inode, + u64 file_offset) +{ + struct btrfs_ordered_inode_tree *tree; + struct rb_node *node; + struct btrfs_ordered_extent *entry = NULL; + + tree = &BTRFS_I(inode)->ordered_tree; + mutex_lock(&tree->mutex); + node = tree_search(tree, file_offset); + if (!node) + goto out; + + entry = rb_entry(node, struct btrfs_ordered_extent, rb_node); + if (!offset_in_entry(entry, file_offset)) + entry = NULL; + if (entry) + atomic_inc(&entry->refs); +out: + mutex_unlock(&tree->mutex); + return entry; +} + +struct btrfs_ordered_extent * +btrfs_lookup_first_ordered_extent(struct inode * inode, u64 file_offset) +{ + struct btrfs_ordered_inode_tree *tree; + struct rb_node *node; + struct btrfs_ordered_extent *entry = NULL; + + tree = &BTRFS_I(inode)->ordered_tree; + mutex_lock(&tree->mutex); + node = tree_search(tree, file_offset); + if (!node) + goto out; + + entry = rb_entry(node, struct btrfs_ordered_extent, rb_node); + atomic_inc(&entry->refs); +out: + mutex_unlock(&tree->mutex); + return entry; } diff --git a/fs/btrfs/ordered-data.h b/fs/btrfs/ordered-data.h index 4fa78736423e..33292c5fe90c 100644 --- a/fs/btrfs/ordered-data.h +++ b/fs/btrfs/ordered-data.h @@ -20,24 +20,73 @@ #define __BTRFS_ORDERED_DATA__ struct btrfs_ordered_inode_tree { - rwlock_t lock; + struct mutex mutex; struct rb_root tree; + struct rb_node *last; }; +struct btrfs_sector_sum { + u64 offset; + u32 sum; +}; + +struct btrfs_ordered_sum { + u64 file_offset; + u64 len; + struct list_head list; + struct btrfs_sector_sum sums; +}; + +/* bits for the flags field */ +#define BTRFS_ORDERED_IO_DONE 0 /* set when all the pages are written */ +#define BTRFS_ORDERED_COMPLETE 1 /* set when removed from the tree */ +#define BTRFS_ORDERED_START 2 /* set when tree setup */ + +struct btrfs_ordered_extent { + u64 file_offset; + u64 start; + u64 len; + unsigned long flags; + atomic_t refs; + struct list_head list; + struct inode *inode; + wait_queue_head_t wait; + struct rb_node rb_node; +}; + + +static inline int btrfs_ordered_sum_size(struct btrfs_root *root, u64 bytes) +{ + unsigned long num_sectors = (bytes + root->sectorsize - 1) / + root->sectorsize; + return sizeof(struct btrfs_ordered_sum) + + num_sectors * sizeof(struct btrfs_sector_sum); +} + static inline void btrfs_ordered_inode_tree_init(struct btrfs_ordered_inode_tree *t) { - rwlock_init(&t->lock); + mutex_init(&t->mutex); t->tree.rb_node = NULL; + t->last = NULL; } -int btrfs_add_ordered_inode(struct inode *inode); -int btrfs_find_del_first_ordered_inode(struct btrfs_ordered_inode_tree *tree, - u64 *root_objectid, u64 *objectid, - struct inode **inode); -int btrfs_find_first_ordered_inode(struct btrfs_ordered_inode_tree *tree, - u64 *root_objectid, u64 *objectid, - struct inode **inode); -void btrfs_del_ordered_inode(struct inode *inode, int force); -int btrfs_ordered_throttle(struct btrfs_root *root, struct inode *inode); +int btrfs_put_ordered_extent(struct btrfs_ordered_extent *entry); +int btrfs_remove_ordered_extent(struct inode *inode, + struct btrfs_ordered_extent *entry); +int btrfs_dec_test_ordered_pending(struct inode *inode, + u64 file_offset, u64 io_size); +int btrfs_add_ordered_extent(struct inode *inode, u64 file_offset, + u64 start, u64 len); +int btrfs_add_ordered_sum(struct inode *inode, struct btrfs_ordered_sum *sum); +struct btrfs_ordered_extent *btrfs_lookup_ordered_extent(struct inode *inode, + u64 file_offset); +void btrfs_wait_ordered_extent(struct inode *inode, + struct btrfs_ordered_extent *entry); +void btrfs_wait_ordered_range(struct inode *inode, u64 start, u64 len); +struct btrfs_ordered_extent * +btrfs_lookup_first_ordered_extent(struct inode * inode, u64 file_offset); +int btrfs_add_ordered_pending(struct inode *inode, + struct btrfs_ordered_extent *ordered, + u64 start, u64 len); #endif diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c index a8a3cb03de59..86a5acc19ce7 100644 --- a/fs/btrfs/transaction.c +++ b/fs/btrfs/transaction.c @@ -67,7 +67,6 @@ static noinline int join_transaction(struct btrfs_root *root) cur_trans->start_time = get_seconds(); INIT_LIST_HEAD(&cur_trans->pending_snapshots); list_add_tail(&cur_trans->list, &root->fs_info->trans_list); - btrfs_ordered_inode_tree_init(&cur_trans->ordered_inode_tree); extent_io_tree_init(&cur_trans->dirty_pages, root->fs_info->btree_inode->i_mapping, GFP_NOFS); @@ -158,10 +157,12 @@ static int __btrfs_end_transaction(struct btrfs_trans_handle *trans, wake_up(&cur_trans->writer_wait); if (cur_trans->in_commit && throttle) { - int ret; + DEFINE_WAIT(wait); mutex_unlock(&root->fs_info->trans_mutex); - ret = wait_for_commit(root, cur_trans); - BUG_ON(ret); + prepare_to_wait(&root->fs_info->transaction_throttle, &wait, + TASK_UNINTERRUPTIBLE); + schedule(); + finish_wait(&root->fs_info->transaction_throttle, &wait); mutex_lock(&root->fs_info->trans_mutex); } @@ -486,58 +487,6 @@ static noinline int drop_dirty_roots(struct btrfs_root *tree_root, return ret; } -int btrfs_write_ordered_inodes(struct btrfs_trans_handle *trans, - struct btrfs_root *root) -{ - struct btrfs_transaction *cur_trans = trans->transaction; - struct inode *inode; - u64 root_objectid = 0; - u64 objectid = 0; - int ret; - - atomic_inc(&root->fs_info->throttles); - while(1) { - ret = btrfs_find_first_ordered_inode( - &cur_trans->ordered_inode_tree, - &root_objectid, &objectid, &inode); - if (!ret) - break; - - mutex_unlock(&root->fs_info->trans_mutex); - - if (S_ISREG(inode->i_mode)) { - atomic_inc(&BTRFS_I(inode)->ordered_writeback); - filemap_fdatawrite(inode->i_mapping); - atomic_dec(&BTRFS_I(inode)->ordered_writeback); - } - iput(inode); - - mutex_lock(&root->fs_info->trans_mutex); - } - while(1) { - root_objectid = 0; - objectid = 0; - ret = btrfs_find_del_first_ordered_inode( - &cur_trans->ordered_inode_tree, - &root_objectid, &objectid, &inode); - if (!ret) - break; - mutex_unlock(&root->fs_info->trans_mutex); - - if (S_ISREG(inode->i_mode)) { - atomic_inc(&BTRFS_I(inode)->ordered_writeback); - filemap_write_and_wait(inode->i_mapping); - atomic_dec(&BTRFS_I(inode)->ordered_writeback); - } - atomic_dec(&inode->i_count); - iput(inode); - - mutex_lock(&root->fs_info->trans_mutex); - } - atomic_dec(&root->fs_info->throttles); - return 0; -} - static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans, struct btrfs_fs_info *fs_info, struct btrfs_pending_snapshot *pending) @@ -666,6 +615,7 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans, extent_io_tree_init(pinned_copy, root->fs_info->btree_inode->i_mapping, GFP_NOFS); +printk("commit trans %Lu\n", trans->transid); trans->transaction->in_commit = 1; cur_trans = trans->transaction; if (cur_trans->list.prev != &root->fs_info->trans_list) { @@ -699,8 +649,6 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans, mutex_lock(&root->fs_info->trans_mutex); finish_wait(&cur_trans->writer_wait, &wait); - ret = btrfs_write_ordered_inodes(trans, root); - } while (cur_trans->num_writers > 1 || (cur_trans->num_joined != joined)); @@ -736,6 +684,8 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans, btrfs_copy_pinned(root, pinned_copy); + wake_up(&root->fs_info->transaction_throttle); + mutex_unlock(&root->fs_info->trans_mutex); ret = btrfs_write_and_wait_transaction(trans, root); BUG_ON(ret); @@ -758,6 +708,7 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans, list_splice_init(&dirty_fs_roots, &root->fs_info->dead_roots); mutex_unlock(&root->fs_info->trans_mutex); +printk("done commit trans %Lu\n", trans->transid); kmem_cache_free(btrfs_trans_handle_cachep, trans); if (root->fs_info->closing) { diff --git a/fs/btrfs/transaction.h b/fs/btrfs/transaction.h index 9ccd5a5b170f..910350cd4cf0 100644 --- a/fs/btrfs/transaction.h +++ b/fs/btrfs/transaction.h @@ -19,7 +19,6 @@ #ifndef __BTRFS_TRANSACTION__ #define __BTRFS_TRANSACTION__ #include "btrfs_inode.h" -#include "ordered-data.h" struct btrfs_transaction { u64 transid; @@ -31,7 +30,6 @@ struct btrfs_transaction { struct list_head list; struct extent_io_tree dirty_pages; unsigned long start_time; - struct btrfs_ordered_inode_tree ordered_inode_tree; wait_queue_head_t writer_wait; wait_queue_head_t commit_wait; struct list_head pending_snapshots; @@ -88,8 +86,6 @@ int btrfs_defrag_root(struct btrfs_root *root, int cacheonly); int btrfs_clean_old_snapshots(struct btrfs_root *root); int btrfs_commit_transaction(struct btrfs_trans_handle *trans, struct btrfs_root *root); -int btrfs_write_ordered_inodes(struct btrfs_trans_handle *trans, - struct btrfs_root *root); int btrfs_end_transaction_throttle(struct btrfs_trans_handle *trans, struct btrfs_root *root); #endif -- cgit v1.2.3-70-g09d2 From 7f3c74fb831fa19bafe087e817c0a5ff3883f1ea Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Fri, 18 Jul 2008 12:01:11 -0400 Subject: Btrfs: Keep extent mappings in ram until pending ordered extents are done It was possible for stale mappings from disk to be used instead of the new pending ordered extent. This adds a flag to the extent map struct to keep it pinned until the pending ordered extent is actually on disk. Signed-off-by: Chris Mason --- fs/btrfs/extent_io.c | 27 +++++++++++++++------------ fs/btrfs/extent_map.c | 4 ++++ fs/btrfs/extent_map.h | 3 +++ fs/btrfs/file-item.c | 5 +---- fs/btrfs/file.c | 14 ++++++++++---- fs/btrfs/inode.c | 15 +++++++++++++++ 6 files changed, 48 insertions(+), 20 deletions(-) (limited to 'fs/btrfs/extent_map.c') diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index 3153b4fdd0b3..d4a63ae7ed1b 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c @@ -2000,7 +2000,7 @@ static int __extent_writepage(struct page *page, struct writeback_control *wbc, struct block_device *bdev; int ret; int nr = 0; - size_t page_offset = 0; + size_t pg_offset = 0; size_t blocksize; loff_t i_size = i_size_read(inode); unsigned long end_index = i_size >> PAGE_CACHE_SHIFT; @@ -2008,9 +2008,9 @@ static int __extent_writepage(struct page *page, struct writeback_control *wbc, u64 delalloc_end; WARN_ON(!PageLocked(page)); - page_offset = i_size & (PAGE_CACHE_SIZE - 1); + pg_offset = i_size & (PAGE_CACHE_SIZE - 1); if (page->index > end_index || - (page->index == end_index && !page_offset)) { + (page->index == end_index && !pg_offset)) { page->mapping->a_ops->invalidatepage(page, 0); unlock_page(page); return 0; @@ -2020,12 +2020,12 @@ static int __extent_writepage(struct page *page, struct writeback_control *wbc, char *userpage; userpage = kmap_atomic(page, KM_USER0); - memset(userpage + page_offset, 0, - PAGE_CACHE_SIZE - page_offset); + memset(userpage + pg_offset, 0, + PAGE_CACHE_SIZE - pg_offset); kunmap_atomic(userpage, KM_USER0); flush_dcache_page(page); } - page_offset = 0; + pg_offset = 0; set_page_extent_mapped(page); @@ -2088,7 +2088,7 @@ static int __extent_writepage(struct page *page, struct writeback_control *wbc, unlock_start = page_end + 1; break; } - em = epd->get_extent(inode, page, page_offset, cur, + em = epd->get_extent(inode, page, pg_offset, cur, end - cur + 1, 1); if (IS_ERR(em) || !em) { SetPageError(page); @@ -2113,12 +2113,13 @@ static int __extent_writepage(struct page *page, struct writeback_control *wbc, unlock_extent(tree, unlock_start, cur + iosize -1, GFP_NOFS); + if (tree->ops && tree->ops->writepage_end_io_hook) tree->ops->writepage_end_io_hook(page, cur, cur + iosize - 1, NULL, 1); cur = cur + iosize; - page_offset += iosize; + pg_offset += iosize; unlock_start = cur; continue; } @@ -2127,7 +2128,7 @@ static int __extent_writepage(struct page *page, struct writeback_control *wbc, if (0 && !test_range_bit(tree, cur, cur + iosize - 1, EXTENT_DIRTY, 0)) { cur = cur + iosize; - page_offset += iosize; + pg_offset += iosize; continue; } clear_extent_dirty(tree, cur, cur + iosize - 1, GFP_NOFS); @@ -2141,6 +2142,7 @@ static int __extent_writepage(struct page *page, struct writeback_control *wbc, SetPageError(page); } else { unsigned long max_nr = end_index + 1; + set_range_writeback(tree, cur, cur + iosize - 1); if (!PageWriteback(page)) { printk("warning page %lu not writeback, " @@ -2150,14 +2152,14 @@ static int __extent_writepage(struct page *page, struct writeback_control *wbc, } ret = submit_extent_page(WRITE, tree, page, sector, - iosize, page_offset, bdev, + iosize, pg_offset, bdev, &epd->bio, max_nr, end_bio_extent_writepage, 0); if (ret) SetPageError(page); } cur = cur + iosize; - page_offset += iosize; + pg_offset += iosize; nr++; } done: @@ -2579,7 +2581,8 @@ int try_release_extent_mapping(struct extent_map_tree *map, spin_unlock(&map->lock); break; } - if (em->start != start) { + if (test_bit(EXTENT_FLAG_PINNED, &em->flags) || + em->start != start) { spin_unlock(&map->lock); free_extent_map(em); break; diff --git a/fs/btrfs/extent_map.c b/fs/btrfs/extent_map.c index 81123277c2b8..71b1ac155355 100644 --- a/fs/btrfs/extent_map.c +++ b/fs/btrfs/extent_map.c @@ -173,6 +173,9 @@ static inline struct rb_node *tree_search(struct rb_root *root, u64 offset) static int mergable_maps(struct extent_map *prev, struct extent_map *next) { + if (test_bit(EXTENT_FLAG_PINNED, &prev->flags)) + return 0; + if (extent_map_end(prev) == next->start && prev->flags == next->flags && prev->bdev == next->bdev && @@ -320,6 +323,7 @@ int remove_extent_mapping(struct extent_map_tree *tree, struct extent_map *em) { int ret = 0; + WARN_ON(test_bit(EXTENT_FLAG_PINNED, &em->flags)); BUG_ON(spin_trylock(&tree->lock)); rb_erase(&em->rb_node, &tree->map); em->in_tree = 0; diff --git a/fs/btrfs/extent_map.h b/fs/btrfs/extent_map.h index 56314217cfc0..a3978ec27846 100644 --- a/fs/btrfs/extent_map.h +++ b/fs/btrfs/extent_map.h @@ -8,6 +8,9 @@ #define EXTENT_MAP_INLINE (u64)-2 #define EXTENT_MAP_DELALLOC (u64)-1 +/* bits for the flags field */ +#define EXTENT_FLAG_PINNED 0 /* this entry not yet on disk, don't free it */ + struct extent_map { struct rb_node rb_node; diff --git a/fs/btrfs/file-item.c b/fs/btrfs/file-item.c index e02f1e5acb0a..d9c69e16d368 100644 --- a/fs/btrfs/file-item.c +++ b/fs/btrfs/file-item.c @@ -192,7 +192,6 @@ int btrfs_csum_one_bio(struct btrfs_root *root, struct inode *inode, (char *)§or_sum->sum); sector_sum->offset = page_offset(bvec->bv_page) + bvec->bv_offset; - sector_sum++; bio_index++; total_bytes += bvec->bv_len; @@ -201,9 +200,6 @@ int btrfs_csum_one_bio(struct btrfs_root *root, struct inode *inode, } btrfs_add_ordered_sum(inode, ordered, sums); btrfs_put_ordered_extent(ordered); - if (total_bytes != bio->bi_size) { -printk("warning, total bytes %lu bio size %u\n", total_bytes, bio->bi_size); - } return 0; } @@ -372,6 +368,7 @@ next_sector: write_extent_buffer(leaf, §or_sum->sum, (unsigned long)item, BTRFS_CRC32_SIZE); } + total_bytes += root->sectorsize; sector_sum++; if (total_bytes < sums->len) { diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c index 40ad1b2958cb..eccdb9562ba8 100644 --- a/fs/btrfs/file.c +++ b/fs/btrfs/file.c @@ -358,9 +358,7 @@ int btrfs_drop_extent_cache(struct inode *inode, u64 start, u64 end) struct extent_map *split = NULL; struct extent_map *split2 = NULL; struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree; - struct extent_map *tmp; u64 len = end - start + 1; - u64 next_start; int ret; int testend = 1; @@ -381,8 +379,16 @@ int btrfs_drop_extent_cache(struct inode *inode, u64 start, u64 end) spin_unlock(&em_tree->lock); break; } - tmp = rb_entry(&em->rb_node, struct extent_map, rb_node); - next_start = tmp->start; + if (test_bit(EXTENT_FLAG_PINNED, &em->flags)) { + start = em->start + em->len; + free_extent_map(em); + spin_unlock(&em_tree->lock); + if (start < end) { + len = end - start + 1; + continue; + } + break; + } remove_extent_mapping(em_tree, em); if (em->block_start < EXTENT_MAP_LAST_BYTE && diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 8803abc89bb8..08dbe738b512 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -144,6 +144,7 @@ static int cow_file_range(struct inode *inode, u64 start, u64 end) em->len = ins.offset; em->block_start = ins.objectid; em->bdev = root->fs_info->fs_devices->latest_bdev; + set_bit(EXTENT_FLAG_PINNED, &em->flags); while(1) { spin_lock(&em_tree->lock); ret = add_extent_mapping(em_tree, em); @@ -483,6 +484,8 @@ static int btrfs_finish_ordered_io(struct inode *inode, u64 start, u64 end) struct btrfs_trans_handle *trans; struct btrfs_ordered_extent *ordered_extent; struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree; + struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree; + struct extent_map *em; u64 alloc_hint = 0; struct list_head list; struct btrfs_key ins; @@ -524,6 +527,17 @@ static int btrfs_finish_ordered_io(struct inode *inode, u64 start, u64 end) ordered_extent->len, ordered_extent->len, 0); BUG_ON(ret); + + + spin_lock(&em_tree->lock); + em = lookup_extent_mapping(em_tree, ordered_extent->file_offset, + ordered_extent->len); + if (em) { + clear_bit(EXTENT_FLAG_PINNED, &em->flags); + free_extent_map(em); + } + spin_unlock(&em_tree->lock); + btrfs_drop_extent_cache(inode, ordered_extent->file_offset, ordered_extent->file_offset + ordered_extent->len - 1); @@ -538,6 +552,7 @@ static int btrfs_finish_ordered_io(struct inode *inode, u64 start, u64 end) btrfs_ordered_update_i_size(inode, ordered_extent); btrfs_remove_ordered_extent(inode, ordered_extent); + /* once for us */ btrfs_put_ordered_extent(ordered_extent); /* once for the tree */ -- cgit v1.2.3-70-g09d2 From f421950f86bf96a11fef932e167ab2e70d4c43a0 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Tue, 22 Jul 2008 11:18:09 -0400 Subject: Btrfs: Fix some data=ordered related data corruptions Stress testing was showing data checksum errors, most of which were caused by a lookup bug in the extent_map tree. The tree was caching the last pointer returned, and searches would check the last pointer first. But, search callers also expect the search to return the very first matching extent in the range, which wasn't always true with the last pointer usage. For now, the code to cache the last return value is just removed. It is easy to fix, but I think lookups are rare enough that it isn't required anymore. This commit also replaces do_sync_mapping_range with a local copy of the related functions. Signed-off-by: Chris Mason --- fs/btrfs/ctree.h | 2 + fs/btrfs/extent_io.c | 20 --------- fs/btrfs/extent_io.h | 1 - fs/btrfs/extent_map.c | 9 ---- fs/btrfs/extent_map.h | 1 - fs/btrfs/file.c | 15 +++---- fs/btrfs/inode.c | 54 +++++++++++++---------- fs/btrfs/ordered-data.c | 115 ++++++++++++++++++++++++++++++++++++++++-------- fs/btrfs/ordered-data.h | 4 ++ fs/btrfs/transaction.c | 2 - 10 files changed, 140 insertions(+), 83 deletions(-) (limited to 'fs/btrfs/extent_map.c') diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 96ab2797c09a..f8fccdac3055 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -1590,6 +1590,8 @@ int btrfs_csum_truncate(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct btrfs_path *path, u64 isize); /* inode.c */ +int btrfs_writepages(struct address_space *mapping, + struct writeback_control *wbc); int btrfs_create_subvol_root(struct btrfs_root *new_root, struct btrfs_trans_handle *trans, u64 new_dirid, struct btrfs_block_group_cache *block_group); diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index 7380449cb5b3..9965993748d0 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c @@ -97,7 +97,6 @@ void extent_io_tree_init(struct extent_io_tree *tree, spin_lock_init(&tree->lock); spin_lock_init(&tree->buffer_lock); tree->mapping = mapping; - tree->last = NULL; } EXPORT_SYMBOL(extent_io_tree_init); @@ -173,12 +172,6 @@ static struct rb_node *__etree_search(struct extent_io_tree *tree, u64 offset, struct tree_entry *entry; struct tree_entry *prev_entry = NULL; - if (tree->last) { - struct extent_state *state; - state = tree->last; - if (state->start <= offset && offset <= state->end) - return &tree->last->rb_node; - } while(n) { entry = rb_entry(n, struct tree_entry, rb_node); prev = n; @@ -189,7 +182,6 @@ static struct rb_node *__etree_search(struct extent_io_tree *tree, u64 offset, else if (offset > entry->end) n = n->rb_right; else { - tree->last = rb_entry(n, struct extent_state, rb_node); return n; } } @@ -223,10 +215,6 @@ static inline struct rb_node *tree_search(struct extent_io_tree *tree, ret = __etree_search(tree, offset, &prev, NULL); if (!ret) { - if (prev) { - tree->last = rb_entry(prev, struct extent_state, - rb_node); - } return prev; } return ret; @@ -301,8 +289,6 @@ static int merge_state(struct extent_io_tree *tree, other->state == state->state) { state->start = other->start; other->tree = NULL; - if (tree->last == other) - tree->last = state; rb_erase(&other->rb_node, &tree->state); free_extent_state(other); } @@ -314,8 +300,6 @@ static int merge_state(struct extent_io_tree *tree, other->state == state->state) { other->start = state->start; state->tree = NULL; - if (tree->last == state) - tree->last = other; rb_erase(&state->rb_node, &tree->state); free_extent_state(state); } @@ -378,7 +362,6 @@ static int insert_state(struct extent_io_tree *tree, return -EEXIST; } state->tree = tree; - tree->last = state; merge_state(tree, state); return 0; } @@ -444,9 +427,6 @@ static int clear_state_bit(struct extent_io_tree *tree, if (delete || state->state == 0) { if (state->tree) { clear_state_cb(tree, state, state->state); - if (tree->last == state) { - tree->last = extent_state_next(state); - } rb_erase(&state->rb_node, &tree->state); state->tree = NULL; free_extent_state(state); diff --git a/fs/btrfs/extent_io.h b/fs/btrfs/extent_io.h index 6c03e6a19938..315cfceae312 100644 --- a/fs/btrfs/extent_io.h +++ b/fs/btrfs/extent_io.h @@ -60,7 +60,6 @@ struct extent_io_tree { spinlock_t lock; spinlock_t buffer_lock; struct extent_io_ops *ops; - struct extent_state *last; }; struct extent_state { diff --git a/fs/btrfs/extent_map.c b/fs/btrfs/extent_map.c index 71b1ac155355..8a502ee2f231 100644 --- a/fs/btrfs/extent_map.c +++ b/fs/btrfs/extent_map.c @@ -42,7 +42,6 @@ void extent_map_exit(void) void extent_map_tree_init(struct extent_map_tree *tree, gfp_t mask) { tree->map.rb_node = NULL; - tree->last = NULL; spin_lock_init(&tree->lock); } EXPORT_SYMBOL(extent_map_tree_init); @@ -239,7 +238,6 @@ int add_extent_mapping(struct extent_map_tree *tree, merge->in_tree = 0; free_extent_map(merge); } - tree->last = em; out: return ret; } @@ -273,10 +271,6 @@ struct extent_map *lookup_extent_mapping(struct extent_map_tree *tree, u64 end = range_end(start, len); BUG_ON(spin_trylock(&tree->lock)); - em = tree->last; - if (em && end > em->start && start < extent_map_end(em)) - goto found; - rb_node = __tree_search(&tree->map, start, &prev, &next); if (!rb_node && prev) { em = rb_entry(prev, struct extent_map, rb_node); @@ -305,7 +299,6 @@ struct extent_map *lookup_extent_mapping(struct extent_map_tree *tree, found: atomic_inc(&em->refs); - tree->last = em; out: return em; } @@ -327,8 +320,6 @@ int remove_extent_mapping(struct extent_map_tree *tree, struct extent_map *em) BUG_ON(spin_trylock(&tree->lock)); rb_erase(&em->rb_node, &tree->map); em->in_tree = 0; - if (tree->last == em) - tree->last = NULL; return ret; } EXPORT_SYMBOL(remove_extent_mapping); diff --git a/fs/btrfs/extent_map.h b/fs/btrfs/extent_map.h index a3978ec27846..26ac6fe0b268 100644 --- a/fs/btrfs/extent_map.h +++ b/fs/btrfs/extent_map.h @@ -26,7 +26,6 @@ struct extent_map { struct extent_map_tree { struct rb_root map; - struct extent_map *last; spinlock_t lock; }; diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c index 591a30208acd..e5ffb66ad320 100644 --- a/fs/btrfs/file.c +++ b/fs/btrfs/file.c @@ -381,14 +381,13 @@ int btrfs_drop_extent_cache(struct inode *inode, u64 start, u64 end) break; } if (test_bit(EXTENT_FLAG_PINNED, &em->flags)) { - start = em->start + em->len; - free_extent_map(em); - spin_unlock(&em_tree->lock); - if (start < end) { - len = end - start + 1; - continue; - } - break; + printk(KERN_CRIT "inode %lu trying to drop pinned " + "extent start %llu end %llu, em [%llu %llu]\n", + inode->i_ino, + (unsigned long long)start, + (unsigned long long)end, + (unsigned long long)em->start, + (unsigned long long)em->len); } remove_extent_mapping(em_tree, em); diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 60852ada658e..3da12a4d913d 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -485,7 +485,7 @@ int btrfs_writepage_start_hook(struct page *page, u64 start, u64 end) fixup = kzalloc(sizeof(*fixup), GFP_NOFS); if (!fixup) return -EAGAIN; -printk("queueing worker to fixup page %lu %Lu\n", inode->i_ino, page_offset(page)); + SetPageChecked(page); page_cache_get(page); fixup->work.func = btrfs_writepage_fixup_worker; @@ -502,11 +502,13 @@ static int btrfs_finish_ordered_io(struct inode *inode, u64 start, u64 end) struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree; struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree; struct extent_map *em; + struct extent_map *em_orig; u64 alloc_hint = 0; u64 clear_start; u64 clear_end; struct list_head list; struct btrfs_key ins; + struct rb_node *rb; int ret; ret = btrfs_dec_test_ordered_pending(inode, start, end - start + 1); @@ -535,6 +537,22 @@ static int btrfs_finish_ordered_io(struct inode *inode, u64 start, u64 end) mutex_lock(&BTRFS_I(inode)->extent_mutex); + spin_lock(&em_tree->lock); + clear_start = ordered_extent->file_offset; + clear_end = ordered_extent->file_offset + ordered_extent->len; + em = lookup_extent_mapping(em_tree, clear_start, + ordered_extent->len); + em_orig = em; + while(em && clear_start < extent_map_end(em) && clear_end > em->start) { + clear_bit(EXTENT_FLAG_PINNED, &em->flags); + rb = rb_next(&em->rb_node); + if (!rb) + break; + em = rb_entry(rb, struct extent_map, rb_node); + } + free_extent_map(em_orig); + spin_unlock(&em_tree->lock); + ret = btrfs_drop_extents(trans, root, inode, ordered_extent->file_offset, ordered_extent->file_offset + @@ -548,22 +566,6 @@ static int btrfs_finish_ordered_io(struct inode *inode, u64 start, u64 end) ordered_extent->len, 0); BUG_ON(ret); - spin_lock(&em_tree->lock); - clear_start = ordered_extent->file_offset; - clear_end = ordered_extent->file_offset + ordered_extent->len; - while(clear_start < clear_end) { - em = lookup_extent_mapping(em_tree, clear_start, - clear_end - clear_start); - if (em) { - clear_bit(EXTENT_FLAG_PINNED, &em->flags); - clear_start = em->start + em->len; - free_extent_map(em); - } else { - break; - } - } - spin_unlock(&em_tree->lock); - btrfs_drop_extent_cache(inode, ordered_extent->file_offset, ordered_extent->file_offset + ordered_extent->len - 1); @@ -2318,7 +2320,7 @@ struct extent_map *btrfs_get_extent(struct inode *inode, struct page *page, u64 extent_end = 0; u64 objectid = inode->i_ino; u32 found_type; - struct btrfs_path *path; + struct btrfs_path *path = NULL; struct btrfs_root *root = BTRFS_I(inode)->root; struct btrfs_file_extent_item *item; struct extent_buffer *leaf; @@ -2328,9 +2330,6 @@ struct extent_map *btrfs_get_extent(struct inode *inode, struct page *page, struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree; struct btrfs_trans_handle *trans = NULL; - path = btrfs_alloc_path(); - BUG_ON(!path); - again: spin_lock(&em_tree->lock); em = lookup_extent_mapping(em_tree, start, len); @@ -2354,6 +2353,12 @@ again: em->bdev = root->fs_info->fs_devices->latest_bdev; em->start = EXTENT_MAP_HOLE; em->len = (u64)-1; + + if (!path) { + path = btrfs_alloc_path(); + BUG_ON(!path); + } + ret = btrfs_lookup_file_extent(trans, root, path, objectid, start, trans != NULL); if (ret < 0) { @@ -2530,7 +2535,8 @@ insert: } spin_unlock(&em_tree->lock); out: - btrfs_free_path(path); + if (path) + btrfs_free_path(path); if (trans) { ret = btrfs_end_transaction(trans, root); if (!err) { @@ -2643,8 +2649,8 @@ static int btrfs_writepage(struct page *page, struct writeback_control *wbc) return extent_write_full_page(tree, page, btrfs_get_extent, wbc); } -static int btrfs_writepages(struct address_space *mapping, - struct writeback_control *wbc) +int btrfs_writepages(struct address_space *mapping, + struct writeback_control *wbc) { struct extent_io_tree *tree; tree = &BTRFS_I(mapping->host)->io_tree; diff --git a/fs/btrfs/ordered-data.c b/fs/btrfs/ordered-data.c index 0d87795fdd8f..830dbaea6853 100644 --- a/fs/btrfs/ordered-data.c +++ b/fs/btrfs/ordered-data.c @@ -19,6 +19,8 @@ #include #include #include +#include +#include #include "ctree.h" #include "transaction.h" #include "btrfs_inode.h" @@ -307,12 +309,7 @@ void btrfs_start_ordered_extent(struct inode *inode, * start IO on any dirty ones so the wait doesn't stall waiting * for pdflush to find them */ -#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,22) - do_sync_file_range(file, start, end, SYNC_FILE_RANGE_WRITE); -#else - do_sync_mapping_range(inode->i_mapping, start, end, - SYNC_FILE_RANGE_WRITE); -#endif + btrfs_fdatawrite_range(inode->i_mapping, start, end, WB_SYNC_NONE); if (wait) wait_event(entry->wait, test_bit(BTRFS_ORDERED_COMPLETE, &entry->flags)); @@ -327,28 +324,26 @@ void btrfs_wait_ordered_range(struct inode *inode, u64 start, u64 len) u64 orig_end; u64 wait_end; struct btrfs_ordered_extent *ordered; - u64 mask = BTRFS_I(inode)->root->sectorsize - 1; if (start + len < start) { - wait_end = (inode->i_size + mask) & ~mask; - orig_end = (u64)-1; + orig_end = INT_LIMIT(loff_t); } else { orig_end = start + len - 1; - wait_end = orig_end; + if (orig_end > INT_LIMIT(loff_t)) + orig_end = INT_LIMIT(loff_t); } + wait_end = orig_end; again: /* start IO across the range first to instantiate any delalloc * extents */ -#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,22) - do_sync_file_range(file, start, wait_end, SYNC_FILE_RANGE_WRITE); -#else - do_sync_mapping_range(inode->i_mapping, start, wait_end, - SYNC_FILE_RANGE_WRITE); -#endif - end = orig_end; - wait_on_extent_writeback(&BTRFS_I(inode)->io_tree, start, orig_end); + btrfs_fdatawrite_range(inode->i_mapping, start, orig_end, WB_SYNC_NONE); + + btrfs_wait_on_page_writeback_range(inode->i_mapping, + start >> PAGE_CACHE_SHIFT, + orig_end >> PAGE_CACHE_SHIFT); + end = orig_end; while(1) { ordered = btrfs_lookup_first_ordered_extent(inode, end); if (!ordered) { @@ -565,3 +560,87 @@ out: return ret; } + +/** + * taken from mm/filemap.c because it isn't exported + * + * __filemap_fdatawrite_range - start writeback on mapping dirty pages in range + * @mapping: address space structure to write + * @start: offset in bytes where the range starts + * @end: offset in bytes where the range ends (inclusive) + * @sync_mode: enable synchronous operation + * + * Start writeback against all of a mapping's dirty pages that lie + * within the byte offsets inclusive. + * + * If sync_mode is WB_SYNC_ALL then this is a "data integrity" operation, as + * opposed to a regular memory cleansing writeback. The difference between + * these two operations is that if a dirty page/buffer is encountered, it must + * be waited upon, and not just skipped over. + */ +int btrfs_fdatawrite_range(struct address_space *mapping, loff_t start, + loff_t end, int sync_mode) +{ + struct writeback_control wbc = { + .sync_mode = sync_mode, + .nr_to_write = mapping->nrpages * 2, + .range_start = start, + .range_end = end, + .for_writepages = 1, + }; + return btrfs_writepages(mapping, &wbc); +} + +/** + * taken from mm/filemap.c because it isn't exported + * + * wait_on_page_writeback_range - wait for writeback to complete + * @mapping: target address_space + * @start: beginning page index + * @end: ending page index + * + * Wait for writeback to complete against pages indexed by start->end + * inclusive + */ +int btrfs_wait_on_page_writeback_range(struct address_space *mapping, + pgoff_t start, pgoff_t end) +{ + struct pagevec pvec; + int nr_pages; + int ret = 0; + pgoff_t index; + + if (end < start) + return 0; + + pagevec_init(&pvec, 0); + index = start; + while ((index <= end) && + (nr_pages = pagevec_lookup_tag(&pvec, mapping, &index, + PAGECACHE_TAG_WRITEBACK, + min(end - index, (pgoff_t)PAGEVEC_SIZE-1) + 1)) != 0) { + unsigned i; + + for (i = 0; i < nr_pages; i++) { + struct page *page = pvec.pages[i]; + + /* until radix tree lookup accepts end_index */ + if (page->index > end) + continue; + + wait_on_page_writeback(page); + if (PageError(page)) + ret = -EIO; + } + pagevec_release(&pvec); + cond_resched(); + } + + /* Check for outstanding write errors */ + if (test_and_clear_bit(AS_ENOSPC, &mapping->flags)) + ret = -ENOSPC; + if (test_and_clear_bit(AS_EIO, &mapping->flags)) + ret = -EIO; + + return ret; +} diff --git a/fs/btrfs/ordered-data.h b/fs/btrfs/ordered-data.h index 1794efd13ca3..8e8e3c0404f3 100644 --- a/fs/btrfs/ordered-data.h +++ b/fs/btrfs/ordered-data.h @@ -132,4 +132,8 @@ btrfs_lookup_first_ordered_extent(struct inode * inode, u64 file_offset); int btrfs_ordered_update_i_size(struct inode *inode, struct btrfs_ordered_extent *ordered); int btrfs_find_ordered_sum(struct inode *inode, u64 offset, u32 *sum); +int btrfs_wait_on_page_writeback_range(struct address_space *mapping, + pgoff_t start, pgoff_t end); +int btrfs_fdatawrite_range(struct address_space *mapping, loff_t start, + loff_t end, int sync_mode); #endif diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c index 05823904ecba..38c75a0256cb 100644 --- a/fs/btrfs/transaction.c +++ b/fs/btrfs/transaction.c @@ -649,7 +649,6 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans, extent_io_tree_init(pinned_copy, root->fs_info->btree_inode->i_mapping, GFP_NOFS); -printk("commit trans %Lu\n", trans->transid); trans->transaction->in_commit = 1; trans->transaction->blocked = 1; cur_trans = trans->transaction; @@ -745,7 +744,6 @@ printk("commit trans %Lu\n", trans->transid); list_splice_init(&dirty_fs_roots, &root->fs_info->dead_roots); mutex_unlock(&root->fs_info->trans_mutex); -printk("done commit trans %Lu\n", trans->transid); kmem_cache_free(btrfs_trans_handle_cachep, trans); if (root->fs_info->closing) { -- cgit v1.2.3-70-g09d2 From 64f26f745084872b916cd1bef6054e21b15c5784 Mon Sep 17 00:00:00 2001 From: David Woodhouse Date: Thu, 24 Jul 2008 10:09:43 -0400 Subject: Btrfs: Use assert_spin_locked instead of spin_trylock On UP systems spin_trylock always succeeds Signed-off-by: Chris Mason --- fs/btrfs/extent_map.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'fs/btrfs/extent_map.c') diff --git a/fs/btrfs/extent_map.c b/fs/btrfs/extent_map.c index 8a502ee2f231..954b047639ab 100644 --- a/fs/btrfs/extent_map.c +++ b/fs/btrfs/extent_map.c @@ -208,7 +208,7 @@ int add_extent_mapping(struct extent_map_tree *tree, struct extent_map *merge = NULL; struct rb_node *rb; - BUG_ON(spin_trylock(&tree->lock)); + assert_spin_locked(&tree->lock); rb = tree_insert(&tree->map, em->start, &em->rb_node); if (rb) { ret = -EEXIST; @@ -270,7 +270,7 @@ struct extent_map *lookup_extent_mapping(struct extent_map_tree *tree, struct rb_node *next = NULL; u64 end = range_end(start, len); - BUG_ON(spin_trylock(&tree->lock)); + assert_spin_locked(&tree->lock); rb_node = __tree_search(&tree->map, start, &prev, &next); if (!rb_node && prev) { em = rb_entry(prev, struct extent_map, rb_node); @@ -317,7 +317,7 @@ int remove_extent_mapping(struct extent_map_tree *tree, struct extent_map *em) int ret = 0; WARN_ON(test_bit(EXTENT_FLAG_PINNED, &em->flags)); - BUG_ON(spin_trylock(&tree->lock)); + assert_spin_locked(&tree->lock); rb_erase(&em->rb_node, &tree->map); em->in_tree = 0; return ret; -- cgit v1.2.3-70-g09d2 From 7c2fe32a238eb12422beca5cbd5194a594baa559 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Wed, 20 Aug 2008 08:51:50 -0400 Subject: Btrfs: Fix add_extent_mapping to check for duplicates across the whole range add_extent_mapping was allowing the insertion of overlapping extents. This never used to happen because it only inserted the extents from disk and those were never overlapping. But, with the data=ordered code, the disk and memory representations of the file are not the same. add_extent_mapping needs to ensure a new extent does not overlap before it inserts. Signed-off-by: Chris Mason --- fs/btrfs/extent_map.c | 7 +++++++ fs/btrfs/inode.c | 21 +++++++++++++++++---- 2 files changed, 24 insertions(+), 4 deletions(-) (limited to 'fs/btrfs/extent_map.c') diff --git a/fs/btrfs/extent_map.c b/fs/btrfs/extent_map.c index 954b047639ab..78ced11d18c7 100644 --- a/fs/btrfs/extent_map.c +++ b/fs/btrfs/extent_map.c @@ -207,7 +207,14 @@ int add_extent_mapping(struct extent_map_tree *tree, int ret = 0; struct extent_map *merge = NULL; struct rb_node *rb; + struct extent_map *exist; + exist = lookup_extent_mapping(tree, em->start, em->len); + if (exist) { + free_extent_map(exist); + ret = -EEXIST; + goto out; + } assert_spin_locked(&tree->lock); rb = tree_insert(&tree->map, em->start, &em->rb_node); if (rb) { diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 65107894a5bb..6c778043207d 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -641,8 +641,9 @@ int btrfs_readpage_io_hook(struct page *page, u64 start, u64 end) if (ret == -ENOENT || ret == -EFBIG) ret = 0; csum = 0; - printk("no csum found for inode %lu start %Lu\n", inode->i_ino, - start); + if (printk_ratelimit()) + printk("no csum found for inode %lu start %Lu\n", inode->i_ino, + start); goto out; } read_extent_buffer(path->nodes[0], &csum, (unsigned long)item, @@ -1653,8 +1654,20 @@ static int btrfs_setattr(struct dentry *dentry, struct iattr *attr) btrfs_truncate_page(inode->i_mapping, inode->i_size); hole_size = block_end - hole_start; - btrfs_wait_ordered_range(inode, hole_start, hole_size); - lock_extent(io_tree, hole_start, block_end - 1, GFP_NOFS); + while(1) { + struct btrfs_ordered_extent *ordered; + btrfs_wait_ordered_range(inode, hole_start, hole_size); + + lock_extent(io_tree, hole_start, block_end - 1, GFP_NOFS); + ordered = btrfs_lookup_ordered_extent(inode, hole_start); + if (ordered) { + unlock_extent(io_tree, hole_start, + block_end - 1, GFP_NOFS); + btrfs_put_ordered_extent(ordered); + } else { + break; + } + } trans = btrfs_start_transaction(root, 1); btrfs_set_trans_block_group(trans, inode); -- cgit v1.2.3-70-g09d2 From d352ac68148b69937d39ca5d48bcc4478e118dbf Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Mon, 29 Sep 2008 15:18:18 -0400 Subject: Btrfs: add and improve comments This improves the comments at the top of many functions. It didn't dive into the guts of functions because I was trying to avoid merging problems with the new allocator and back reference work. extent-tree.c and volumes.c were both skipped, and there is definitely more work todo in cleaning and commenting the code. Signed-off-by: Chris Mason --- fs/btrfs/Makefile | 2 +- fs/btrfs/TODO | 20 ----- fs/btrfs/async-thread.c | 10 ++- fs/btrfs/async-thread.h | 7 +- fs/btrfs/bit-radix.c | 130 --------------------------------- fs/btrfs/bit-radix.h | 33 --------- fs/btrfs/btrfs_inode.h | 54 +++++++++++++- fs/btrfs/crc32c.h | 18 +++++ fs/btrfs/ctree.c | 127 +++++++++++++++++++++++++++++--- fs/btrfs/ctree.h | 1 - fs/btrfs/dir-item.c | 41 +++++++++++ fs/btrfs/disk-io.c | 33 ++++++++- fs/btrfs/extent_io.c | 34 ++++++++- fs/btrfs/extent_map.c | 10 +++ fs/btrfs/file.c | 44 ++++++++++- fs/btrfs/inode.c | 189 +++++++++++++++++++++++++++++++----------------- fs/btrfs/locking.c | 13 ++++ fs/btrfs/ordered-data.c | 19 ++++- fs/btrfs/ref-cache.c | 26 +++++++ fs/btrfs/ref-cache.h | 3 + fs/btrfs/root-tree.c | 21 +++++- fs/btrfs/struct-funcs.c | 21 ++++++ fs/btrfs/super.c | 3 + fs/btrfs/transaction.c | 67 ++++++++++++++++- fs/btrfs/tree-defrag.c | 4 + 25 files changed, 653 insertions(+), 277 deletions(-) delete mode 100644 fs/btrfs/TODO delete mode 100644 fs/btrfs/bit-radix.c delete mode 100644 fs/btrfs/bit-radix.h (limited to 'fs/btrfs/extent_map.c') diff --git a/fs/btrfs/Makefile b/fs/btrfs/Makefile index d5c28557fba9..48b7909ca8d1 100644 --- a/fs/btrfs/Makefile +++ b/fs/btrfs/Makefile @@ -4,7 +4,7 @@ ifneq ($(KERNELRELEASE),) obj-m := btrfs.o btrfs-y := super.o ctree.o extent-tree.o print-tree.o root-tree.o dir-item.o \ file-item.o inode-item.o inode-map.o disk-io.o \ - transaction.o bit-radix.o inode.o file.o tree-defrag.o \ + transaction.o inode.o file.o tree-defrag.o \ extent_map.o sysfs.o struct-funcs.o xattr.o ordered-data.o \ extent_io.o volumes.o async-thread.o ioctl.o locking.o orphan.o \ ref-cache.o export.o tree-log.o acl.o free-space-cache.o diff --git a/fs/btrfs/TODO b/fs/btrfs/TODO deleted file mode 100644 index d9b6d38c603a..000000000000 --- a/fs/btrfs/TODO +++ /dev/null @@ -1,20 +0,0 @@ -* cleanup, add more error checking, get rid of BUG_ONs -* Fix ENOSPC handling -* Make allocator smarter -* add a block group to struct inode -* Do actual block accounting -* Check compat and incompat flags on the inode -* Get rid of struct ctree_path, limiting tree levels held at one time -* Add generation number to key pointer in nodes -* Add generation number to inode -* forbid cross subvolume renames and hardlinks -* Release -* Do real tree locking -* Add extent mirroring (backup copies of blocks) -* Add fancy interface to get access to incremental backups -* Add fancy striped extents to make big reads faster -* Use relocation to try and fix write errors -* Make allocator much smarter -* xattrs (directory streams for regular files) -* Scrub & defrag - diff --git a/fs/btrfs/async-thread.c b/fs/btrfs/async-thread.c index 4e780b279de6..04fb9702d14c 100644 --- a/fs/btrfs/async-thread.c +++ b/fs/btrfs/async-thread.c @@ -231,17 +231,25 @@ static struct btrfs_worker_thread *next_worker(struct btrfs_workers *workers) /* * if we pick a busy task, move the task to the end of the list. - * hopefully this will keep things somewhat evenly balanced + * hopefully this will keep things somewhat evenly balanced. + * Do the move in batches based on the sequence number. This groups + * requests submitted at roughly the same time onto the same worker. */ next = workers->worker_list.next; worker = list_entry(next, struct btrfs_worker_thread, worker_list); atomic_inc(&worker->num_pending); worker->sequence++; + if (worker->sequence % workers->idle_thresh == 0) list_move_tail(next, &workers->worker_list); return worker; } +/* + * selects a worker thread to take the next job. This will either find + * an idle worker, start a new worker up to the max count, or just return + * one of the existing busy workers. + */ static struct btrfs_worker_thread *find_worker(struct btrfs_workers *workers) { struct btrfs_worker_thread *worker; diff --git a/fs/btrfs/async-thread.h b/fs/btrfs/async-thread.h index 43e44d115dd1..4ec9a2ee0f9d 100644 --- a/fs/btrfs/async-thread.h +++ b/fs/btrfs/async-thread.h @@ -63,14 +63,17 @@ struct btrfs_workers { /* once a worker has this many requests or fewer, it is idle */ int idle_thresh; - /* list with all the work threads */ + /* list with all the work threads. The workers on the idle thread + * may be actively servicing jobs, but they haven't yet hit the + * idle thresh limit above. + */ struct list_head worker_list; struct list_head idle_list; /* lock for finding the next worker thread to queue on */ spinlock_t lock; - /* extra name for this worker */ + /* extra name for this worker, used for current->name */ char *name; }; diff --git a/fs/btrfs/bit-radix.c b/fs/btrfs/bit-radix.c deleted file mode 100644 index e8bf876db393..000000000000 --- a/fs/btrfs/bit-radix.c +++ /dev/null @@ -1,130 +0,0 @@ -/* - * Copyright (C) 2007 Oracle. All rights reserved. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public - * License v2 as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License for more details. - * - * You should have received a copy of the GNU General Public - * License along with this program; if not, write to the - * Free Software Foundation, Inc., 59 Temple Place - Suite 330, - * Boston, MA 021110-1307, USA. - */ - -#include "bit-radix.h" - -#define BIT_ARRAY_BYTES 256 -#define BIT_RADIX_BITS_PER_ARRAY ((BIT_ARRAY_BYTES - sizeof(unsigned long)) * 8) - -extern struct kmem_cache *btrfs_bit_radix_cachep; -int set_radix_bit(struct radix_tree_root *radix, unsigned long bit) -{ - unsigned long *bits; - unsigned long slot; - int bit_slot; - int ret; - - slot = bit / BIT_RADIX_BITS_PER_ARRAY; - bit_slot = bit % BIT_RADIX_BITS_PER_ARRAY; - - bits = radix_tree_lookup(radix, slot); - if (!bits) { - bits = kmem_cache_alloc(btrfs_bit_radix_cachep, GFP_NOFS); - if (!bits) - return -ENOMEM; - memset(bits + 1, 0, BIT_ARRAY_BYTES - sizeof(unsigned long)); - bits[0] = slot; - ret = radix_tree_insert(radix, slot, bits); - if (ret) - return ret; - } - ret = test_and_set_bit(bit_slot, bits + 1); - if (ret < 0) - ret = 1; - return ret; -} - -int test_radix_bit(struct radix_tree_root *radix, unsigned long bit) -{ - unsigned long *bits; - unsigned long slot; - int bit_slot; - - slot = bit / BIT_RADIX_BITS_PER_ARRAY; - bit_slot = bit % BIT_RADIX_BITS_PER_ARRAY; - - bits = radix_tree_lookup(radix, slot); - if (!bits) - return 0; - return test_bit(bit_slot, bits + 1); -} - -int clear_radix_bit(struct radix_tree_root *radix, unsigned long bit) -{ - unsigned long *bits; - unsigned long slot; - int bit_slot; - int i; - int empty = 1; - - slot = bit / BIT_RADIX_BITS_PER_ARRAY; - bit_slot = bit % BIT_RADIX_BITS_PER_ARRAY; - - bits = radix_tree_lookup(radix, slot); - if (!bits) - return 0; - clear_bit(bit_slot, bits + 1); - for (i = 1; i < BIT_ARRAY_BYTES / sizeof(unsigned long); i++) { - if (bits[i]) { - empty = 0; - break; - } - } - if (empty) { - bits = radix_tree_delete(radix, slot); - BUG_ON(!bits); - kmem_cache_free(btrfs_bit_radix_cachep, bits); - } - return 0; -} - -int find_first_radix_bit(struct radix_tree_root *radix, unsigned long *retbits, - unsigned long start, int nr) -{ - unsigned long *bits; - unsigned long *gang[4]; - int found; - int ret; - int i; - int total_found = 0; - unsigned long slot; - - slot = start / BIT_RADIX_BITS_PER_ARRAY; - ret = radix_tree_gang_lookup(radix, (void **)gang, slot, - ARRAY_SIZE(gang)); - found = start % BIT_RADIX_BITS_PER_ARRAY; - for (i = 0; i < ret && nr > 0; i++) { - bits = gang[i]; - while(nr > 0) { - found = find_next_bit(bits + 1, - BIT_RADIX_BITS_PER_ARRAY, - found); - if (found < BIT_RADIX_BITS_PER_ARRAY) { - *retbits = bits[0] * - BIT_RADIX_BITS_PER_ARRAY + found; - retbits++; - nr--; - total_found++; - found++; - } else - break; - } - found = 0; - } - return total_found; -} diff --git a/fs/btrfs/bit-radix.h b/fs/btrfs/bit-radix.h deleted file mode 100644 index c100f54d5c32..000000000000 --- a/fs/btrfs/bit-radix.h +++ /dev/null @@ -1,33 +0,0 @@ -/* - * Copyright (C) 2007 Oracle. All rights reserved. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public - * License v2 as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License for more details. - * - * You should have received a copy of the GNU General Public - * License along with this program; if not, write to the - * Free Software Foundation, Inc., 59 Temple Place - Suite 330, - * Boston, MA 021110-1307, USA. - */ - -#ifndef __BIT_RADIX__ -#define __BIT_RADIX__ -#include - -int set_radix_bit(struct radix_tree_root *radix, unsigned long bit); -int test_radix_bit(struct radix_tree_root *radix, unsigned long bit); -int clear_radix_bit(struct radix_tree_root *radix, unsigned long bit); -int find_first_radix_bit(struct radix_tree_root *radix, unsigned long *retbits, - unsigned long start, int nr); - -static inline void init_bit_radix(struct radix_tree_root *radix) -{ - INIT_RADIX_TREE(radix, GFP_NOFS); -} -#endif diff --git a/fs/btrfs/btrfs_inode.h b/fs/btrfs/btrfs_inode.h index 0577fda2168a..0b2e623cf421 100644 --- a/fs/btrfs/btrfs_inode.h +++ b/fs/btrfs/btrfs_inode.h @@ -25,27 +25,58 @@ /* in memory btrfs inode */ struct btrfs_inode { + /* which subvolume this inode belongs to */ struct btrfs_root *root; + + /* the block group preferred for allocations. This pointer is buggy + * and needs to be replaced with a bytenr instead + */ struct btrfs_block_group_cache *block_group; + + /* key used to find this inode on disk. This is used by the code + * to read in roots of subvolumes + */ struct btrfs_key location; + + /* the extent_tree has caches of all the extent mappings to disk */ struct extent_map_tree extent_tree; + + /* the io_tree does range state (DIRTY, LOCKED etc) */ struct extent_io_tree io_tree; + + /* special utility tree used to record which mirrors have already been + * tried when checksums fail for a given block + */ struct extent_io_tree io_failure_tree; + + /* held while inserting checksums to avoid races */ struct mutex csum_mutex; + + /* held while inesrting or deleting extents from files */ struct mutex extent_mutex; + + /* held while logging the inode in tree-log.c */ struct mutex log_mutex; - struct inode vfs_inode; + + /* used to order data wrt metadata */ struct btrfs_ordered_inode_tree ordered_tree; + /* standard acl pointers */ struct posix_acl *i_acl; struct posix_acl *i_default_acl; /* for keeping track of orphaned inodes */ struct list_head i_orphan; + /* list of all the delalloc inodes in the FS. There are times we need + * to write all the delalloc pages to disk, and this list is used + * to walk them all. + */ struct list_head delalloc_inodes; - /* full 64 bit generation number */ + /* full 64 bit generation number, struct vfs_inode doesn't have a big + * enough field for this. + */ u64 generation; /* @@ -57,10 +88,25 @@ struct btrfs_inode { */ u64 logged_trans; - /* trans that last made a change that should be fully fsync'd */ + /* + * trans that last made a change that should be fully fsync'd. This + * gets reset to zero each time the inode is logged + */ u64 log_dirty_trans; + + /* total number of bytes pending delalloc, used by stat to calc the + * real block usage of the file + */ u64 delalloc_bytes; + + /* + * the size of the file stored in the metadata on disk. data=ordered + * means the in-memory i_size might be larger than the size on disk + * because not all the blocks are written yet. + */ u64 disk_i_size; + + /* flags field from the on disk inode */ u32 flags; /* @@ -68,6 +114,8 @@ struct btrfs_inode { * number for new files that are created */ u64 index_cnt; + + struct inode vfs_inode; }; static inline struct btrfs_inode *BTRFS_I(struct inode *inode) diff --git a/fs/btrfs/crc32c.h b/fs/btrfs/crc32c.h index 4f0fefed132a..1eaf11d334fd 100644 --- a/fs/btrfs/crc32c.h +++ b/fs/btrfs/crc32c.h @@ -1,3 +1,21 @@ +/* + * Copyright (C) 2008 Oracle. All rights reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public + * License v2 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public + * License along with this program; if not, write to the + * Free Software Foundation, Inc., 59 Temple Place - Suite 330, + * Boston, MA 021110-1307, USA. + */ + #ifndef __BTRFS_CRC32C__ #define __BTRFS_CRC32C__ #include diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c index 50e81f43e6d4..ff3261ff2e19 100644 --- a/fs/btrfs/ctree.c +++ b/fs/btrfs/ctree.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2007 Oracle. All rights reserved. + * Copyright (C) 2007,2008 Oracle. All rights reserved. * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public @@ -54,12 +54,19 @@ struct btrfs_path *btrfs_alloc_path(void) return path; } +/* this also releases the path */ void btrfs_free_path(struct btrfs_path *p) { btrfs_release_path(NULL, p); kmem_cache_free(btrfs_path_cachep, p); } +/* + * path release drops references on the extent buffers in the path + * and it drops any locks held by this path + * + * It is safe to call this on paths that no locks or extent buffers held. + */ void noinline btrfs_release_path(struct btrfs_root *root, struct btrfs_path *p) { int i; @@ -77,6 +84,16 @@ void noinline btrfs_release_path(struct btrfs_root *root, struct btrfs_path *p) } } +/* + * safely gets a reference on the root node of a tree. A lock + * is not taken, so a concurrent writer may put a different node + * at the root of the tree. See btrfs_lock_root_node for the + * looping required. + * + * The extent buffer returned by this has a reference taken, so + * it won't disappear. It may stop being the root of the tree + * at any time because there are no locks held. + */ struct extent_buffer *btrfs_root_node(struct btrfs_root *root) { struct extent_buffer *eb; @@ -87,6 +104,10 @@ struct extent_buffer *btrfs_root_node(struct btrfs_root *root) return eb; } +/* loop around taking references on and locking the root node of the + * tree until you end up with a lock on the root. A locked buffer + * is returned, with a reference held. + */ struct extent_buffer *btrfs_lock_root_node(struct btrfs_root *root) { struct extent_buffer *eb; @@ -108,6 +129,10 @@ struct extent_buffer *btrfs_lock_root_node(struct btrfs_root *root) return eb; } +/* cowonly root (everything not a reference counted cow subvolume), just get + * put onto a simple dirty list. transaction.c walks this to make sure they + * get properly updated on disk. + */ static void add_root_to_dirty_list(struct btrfs_root *root) { if (root->track_dirty && list_empty(&root->dirty_list)) { @@ -116,6 +141,11 @@ static void add_root_to_dirty_list(struct btrfs_root *root) } } +/* + * used by snapshot creation to make a copy of a root for a tree with + * a given objectid. The buffer with the new root node is returned in + * cow_ret, and this func returns zero on success or a negative error code. + */ int btrfs_copy_root(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct extent_buffer *buf, @@ -167,6 +197,22 @@ int btrfs_copy_root(struct btrfs_trans_handle *trans, return 0; } +/* + * does the dirty work in cow of a single block. The parent block + * (if supplied) is updated to point to the new cow copy. The new + * buffer is marked dirty and returned locked. If you modify the block + * it needs to be marked dirty again. + * + * search_start -- an allocation hint for the new block + * + * empty_size -- a hint that you plan on doing more cow. This is the size in bytes + * the allocator should try to find free next to the block it returns. This is + * just a hint and may be ignored by the allocator. + * + * prealloc_dest -- if you have already reserved a destination for the cow, + * this uses that block instead of allocating a new one. btrfs_alloc_reserved_extent + * is used to finish the allocation. + */ int noinline __btrfs_cow_block(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct extent_buffer *buf, @@ -311,6 +357,11 @@ int noinline __btrfs_cow_block(struct btrfs_trans_handle *trans, return 0; } +/* + * cows a single block, see __btrfs_cow_block for the real work. + * This version of it has extra checks so that a block isn't cow'd more than + * once per transaction, as long as it hasn't been written yet + */ int noinline btrfs_cow_block(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct extent_buffer *buf, struct extent_buffer *parent, int parent_slot, @@ -347,6 +398,10 @@ int noinline btrfs_cow_block(struct btrfs_trans_handle *trans, return ret; } +/* + * helper function for defrag to decide if two blocks pointed to by a + * node are actually close by + */ static int close_blocks(u64 blocknr, u64 other, u32 blocksize) { if (blocknr < other && other - (blocknr + blocksize) < 32768) @@ -381,6 +436,11 @@ static int comp_keys(struct btrfs_disk_key *disk, struct btrfs_key *k2) } +/* + * this is used by the defrag code to go through all the + * leaves pointed to by a node and reallocate them so that + * disk order is close to key order + */ int btrfs_realloc_node(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct extent_buffer *parent, int start_slot, int cache_only, u64 *last_ret, @@ -521,6 +581,10 @@ static inline unsigned int leaf_data_end(struct btrfs_root *root, return btrfs_item_offset_nr(leaf, nr - 1); } +/* + * extra debugging checks to make sure all the items in a key are + * well formed and in the proper order + */ static int check_node(struct btrfs_root *root, struct btrfs_path *path, int level) { @@ -561,6 +625,10 @@ static int check_node(struct btrfs_root *root, struct btrfs_path *path, return 0; } +/* + * extra checking to make sure all the items in a leaf are + * well formed and in the proper order + */ static int check_leaf(struct btrfs_root *root, struct btrfs_path *path, int level) { @@ -782,6 +850,10 @@ static int bin_search(struct extent_buffer *eb, struct btrfs_key *key, return -1; } +/* given a node and slot number, this reads the blocks it points to. The + * extent buffer is returned with a reference taken (but unlocked). + * NULL is returned on error. + */ static noinline struct extent_buffer *read_node_slot(struct btrfs_root *root, struct extent_buffer *parent, int slot) { @@ -798,6 +870,11 @@ static noinline struct extent_buffer *read_node_slot(struct btrfs_root *root, btrfs_node_ptr_generation(parent, slot)); } +/* + * node level balancing, used to make sure nodes are in proper order for + * item deletion. We balance from the top down, so we have to make sure + * that a deletion won't leave an node completely empty later on. + */ static noinline int balance_level(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct btrfs_path *path, int level) @@ -1024,7 +1101,10 @@ enospc: return ret; } -/* returns zero if the push worked, non-zero otherwise */ +/* Node balancing for insertion. Here we only split or push nodes around + * when they are completely full. This is also done top down, so we + * have to be pessimistic. + */ static int noinline push_nodes_for_insert(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct btrfs_path *path, int level) @@ -1150,7 +1230,8 @@ static int noinline push_nodes_for_insert(struct btrfs_trans_handle *trans, } /* - * readahead one full node of leaves + * readahead one full node of leaves, finding things that are close + * to the block in 'slot', and triggering ra on them. */ static noinline void reada_for_search(struct btrfs_root *root, struct btrfs_path *path, @@ -1226,6 +1307,19 @@ static noinline void reada_for_search(struct btrfs_root *root, } } +/* + * when we walk down the tree, it is usually safe to unlock the higher layers in + * the tree. The exceptions are when our path goes through slot 0, because operations + * on the tree might require changing key pointers higher up in the tree. + * + * callers might also have set path->keep_locks, which tells this code to + * keep the lock if the path points to the last slot in the block. This is + * part of walking through the tree, and selecting the next slot in the higher + * block. + * + * lowest_unlock sets the lowest level in the tree we're allowed to unlock. + * so if lowest_unlock is 1, level 0 won't be unlocked + */ static noinline void unlock_up(struct btrfs_path *path, int level, int lowest_unlock) { @@ -2705,6 +2799,12 @@ again: return ret; } +/* + * make the item pointed to by the path smaller. new_size indicates + * how small to make it, and from_end tells us if we just chop bytes + * off the end of the item or if we shift the item to chop bytes off + * the front. + */ int btrfs_truncate_item(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct btrfs_path *path, @@ -2818,6 +2918,9 @@ int btrfs_truncate_item(struct btrfs_trans_handle *trans, return ret; } +/* + * make the item pointed to by the path bigger, data_size is the new size. + */ int btrfs_extend_item(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct btrfs_path *path, u32 data_size) @@ -2897,7 +3000,7 @@ int btrfs_extend_item(struct btrfs_trans_handle *trans, } /* - * Given a key and some data, insert an item into the tree. + * Given a key and some data, insert items into the tree. * This does all the path init required, making room in the tree if needed. */ int btrfs_insert_empty_items(struct btrfs_trans_handle *trans, @@ -3046,9 +3149,8 @@ int btrfs_insert_item(struct btrfs_trans_handle *trans, struct btrfs_root /* * delete the pointer from a given node. * - * If the delete empties a node, the node is removed from the tree, - * continuing all the way the root if required. The root is converted into - * a leaf if all the nodes are emptied. + * the tree should have been previously balanced so the deletion does not + * empty a node. */ static int del_ptr(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct btrfs_path *path, int level, int slot) @@ -3233,6 +3335,9 @@ int btrfs_del_items(struct btrfs_trans_handle *trans, struct btrfs_root *root, * search the tree again to find a leaf with lesser keys * returns 0 if it found something or 1 if there are no lesser leaves. * returns < 0 on io errors. + * + * This may release the path, and so you may lose any locks held at the + * time you call it. */ int btrfs_prev_leaf(struct btrfs_root *root, struct btrfs_path *path) { @@ -3265,9 +3370,7 @@ int btrfs_prev_leaf(struct btrfs_root *root, struct btrfs_path *path) /* * A helper function to walk down the tree starting at min_key, and looking * for nodes or leaves that are either in cache or have a minimum - * transaction id. This is used by the btree defrag code, but could - * also be used to search for blocks that have changed since a given - * transaction id. + * transaction id. This is used by the btree defrag code, and tree logging * * This does not cow, but it does stuff the starting key it finds back * into min_key, so you can call btrfs_search_slot with cow=1 on the @@ -3279,6 +3382,10 @@ int btrfs_prev_leaf(struct btrfs_root *root, struct btrfs_path *path) * This honors path->lowest_level to prevent descent past a given level * of the tree. * + * min_trans indicates the oldest transaction that you are interested + * in walking through. Any nodes or leaves older than min_trans are + * skipped over (without reading them). + * * returns zero if something useful was found, < 0 on error and 1 if there * was nothing in the tree that matched the search criteria. */ diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 0079b60b18f3..ded1643c0273 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -27,7 +27,6 @@ #include #include #include -#include "bit-radix.h" #include "extent_io.h" #include "extent_map.h" #include "async-thread.h" diff --git a/fs/btrfs/dir-item.c b/fs/btrfs/dir-item.c index e4f30090d640..5040b71f1900 100644 --- a/fs/btrfs/dir-item.c +++ b/fs/btrfs/dir-item.c @@ -21,6 +21,14 @@ #include "hash.h" #include "transaction.h" +/* + * insert a name into a directory, doing overflow properly if there is a hash + * collision. data_size indicates how big the item inserted should be. On + * success a struct btrfs_dir_item pointer is returned, otherwise it is + * an ERR_PTR. + * + * The name is not copied into the dir item, you have to do that yourself. + */ static struct btrfs_dir_item *insert_with_overflow(struct btrfs_trans_handle *trans, struct btrfs_root *root, @@ -55,6 +63,10 @@ static struct btrfs_dir_item *insert_with_overflow(struct btrfs_trans_handle return (struct btrfs_dir_item *)ptr; } +/* + * xattrs work a lot like directories, this inserts an xattr item + * into the tree + */ int btrfs_insert_xattr_item(struct btrfs_trans_handle *trans, struct btrfs_root *root, const char *name, u16 name_len, const void *data, u16 data_len, @@ -109,6 +121,13 @@ int btrfs_insert_xattr_item(struct btrfs_trans_handle *trans, return ret; } +/* + * insert a directory item in the tree, doing all the magic for + * both indexes. 'dir' indicates which objectid to insert it into, + * 'location' is the key to stuff into the directory item, 'type' is the + * type of the inode we're pointing to, and 'index' is the sequence number + * to use for the second index (if one is created). + */ int btrfs_insert_dir_item(struct btrfs_trans_handle *trans, struct btrfs_root *root, const char *name, int name_len, u64 dir, struct btrfs_key *location, u8 type, u64 index) @@ -184,6 +203,11 @@ out: return 0; } +/* + * lookup a directory item based on name. 'dir' is the objectid + * we're searching in, and 'mod' tells us if you plan on deleting the + * item (use mod < 0) or changing the options (use mod > 0) + */ struct btrfs_dir_item *btrfs_lookup_dir_item(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct btrfs_path *path, u64 dir, @@ -222,6 +246,14 @@ struct btrfs_dir_item *btrfs_lookup_dir_item(struct btrfs_trans_handle *trans, return btrfs_match_dir_item_name(root, path, name, name_len); } +/* + * lookup a directory item based on index. 'dir' is the objectid + * we're searching in, and 'mod' tells us if you plan on deleting the + * item (use mod < 0) or changing the options (use mod > 0) + * + * The name is used to make sure the index really points to the name you were + * looking for. + */ struct btrfs_dir_item * btrfs_lookup_dir_index_item(struct btrfs_trans_handle *trans, struct btrfs_root *root, @@ -282,6 +314,11 @@ struct btrfs_dir_item *btrfs_lookup_xattr(struct btrfs_trans_handle *trans, return btrfs_match_dir_item_name(root, path, name, name_len); } +/* + * helper function to look at the directory item pointed to by 'path' + * this walks through all the entries in a dir item and finds one + * for a specific name. + */ struct btrfs_dir_item *btrfs_match_dir_item_name(struct btrfs_root *root, struct btrfs_path *path, const char *name, int name_len) @@ -313,6 +350,10 @@ struct btrfs_dir_item *btrfs_match_dir_item_name(struct btrfs_root *root, return NULL; } +/* + * given a pointer into a directory item, delete it. This + * handles items that have more than one entry in them. + */ int btrfs_delete_one_dir_name(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct btrfs_path *path, diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 45b4f7285275..5ee10d3136f5 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -55,6 +55,11 @@ static int check_tree_block(struct btrfs_root *root, struct extent_buffer *buf) static struct extent_io_ops btree_extent_io_ops; static void end_workqueue_fn(struct btrfs_work *work); +/* + * end_io_wq structs are used to do processing in task context when an IO is + * complete. This is used during reads to verify checksums, and it is used + * by writes to insert metadata for new file extents after IO is complete. + */ struct end_io_wq { struct bio *bio; bio_end_io_t *end_io; @@ -66,6 +71,11 @@ struct end_io_wq { struct btrfs_work work; }; +/* + * async submit bios are used to offload expensive checksumming + * onto the worker threads. They checksum file and metadata bios + * just before they are sent down the IO stack. + */ struct async_submit_bio { struct inode *inode; struct bio *bio; @@ -76,6 +86,10 @@ struct async_submit_bio { struct btrfs_work work; }; +/* + * extents on the btree inode are pretty simple, there's one extent + * that covers the entire device + */ struct extent_map *btree_get_extent(struct inode *inode, struct page *page, size_t page_offset, u64 start, u64 len, int create) @@ -151,6 +165,10 @@ void btrfs_csum_final(u32 crc, char *result) *(__le32 *)result = ~cpu_to_le32(crc); } +/* + * compute the csum for a btree block, and either verify it or write it + * into the csum field of the block. + */ static int csum_tree_block(struct btrfs_root *root, struct extent_buffer *buf, int verify) { @@ -204,6 +222,12 @@ static int csum_tree_block(struct btrfs_root *root, struct extent_buffer *buf, return 0; } +/* + * we can't consider a given block up to date unless the transid of the + * block matches the transid in the parent node's pointer. This is how we + * detect blocks that either didn't get written at all or got written + * in the wrong place. + */ static int verify_parent_transid(struct extent_io_tree *io_tree, struct extent_buffer *eb, u64 parent_transid) { @@ -228,9 +252,12 @@ out: unlock_extent(io_tree, eb->start, eb->start + eb->len - 1, GFP_NOFS); return ret; - } +/* + * helper to read a given tree block, doing retries as required when + * the checksums don't match and we have alternate mirrors to try. + */ static int btree_read_extent_buffer_pages(struct btrfs_root *root, struct extent_buffer *eb, u64 start, u64 parent_transid) @@ -260,6 +287,10 @@ printk("read extent buffer pages failed with ret %d mirror no %d\n", ret, mirror return -EIO; } +/* + * checksum a dirty tree block before IO. This has extra checks to make + * sure we only fill in the checksum field in the first page of a multi-page block + */ int csum_dirty_buffer(struct btrfs_root *root, struct page *page) { struct extent_io_tree *tree; diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index 8bd1b402f3fd..563b2d12f4f2 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c @@ -914,6 +914,10 @@ int wait_on_extent_writeback(struct extent_io_tree *tree, u64 start, u64 end) } EXPORT_SYMBOL(wait_on_extent_writeback); +/* + * either insert or lock state struct between start and end use mask to tell + * us if waiting is desired. + */ int lock_extent(struct extent_io_tree *tree, u64 start, u64 end, gfp_t mask) { int err; @@ -982,6 +986,13 @@ int set_range_writeback(struct extent_io_tree *tree, u64 start, u64 end) } EXPORT_SYMBOL(set_range_writeback); +/* + * find the first offset in the io tree with 'bits' set. zero is + * returned if we find something, and *start_ret and *end_ret are + * set to reflect the state struct that was found. + * + * If nothing was found, 1 is returned, < 0 on error + */ int find_first_extent_bit(struct extent_io_tree *tree, u64 start, u64 *start_ret, u64 *end_ret, int bits) { @@ -1017,6 +1028,10 @@ out: } EXPORT_SYMBOL(find_first_extent_bit); +/* find the first state struct with 'bits' set after 'start', and + * return it. tree->lock must be held. NULL will returned if + * nothing was found after 'start' + */ struct extent_state *find_first_extent_bit_state(struct extent_io_tree *tree, u64 start, int bits) { @@ -1046,8 +1061,14 @@ out: } EXPORT_SYMBOL(find_first_extent_bit_state); -u64 find_lock_delalloc_range(struct extent_io_tree *tree, - u64 *start, u64 *end, u64 max_bytes) +/* + * find a contiguous range of bytes in the file marked as delalloc, not + * more than 'max_bytes'. start and end are used to return the range, + * + * 1 is returned if we find something, 0 if nothing was in the tree + */ +static noinline u64 find_lock_delalloc_range(struct extent_io_tree *tree, + u64 *start, u64 *end, u64 max_bytes) { struct rb_node *node; struct extent_state *state; @@ -1130,6 +1151,11 @@ out: return found; } +/* + * count the number of bytes in the tree that have a given bit(s) + * set. This can be fairly slow, except for EXTENT_DIRTY which is + * cached. The total number found is returned. + */ u64 count_range_bits(struct extent_io_tree *tree, u64 *start, u64 search_end, u64 max_bytes, unsigned long bits) @@ -1245,6 +1271,10 @@ int unlock_range(struct extent_io_tree *tree, u64 start, u64 end) } EXPORT_SYMBOL(unlock_range); +/* + * set the private field for a given byte offset in the tree. If there isn't + * an extent_state there already, this does nothing. + */ int set_state_private(struct extent_io_tree *tree, u64 start, u64 private) { struct rb_node *node; diff --git a/fs/btrfs/extent_map.c b/fs/btrfs/extent_map.c index 78ced11d18c7..74b2a29880d3 100644 --- a/fs/btrfs/extent_map.c +++ b/fs/btrfs/extent_map.c @@ -114,6 +114,10 @@ static struct rb_node *tree_insert(struct rb_root *root, u64 offset, return NULL; } +/* + * search through the tree for an extent_map with a given offset. If + * it can't be found, try to find some neighboring extents + */ static struct rb_node *__tree_search(struct rb_root *root, u64 offset, struct rb_node **prev_ret, struct rb_node **next_ret) @@ -160,6 +164,10 @@ static struct rb_node *__tree_search(struct rb_root *root, u64 offset, return NULL; } +/* + * look for an offset in the tree, and if it can't be found, return + * the first offset we can find smaller than 'offset'. + */ static inline struct rb_node *tree_search(struct rb_root *root, u64 offset) { struct rb_node *prev; @@ -170,6 +178,7 @@ static inline struct rb_node *tree_search(struct rb_root *root, u64 offset) return ret; } +/* check to see if two extent_map structs are adjacent and safe to merge */ static int mergable_maps(struct extent_map *prev, struct extent_map *next) { if (test_bit(EXTENT_FLAG_PINNED, &prev->flags)) @@ -250,6 +259,7 @@ out: } EXPORT_SYMBOL(add_extent_mapping); +/* simple helper to do math around the end of an extent, handling wrap */ static u64 range_end(u64 start, u64 len) { if (start + len < start) diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c index 1b7e51a9db0f..3088a1184483 100644 --- a/fs/btrfs/file.c +++ b/fs/btrfs/file.c @@ -41,6 +41,9 @@ #include "compat.h" +/* simple helper to fault in pages and copy. This should go away + * and be replaced with calls into generic code. + */ static int noinline btrfs_copy_from_user(loff_t pos, int num_pages, int write_bytes, struct page **prepared_pages, @@ -72,12 +75,19 @@ static int noinline btrfs_copy_from_user(loff_t pos, int num_pages, return page_fault ? -EFAULT : 0; } +/* + * unlocks pages after btrfs_file_write is done with them + */ static void noinline btrfs_drop_pages(struct page **pages, size_t num_pages) { size_t i; for (i = 0; i < num_pages; i++) { if (!pages[i]) break; + /* page checked is some magic around finding pages that + * have been modified without going through btrfs_set_page_dirty + * clear it here + */ ClearPageChecked(pages[i]); unlock_page(pages[i]); mark_page_accessed(pages[i]); @@ -85,6 +95,10 @@ static void noinline btrfs_drop_pages(struct page **pages, size_t num_pages) } } +/* this does all the hard work for inserting an inline extent into + * the btree. Any existing inline extent is extended as required to make room, + * otherwise things are inserted as required into the btree + */ static int noinline insert_inline_extent(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct inode *inode, u64 offset, size_t size, @@ -228,6 +242,14 @@ fail: return err; } +/* + * after copy_from_user, pages need to be dirtied and we need to make + * sure holes are created between the current EOF and the start of + * any next extents (if required). + * + * this also makes the decision about creating an inline extent vs + * doing real data extents, marking pages dirty and delalloc as required. + */ static int noinline dirty_and_release_pages(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct file *file, @@ -362,6 +384,10 @@ out_unlock: return err; } +/* + * this drops all the extents in the cache that intersect the range + * [start, end]. Existing extents are split as required. + */ int btrfs_drop_extent_cache(struct inode *inode, u64 start, u64 end, int skip_pinned) { @@ -536,6 +562,9 @@ out: * If an extent intersects the range but is not entirely inside the range * it is either truncated or split. Anything entirely inside the range * is deleted from the tree. + * + * inline_limit is used to tell this code which offsets in the file to keep + * if they contain inline extents. */ int noinline btrfs_drop_extents(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct inode *inode, @@ -796,7 +825,9 @@ out: } /* - * this gets pages into the page cache and locks them down + * this gets pages into the page cache and locks them down, it also properly + * waits for data=ordered extents to finish before allowing the pages to be + * modified. */ static int noinline prepare_pages(struct btrfs_root *root, struct file *file, struct page **pages, size_t num_pages, @@ -1034,6 +1065,17 @@ int btrfs_release_file(struct inode * inode, struct file * filp) return 0; } +/* + * fsync call for both files and directories. This logs the inode into + * the tree log instead of forcing full commits whenever possible. + * + * It needs to call filemap_fdatawait so that all ordered extent updates are + * in the metadata btree are up to date for copying to the log. + * + * It drops the inode mutex before doing the tree log commit. This is an + * important optimization for directories because holding the mutex prevents + * new operations on the dir while we write to disk. + */ int btrfs_sync_file(struct file *file, struct dentry *dentry, int datasync) { struct inode *inode = dentry->d_inode; diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 404704d26822..f3abecc2d14c 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -83,6 +83,10 @@ static unsigned char btrfs_type_by_mode[S_IFMT >> S_SHIFT] = { static void btrfs_truncate(struct inode *inode); +/* + * a very lame attempt at stopping writes when the FS is 85% full. There + * are countless ways this is incorrect, but it is better than nothing. + */ int btrfs_check_free_space(struct btrfs_root *root, u64 num_required, int for_del) { @@ -108,6 +112,12 @@ int btrfs_check_free_space(struct btrfs_root *root, u64 num_required, return ret; } +/* + * when extent_io.c finds a delayed allocation range in the file, + * the call backs end up in this code. The basic idea is to + * allocate extents on disk for the range, and create ordered data structs + * in ram to track those extents. + */ static int cow_file_range(struct inode *inode, u64 start, u64 end) { struct btrfs_root *root = BTRFS_I(inode)->root; @@ -185,6 +195,13 @@ out: return ret; } +/* + * when nowcow writeback call back. This checks for snapshots or COW copies + * of the extents that exist in the file, and COWs the file as required. + * + * If no cow copies or snapshots exist, we write directly to the existing + * blocks on disk + */ static int run_delalloc_nocow(struct inode *inode, u64 start, u64 end) { u64 extent_start; @@ -291,6 +308,9 @@ out: return err; } +/* + * extent_io.c call back to do delayed allocation processing + */ static int run_delalloc_range(struct inode *inode, u64 start, u64 end) { struct btrfs_root *root = BTRFS_I(inode)->root; @@ -305,6 +325,11 @@ static int run_delalloc_range(struct inode *inode, u64 start, u64 end) return ret; } +/* + * extent_io.c set_bit_hook, used to track delayed allocation + * bytes in this file, and to maintain the list of inodes that + * have pending delalloc work to be done. + */ int btrfs_set_bit_hook(struct inode *inode, u64 start, u64 end, unsigned long old, unsigned long bits) { @@ -323,6 +348,9 @@ int btrfs_set_bit_hook(struct inode *inode, u64 start, u64 end, return 0; } +/* + * extent_io.c clear_bit_hook, see set_bit_hook for why + */ int btrfs_clear_bit_hook(struct inode *inode, u64 start, u64 end, unsigned long old, unsigned long bits) { @@ -349,6 +377,10 @@ int btrfs_clear_bit_hook(struct inode *inode, u64 start, u64 end, return 0; } +/* + * extent_io.c merge_bio_hook, this must check the chunk tree to make sure + * we don't create bios that span stripes or chunks + */ int btrfs_merge_bio_hook(struct page *page, unsigned long offset, size_t size, struct bio *bio) { @@ -371,6 +403,14 @@ int btrfs_merge_bio_hook(struct page *page, unsigned long offset, return 0; } +/* + * in order to insert checksums into the metadata in large chunks, + * we wait until bio submission time. All the pages in the bio are + * checksummed and sums are attached onto the ordered extent record. + * + * At IO completion time the cums attached on the ordered extent record + * are inserted into the btree + */ int __btrfs_submit_bio_hook(struct inode *inode, int rw, struct bio *bio, int mirror_num) { @@ -383,6 +423,10 @@ int __btrfs_submit_bio_hook(struct inode *inode, int rw, struct bio *bio, return btrfs_map_bio(root, rw, bio, mirror_num, 1); } +/* + * extent_io.c submission hook. This does the right thing for csum calculation on write, + * or reading the csums from the tree before a read + */ int btrfs_submit_bio_hook(struct inode *inode, int rw, struct bio *bio, int mirror_num) { @@ -408,6 +452,10 @@ mapit: return btrfs_map_bio(root, rw, bio, mirror_num, 0); } +/* + * given a list of ordered sums record them in the inode. This happens + * at IO completion time based on sums calculated at bio submission time. + */ static noinline int add_pending_csums(struct btrfs_trans_handle *trans, struct inode *inode, u64 file_offset, struct list_head *list) @@ -430,12 +478,12 @@ int btrfs_set_extent_delalloc(struct inode *inode, u64 start, u64 end) GFP_NOFS); } +/* see btrfs_writepage_start_hook for details on why this is required */ struct btrfs_writepage_fixup { struct page *page; struct btrfs_work work; }; -/* see btrfs_writepage_start_hook for details on why this is required */ void btrfs_writepage_fixup_worker(struct btrfs_work *work) { struct btrfs_writepage_fixup *fixup; @@ -522,6 +570,10 @@ int btrfs_writepage_start_hook(struct page *page, u64 start, u64 end) return -EAGAIN; } +/* as ordered data IO finishes, this gets called so we can finish + * an ordered extent if the range of bytes in the file it covers are + * fully written. + */ static int btrfs_finish_ordered_io(struct inode *inode, u64 start, u64 end) { struct btrfs_root *root = BTRFS_I(inode)->root; @@ -631,6 +683,14 @@ int btrfs_writepage_end_io_hook(struct page *page, u64 start, u64 end, return btrfs_finish_ordered_io(page->mapping->host, start, end); } +/* + * When IO fails, either with EIO or csum verification fails, we + * try other mirrors that might have a good copy of the data. This + * io_failure_record is used to record state as we go through all the + * mirrors. If another mirror has good data, the page is set up to date + * and things continue. If a good mirror can't be found, the original + * bio end_io callback is called to indicate things have failed. + */ struct io_failure_record { struct page *page; u64 start; @@ -725,6 +785,10 @@ int btrfs_io_failed_hook(struct bio *failed_bio, return 0; } +/* + * each time an IO finishes, we do a fast check in the IO failure tree + * to see if we need to process or clean up an io_failure_record + */ int btrfs_clean_io_failures(struct inode *inode, u64 start) { u64 private; @@ -753,6 +817,11 @@ int btrfs_clean_io_failures(struct inode *inode, u64 start) return 0; } +/* + * when reads are done, we need to check csums to verify the data is correct + * if there's a match, we allow the bio to finish. If not, we go through + * the io_failure_record routines to find good copies + */ int btrfs_readpage_end_io_hook(struct page *page, u64 start, u64 end, struct extent_state *state) { @@ -990,6 +1059,9 @@ void btrfs_orphan_cleanup(struct btrfs_root *root) btrfs_free_path(path); } +/* + * read an inode from the btree into the in-memory inode + */ void btrfs_read_locked_inode(struct inode *inode) { struct btrfs_path *path; @@ -1083,6 +1155,9 @@ make_bad: make_bad_inode(inode); } +/* + * given a leaf and an inode, copy the inode fields into the leaf + */ static void fill_inode_item(struct btrfs_trans_handle *trans, struct extent_buffer *leaf, struct btrfs_inode_item *item, @@ -1118,6 +1193,9 @@ static void fill_inode_item(struct btrfs_trans_handle *trans, BTRFS_I(inode)->block_group->key.objectid); } +/* + * copy everything in the in-memory inode into the btree. + */ int noinline btrfs_update_inode(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct inode *inode) @@ -1151,6 +1229,11 @@ failed: } +/* + * unlink helper that gets used here in inode.c and in the tree logging + * recovery code. It remove a link in a directory with a given name, and + * also drops the back refs in the inode to the directory + */ int btrfs_unlink_inode(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct inode *dir, struct inode *inode, @@ -1309,7 +1392,7 @@ fail: /* * this can truncate away extent items, csum items and directory items. * It starts at a high offset and removes keys until it can't find - * any higher than i_size. + * any higher than new_size * * csum items that cross the new i_size are truncated to the new size * as well. @@ -2123,6 +2206,11 @@ void btrfs_dirty_inode(struct inode *inode) btrfs_end_transaction(trans, root); } +/* + * find the highest existing sequence number in a directory + * and then set the in-memory index_cnt variable to reflect + * free sequence numbers + */ static int btrfs_set_inode_index_count(struct inode *inode) { struct btrfs_root *root = BTRFS_I(inode)->root; @@ -2175,6 +2263,10 @@ out: return ret; } +/* + * helper to find a free sequence number in a given directory. This current + * code is very simple, later versions will do smarter things in the btree + */ static int btrfs_set_inode_index(struct inode *dir, struct inode *inode, u64 *index) { @@ -2305,6 +2397,12 @@ static inline u8 btrfs_inode_type(struct inode *inode) return btrfs_type_by_mode[(inode->i_mode & S_IFMT) >> S_SHIFT]; } +/* + * utility function to add 'inode' into 'parent_inode' with + * a give name and a given sequence number. + * if 'add_backref' is true, also insert a backref from the + * inode to the parent directory. + */ int btrfs_add_link(struct btrfs_trans_handle *trans, struct inode *parent_inode, struct inode *inode, const char *name, int name_len, int add_backref, u64 index) @@ -2611,6 +2709,10 @@ out_unlock: return err; } +/* helper for btfs_get_extent. Given an existing extent in the tree, + * and an extent that you want to insert, deal with overlap and insert + * the new extent into the tree. + */ static int merge_extent_mapping(struct extent_map_tree *em_tree, struct extent_map *existing, struct extent_map *em, @@ -2627,6 +2729,14 @@ static int merge_extent_mapping(struct extent_map_tree *em_tree, return add_extent_mapping(em_tree, em); } +/* + * a bit scary, this does extent mapping from logical file offset to the disk. + * the ugly parts come from merging extents from the disk with the + * in-ram representation. This gets more complex because of the data=ordered code, + * where the in-ram extents might be locked pending data=ordered completion. + * + * This also copies inline extents directly into the page. + */ struct extent_map *btrfs_get_extent(struct inode *inode, struct page *page, size_t pg_offset, u64 start, u64 len, int create) @@ -2869,76 +2979,11 @@ out: return em; } -#if 0 /* waiting for O_DIRECT reads */ -static int btrfs_get_block(struct inode *inode, sector_t iblock, - struct buffer_head *bh_result, int create) -{ - struct extent_map *em; - u64 start = (u64)iblock << inode->i_blkbits; - struct btrfs_multi_bio *multi = NULL; - struct btrfs_root *root = BTRFS_I(inode)->root; - u64 len; - u64 logical; - u64 map_length; - int ret = 0; - - em = btrfs_get_extent(inode, NULL, 0, start, bh_result->b_size, 0); - - if (!em || IS_ERR(em)) - goto out; - - if (em->start > start || em->start + em->len <= start) { - goto out; - } - - if (em->block_start == EXTENT_MAP_INLINE) { - ret = -EINVAL; - goto out; - } - - len = em->start + em->len - start; - len = min_t(u64, len, INT_LIMIT(typeof(bh_result->b_size))); - - if (em->block_start == EXTENT_MAP_HOLE || - em->block_start == EXTENT_MAP_DELALLOC) { - bh_result->b_size = len; - goto out; - } - - logical = start - em->start; - logical = em->block_start + logical; - - map_length = len; - ret = btrfs_map_block(&root->fs_info->mapping_tree, READ, - logical, &map_length, &multi, 0); - BUG_ON(ret); - bh_result->b_blocknr = multi->stripes[0].physical >> inode->i_blkbits; - bh_result->b_size = min(map_length, len); - - bh_result->b_bdev = multi->stripes[0].dev->bdev; - set_buffer_mapped(bh_result); - kfree(multi); -out: - free_extent_map(em); - return ret; -} -#endif - static ssize_t btrfs_direct_IO(int rw, struct kiocb *iocb, const struct iovec *iov, loff_t offset, unsigned long nr_segs) { return -EINVAL; -#if 0 - struct file *file = iocb->ki_filp; - struct inode *inode = file->f_mapping->host; - - if (rw == WRITE) - return -EINVAL; - - return blockdev_direct_IO(rw, iocb, inode, inode->i_sb->s_bdev, iov, - offset, nr_segs, btrfs_get_block, NULL); -#endif } static sector_t btrfs_bmap(struct address_space *mapping, sector_t iblock) @@ -3202,6 +3247,9 @@ void btrfs_invalidate_dcache_root(struct btrfs_root *root, char *name, } } +/* + * create a new subvolume directory/inode (helper for the ioctl). + */ int btrfs_create_subvol_root(struct btrfs_root *new_root, struct btrfs_trans_handle *trans, u64 new_dirid, struct btrfs_block_group_cache *block_group) @@ -3223,6 +3271,9 @@ int btrfs_create_subvol_root(struct btrfs_root *new_root, return btrfs_update_inode(trans, new_root, inode); } +/* helper function for file defrag and space balancing. This + * forces readahead on a given range of bytes in an inode + */ unsigned long btrfs_force_ra(struct address_space *mapping, struct file_ra_state *ra, struct file *file, pgoff_t offset, pgoff_t last_index) @@ -3424,6 +3475,10 @@ out_unlock: return ret; } +/* + * some fairly slow code that needs optimization. This walks the list + * of all the inodes with pending delalloc and forces them to disk. + */ int btrfs_start_delalloc_inodes(struct btrfs_root *root) { struct list_head *head = &root->fs_info->delalloc_inodes; diff --git a/fs/btrfs/locking.c b/fs/btrfs/locking.c index 0cc314c10d66..e30aa6e2958f 100644 --- a/fs/btrfs/locking.c +++ b/fs/btrfs/locking.c @@ -25,6 +25,15 @@ #include "extent_io.h" #include "locking.h" +/* + * locks the per buffer mutex in an extent buffer. This uses adaptive locks + * and the spin is not tuned very extensively. The spinning does make a big + * difference in almost every workload, but spinning for the right amount of + * time needs some help. + * + * In general, we want to spin as long as the lock holder is doing btree searches, + * and we should give up if they are in more expensive code. + */ int btrfs_tree_lock(struct extent_buffer *eb) { int i; @@ -57,6 +66,10 @@ int btrfs_tree_locked(struct extent_buffer *eb) return mutex_is_locked(&eb->mutex); } +/* + * btrfs_search_slot uses this to decide if it should drop its locks + * before doing something expensive like allocating free blocks for cow. + */ int btrfs_path_lock_waiting(struct btrfs_path *path, int level) { int i; diff --git a/fs/btrfs/ordered-data.c b/fs/btrfs/ordered-data.c index 951eacff2420..dcc1730dd837 100644 --- a/fs/btrfs/ordered-data.c +++ b/fs/btrfs/ordered-data.c @@ -26,7 +26,6 @@ #include "btrfs_inode.h" #include "extent_io.h" - static u64 entry_end(struct btrfs_ordered_extent *entry) { if (entry->file_offset + entry->len < entry->file_offset) @@ -34,6 +33,9 @@ static u64 entry_end(struct btrfs_ordered_extent *entry) return entry->file_offset + entry->len; } +/* returns NULL if the insertion worked, or it returns the node it did find + * in the tree + */ static struct rb_node *tree_insert(struct rb_root *root, u64 file_offset, struct rb_node *node) { @@ -58,6 +60,10 @@ static struct rb_node *tree_insert(struct rb_root *root, u64 file_offset, return NULL; } +/* + * look for a given offset in the tree, and if it can't be found return the + * first lesser offset + */ static struct rb_node *__tree_search(struct rb_root *root, u64 file_offset, struct rb_node **prev_ret) { @@ -108,6 +114,9 @@ static struct rb_node *__tree_search(struct rb_root *root, u64 file_offset, return NULL; } +/* + * helper to check if a given offset is inside a given entry + */ static int offset_in_entry(struct btrfs_ordered_extent *entry, u64 file_offset) { if (file_offset < entry->file_offset || @@ -116,6 +125,10 @@ static int offset_in_entry(struct btrfs_ordered_extent *entry, u64 file_offset) return 1; } +/* + * look find the first ordered struct that has this offset, otherwise + * the first one less than this offset + */ static inline struct rb_node *tree_search(struct btrfs_ordered_inode_tree *tree, u64 file_offset) { @@ -305,6 +318,10 @@ int btrfs_remove_ordered_extent(struct inode *inode, return 0; } +/* + * wait for all the ordered extents in a root. This is done when balancing + * space between drives. + */ int btrfs_wait_ordered_extents(struct btrfs_root *root, int nocow_only) { struct list_head splice; diff --git a/fs/btrfs/ref-cache.c b/fs/btrfs/ref-cache.c index 30fcb7aea5b5..a50ebb67055d 100644 --- a/fs/btrfs/ref-cache.c +++ b/fs/btrfs/ref-cache.c @@ -21,6 +21,16 @@ #include "ref-cache.h" #include "transaction.h" +/* + * leaf refs are used to cache the information about which extents + * a given leaf has references on. This allows us to process that leaf + * in btrfs_drop_snapshot without needing to read it back from disk. + */ + +/* + * kmalloc a leaf reference struct and update the counters for the + * total ref cache size + */ struct btrfs_leaf_ref *btrfs_alloc_leaf_ref(struct btrfs_root *root, int nr_extents) { @@ -40,6 +50,10 @@ struct btrfs_leaf_ref *btrfs_alloc_leaf_ref(struct btrfs_root *root, return ref; } +/* + * free a leaf reference struct and update the counters for the + * total ref cache size + */ void btrfs_free_leaf_ref(struct btrfs_root *root, struct btrfs_leaf_ref *ref) { if (!ref) @@ -135,6 +149,10 @@ int btrfs_remove_leaf_refs(struct btrfs_root *root, u64 max_root_gen, return 0; } +/* + * find the leaf ref for a given extent. This returns the ref struct with + * a usage reference incremented + */ struct btrfs_leaf_ref *btrfs_lookup_leaf_ref(struct btrfs_root *root, u64 bytenr) { @@ -160,6 +178,10 @@ again: return NULL; } +/* + * add a fully filled in leaf ref struct + * remove all the refs older than a given root generation + */ int btrfs_add_leaf_ref(struct btrfs_root *root, struct btrfs_leaf_ref *ref, int shared) { @@ -184,6 +206,10 @@ int btrfs_add_leaf_ref(struct btrfs_root *root, struct btrfs_leaf_ref *ref, return ret; } +/* + * remove a single leaf ref from the tree. This drops the ref held by the tree + * only + */ int btrfs_remove_leaf_ref(struct btrfs_root *root, struct btrfs_leaf_ref *ref) { struct btrfs_leaf_ref_tree *tree; diff --git a/fs/btrfs/ref-cache.h b/fs/btrfs/ref-cache.h index 617564787f52..16f3183d7c59 100644 --- a/fs/btrfs/ref-cache.h +++ b/fs/btrfs/ref-cache.h @@ -19,8 +19,11 @@ #define __REFCACHE__ struct btrfs_extent_info { + /* bytenr and num_bytes find the extent in the extent allocation tree */ u64 bytenr; u64 num_bytes; + + /* objectid and offset find the back reference for the file */ u64 objectid; u64 offset; }; diff --git a/fs/btrfs/root-tree.c b/fs/btrfs/root-tree.c index 0091c01abb06..eb7f7655e9d5 100644 --- a/fs/btrfs/root-tree.c +++ b/fs/btrfs/root-tree.c @@ -22,8 +22,10 @@ #include "print-tree.h" /* - * returns 0 on finding something, 1 if no more roots are there - * and < 0 on error + * search forward for a root, starting with objectid 'search_start' + * if a root key is found, the objectid we find is filled into 'found_objectid' + * and 0 is returned. < 0 is returned on error, 1 if there is nothing + * left in the tree. */ int btrfs_search_root(struct btrfs_root *root, u64 search_start, u64 *found_objectid) @@ -66,6 +68,11 @@ out: return ret; } +/* + * lookup the root with the highest offset for a given objectid. The key we do + * find is copied into 'key'. If we find something return 0, otherwise 1, < 0 + * on error. + */ int btrfs_find_last_root(struct btrfs_root *root, u64 objectid, struct btrfs_root_item *item, struct btrfs_key *key) { @@ -104,6 +111,9 @@ out: return ret; } +/* + * copy the data in 'item' into the btree + */ int btrfs_update_root(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct btrfs_key *key, struct btrfs_root_item *item) @@ -147,6 +157,12 @@ int btrfs_insert_root(struct btrfs_trans_handle *trans, struct btrfs_root return ret; } +/* + * at mount time we want to find all the old transaction snapshots that were in + * the process of being deleted if we crashed. This is any root item with an offset + * lower than the latest root. They need to be queued for deletion to finish + * what was happening when we crashed. + */ int btrfs_find_dead_roots(struct btrfs_root *root, u64 objectid, struct btrfs_root *latest) { @@ -227,6 +243,7 @@ err: return ret; } +/* drop the root item for 'key' from 'root' */ int btrfs_del_root(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct btrfs_key *key) { diff --git a/fs/btrfs/struct-funcs.c b/fs/btrfs/struct-funcs.c index ad03a32d1116..cdedbe144d45 100644 --- a/fs/btrfs/struct-funcs.c +++ b/fs/btrfs/struct-funcs.c @@ -17,6 +17,27 @@ */ #include + +/* this is some deeply nasty code. ctree.h has a different + * definition for this BTRFS_SETGET_FUNCS macro, behind a #ifndef + * + * The end result is that anyone who #includes ctree.h gets a + * declaration for the btrfs_set_foo functions and btrfs_foo functions + * + * This file declares the macros and then #includes ctree.h, which results + * in cpp creating the function here based on the template below. + * + * These setget functions do all the extent_buffer related mapping + * required to efficiently read and write specific fields in the extent + * buffers. Every pointer to metadata items in btrfs is really just + * an unsigned long offset into the extent buffer which has been + * cast to a specific type. This gives us all the gcc type checking. + * + * The extent buffer api is used to do all the kmapping and page + * spanning work required to get extent buffers in highmem and have + * a metadata blocksize different from the page size. + */ + #define BTRFS_SETGET_FUNCS(name, type, member, bits) \ u##bits btrfs_##name(struct extent_buffer *eb, \ type *s) \ diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index 8399d6d05d63..2e6039825b7b 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c @@ -519,6 +519,9 @@ static struct file_system_type btrfs_fs_type = { .fs_flags = FS_REQUIRES_DEV, }; +/* + * used by btrfsctl to scan devices when no FS is mounted + */ static long btrfs_control_ioctl(struct file *file, unsigned int cmd, unsigned long arg) { diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c index 444abe0796ae..11266d68a6c9 100644 --- a/fs/btrfs/transaction.c +++ b/fs/btrfs/transaction.c @@ -46,6 +46,9 @@ static noinline void put_transaction(struct btrfs_transaction *transaction) } } +/* + * either allocate a new transaction or hop into the existing one + */ static noinline int join_transaction(struct btrfs_root *root) { struct btrfs_transaction *cur_trans; @@ -85,6 +88,12 @@ static noinline int join_transaction(struct btrfs_root *root) return 0; } +/* + * this does all the record keeping required to make sure that a + * reference counted root is properly recorded in a given transaction. + * This is required to make sure the old root from before we joined the transaction + * is deleted when the transaction commits + */ noinline int btrfs_record_root_in_trans(struct btrfs_root *root) { struct btrfs_dirty_root *dirty; @@ -127,6 +136,10 @@ noinline int btrfs_record_root_in_trans(struct btrfs_root *root) return 0; } +/* wait for commit against the current transaction to become unblocked + * when this is done, it is safe to start a new transaction, but the current + * transaction might not be fully on disk. + */ static void wait_current_trans(struct btrfs_root *root) { struct btrfs_transaction *cur_trans; @@ -198,7 +211,7 @@ struct btrfs_trans_handle *btrfs_start_ioctl_transaction(struct btrfs_root *r, return start_transaction(r, num_blocks, 2); } - +/* wait for a transaction commit to be fully complete */ static noinline int wait_for_commit(struct btrfs_root *root, struct btrfs_transaction *commit) { @@ -218,6 +231,10 @@ static noinline int wait_for_commit(struct btrfs_root *root, return 0; } +/* + * rate limit against the drop_snapshot code. This helps to slow down new operations + * if the drop_snapshot code isn't able to keep up. + */ static void throttle_on_drops(struct btrfs_root *root) { struct btrfs_fs_info *info = root->fs_info; @@ -302,7 +319,11 @@ int btrfs_end_transaction_throttle(struct btrfs_trans_handle *trans, return __btrfs_end_transaction(trans, root, 1); } - +/* + * when btree blocks are allocated, they have some corresponding bits set for + * them in one of two extent_io trees. This is used to make sure all of + * those extents are on disk for transaction or log commit + */ int btrfs_write_and_wait_marked_extents(struct btrfs_root *root, struct extent_io_tree *dirty_pages) { @@ -393,6 +414,16 @@ int btrfs_write_and_wait_transaction(struct btrfs_trans_handle *trans, &trans->transaction->dirty_pages); } +/* + * this is used to update the root pointer in the tree of tree roots. + * + * But, in the case of the extent allocation tree, updating the root + * pointer may allocate blocks which may change the root of the extent + * allocation tree. + * + * So, this loops and repeats and makes sure the cowonly root didn't + * change while the root pointer was being updated in the metadata. + */ static int update_cowonly_root(struct btrfs_trans_handle *trans, struct btrfs_root *root) { @@ -418,6 +449,9 @@ static int update_cowonly_root(struct btrfs_trans_handle *trans, return 0; } +/* + * update all the cowonly tree roots on disk + */ int btrfs_commit_tree_roots(struct btrfs_trans_handle *trans, struct btrfs_root *root) { @@ -433,6 +467,11 @@ int btrfs_commit_tree_roots(struct btrfs_trans_handle *trans, return 0; } +/* + * dead roots are old snapshots that need to be deleted. This allocates + * a dirty root struct and adds it into the list of dead roots that need to + * be deleted + */ int btrfs_add_dead_root(struct btrfs_root *root, struct btrfs_root *latest) { struct btrfs_dirty_root *dirty; @@ -449,6 +488,12 @@ int btrfs_add_dead_root(struct btrfs_root *root, struct btrfs_root *latest) return 0; } +/* + * at transaction commit time we need to schedule the old roots for + * deletion via btrfs_drop_snapshot. This runs through all the + * reference counted roots that were modified in the current + * transaction and puts them into the drop list + */ static noinline int add_dirty_roots(struct btrfs_trans_handle *trans, struct radix_tree_root *radix, struct list_head *list) @@ -541,6 +586,10 @@ static noinline int add_dirty_roots(struct btrfs_trans_handle *trans, return err; } +/* + * defrag a given btree. If cacheonly == 1, this won't read from the disk, + * otherwise every leaf in the btree is read and defragged. + */ int btrfs_defrag_root(struct btrfs_root *root, int cacheonly) { struct btrfs_fs_info *info = root->fs_info; @@ -570,6 +619,10 @@ int btrfs_defrag_root(struct btrfs_root *root, int cacheonly) return 0; } +/* + * Given a list of roots that need to be deleted, call btrfs_drop_snapshot on + * all of them + */ static noinline int drop_dirty_roots(struct btrfs_root *tree_root, struct list_head *list) { @@ -664,6 +717,10 @@ static noinline int drop_dirty_roots(struct btrfs_root *tree_root, return ret; } +/* + * new snapshots need to be created at a very specific time in the + * transaction commit. This does the actual creation + */ static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans, struct btrfs_fs_info *fs_info, struct btrfs_pending_snapshot *pending) @@ -734,6 +791,9 @@ fail: return ret; } +/* + * create all the snapshots we've scheduled for creation + */ static noinline int create_pending_snapshots(struct btrfs_trans_handle *trans, struct btrfs_fs_info *fs_info) { @@ -944,6 +1004,9 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans, return ret; } +/* + * interface function to delete all the snapshots we have scheduled for deletion + */ int btrfs_clean_old_snapshots(struct btrfs_root *root) { struct list_head dirty_roots; diff --git a/fs/btrfs/tree-defrag.c b/fs/btrfs/tree-defrag.c index b3bb5bbad76e..6f57d0889b1e 100644 --- a/fs/btrfs/tree-defrag.c +++ b/fs/btrfs/tree-defrag.c @@ -23,6 +23,10 @@ #include "transaction.h" #include "locking.h" +/* defrag all the leaves in a given btree. If cache_only == 1, don't read things + * from disk, otherwise read all the leaves and try to get key order to + * better reflect disk order + */ int btrfs_defrag_leaves(struct btrfs_trans_handle *trans, struct btrfs_root *root, int cache_only) { -- cgit v1.2.3-70-g09d2