Commit afcdaea3 authored by Sage Weil's avatar Sage Weil
Browse files

ceph: flush dirty caps via the cap_dirty list



Previously we were flushing dirty caps by passing an extra flag
when traversing the delayed caps list.  Besides being a bit ugly,
that can also miss caps that are dirty but didn't result in a
cap requeue: notably, mark_caps_dirty().

Separate the flushing into a separate helper, and traverse the
cap_dirty list.

This also brings i_dirty_item in line with i_dirty_caps: we are
on the list IFF caps != 0.  We carry an inode ref IFF
dirty_caps|flushing_caps != 0.

Lose the unused return value from __ceph_mark_caps_dirty().

Signed-off-by: default avatarSage Weil <sage@newdream.net>
parent cdc35f96
Loading
Loading
Loading
Loading
+53 −23
Original line number Original line Diff line number Diff line
@@ -1292,14 +1292,20 @@ static int __mark_caps_flushing(struct inode *inode,
	     ceph_cap_string(ci->i_flushing_caps | flushing));
	     ceph_cap_string(ci->i_flushing_caps | flushing));
	ci->i_flushing_caps |= flushing;
	ci->i_flushing_caps |= flushing;
	ci->i_dirty_caps = 0;
	ci->i_dirty_caps = 0;
	dout(" inode %p now !dirty\n", inode);


	spin_lock(&mdsc->cap_dirty_lock);
	spin_lock(&mdsc->cap_dirty_lock);
	if (list_empty(&ci->i_flushing_item)) {
	list_del_init(&ci->i_dirty_item);
	list_del_init(&ci->i_dirty_item);

	ci->i_cap_flush_seq = ++mdsc->cap_flush_seq;
	if (list_empty(&ci->i_flushing_item)) {
		list_add_tail(&ci->i_flushing_item, &session->s_cap_flushing);
		list_add_tail(&ci->i_flushing_item, &session->s_cap_flushing);
		mdsc->num_cap_flushing++;
		mdsc->num_cap_flushing++;
		ci->i_cap_flush_seq = ++mdsc->cap_flush_seq;
		dout(" inode %p now flushing seq %lld\n", inode,
		dout(" inode %p now flushing seq %lld\n", &ci->vfs_inode,
		     ci->i_cap_flush_seq);
	} else {
		list_move_tail(&ci->i_flushing_item, &session->s_cap_flushing);
		dout(" inode %p now flushing (more) seq %lld\n", inode,
		     ci->i_cap_flush_seq);
		     ci->i_cap_flush_seq);
	}
	}
	spin_unlock(&mdsc->cap_dirty_lock);
	spin_unlock(&mdsc->cap_dirty_lock);
@@ -1555,32 +1561,33 @@ void ceph_check_caps(struct ceph_inode_info *ci, int flags,
 * Mark caps dirty.  If inode is newly dirty, add to the global dirty
 * Mark caps dirty.  If inode is newly dirty, add to the global dirty
 * list.
 * list.
 */
 */
int __ceph_mark_dirty_caps(struct ceph_inode_info *ci, int mask)
void __ceph_mark_dirty_caps(struct ceph_inode_info *ci, int mask)
{
{
	struct ceph_mds_client *mdsc = &ceph_client(ci->vfs_inode.i_sb)->mdsc;
	struct ceph_mds_client *mdsc = &ceph_client(ci->vfs_inode.i_sb)->mdsc;
	struct inode *inode = &ci->vfs_inode;
	struct inode *inode = &ci->vfs_inode;
	int was = __ceph_caps_dirty(ci);
	int was_dirty = ci->i_dirty_caps;
	int dirty = 0;
	int dirty = 0;


	dout("__mark_dirty_caps %p %s dirty %s -> %s\n", &ci->vfs_inode,
	dout("__mark_dirty_caps %p %s dirty %s -> %s\n", &ci->vfs_inode,
	     ceph_cap_string(mask), ceph_cap_string(ci->i_dirty_caps),
	     ceph_cap_string(mask), ceph_cap_string(ci->i_dirty_caps),
	     ceph_cap_string(ci->i_dirty_caps | mask));
	     ceph_cap_string(ci->i_dirty_caps | mask));
	ci->i_dirty_caps |= mask;
	ci->i_dirty_caps |= mask;
	if (!was) {
	if (!was_dirty) {
		dout(" inode %p now dirty\n", &ci->vfs_inode);
		dout(" inode %p now dirty\n", &ci->vfs_inode);
		spin_lock(&mdsc->cap_dirty_lock);
		spin_lock(&mdsc->cap_dirty_lock);
		list_add(&ci->i_dirty_item, &mdsc->cap_dirty);
		list_add(&ci->i_dirty_item, &mdsc->cap_dirty);
		spin_unlock(&mdsc->cap_dirty_lock);
		spin_unlock(&mdsc->cap_dirty_lock);
		if (ci->i_flushing_caps == 0) {
			igrab(inode);
			igrab(inode);
			dirty |= I_DIRTY_SYNC;
			dirty |= I_DIRTY_SYNC;
		}
		}
	if ((was & CEPH_CAP_FILE_BUFFER) &&
	}
	if (((was_dirty | ci->i_flushing_caps) & CEPH_CAP_FILE_BUFFER) &&
	    (mask & CEPH_CAP_FILE_BUFFER))
	    (mask & CEPH_CAP_FILE_BUFFER))
		dirty |= I_DIRTY_DATASYNC;
		dirty |= I_DIRTY_DATASYNC;
	if (dirty)
	if (dirty)
		__mark_inode_dirty(inode, dirty);
		__mark_inode_dirty(inode, dirty);
	__cap_delay_requeue(mdsc, ci);
	__cap_delay_requeue(mdsc, ci);
	return was;
}
}


/*
/*
@@ -2327,7 +2334,7 @@ static void handle_cap_flush_ack(struct inode *inode,
	int dirty = le32_to_cpu(m->dirty);
	int dirty = le32_to_cpu(m->dirty);
	int cleaned = 0;
	int cleaned = 0;
	u64 flush_tid = le64_to_cpu(m->client_tid);
	u64 flush_tid = le64_to_cpu(m->client_tid);
	int old_dirty = 0, new_dirty = 0;
	int drop = 0;
	int i;
	int i;


	for (i = 0; i < CEPH_CAP_BITS; i++)
	for (i = 0; i < CEPH_CAP_BITS; i++)
@@ -2344,9 +2351,7 @@ static void handle_cap_flush_ack(struct inode *inode,
	if (ci->i_flushing_caps == (ci->i_flushing_caps & ~cleaned))
	if (ci->i_flushing_caps == (ci->i_flushing_caps & ~cleaned))
		goto out;
		goto out;


	old_dirty = ci->i_dirty_caps | ci->i_flushing_caps;
	ci->i_flushing_caps &= ~cleaned;
	ci->i_flushing_caps &= ~cleaned;
	new_dirty = ci->i_dirty_caps | ci->i_flushing_caps;


	spin_lock(&mdsc->cap_dirty_lock);
	spin_lock(&mdsc->cap_dirty_lock);
	if (ci->i_flushing_caps == 0) {
	if (ci->i_flushing_caps == 0) {
@@ -2360,17 +2365,19 @@ static void handle_cap_flush_ack(struct inode *inode,
		mdsc->num_cap_flushing--;
		mdsc->num_cap_flushing--;
		wake_up(&mdsc->cap_flushing_wq);
		wake_up(&mdsc->cap_flushing_wq);
		dout(" inode %p now !flushing\n", inode);
		dout(" inode %p now !flushing\n", inode);
	}

	if (old_dirty && !new_dirty) {
		if (ci->i_dirty_caps == 0) {
			dout(" inode %p now clean\n", inode);
			dout(" inode %p now clean\n", inode);
		list_del_init(&ci->i_dirty_item);
			BUG_ON(!list_empty(&ci->i_dirty_item));
			drop = 1;
		}
	}
	}
	spin_unlock(&mdsc->cap_dirty_lock);
	spin_unlock(&mdsc->cap_dirty_lock);
	wake_up(&ci->i_cap_wq);
	wake_up(&ci->i_cap_wq);


out:
out:
	spin_unlock(&inode->i_lock);
	spin_unlock(&inode->i_lock);
	if (old_dirty && !new_dirty)
	if (drop)
		iput(inode);
		iput(inode);
}
}


@@ -2676,14 +2683,11 @@ void ceph_handle_caps(struct ceph_mds_session *session,
/*
/*
 * Delayed work handler to process end of delayed cap release LRU list.
 * Delayed work handler to process end of delayed cap release LRU list.
 */
 */
void ceph_check_delayed_caps(struct ceph_mds_client *mdsc, int flushdirty)
void ceph_check_delayed_caps(struct ceph_mds_client *mdsc)
{
{
	struct ceph_inode_info *ci;
	struct ceph_inode_info *ci;
	int flags = CHECK_CAPS_NODELAY;
	int flags = CHECK_CAPS_NODELAY;


	if (flushdirty)
		flags |= CHECK_CAPS_FLUSH;

	dout("check_delayed_caps\n");
	dout("check_delayed_caps\n");
	while (1) {
	while (1) {
		spin_lock(&mdsc->cap_delay_lock);
		spin_lock(&mdsc->cap_delay_lock);
@@ -2703,6 +2707,32 @@ void ceph_check_delayed_caps(struct ceph_mds_client *mdsc, int flushdirty)
	spin_unlock(&mdsc->cap_delay_lock);
	spin_unlock(&mdsc->cap_delay_lock);
}
}


/*
 * Flush all dirty caps to the mds
 */
void ceph_flush_dirty_caps(struct ceph_mds_client *mdsc)
{
	struct ceph_inode_info *ci;
	struct inode *inode;

	dout("flush_dirty_caps\n");
	spin_lock(&mdsc->cap_dirty_lock);
	while (!list_empty(&mdsc->cap_dirty)) {
		ci = list_first_entry(&mdsc->cap_dirty,
				      struct ceph_inode_info,
				      i_dirty_item);
		inode = igrab(&ci->vfs_inode);
		spin_unlock(&mdsc->cap_dirty_lock);
		if (inode) {
			ceph_check_caps(ci, CHECK_CAPS_NODELAY|CHECK_CAPS_FLUSH,
					NULL);
			iput(inode);
		}
		spin_lock(&mdsc->cap_dirty_lock);
	}
	spin_unlock(&mdsc->cap_dirty_lock);
}

/*
/*
 * Drop open file reference.  If we were the last open file,
 * Drop open file reference.  If we were the last open file,
 * we may need to release capabilities to the MDS (or schedule
 * we may need to release capabilities to the MDS (or schedule
+3 −3
Original line number Original line Diff line number Diff line
@@ -2504,7 +2504,7 @@ static void delayed_work(struct work_struct *work)
	int renew_caps;
	int renew_caps;


	dout("mdsc delayed_work\n");
	dout("mdsc delayed_work\n");
	ceph_check_delayed_caps(mdsc, 0);
	ceph_check_delayed_caps(mdsc);


	mutex_lock(&mdsc->mutex);
	mutex_lock(&mdsc->mutex);
	renew_interval = mdsc->mdsmap->m_session_timeout >> 2;
	renew_interval = mdsc->mdsmap->m_session_timeout >> 2;
@@ -2627,7 +2627,7 @@ void ceph_mdsc_pre_umount(struct ceph_mds_client *mdsc)
	mdsc->stopping = 1;
	mdsc->stopping = 1;


	drop_leases(mdsc);
	drop_leases(mdsc);
	ceph_check_delayed_caps(mdsc, 1);
	ceph_flush_dirty_caps(mdsc);
	wait_requests(mdsc);
	wait_requests(mdsc);
}
}


@@ -2677,7 +2677,7 @@ void ceph_mdsc_sync(struct ceph_mds_client *mdsc)
	mutex_unlock(&mdsc->mutex);
	mutex_unlock(&mdsc->mutex);
	dout("sync want tid %lld flush_seq %lld\n", want_tid, want_flush);
	dout("sync want tid %lld flush_seq %lld\n", want_tid, want_flush);


	ceph_check_delayed_caps(mdsc, 1);
	ceph_flush_dirty_caps(mdsc);


	wait_unsafe_requests(mdsc, want_tid);
	wait_unsafe_requests(mdsc, want_tid);
	wait_event(mdsc->cap_flushing_wq, check_cap_flush(mdsc, want_flush));
	wait_event(mdsc->cap_flushing_wq, check_cap_flush(mdsc, want_flush));
+3 −3
Original line number Original line Diff line number Diff line
@@ -524,7 +524,7 @@ static inline int __ceph_caps_dirty(struct ceph_inode_info *ci)
{
{
	return ci->i_dirty_caps | ci->i_flushing_caps;
	return ci->i_dirty_caps | ci->i_flushing_caps;
}
}
extern int __ceph_mark_dirty_caps(struct ceph_inode_info *ci, int mask);
extern void __ceph_mark_dirty_caps(struct ceph_inode_info *ci, int mask);


extern int ceph_caps_revoking(struct ceph_inode_info *ci, int mask);
extern int ceph_caps_revoking(struct ceph_inode_info *ci, int mask);
extern int __ceph_caps_used(struct ceph_inode_info *ci);
extern int __ceph_caps_used(struct ceph_inode_info *ci);
@@ -814,8 +814,8 @@ extern void __ceph_flush_snaps(struct ceph_inode_info *ci,
			       struct ceph_mds_session **psession);
			       struct ceph_mds_session **psession);
extern void ceph_check_caps(struct ceph_inode_info *ci, int flags,
extern void ceph_check_caps(struct ceph_inode_info *ci, int flags,
			    struct ceph_mds_session *session);
			    struct ceph_mds_session *session);
extern void ceph_check_delayed_caps(struct ceph_mds_client *mdsc,
extern void ceph_check_delayed_caps(struct ceph_mds_client *mdsc);
				    int flushdirty);
extern void ceph_flush_dirty_caps(struct ceph_mds_client *mdsc);


extern int ceph_encode_inode_release(void **p, struct inode *inode,
extern int ceph_encode_inode_release(void **p, struct inode *inode,
				     int mds, int drop, int unless, int force);
				     int mds, int drop, int unless, int force);