Commit 46ff82f9 authored by Jinshan Xiong's avatar Jinshan Xiong Committed by Greg Kroah-Hartman
Browse files

staging: lustre: ldlm: handle ldlm lock cancel race when evicting client.



A ldlm lock could be canceled simutaneously by ldlm bl thread and
cleanup_resource(). In this case, only one side will win the race
and the other side should wait for the work to complete. Eviction
on group lock is now well supported.

Signed-off-by: default avatarJinshan Xiong <jinshan.xiong@intel.com>
Intel-bug-id: https://jira.hpdd.intel.com/browse/LU-6271
Reviewed-on: http://review.whamcloud.com/16456


Reviewed-by: default avatarBobi Jam <bobijam@hotmail.com>
Reviewed-by: default avatarJohn L. Hammond <john.hammond@intel.com>
Reviewed-by: default avatarJames Simmons <uja.ornl@yahoo.com>
Reviewed-by: default avatarOleg Drokin <oleg.drokin@intel.com>
Signed-off-by: default avatarJames Simmons <jsimmons@infradead.org>
Signed-off-by: default avatarGreg Kroah-Hartman <gregkh@linuxfoundation.org>
parent 5fd8f8b8
Loading
Loading
Loading
Loading
+6 −1
Original line number Diff line number Diff line
@@ -1639,10 +1639,15 @@ enum cl_enq_flags {
	 * enqueue a lock to test DLM lock existence.
	 */
	CEF_PEEK	= 0x00000040,
	/**
	 * Lock match only. Used by group lock in I/O as group lock
	 * is known to exist.
	 */
	CEF_LOCK_MATCH	= BIT(7),
	/**
	 * mask of enq_flags.
	 */
	CEF_MASK         = 0x0000007f,
	CEF_MASK	= 0x000000ff,
};

/**
+3 −0
Original line number Diff line number Diff line
@@ -121,6 +121,9 @@
#define ldlm_set_test_lock(_l)          LDLM_SET_FLAG((_l), 1ULL << 19)
#define ldlm_clear_test_lock(_l)        LDLM_CLEAR_FLAG((_l), 1ULL << 19)

/** match lock only */
#define LDLM_FL_MATCH_LOCK		0x0000000000100000ULL /* bit  20 */

/**
 * Immediately cancel such locks when they block some other locks. Send
 * cancel notification to original lock holder, but expect no reply. This
+35 −11
Original line number Diff line number Diff line
@@ -771,19 +771,11 @@ void ldlm_lock_decref_internal(struct ldlm_lock *lock, enum ldlm_mode mode)

	ldlm_lock_decref_internal_nolock(lock, mode);

	if (ldlm_is_local(lock) &&
	if ((ldlm_is_local(lock) || lock->l_req_mode == LCK_GROUP) &&
	    !lock->l_readers && !lock->l_writers) {
		/* If this is a local lock on a server namespace and this was
		 * the last reference, cancel the lock.
		 */
		CDEBUG(D_INFO, "forcing cancel of local lock\n");
		ldlm_set_cbpending(lock);
	}

	if (!lock->l_readers && !lock->l_writers &&
	    (ldlm_is_cbpending(lock) || lock->l_req_mode == LCK_GROUP)) {
		/* If we received a blocked AST and this was the last reference,
		 * run the callback.
		 *
		 * Group locks are special:
		 * They must not go in LRU, but they are not called back
		 * like non-group locks, instead they are manually released.
@@ -791,6 +783,13 @@ void ldlm_lock_decref_internal(struct ldlm_lock *lock, enum ldlm_mode mode)
		 * they are manually released, so we remove them when they have
		 * no more reader or writer references. - LU-6368
		 */
		ldlm_set_cbpending(lock);
	}

	if (!lock->l_readers && !lock->l_writers && ldlm_is_cbpending(lock)) {
		/* If we received a blocked AST and this was the last reference,
		 * run the callback.
		 */
		LDLM_DEBUG(lock, "final decref done on cbpending lock");

		LDLM_LOCK_GET(lock); /* dropped by bl thread */
@@ -1882,6 +1881,19 @@ int ldlm_run_ast_work(struct ldlm_namespace *ns, struct list_head *rpc_list,
	return rc;
}

static bool is_bl_done(struct ldlm_lock *lock)
{
	bool bl_done = true;

	if (!ldlm_is_bl_done(lock)) {
		lock_res_and_lock(lock);
		bl_done = ldlm_is_bl_done(lock);
		unlock_res_and_lock(lock);
	}

	return bl_done;
}

/**
 * Helper function to call blocking AST for LDLM lock \a lock in a
 * "cancelling" mode.
@@ -1899,8 +1911,20 @@ void ldlm_cancel_callback(struct ldlm_lock *lock)
		} else {
			LDLM_DEBUG(lock, "no blocking ast");
		}
	}
		/* only canceller can set bl_done bit */
		ldlm_set_bl_done(lock);
		wake_up_all(&lock->l_waitq);
	} else if (!ldlm_is_bl_done(lock)) {
		struct l_wait_info lwi = { 0 };

		/*
		 * The lock is guaranteed to have been canceled once
		 * returning from this function.
		 */
		unlock_res_and_lock(lock);
		l_wait_event(lock->l_waitq, is_bl_done(lock), &lwi);
		lock_res_and_lock(lock);
	}
}

/**
+12 −2
Original line number Diff line number Diff line
@@ -1029,13 +1029,23 @@ int ldlm_cli_cancel(const struct lustre_handle *lockh,
	struct ldlm_lock *lock;
	LIST_HEAD(cancels);

	/* concurrent cancels on the same handle can happen */
	lock = ldlm_handle2lock_long(lockh, LDLM_FL_CANCELING);
	lock = ldlm_handle2lock_long(lockh, 0);
	if (!lock) {
		LDLM_DEBUG_NOLOCK("lock is already being destroyed");
		return 0;
	}

	lock_res_and_lock(lock);
	/* Lock is being canceled and the caller doesn't want to wait */
	if (ldlm_is_canceling(lock) && (cancel_flags & LCF_ASYNC)) {
		unlock_res_and_lock(lock);
		LDLM_LOCK_RELEASE(lock);
		return 0;
	}

	ldlm_set_canceling(lock);
	unlock_res_and_lock(lock);

	rc = ldlm_cli_cancel_local(lock);
	if (rc == LDLM_FL_LOCAL_ONLY || cancel_flags & LCF_LOCAL) {
		LDLM_LOCK_RELEASE(lock);
+1 −1
Original line number Diff line number Diff line
@@ -806,7 +806,7 @@ static void cleanup_resource(struct ldlm_resource *res, struct list_head *q,

		unlock_res(res);
		ldlm_lock2handle(lock, &lockh);
		rc = ldlm_cli_cancel(&lockh, LCF_ASYNC);
		rc = ldlm_cli_cancel(&lockh, LCF_LOCAL);
		if (rc)
			CERROR("ldlm_cli_cancel: %d\n", rc);
		LDLM_LOCK_RELEASE(lock);
Loading