Thanks to visit codestin.com
Credit goes to github.com

Skip to content

Commit e1a4541

Browse files
lxbszidryomov
authored andcommitted
ceph: flush the mdlog before waiting on unsafe reqs
For the client requests who will have unsafe and safe replies from MDS daemons, in the MDS side the MDS daemons won't flush the mdlog (journal log) immediatelly, because they think it's unnecessary. That's true for most cases but not all, likes the fsync request. The fsync will wait until all the unsafe replied requests to be safely replied. Normally if there have multiple threads or clients are running, the whole mdlog in MDS daemons could be flushed in time if any request will trigger the mdlog submit thread. So usually we won't experience the normal operations will stuck for a long time. But in case there has only one client with only thread is running, the stuck phenomenon maybe obvious and the worst case it must wait at most 5 seconds to wait the mdlog to be flushed by the MDS's tick thread periodically. This patch will trigger to flush the mdlog in the relevant and auth MDSes to which the in-flight requests are sent just before waiting the unsafe requests to finish. Signed-off-by: Xiubo Li <[email protected]> Reviewed-by: Jeff Layton <[email protected]> Signed-off-by: Ilya Dryomov <[email protected]>
1 parent d095559 commit e1a4541

File tree

1 file changed

+76
-0
lines changed

1 file changed

+76
-0
lines changed

fs/ceph/caps.c

Lines changed: 76 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2219,6 +2219,7 @@ static int caps_are_flushed(struct inode *inode, u64 flush_tid)
22192219
*/
22202220
static int unsafe_request_wait(struct inode *inode)
22212221
{
2222+
struct ceph_mds_client *mdsc = ceph_sb_to_client(inode->i_sb)->mdsc;
22222223
struct ceph_inode_info *ci = ceph_inode(inode);
22232224
struct ceph_mds_request *req1 = NULL, *req2 = NULL;
22242225
int ret, err = 0;
@@ -2238,6 +2239,81 @@ static int unsafe_request_wait(struct inode *inode)
22382239
}
22392240
spin_unlock(&ci->i_unsafe_lock);
22402241

2242+
/*
2243+
* Trigger to flush the journal logs in all the relevant MDSes
2244+
* manually, or in the worst case we must wait at most 5 seconds
2245+
* to wait the journal logs to be flushed by the MDSes periodically.
2246+
*/
2247+
if (req1 || req2) {
2248+
struct ceph_mds_session **sessions = NULL;
2249+
struct ceph_mds_session *s;
2250+
struct ceph_mds_request *req;
2251+
unsigned int max;
2252+
int i;
2253+
2254+
/*
2255+
* The mdsc->max_sessions is unlikely to be changed
2256+
* mostly, here we will retry it by reallocating the
2257+
* sessions arrary memory to get rid of the mdsc->mutex
2258+
* lock.
2259+
*/
2260+
retry:
2261+
max = mdsc->max_sessions;
2262+
sessions = krealloc(sessions, max * sizeof(s), __GFP_ZERO);
2263+
if (!sessions)
2264+
return -ENOMEM;
2265+
2266+
spin_lock(&ci->i_unsafe_lock);
2267+
if (req1) {
2268+
list_for_each_entry(req, &ci->i_unsafe_dirops,
2269+
r_unsafe_dir_item) {
2270+
s = req->r_session;
2271+
if (unlikely(s->s_mds > max)) {
2272+
spin_unlock(&ci->i_unsafe_lock);
2273+
goto retry;
2274+
}
2275+
if (!sessions[s->s_mds]) {
2276+
s = ceph_get_mds_session(s);
2277+
sessions[s->s_mds] = s;
2278+
}
2279+
}
2280+
}
2281+
if (req2) {
2282+
list_for_each_entry(req, &ci->i_unsafe_iops,
2283+
r_unsafe_target_item) {
2284+
s = req->r_session;
2285+
if (unlikely(s->s_mds > max)) {
2286+
spin_unlock(&ci->i_unsafe_lock);
2287+
goto retry;
2288+
}
2289+
if (!sessions[s->s_mds]) {
2290+
s = ceph_get_mds_session(s);
2291+
sessions[s->s_mds] = s;
2292+
}
2293+
}
2294+
}
2295+
spin_unlock(&ci->i_unsafe_lock);
2296+
2297+
/* the auth MDS */
2298+
spin_lock(&ci->i_ceph_lock);
2299+
if (ci->i_auth_cap) {
2300+
s = ci->i_auth_cap->session;
2301+
if (!sessions[s->s_mds])
2302+
sessions[s->s_mds] = ceph_get_mds_session(s);
2303+
}
2304+
spin_unlock(&ci->i_ceph_lock);
2305+
2306+
/* send flush mdlog request to MDSes */
2307+
for (i = 0; i < max; i++) {
2308+
s = sessions[i];
2309+
if (s) {
2310+
send_flush_mdlog(s);
2311+
ceph_put_mds_session(s);
2312+
}
2313+
}
2314+
kfree(sessions);
2315+
}
2316+
22412317
dout("unsafe_request_wait %p wait on tid %llu %llu\n",
22422318
inode, req1 ? req1->r_tid : 0ULL, req2 ? req2->r_tid : 0ULL);
22432319
if (req1) {

0 commit comments

Comments
 (0)