1
2
3
4
5
6
7#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
8
9#include <linux/fs.h>
10#include <linux/dlm.h>
11#include <linux/slab.h>
12#include <linux/types.h>
13#include <linux/delay.h>
14#include <linux/gfs2_ondisk.h>
15#include <linux/sched/signal.h>
16
17#include "incore.h"
18#include "glock.h"
19#include "glops.h"
20#include "recovery.h"
21#include "util.h"
22#include "sys.h"
23#include "trace_gfs2.h"
24
25
26
27
28
29
30
31static inline void gfs2_update_stats(struct gfs2_lkstats *s, unsigned index,
32 s64 sample)
33{
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52 s64 delta = sample - s->stats[index];
53 s->stats[index] += (delta >> 3);
54 index++;
55 s->stats[index] += (s64)(abs(delta) - s->stats[index]) >> 2;
56}
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75static inline void gfs2_update_reply_times(struct gfs2_glock *gl)
76{
77 struct gfs2_pcpu_lkstats *lks;
78 const unsigned gltype = gl->gl_name.ln_type;
79 unsigned index = test_bit(GLF_BLOCKING, &gl->gl_flags) ?
80 GFS2_LKS_SRTTB : GFS2_LKS_SRTT;
81 s64 rtt;
82
83 preempt_disable();
84 rtt = ktime_to_ns(ktime_sub(ktime_get_real(), gl->gl_dstamp));
85 lks = this_cpu_ptr(gl->gl_name.ln_sbd->sd_lkstats);
86 gfs2_update_stats(&gl->gl_stats, index, rtt);
87 gfs2_update_stats(&lks->lkstats[gltype], index, rtt);
88 preempt_enable();
89
90 trace_gfs2_glock_lock_time(gl, rtt);
91}
92
93
94
95
96
97
98
99
100
101
102static inline void gfs2_update_request_times(struct gfs2_glock *gl)
103{
104 struct gfs2_pcpu_lkstats *lks;
105 const unsigned gltype = gl->gl_name.ln_type;
106 ktime_t dstamp;
107 s64 irt;
108
109 preempt_disable();
110 dstamp = gl->gl_dstamp;
111 gl->gl_dstamp = ktime_get_real();
112 irt = ktime_to_ns(ktime_sub(gl->gl_dstamp, dstamp));
113 lks = this_cpu_ptr(gl->gl_name.ln_sbd->sd_lkstats);
114 gfs2_update_stats(&gl->gl_stats, GFS2_LKS_SIRT, irt);
115 gfs2_update_stats(&lks->lkstats[gltype], GFS2_LKS_SIRT, irt);
116 preempt_enable();
117}
118
119static void gdlm_ast(void *arg)
120{
121 struct gfs2_glock *gl = arg;
122 unsigned ret = gl->gl_state;
123
124 gfs2_update_reply_times(gl);
125 BUG_ON(gl->gl_lksb.sb_flags & DLM_SBF_DEMOTED);
126
127 if ((gl->gl_lksb.sb_flags & DLM_SBF_VALNOTVALID) && gl->gl_lksb.sb_lvbptr)
128 memset(gl->gl_lksb.sb_lvbptr, 0, GDLM_LVB_SIZE);
129
130 switch (gl->gl_lksb.sb_status) {
131 case -DLM_EUNLOCK:
132 if (gl->gl_ops->go_free)
133 gl->gl_ops->go_free(gl);
134 gfs2_glock_free(gl);
135 return;
136 case -DLM_ECANCEL:
137 ret |= LM_OUT_CANCELED;
138 goto out;
139 case -EAGAIN:
140 case -EDEADLK:
141 goto out;
142 case -ETIMEDOUT:
143 ret |= LM_OUT_ERROR;
144 goto out;
145 case 0:
146 break;
147 default:
148 BUG();
149 }
150
151 ret = gl->gl_req;
152 if (gl->gl_lksb.sb_flags & DLM_SBF_ALTMODE) {
153 if (gl->gl_req == LM_ST_SHARED)
154 ret = LM_ST_DEFERRED;
155 else if (gl->gl_req == LM_ST_DEFERRED)
156 ret = LM_ST_SHARED;
157 else
158 BUG();
159 }
160
161 set_bit(GLF_INITIAL, &gl->gl_flags);
162 gfs2_glock_complete(gl, ret);
163 return;
164out:
165 if (!test_bit(GLF_INITIAL, &gl->gl_flags))
166 gl->gl_lksb.sb_lkid = 0;
167 gfs2_glock_complete(gl, ret);
168}
169
170static void gdlm_bast(void *arg, int mode)
171{
172 struct gfs2_glock *gl = arg;
173
174 switch (mode) {
175 case DLM_LOCK_EX:
176 gfs2_glock_cb(gl, LM_ST_UNLOCKED);
177 break;
178 case DLM_LOCK_CW:
179 gfs2_glock_cb(gl, LM_ST_DEFERRED);
180 break;
181 case DLM_LOCK_PR:
182 gfs2_glock_cb(gl, LM_ST_SHARED);
183 break;
184 default:
185 fs_err(gl->gl_name.ln_sbd, "unknown bast mode %d\n", mode);
186 BUG();
187 }
188}
189
190
191
192static int make_mode(struct gfs2_sbd *sdp, const unsigned int lmstate)
193{
194 switch (lmstate) {
195 case LM_ST_UNLOCKED:
196 return DLM_LOCK_NL;
197 case LM_ST_EXCLUSIVE:
198 return DLM_LOCK_EX;
199 case LM_ST_DEFERRED:
200 return DLM_LOCK_CW;
201 case LM_ST_SHARED:
202 return DLM_LOCK_PR;
203 }
204 fs_err(sdp, "unknown LM state %d\n", lmstate);
205 BUG();
206 return -1;
207}
208
209static u32 make_flags(struct gfs2_glock *gl, const unsigned int gfs_flags,
210 const int req)
211{
212 u32 lkf = 0;
213
214 if (gl->gl_lksb.sb_lvbptr)
215 lkf |= DLM_LKF_VALBLK;
216
217 if (gfs_flags & LM_FLAG_TRY)
218 lkf |= DLM_LKF_NOQUEUE;
219
220 if (gfs_flags & LM_FLAG_TRY_1CB) {
221 lkf |= DLM_LKF_NOQUEUE;
222 lkf |= DLM_LKF_NOQUEUEBAST;
223 }
224
225 if (gfs_flags & LM_FLAG_PRIORITY) {
226 lkf |= DLM_LKF_NOORDER;
227 lkf |= DLM_LKF_HEADQUE;
228 }
229
230 if (gfs_flags & LM_FLAG_ANY) {
231 if (req == DLM_LOCK_PR)
232 lkf |= DLM_LKF_ALTCW;
233 else if (req == DLM_LOCK_CW)
234 lkf |= DLM_LKF_ALTPR;
235 else
236 BUG();
237 }
238
239 if (gl->gl_lksb.sb_lkid != 0) {
240 lkf |= DLM_LKF_CONVERT;
241 if (test_bit(GLF_BLOCKING, &gl->gl_flags))
242 lkf |= DLM_LKF_QUECVT;
243 }
244
245 return lkf;
246}
247
248static void gfs2_reverse_hex(char *c, u64 value)
249{
250 *c = '0';
251 while (value) {
252 *c-- = hex_asc[value & 0x0f];
253 value >>= 4;
254 }
255}
256
257static int gdlm_lock(struct gfs2_glock *gl, unsigned int req_state,
258 unsigned int flags)
259{
260 struct lm_lockstruct *ls = &gl->gl_name.ln_sbd->sd_lockstruct;
261 int req;
262 u32 lkf;
263 char strname[GDLM_STRNAME_BYTES] = "";
264
265 req = make_mode(gl->gl_name.ln_sbd, req_state);
266 lkf = make_flags(gl, flags, req);
267 gfs2_glstats_inc(gl, GFS2_LKS_DCOUNT);
268 gfs2_sbstats_inc(gl, GFS2_LKS_DCOUNT);
269 if (gl->gl_lksb.sb_lkid) {
270 gfs2_update_request_times(gl);
271 } else {
272 memset(strname, ' ', GDLM_STRNAME_BYTES - 1);
273 strname[GDLM_STRNAME_BYTES - 1] = '\0';
274 gfs2_reverse_hex(strname + 7, gl->gl_name.ln_type);
275 gfs2_reverse_hex(strname + 23, gl->gl_name.ln_number);
276 gl->gl_dstamp = ktime_get_real();
277 }
278
279
280
281
282 return dlm_lock(ls->ls_dlm, req, &gl->gl_lksb, lkf, strname,
283 GDLM_STRNAME_BYTES - 1, 0, gdlm_ast, gl, gdlm_bast);
284}
285
286static void gdlm_put_lock(struct gfs2_glock *gl)
287{
288 struct gfs2_sbd *sdp = gl->gl_name.ln_sbd;
289 struct lm_lockstruct *ls = &sdp->sd_lockstruct;
290 int error;
291
292 if (gl->gl_lksb.sb_lkid == 0) {
293 gfs2_glock_free(gl);
294 return;
295 }
296
297 clear_bit(GLF_BLOCKING, &gl->gl_flags);
298 gfs2_glstats_inc(gl, GFS2_LKS_DCOUNT);
299 gfs2_sbstats_inc(gl, GFS2_LKS_DCOUNT);
300 gfs2_update_request_times(gl);
301
302
303
304 if (test_bit(SDF_SKIP_DLM_UNLOCK, &sdp->sd_flags) &&
305 !gl->gl_lksb.sb_lvbptr) {
306 gfs2_glock_free(gl);
307 return;
308 }
309
310 error = dlm_unlock(ls->ls_dlm, gl->gl_lksb.sb_lkid, DLM_LKF_VALBLK,
311 NULL, gl);
312 if (error) {
313 fs_err(sdp, "gdlm_unlock %x,%llx err=%d\n",
314 gl->gl_name.ln_type,
315 (unsigned long long)gl->gl_name.ln_number, error);
316 return;
317 }
318}
319
320static void gdlm_cancel(struct gfs2_glock *gl)
321{
322 struct lm_lockstruct *ls = &gl->gl_name.ln_sbd->sd_lockstruct;
323 dlm_unlock(ls->ls_dlm, gl->gl_lksb.sb_lkid, DLM_LKF_CANCEL, NULL, gl);
324}
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467#define JID_BITMAP_OFFSET 8
468
469static void control_lvb_read(struct lm_lockstruct *ls, uint32_t *lvb_gen,
470 char *lvb_bits)
471{
472 __le32 gen;
473 memcpy(lvb_bits, ls->ls_control_lvb, GDLM_LVB_SIZE);
474 memcpy(&gen, lvb_bits, sizeof(__le32));
475 *lvb_gen = le32_to_cpu(gen);
476}
477
478static void control_lvb_write(struct lm_lockstruct *ls, uint32_t lvb_gen,
479 char *lvb_bits)
480{
481 __le32 gen;
482 memcpy(ls->ls_control_lvb, lvb_bits, GDLM_LVB_SIZE);
483 gen = cpu_to_le32(lvb_gen);
484 memcpy(ls->ls_control_lvb, &gen, sizeof(__le32));
485}
486
487static int all_jid_bits_clear(char *lvb)
488{
489 return !memchr_inv(lvb + JID_BITMAP_OFFSET, 0,
490 GDLM_LVB_SIZE - JID_BITMAP_OFFSET);
491}
492
493static void sync_wait_cb(void *arg)
494{
495 struct lm_lockstruct *ls = arg;
496 complete(&ls->ls_sync_wait);
497}
498
499static int sync_unlock(struct gfs2_sbd *sdp, struct dlm_lksb *lksb, char *name)
500{
501 struct lm_lockstruct *ls = &sdp->sd_lockstruct;
502 int error;
503
504 error = dlm_unlock(ls->ls_dlm, lksb->sb_lkid, 0, lksb, ls);
505 if (error) {
506 fs_err(sdp, "%s lkid %x error %d\n",
507 name, lksb->sb_lkid, error);
508 return error;
509 }
510
511 wait_for_completion(&ls->ls_sync_wait);
512
513 if (lksb->sb_status != -DLM_EUNLOCK) {
514 fs_err(sdp, "%s lkid %x status %d\n",
515 name, lksb->sb_lkid, lksb->sb_status);
516 return -1;
517 }
518 return 0;
519}
520
521static int sync_lock(struct gfs2_sbd *sdp, int mode, uint32_t flags,
522 unsigned int num, struct dlm_lksb *lksb, char *name)
523{
524 struct lm_lockstruct *ls = &sdp->sd_lockstruct;
525 char strname[GDLM_STRNAME_BYTES];
526 int error, status;
527
528 memset(strname, 0, GDLM_STRNAME_BYTES);
529 snprintf(strname, GDLM_STRNAME_BYTES, "%8x%16x", LM_TYPE_NONDISK, num);
530
531 error = dlm_lock(ls->ls_dlm, mode, lksb, flags,
532 strname, GDLM_STRNAME_BYTES - 1,
533 0, sync_wait_cb, ls, NULL);
534 if (error) {
535 fs_err(sdp, "%s lkid %x flags %x mode %d error %d\n",
536 name, lksb->sb_lkid, flags, mode, error);
537 return error;
538 }
539
540 wait_for_completion(&ls->ls_sync_wait);
541
542 status = lksb->sb_status;
543
544 if (status && status != -EAGAIN) {
545 fs_err(sdp, "%s lkid %x flags %x mode %d status %d\n",
546 name, lksb->sb_lkid, flags, mode, status);
547 }
548
549 return status;
550}
551
552static int mounted_unlock(struct gfs2_sbd *sdp)
553{
554 struct lm_lockstruct *ls = &sdp->sd_lockstruct;
555 return sync_unlock(sdp, &ls->ls_mounted_lksb, "mounted_lock");
556}
557
558static int mounted_lock(struct gfs2_sbd *sdp, int mode, uint32_t flags)
559{
560 struct lm_lockstruct *ls = &sdp->sd_lockstruct;
561 return sync_lock(sdp, mode, flags, GFS2_MOUNTED_LOCK,
562 &ls->ls_mounted_lksb, "mounted_lock");
563}
564
565static int control_unlock(struct gfs2_sbd *sdp)
566{
567 struct lm_lockstruct *ls = &sdp->sd_lockstruct;
568 return sync_unlock(sdp, &ls->ls_control_lksb, "control_lock");
569}
570
571static int control_lock(struct gfs2_sbd *sdp, int mode, uint32_t flags)
572{
573 struct lm_lockstruct *ls = &sdp->sd_lockstruct;
574 return sync_lock(sdp, mode, flags, GFS2_CONTROL_LOCK,
575 &ls->ls_control_lksb, "control_lock");
576}
577
578
579
580
581
582static void remote_withdraw(struct gfs2_sbd *sdp)
583{
584 struct gfs2_jdesc *jd;
585 int ret = 0, count = 0;
586
587 list_for_each_entry(jd, &sdp->sd_jindex_list, jd_list) {
588 if (jd->jd_jid == sdp->sd_lockstruct.ls_jid)
589 continue;
590 ret = gfs2_recover_journal(jd, true);
591 if (ret)
592 break;
593 count++;
594 }
595
596
597 fs_err(sdp, "Journals checked: %d, ret = %d.\n", count, ret);
598}
599
600static void gfs2_control_func(struct work_struct *work)
601{
602 struct gfs2_sbd *sdp = container_of(work, struct gfs2_sbd, sd_control_work.work);
603 struct lm_lockstruct *ls = &sdp->sd_lockstruct;
604 uint32_t block_gen, start_gen, lvb_gen, flags;
605 int recover_set = 0;
606 int write_lvb = 0;
607 int recover_size;
608 int i, error;
609
610
611 if (test_bit(SDF_REMOTE_WITHDRAW, &sdp->sd_flags)) {
612 remote_withdraw(sdp);
613 clear_bit(SDF_REMOTE_WITHDRAW, &sdp->sd_flags);
614 return;
615 }
616
617 spin_lock(&ls->ls_recover_spin);
618
619
620
621
622
623
624
625
626
627 if (!test_bit(DFL_MOUNT_DONE, &ls->ls_recover_flags) ||
628 test_bit(DFL_FIRST_MOUNT, &ls->ls_recover_flags)) {
629 spin_unlock(&ls->ls_recover_spin);
630 return;
631 }
632 block_gen = ls->ls_recover_block;
633 start_gen = ls->ls_recover_start;
634 spin_unlock(&ls->ls_recover_spin);
635
636
637
638
639
640
641
642
643 if (block_gen == start_gen)
644 return;
645
646
647
648
649
650
651
652
653
654
655
656
657
658 error = control_lock(sdp, DLM_LOCK_EX, DLM_LKF_CONVERT|DLM_LKF_VALBLK);
659 if (error) {
660 fs_err(sdp, "control lock EX error %d\n", error);
661 return;
662 }
663
664 control_lvb_read(ls, &lvb_gen, ls->ls_lvb_bits);
665
666 spin_lock(&ls->ls_recover_spin);
667 if (block_gen != ls->ls_recover_block ||
668 start_gen != ls->ls_recover_start) {
669 fs_info(sdp, "recover generation %u block1 %u %u\n",
670 start_gen, block_gen, ls->ls_recover_block);
671 spin_unlock(&ls->ls_recover_spin);
672 control_lock(sdp, DLM_LOCK_NL, DLM_LKF_CONVERT);
673 return;
674 }
675
676 recover_size = ls->ls_recover_size;
677
678 if (lvb_gen <= start_gen) {
679
680
681
682
683
684
685
686
687
688 for (i = 0; i < recover_size; i++) {
689 if (ls->ls_recover_result[i] != LM_RD_SUCCESS)
690 continue;
691
692 ls->ls_recover_result[i] = 0;
693
694 if (!test_bit_le(i, ls->ls_lvb_bits + JID_BITMAP_OFFSET))
695 continue;
696
697 __clear_bit_le(i, ls->ls_lvb_bits + JID_BITMAP_OFFSET);
698 write_lvb = 1;
699 }
700 }
701
702 if (lvb_gen == start_gen) {
703
704
705
706 for (i = 0; i < recover_size; i++) {
707 if (!ls->ls_recover_submit[i])
708 continue;
709 if (ls->ls_recover_submit[i] < lvb_gen)
710 ls->ls_recover_submit[i] = 0;
711 }
712 } else if (lvb_gen < start_gen) {
713
714
715
716 for (i = 0; i < recover_size; i++) {
717 if (!ls->ls_recover_submit[i])
718 continue;
719 if (ls->ls_recover_submit[i] < start_gen) {
720 ls->ls_recover_submit[i] = 0;
721 __set_bit_le(i, ls->ls_lvb_bits + JID_BITMAP_OFFSET);
722 }
723 }
724
725
726 write_lvb = 1;
727 } else {
728
729
730
731 }
732 spin_unlock(&ls->ls_recover_spin);
733
734 if (write_lvb) {
735 control_lvb_write(ls, start_gen, ls->ls_lvb_bits);
736 flags = DLM_LKF_CONVERT | DLM_LKF_VALBLK;
737 } else {
738 flags = DLM_LKF_CONVERT;
739 }
740
741 error = control_lock(sdp, DLM_LOCK_NL, flags);
742 if (error) {
743 fs_err(sdp, "control lock NL error %d\n", error);
744 return;
745 }
746
747
748
749
750
751
752
753
754 for (i = 0; i < recover_size; i++) {
755 if (test_bit_le(i, ls->ls_lvb_bits + JID_BITMAP_OFFSET)) {
756 fs_info(sdp, "recover generation %u jid %d\n",
757 start_gen, i);
758 gfs2_recover_set(sdp, i);
759 recover_set++;
760 }
761 }
762 if (recover_set)
763 return;
764
765
766
767
768
769
770
771 spin_lock(&ls->ls_recover_spin);
772 if (ls->ls_recover_block == block_gen &&
773 ls->ls_recover_start == start_gen) {
774 clear_bit(DFL_BLOCK_LOCKS, &ls->ls_recover_flags);
775 spin_unlock(&ls->ls_recover_spin);
776 fs_info(sdp, "recover generation %u done\n", start_gen);
777 gfs2_glock_thaw(sdp);
778 } else {
779 fs_info(sdp, "recover generation %u block2 %u %u\n",
780 start_gen, block_gen, ls->ls_recover_block);
781 spin_unlock(&ls->ls_recover_spin);
782 }
783}
784
785static int control_mount(struct gfs2_sbd *sdp)
786{
787 struct lm_lockstruct *ls = &sdp->sd_lockstruct;
788 uint32_t start_gen, block_gen, mount_gen, lvb_gen;
789 int mounted_mode;
790 int retries = 0;
791 int error;
792
793 memset(&ls->ls_mounted_lksb, 0, sizeof(struct dlm_lksb));
794 memset(&ls->ls_control_lksb, 0, sizeof(struct dlm_lksb));
795 memset(&ls->ls_control_lvb, 0, GDLM_LVB_SIZE);
796 ls->ls_control_lksb.sb_lvbptr = ls->ls_control_lvb;
797 init_completion(&ls->ls_sync_wait);
798
799 set_bit(DFL_BLOCK_LOCKS, &ls->ls_recover_flags);
800
801 error = control_lock(sdp, DLM_LOCK_NL, DLM_LKF_VALBLK);
802 if (error) {
803 fs_err(sdp, "control_mount control_lock NL error %d\n", error);
804 return error;
805 }
806
807 error = mounted_lock(sdp, DLM_LOCK_NL, 0);
808 if (error) {
809 fs_err(sdp, "control_mount mounted_lock NL error %d\n", error);
810 control_unlock(sdp);
811 return error;
812 }
813 mounted_mode = DLM_LOCK_NL;
814
815restart:
816 if (retries++ && signal_pending(current)) {
817 error = -EINTR;
818 goto fail;
819 }
820
821
822
823
824
825
826 if (mounted_mode != DLM_LOCK_NL) {
827 error = mounted_lock(sdp, DLM_LOCK_NL, DLM_LKF_CONVERT);
828 if (error)
829 goto fail;
830 mounted_mode = DLM_LOCK_NL;
831 }
832
833
834
835
836
837
838
839 msleep_interruptible(500);
840
841
842
843
844
845
846
847 error = control_lock(sdp, DLM_LOCK_EX, DLM_LKF_CONVERT|DLM_LKF_NOQUEUE|DLM_LKF_VALBLK);
848 if (error == -EAGAIN) {
849 goto restart;
850 } else if (error) {
851 fs_err(sdp, "control_mount control_lock EX error %d\n", error);
852 goto fail;
853 }
854
855
856
857
858
859 if (sdp->sd_args.ar_spectator)
860 goto locks_done;
861
862 error = mounted_lock(sdp, DLM_LOCK_EX, DLM_LKF_CONVERT|DLM_LKF_NOQUEUE);
863 if (!error) {
864 mounted_mode = DLM_LOCK_EX;
865 goto locks_done;
866 } else if (error != -EAGAIN) {
867 fs_err(sdp, "control_mount mounted_lock EX error %d\n", error);
868 goto fail;
869 }
870
871 error = mounted_lock(sdp, DLM_LOCK_PR, DLM_LKF_CONVERT|DLM_LKF_NOQUEUE);
872 if (!error) {
873 mounted_mode = DLM_LOCK_PR;
874 goto locks_done;
875 } else {
876
877 fs_err(sdp, "control_mount mounted_lock PR error %d\n", error);
878 goto fail;
879 }
880
881locks_done:
882
883
884
885
886
887
888
889
890
891
892
893 control_lvb_read(ls, &lvb_gen, ls->ls_lvb_bits);
894
895 if (lvb_gen == 0xFFFFFFFF) {
896
897 fs_err(sdp, "control_mount control_lock disabled\n");
898 error = -EINVAL;
899 goto fail;
900 }
901
902 if (mounted_mode == DLM_LOCK_EX) {
903
904 spin_lock(&ls->ls_recover_spin);
905 clear_bit(DFL_BLOCK_LOCKS, &ls->ls_recover_flags);
906 set_bit(DFL_MOUNT_DONE, &ls->ls_recover_flags);
907 set_bit(DFL_FIRST_MOUNT, &ls->ls_recover_flags);
908 spin_unlock(&ls->ls_recover_spin);
909 fs_info(sdp, "first mounter control generation %u\n", lvb_gen);
910 return 0;
911 }
912
913 error = control_lock(sdp, DLM_LOCK_NL, DLM_LKF_CONVERT);
914 if (error)
915 goto fail;
916
917
918
919
920
921
922
923 if (!all_jid_bits_clear(ls->ls_lvb_bits)) {
924
925 fs_info(sdp, "control_mount wait for journal recovery\n");
926 goto restart;
927 }
928
929 spin_lock(&ls->ls_recover_spin);
930 block_gen = ls->ls_recover_block;
931 start_gen = ls->ls_recover_start;
932 mount_gen = ls->ls_recover_mount;
933
934 if (lvb_gen < mount_gen) {
935
936
937 if (sdp->sd_args.ar_spectator) {
938 fs_info(sdp, "Recovery is required. Waiting for a "
939 "non-spectator to mount.\n");
940 msleep_interruptible(1000);
941 } else {
942 fs_info(sdp, "control_mount wait1 block %u start %u "
943 "mount %u lvb %u flags %lx\n", block_gen,
944 start_gen, mount_gen, lvb_gen,
945 ls->ls_recover_flags);
946 }
947 spin_unlock(&ls->ls_recover_spin);
948 goto restart;
949 }
950
951 if (lvb_gen != start_gen) {
952
953
954 fs_info(sdp, "control_mount wait2 block %u start %u mount %u "
955 "lvb %u flags %lx\n", block_gen, start_gen, mount_gen,
956 lvb_gen, ls->ls_recover_flags);
957 spin_unlock(&ls->ls_recover_spin);
958 goto restart;
959 }
960
961 if (block_gen == start_gen) {
962
963 fs_info(sdp, "control_mount wait3 block %u start %u mount %u "
964 "lvb %u flags %lx\n", block_gen, start_gen, mount_gen,
965 lvb_gen, ls->ls_recover_flags);
966 spin_unlock(&ls->ls_recover_spin);
967 goto restart;
968 }
969
970 clear_bit(DFL_BLOCK_LOCKS, &ls->ls_recover_flags);
971 set_bit(DFL_MOUNT_DONE, &ls->ls_recover_flags);
972 memset(ls->ls_recover_submit, 0, ls->ls_recover_size*sizeof(uint32_t));
973 memset(ls->ls_recover_result, 0, ls->ls_recover_size*sizeof(uint32_t));
974 spin_unlock(&ls->ls_recover_spin);
975 return 0;
976
977fail:
978 mounted_unlock(sdp);
979 control_unlock(sdp);
980 return error;
981}
982
983static int control_first_done(struct gfs2_sbd *sdp)
984{
985 struct lm_lockstruct *ls = &sdp->sd_lockstruct;
986 uint32_t start_gen, block_gen;
987 int error;
988
989restart:
990 spin_lock(&ls->ls_recover_spin);
991 start_gen = ls->ls_recover_start;
992 block_gen = ls->ls_recover_block;
993
994 if (test_bit(DFL_BLOCK_LOCKS, &ls->ls_recover_flags) ||
995 !test_bit(DFL_MOUNT_DONE, &ls->ls_recover_flags) ||
996 !test_bit(DFL_FIRST_MOUNT, &ls->ls_recover_flags)) {
997
998 fs_err(sdp, "control_first_done start %u block %u flags %lx\n",
999 start_gen, block_gen, ls->ls_recover_flags);
1000 spin_unlock(&ls->ls_recover_spin);
1001 control_unlock(sdp);
1002 return -1;
1003 }
1004
1005 if (start_gen == block_gen) {
1006
1007
1008
1009
1010
1011
1012
1013 spin_unlock(&ls->ls_recover_spin);
1014 fs_info(sdp, "control_first_done wait gen %u\n", start_gen);
1015
1016 wait_on_bit(&ls->ls_recover_flags, DFL_DLM_RECOVERY,
1017 TASK_UNINTERRUPTIBLE);
1018 goto restart;
1019 }
1020
1021 clear_bit(DFL_FIRST_MOUNT, &ls->ls_recover_flags);
1022 set_bit(DFL_FIRST_MOUNT_DONE, &ls->ls_recover_flags);
1023 memset(ls->ls_recover_submit, 0, ls->ls_recover_size*sizeof(uint32_t));
1024 memset(ls->ls_recover_result, 0, ls->ls_recover_size*sizeof(uint32_t));
1025 spin_unlock(&ls->ls_recover_spin);
1026
1027 memset(ls->ls_lvb_bits, 0, GDLM_LVB_SIZE);
1028 control_lvb_write(ls, start_gen, ls->ls_lvb_bits);
1029
1030 error = mounted_lock(sdp, DLM_LOCK_PR, DLM_LKF_CONVERT);
1031 if (error)
1032 fs_err(sdp, "control_first_done mounted PR error %d\n", error);
1033
1034 error = control_lock(sdp, DLM_LOCK_NL, DLM_LKF_CONVERT|DLM_LKF_VALBLK);
1035 if (error)
1036 fs_err(sdp, "control_first_done control NL error %d\n", error);
1037
1038 return error;
1039}
1040
1041
1042
1043
1044
1045
1046
1047#define RECOVER_SIZE_INC 16
1048
1049static int set_recover_size(struct gfs2_sbd *sdp, struct dlm_slot *slots,
1050 int num_slots)
1051{
1052 struct lm_lockstruct *ls = &sdp->sd_lockstruct;
1053 uint32_t *submit = NULL;
1054 uint32_t *result = NULL;
1055 uint32_t old_size, new_size;
1056 int i, max_jid;
1057
1058 if (!ls->ls_lvb_bits) {
1059 ls->ls_lvb_bits = kzalloc(GDLM_LVB_SIZE, GFP_NOFS);
1060 if (!ls->ls_lvb_bits)
1061 return -ENOMEM;
1062 }
1063
1064 max_jid = 0;
1065 for (i = 0; i < num_slots; i++) {
1066 if (max_jid < slots[i].slot - 1)
1067 max_jid = slots[i].slot - 1;
1068 }
1069
1070 old_size = ls->ls_recover_size;
1071 new_size = old_size;
1072 while (new_size < max_jid + 1)
1073 new_size += RECOVER_SIZE_INC;
1074 if (new_size == old_size)
1075 return 0;
1076
1077 submit = kcalloc(new_size, sizeof(uint32_t), GFP_NOFS);
1078 result = kcalloc(new_size, sizeof(uint32_t), GFP_NOFS);
1079 if (!submit || !result) {
1080 kfree(submit);
1081 kfree(result);
1082 return -ENOMEM;
1083 }
1084
1085 spin_lock(&ls->ls_recover_spin);
1086 memcpy(submit, ls->ls_recover_submit, old_size * sizeof(uint32_t));
1087 memcpy(result, ls->ls_recover_result, old_size * sizeof(uint32_t));
1088 kfree(ls->ls_recover_submit);
1089 kfree(ls->ls_recover_result);
1090 ls->ls_recover_submit = submit;
1091 ls->ls_recover_result = result;
1092 ls->ls_recover_size = new_size;
1093 spin_unlock(&ls->ls_recover_spin);
1094 return 0;
1095}
1096
1097static void free_recover_size(struct lm_lockstruct *ls)
1098{
1099 kfree(ls->ls_lvb_bits);
1100 kfree(ls->ls_recover_submit);
1101 kfree(ls->ls_recover_result);
1102 ls->ls_recover_submit = NULL;
1103 ls->ls_recover_result = NULL;
1104 ls->ls_recover_size = 0;
1105 ls->ls_lvb_bits = NULL;
1106}
1107
1108
1109
1110static void gdlm_recover_prep(void *arg)
1111{
1112 struct gfs2_sbd *sdp = arg;
1113 struct lm_lockstruct *ls = &sdp->sd_lockstruct;
1114
1115 if (gfs2_withdrawn(sdp)) {
1116 fs_err(sdp, "recover_prep ignored due to withdraw.\n");
1117 return;
1118 }
1119 spin_lock(&ls->ls_recover_spin);
1120 ls->ls_recover_block = ls->ls_recover_start;
1121 set_bit(DFL_DLM_RECOVERY, &ls->ls_recover_flags);
1122
1123 if (!test_bit(DFL_MOUNT_DONE, &ls->ls_recover_flags) ||
1124 test_bit(DFL_FIRST_MOUNT, &ls->ls_recover_flags)) {
1125 spin_unlock(&ls->ls_recover_spin);
1126 return;
1127 }
1128 set_bit(DFL_BLOCK_LOCKS, &ls->ls_recover_flags);
1129 spin_unlock(&ls->ls_recover_spin);
1130}
1131
1132
1133
1134
1135static void gdlm_recover_slot(void *arg, struct dlm_slot *slot)
1136{
1137 struct gfs2_sbd *sdp = arg;
1138 struct lm_lockstruct *ls = &sdp->sd_lockstruct;
1139 int jid = slot->slot - 1;
1140
1141 if (gfs2_withdrawn(sdp)) {
1142 fs_err(sdp, "recover_slot jid %d ignored due to withdraw.\n",
1143 jid);
1144 return;
1145 }
1146 spin_lock(&ls->ls_recover_spin);
1147 if (ls->ls_recover_size < jid + 1) {
1148 fs_err(sdp, "recover_slot jid %d gen %u short size %d\n",
1149 jid, ls->ls_recover_block, ls->ls_recover_size);
1150 spin_unlock(&ls->ls_recover_spin);
1151 return;
1152 }
1153
1154 if (ls->ls_recover_submit[jid]) {
1155 fs_info(sdp, "recover_slot jid %d gen %u prev %u\n",
1156 jid, ls->ls_recover_block, ls->ls_recover_submit[jid]);
1157 }
1158 ls->ls_recover_submit[jid] = ls->ls_recover_block;
1159 spin_unlock(&ls->ls_recover_spin);
1160}
1161
1162
1163
1164static void gdlm_recover_done(void *arg, struct dlm_slot *slots, int num_slots,
1165 int our_slot, uint32_t generation)
1166{
1167 struct gfs2_sbd *sdp = arg;
1168 struct lm_lockstruct *ls = &sdp->sd_lockstruct;
1169
1170 if (gfs2_withdrawn(sdp)) {
1171 fs_err(sdp, "recover_done ignored due to withdraw.\n");
1172 return;
1173 }
1174
1175 set_recover_size(sdp, slots, num_slots);
1176
1177 spin_lock(&ls->ls_recover_spin);
1178 ls->ls_recover_start = generation;
1179
1180 if (!ls->ls_recover_mount) {
1181 ls->ls_recover_mount = generation;
1182 ls->ls_jid = our_slot - 1;
1183 }
1184
1185 if (!test_bit(DFL_UNMOUNT, &ls->ls_recover_flags))
1186 queue_delayed_work(gfs2_control_wq, &sdp->sd_control_work, 0);
1187
1188 clear_bit(DFL_DLM_RECOVERY, &ls->ls_recover_flags);
1189 smp_mb__after_atomic();
1190 wake_up_bit(&ls->ls_recover_flags, DFL_DLM_RECOVERY);
1191 spin_unlock(&ls->ls_recover_spin);
1192}
1193
1194
1195
1196static void gdlm_recovery_result(struct gfs2_sbd *sdp, unsigned int jid,
1197 unsigned int result)
1198{
1199 struct lm_lockstruct *ls = &sdp->sd_lockstruct;
1200
1201 if (gfs2_withdrawn(sdp)) {
1202 fs_err(sdp, "recovery_result jid %d ignored due to withdraw.\n",
1203 jid);
1204 return;
1205 }
1206 if (test_bit(DFL_NO_DLM_OPS, &ls->ls_recover_flags))
1207 return;
1208
1209
1210 if (jid == ls->ls_jid)
1211 return;
1212
1213 spin_lock(&ls->ls_recover_spin);
1214 if (test_bit(DFL_FIRST_MOUNT, &ls->ls_recover_flags)) {
1215 spin_unlock(&ls->ls_recover_spin);
1216 return;
1217 }
1218 if (ls->ls_recover_size < jid + 1) {
1219 fs_err(sdp, "recovery_result jid %d short size %d\n",
1220 jid, ls->ls_recover_size);
1221 spin_unlock(&ls->ls_recover_spin);
1222 return;
1223 }
1224
1225 fs_info(sdp, "recover jid %d result %s\n", jid,
1226 result == LM_RD_GAVEUP ? "busy" : "success");
1227
1228 ls->ls_recover_result[jid] = result;
1229
1230
1231
1232
1233
1234 if (!test_bit(DFL_UNMOUNT, &ls->ls_recover_flags))
1235 queue_delayed_work(gfs2_control_wq, &sdp->sd_control_work,
1236 result == LM_RD_GAVEUP ? HZ : 0);
1237 spin_unlock(&ls->ls_recover_spin);
1238}
1239
1240static const struct dlm_lockspace_ops gdlm_lockspace_ops = {
1241 .recover_prep = gdlm_recover_prep,
1242 .recover_slot = gdlm_recover_slot,
1243 .recover_done = gdlm_recover_done,
1244};
1245
1246static int gdlm_mount(struct gfs2_sbd *sdp, const char *table)
1247{
1248 struct lm_lockstruct *ls = &sdp->sd_lockstruct;
1249 char cluster[GFS2_LOCKNAME_LEN];
1250 const char *fsname;
1251 uint32_t flags;
1252 int error, ops_result;
1253
1254
1255
1256
1257
1258 INIT_DELAYED_WORK(&sdp->sd_control_work, gfs2_control_func);
1259 spin_lock_init(&ls->ls_recover_spin);
1260 ls->ls_recover_flags = 0;
1261 ls->ls_recover_mount = 0;
1262 ls->ls_recover_start = 0;
1263 ls->ls_recover_block = 0;
1264 ls->ls_recover_size = 0;
1265 ls->ls_recover_submit = NULL;
1266 ls->ls_recover_result = NULL;
1267 ls->ls_lvb_bits = NULL;
1268
1269 error = set_recover_size(sdp, NULL, 0);
1270 if (error)
1271 goto fail;
1272
1273
1274
1275
1276
1277 fsname = strchr(table, ':');
1278 if (!fsname) {
1279 fs_info(sdp, "no fsname found\n");
1280 error = -EINVAL;
1281 goto fail_free;
1282 }
1283 memset(cluster, 0, sizeof(cluster));
1284 memcpy(cluster, table, strlen(table) - strlen(fsname));
1285 fsname++;
1286
1287 flags = DLM_LSFL_FS | DLM_LSFL_NEWEXCL;
1288
1289
1290
1291
1292
1293 error = dlm_new_lockspace(fsname, cluster, flags, GDLM_LVB_SIZE,
1294 &gdlm_lockspace_ops, sdp, &ops_result,
1295 &ls->ls_dlm);
1296 if (error) {
1297 fs_err(sdp, "dlm_new_lockspace error %d\n", error);
1298 goto fail_free;
1299 }
1300
1301 if (ops_result < 0) {
1302
1303
1304
1305
1306 fs_info(sdp, "dlm lockspace ops not used\n");
1307 free_recover_size(ls);
1308 set_bit(DFL_NO_DLM_OPS, &ls->ls_recover_flags);
1309 return 0;
1310 }
1311
1312 if (!test_bit(SDF_NOJOURNALID, &sdp->sd_flags)) {
1313 fs_err(sdp, "dlm lockspace ops disallow jid preset\n");
1314 error = -EINVAL;
1315 goto fail_release;
1316 }
1317
1318
1319
1320
1321
1322
1323 error = control_mount(sdp);
1324 if (error) {
1325 fs_err(sdp, "mount control error %d\n", error);
1326 goto fail_release;
1327 }
1328
1329 ls->ls_first = !!test_bit(DFL_FIRST_MOUNT, &ls->ls_recover_flags);
1330 clear_bit(SDF_NOJOURNALID, &sdp->sd_flags);
1331 smp_mb__after_atomic();
1332 wake_up_bit(&sdp->sd_flags, SDF_NOJOURNALID);
1333 return 0;
1334
1335fail_release:
1336 dlm_release_lockspace(ls->ls_dlm, 2);
1337fail_free:
1338 free_recover_size(ls);
1339fail:
1340 return error;
1341}
1342
1343static void gdlm_first_done(struct gfs2_sbd *sdp)
1344{
1345 struct lm_lockstruct *ls = &sdp->sd_lockstruct;
1346 int error;
1347
1348 if (test_bit(DFL_NO_DLM_OPS, &ls->ls_recover_flags))
1349 return;
1350
1351 error = control_first_done(sdp);
1352 if (error)
1353 fs_err(sdp, "mount first_done error %d\n", error);
1354}
1355
1356static void gdlm_unmount(struct gfs2_sbd *sdp)
1357{
1358 struct lm_lockstruct *ls = &sdp->sd_lockstruct;
1359
1360 if (test_bit(DFL_NO_DLM_OPS, &ls->ls_recover_flags))
1361 goto release;
1362
1363
1364
1365 spin_lock(&ls->ls_recover_spin);
1366 set_bit(DFL_UNMOUNT, &ls->ls_recover_flags);
1367 spin_unlock(&ls->ls_recover_spin);
1368 flush_delayed_work(&sdp->sd_control_work);
1369
1370
1371release:
1372 if (ls->ls_dlm) {
1373 dlm_release_lockspace(ls->ls_dlm, 2);
1374 ls->ls_dlm = NULL;
1375 }
1376
1377 free_recover_size(ls);
1378}
1379
1380static const match_table_t dlm_tokens = {
1381 { Opt_jid, "jid=%d"},
1382 { Opt_id, "id=%d"},
1383 { Opt_first, "first=%d"},
1384 { Opt_nodir, "nodir=%d"},
1385 { Opt_err, NULL },
1386};
1387
1388const struct lm_lockops gfs2_dlm_ops = {
1389 .lm_proto_name = "lock_dlm",
1390 .lm_mount = gdlm_mount,
1391 .lm_first_done = gdlm_first_done,
1392 .lm_recovery_result = gdlm_recovery_result,
1393 .lm_unmount = gdlm_unmount,
1394 .lm_put_lock = gdlm_put_lock,
1395 .lm_lock = gdlm_lock,
1396 .lm_cancel = gdlm_cancel,
1397 .lm_tokens = &dlm_tokens,
1398};
1399
1400