1
2
3
4
5
6
7
8#include "dm-exception-store.h"
9
10#include <linux/mm.h>
11#include <linux/pagemap.h>
12#include <linux/vmalloc.h>
13#include <linux/slab.h>
14#include <linux/dm-io.h>
15
16#define DM_MSG_PREFIX "persistent snapshot"
17#define DM_CHUNK_SIZE_DEFAULT_SECTORS 32
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51#define SNAP_MAGIC 0x70416e53
52
53
54
55
56#define SNAPSHOT_DISK_VERSION 1
57
58struct disk_header {
59 uint32_t magic;
60
61
62
63
64
65 uint32_t valid;
66
67
68
69
70
71 uint32_t version;
72
73
74 uint32_t chunk_size;
75};
76
77struct disk_exception {
78 uint64_t old_chunk;
79 uint64_t new_chunk;
80};
81
82struct commit_callback {
83 void (*callback)(void *, int success);
84 void *context;
85};
86
87
88
89
90struct pstore {
91 struct dm_exception_store *store;
92 int version;
93 int valid;
94 uint32_t exceptions_per_area;
95
96
97
98
99
100
101 void *area;
102
103
104
105
106 void *zero_area;
107
108
109
110
111
112
113 void *header_area;
114
115
116
117
118
119 chunk_t current_area;
120
121
122
123
124 chunk_t next_free;
125
126
127
128
129
130 uint32_t current_committed;
131
132 atomic_t pending_count;
133 uint32_t callback_count;
134 struct commit_callback *callbacks;
135 struct dm_io_client *io_client;
136
137 struct workqueue_struct *metadata_wq;
138};
139
140static unsigned sectors_to_pages(unsigned sectors)
141{
142 return DIV_ROUND_UP(sectors, PAGE_SIZE >> 9);
143}
144
145static int alloc_area(struct pstore *ps)
146{
147 int r = -ENOMEM;
148 size_t len;
149
150 len = ps->store->chunk_size << SECTOR_SHIFT;
151
152
153
154
155
156 ps->area = vmalloc(len);
157 if (!ps->area)
158 goto err_area;
159
160 ps->zero_area = vmalloc(len);
161 if (!ps->zero_area)
162 goto err_zero_area;
163 memset(ps->zero_area, 0, len);
164
165 ps->header_area = vmalloc(len);
166 if (!ps->header_area)
167 goto err_header_area;
168
169 return 0;
170
171err_header_area:
172 vfree(ps->zero_area);
173
174err_zero_area:
175 vfree(ps->area);
176
177err_area:
178 return r;
179}
180
181static void free_area(struct pstore *ps)
182{
183 if (ps->area)
184 vfree(ps->area);
185 ps->area = NULL;
186
187 if (ps->zero_area)
188 vfree(ps->zero_area);
189 ps->zero_area = NULL;
190
191 if (ps->header_area)
192 vfree(ps->header_area);
193 ps->header_area = NULL;
194}
195
196struct mdata_req {
197 struct dm_io_region *where;
198 struct dm_io_request *io_req;
199 struct work_struct work;
200 int result;
201};
202
203static void do_metadata(struct work_struct *work)
204{
205 struct mdata_req *req = container_of(work, struct mdata_req, work);
206
207 req->result = dm_io(req->io_req, 1, req->where, NULL);
208}
209
210
211
212
213static int chunk_io(struct pstore *ps, void *area, chunk_t chunk, int rw,
214 int metadata)
215{
216 struct dm_io_region where = {
217 .bdev = ps->store->cow->bdev,
218 .sector = ps->store->chunk_size * chunk,
219 .count = ps->store->chunk_size,
220 };
221 struct dm_io_request io_req = {
222 .bi_rw = rw,
223 .mem.type = DM_IO_VMA,
224 .mem.ptr.vma = area,
225 .client = ps->io_client,
226 .notify.fn = NULL,
227 };
228 struct mdata_req req;
229
230 if (!metadata)
231 return dm_io(&io_req, 1, &where, NULL);
232
233 req.where = &where;
234 req.io_req = &io_req;
235
236
237
238
239
240 INIT_WORK(&req.work, do_metadata);
241 queue_work(ps->metadata_wq, &req.work);
242 flush_workqueue(ps->metadata_wq);
243
244 return req.result;
245}
246
247
248
249
250static chunk_t area_location(struct pstore *ps, chunk_t area)
251{
252 return 1 + ((ps->exceptions_per_area + 1) * area);
253}
254
255
256
257
258
259static int area_io(struct pstore *ps, int rw)
260{
261 int r;
262 chunk_t chunk;
263
264 chunk = area_location(ps, ps->current_area);
265
266 r = chunk_io(ps, ps->area, chunk, rw, 0);
267 if (r)
268 return r;
269
270 return 0;
271}
272
273static void zero_memory_area(struct pstore *ps)
274{
275 memset(ps->area, 0, ps->store->chunk_size << SECTOR_SHIFT);
276}
277
278static int zero_disk_area(struct pstore *ps, chunk_t area)
279{
280 return chunk_io(ps, ps->zero_area, area_location(ps, area), WRITE, 0);
281}
282
283static int read_header(struct pstore *ps, int *new_snapshot)
284{
285 int r;
286 struct disk_header *dh;
287 unsigned chunk_size;
288 int chunk_size_supplied = 1;
289 char *chunk_err;
290
291
292
293
294
295 if (!ps->store->chunk_size) {
296 ps->store->chunk_size = max(DM_CHUNK_SIZE_DEFAULT_SECTORS,
297 bdev_logical_block_size(ps->store->cow->bdev) >> 9);
298 ps->store->chunk_mask = ps->store->chunk_size - 1;
299 ps->store->chunk_shift = ffs(ps->store->chunk_size) - 1;
300 chunk_size_supplied = 0;
301 }
302
303 ps->io_client = dm_io_client_create(sectors_to_pages(ps->store->
304 chunk_size));
305 if (IS_ERR(ps->io_client))
306 return PTR_ERR(ps->io_client);
307
308 r = alloc_area(ps);
309 if (r)
310 return r;
311
312 r = chunk_io(ps, ps->header_area, 0, READ, 1);
313 if (r)
314 goto bad;
315
316 dh = ps->header_area;
317
318 if (le32_to_cpu(dh->magic) == 0) {
319 *new_snapshot = 1;
320 return 0;
321 }
322
323 if (le32_to_cpu(dh->magic) != SNAP_MAGIC) {
324 DMWARN("Invalid or corrupt snapshot");
325 r = -ENXIO;
326 goto bad;
327 }
328
329 *new_snapshot = 0;
330 ps->valid = le32_to_cpu(dh->valid);
331 ps->version = le32_to_cpu(dh->version);
332 chunk_size = le32_to_cpu(dh->chunk_size);
333
334 if (ps->store->chunk_size == chunk_size)
335 return 0;
336
337 if (chunk_size_supplied)
338 DMWARN("chunk size %u in device metadata overrides "
339 "table chunk size of %u.",
340 chunk_size, ps->store->chunk_size);
341
342
343 free_area(ps);
344
345 r = dm_exception_store_set_chunk_size(ps->store, chunk_size,
346 &chunk_err);
347 if (r) {
348 DMERR("invalid on-disk chunk size %u: %s.",
349 chunk_size, chunk_err);
350 return r;
351 }
352
353 r = dm_io_client_resize(sectors_to_pages(ps->store->chunk_size),
354 ps->io_client);
355 if (r)
356 return r;
357
358 r = alloc_area(ps);
359 return r;
360
361bad:
362 free_area(ps);
363 return r;
364}
365
366static int write_header(struct pstore *ps)
367{
368 struct disk_header *dh;
369
370 memset(ps->header_area, 0, ps->store->chunk_size << SECTOR_SHIFT);
371
372 dh = ps->header_area;
373 dh->magic = cpu_to_le32(SNAP_MAGIC);
374 dh->valid = cpu_to_le32(ps->valid);
375 dh->version = cpu_to_le32(ps->version);
376 dh->chunk_size = cpu_to_le32(ps->store->chunk_size);
377
378 return chunk_io(ps, ps->header_area, 0, WRITE, 1);
379}
380
381
382
383
384static struct disk_exception *get_exception(struct pstore *ps, uint32_t index)
385{
386 BUG_ON(index >= ps->exceptions_per_area);
387
388 return ((struct disk_exception *) ps->area) + index;
389}
390
391static void read_exception(struct pstore *ps,
392 uint32_t index, struct disk_exception *result)
393{
394 struct disk_exception *e = get_exception(ps, index);
395
396
397 result->old_chunk = le64_to_cpu(e->old_chunk);
398 result->new_chunk = le64_to_cpu(e->new_chunk);
399}
400
401static void write_exception(struct pstore *ps,
402 uint32_t index, struct disk_exception *de)
403{
404 struct disk_exception *e = get_exception(ps, index);
405
406
407 e->old_chunk = cpu_to_le64(de->old_chunk);
408 e->new_chunk = cpu_to_le64(de->new_chunk);
409}
410
411
412
413
414
415
416static int insert_exceptions(struct pstore *ps,
417 int (*callback)(void *callback_context,
418 chunk_t old, chunk_t new),
419 void *callback_context,
420 int *full)
421{
422 int r;
423 unsigned int i;
424 struct disk_exception de;
425
426
427 *full = 1;
428
429 for (i = 0; i < ps->exceptions_per_area; i++) {
430 read_exception(ps, i, &de);
431
432
433
434
435
436
437
438 if (de.new_chunk == 0LL) {
439 ps->current_committed = i;
440 *full = 0;
441 break;
442 }
443
444
445
446
447 if (ps->next_free <= de.new_chunk)
448 ps->next_free = de.new_chunk + 1;
449
450
451
452
453 r = callback(callback_context, de.old_chunk, de.new_chunk);
454 if (r)
455 return r;
456 }
457
458 return 0;
459}
460
461static int read_exceptions(struct pstore *ps,
462 int (*callback)(void *callback_context, chunk_t old,
463 chunk_t new),
464 void *callback_context)
465{
466 int r, full = 1;
467
468
469
470
471
472 for (ps->current_area = 0; full; ps->current_area++) {
473 r = area_io(ps, READ);
474 if (r)
475 return r;
476
477 r = insert_exceptions(ps, callback, callback_context, &full);
478 if (r)
479 return r;
480 }
481
482 ps->current_area--;
483
484 return 0;
485}
486
487static struct pstore *get_info(struct dm_exception_store *store)
488{
489 return (struct pstore *) store->context;
490}
491
492static void persistent_fraction_full(struct dm_exception_store *store,
493 sector_t *numerator, sector_t *denominator)
494{
495 *numerator = get_info(store)->next_free * store->chunk_size;
496 *denominator = get_dev_size(store->cow->bdev);
497}
498
499static void persistent_dtr(struct dm_exception_store *store)
500{
501 struct pstore *ps = get_info(store);
502
503 destroy_workqueue(ps->metadata_wq);
504
505
506 if (ps->io_client)
507 dm_io_client_destroy(ps->io_client);
508 free_area(ps);
509
510
511 if (ps->callbacks)
512 vfree(ps->callbacks);
513
514 kfree(ps);
515}
516
517static int persistent_read_metadata(struct dm_exception_store *store,
518 int (*callback)(void *callback_context,
519 chunk_t old, chunk_t new),
520 void *callback_context)
521{
522 int r, uninitialized_var(new_snapshot);
523 struct pstore *ps = get_info(store);
524
525
526
527
528 r = read_header(ps, &new_snapshot);
529 if (r)
530 return r;
531
532
533
534
535 ps->exceptions_per_area = (ps->store->chunk_size << SECTOR_SHIFT) /
536 sizeof(struct disk_exception);
537 ps->callbacks = dm_vcalloc(ps->exceptions_per_area,
538 sizeof(*ps->callbacks));
539 if (!ps->callbacks)
540 return -ENOMEM;
541
542
543
544
545 if (new_snapshot) {
546 r = write_header(ps);
547 if (r) {
548 DMWARN("write_header failed");
549 return r;
550 }
551
552 ps->current_area = 0;
553 zero_memory_area(ps);
554 r = zero_disk_area(ps, 0);
555 if (r) {
556 DMWARN("zero_disk_area(0) failed");
557 return r;
558 }
559 } else {
560
561
562
563 if (ps->version != SNAPSHOT_DISK_VERSION) {
564 DMWARN("unable to handle snapshot disk version %d",
565 ps->version);
566 return -EINVAL;
567 }
568
569
570
571
572 if (!ps->valid)
573 return 1;
574
575
576
577
578 r = read_exceptions(ps, callback, callback_context);
579 if (r)
580 return r;
581 }
582
583 return 0;
584}
585
586static int persistent_prepare_exception(struct dm_exception_store *store,
587 struct dm_snap_exception *e)
588{
589 struct pstore *ps = get_info(store);
590 uint32_t stride;
591 chunk_t next_free;
592 sector_t size = get_dev_size(store->cow->bdev);
593
594
595 if (size < ((ps->next_free + 1) * store->chunk_size))
596 return -ENOSPC;
597
598 e->new_chunk = ps->next_free;
599
600
601
602
603
604 stride = (ps->exceptions_per_area + 1);
605 next_free = ++ps->next_free;
606 if (sector_div(next_free, stride) == 1)
607 ps->next_free++;
608
609 atomic_inc(&ps->pending_count);
610 return 0;
611}
612
613static void persistent_commit_exception(struct dm_exception_store *store,
614 struct dm_snap_exception *e,
615 void (*callback) (void *, int success),
616 void *callback_context)
617{
618 unsigned int i;
619 struct pstore *ps = get_info(store);
620 struct disk_exception de;
621 struct commit_callback *cb;
622
623 de.old_chunk = e->old_chunk;
624 de.new_chunk = e->new_chunk;
625 write_exception(ps, ps->current_committed++, &de);
626
627
628
629
630
631
632
633 cb = ps->callbacks + ps->callback_count++;
634 cb->callback = callback;
635 cb->context = callback_context;
636
637
638
639
640
641 if (!atomic_dec_and_test(&ps->pending_count) &&
642 (ps->current_committed != ps->exceptions_per_area))
643 return;
644
645
646
647
648 if ((ps->current_committed == ps->exceptions_per_area) &&
649 zero_disk_area(ps, ps->current_area + 1))
650 ps->valid = 0;
651
652
653
654
655 if (ps->valid && area_io(ps, WRITE_BARRIER))
656 ps->valid = 0;
657
658
659
660
661 if (ps->current_committed == ps->exceptions_per_area) {
662 ps->current_committed = 0;
663 ps->current_area++;
664 zero_memory_area(ps);
665 }
666
667 for (i = 0; i < ps->callback_count; i++) {
668 cb = ps->callbacks + i;
669 cb->callback(cb->context, ps->valid);
670 }
671
672 ps->callback_count = 0;
673}
674
675static void persistent_drop_snapshot(struct dm_exception_store *store)
676{
677 struct pstore *ps = get_info(store);
678
679 ps->valid = 0;
680 if (write_header(ps))
681 DMWARN("write header failed");
682}
683
684static int persistent_ctr(struct dm_exception_store *store,
685 unsigned argc, char **argv)
686{
687 struct pstore *ps;
688
689
690 ps = kzalloc(sizeof(*ps), GFP_KERNEL);
691 if (!ps)
692 return -ENOMEM;
693
694 ps->store = store;
695 ps->valid = 1;
696 ps->version = SNAPSHOT_DISK_VERSION;
697 ps->area = NULL;
698 ps->zero_area = NULL;
699 ps->header_area = NULL;
700 ps->next_free = 2;
701 ps->current_committed = 0;
702
703 ps->callback_count = 0;
704 atomic_set(&ps->pending_count, 0);
705 ps->callbacks = NULL;
706
707 ps->metadata_wq = create_singlethread_workqueue("ksnaphd");
708 if (!ps->metadata_wq) {
709 kfree(ps);
710 DMERR("couldn't start header metadata update thread");
711 return -ENOMEM;
712 }
713
714 store->context = ps;
715
716 return 0;
717}
718
719static unsigned persistent_status(struct dm_exception_store *store,
720 status_type_t status, char *result,
721 unsigned maxlen)
722{
723 unsigned sz = 0;
724
725 switch (status) {
726 case STATUSTYPE_INFO:
727 break;
728 case STATUSTYPE_TABLE:
729 DMEMIT(" %s P %llu", store->cow->name,
730 (unsigned long long)store->chunk_size);
731 }
732
733 return sz;
734}
735
736static struct dm_exception_store_type _persistent_type = {
737 .name = "persistent",
738 .module = THIS_MODULE,
739 .ctr = persistent_ctr,
740 .dtr = persistent_dtr,
741 .read_metadata = persistent_read_metadata,
742 .prepare_exception = persistent_prepare_exception,
743 .commit_exception = persistent_commit_exception,
744 .drop_snapshot = persistent_drop_snapshot,
745 .fraction_full = persistent_fraction_full,
746 .status = persistent_status,
747};
748
749static struct dm_exception_store_type _persistent_compat_type = {
750 .name = "P",
751 .module = THIS_MODULE,
752 .ctr = persistent_ctr,
753 .dtr = persistent_dtr,
754 .read_metadata = persistent_read_metadata,
755 .prepare_exception = persistent_prepare_exception,
756 .commit_exception = persistent_commit_exception,
757 .drop_snapshot = persistent_drop_snapshot,
758 .fraction_full = persistent_fraction_full,
759 .status = persistent_status,
760};
761
762int dm_persistent_snapshot_init(void)
763{
764 int r;
765
766 r = dm_exception_store_type_register(&_persistent_type);
767 if (r) {
768 DMERR("Unable to register persistent exception store type");
769 return r;
770 }
771
772 r = dm_exception_store_type_register(&_persistent_compat_type);
773 if (r) {
774 DMERR("Unable to register old-style persistent exception "
775 "store type");
776 dm_exception_store_type_unregister(&_persistent_type);
777 return r;
778 }
779
780 return r;
781}
782
783void dm_persistent_snapshot_exit(void)
784{
785 dm_exception_store_type_unregister(&_persistent_type);
786 dm_exception_store_type_unregister(&_persistent_compat_type);
787}
788