1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25#include <linux/slab.h>
26#include <linux/module.h>
27#include <asm/div64.h>
28#include <linux/lcm.h>
29
30#include "ore_raid.h"
31
32MODULE_AUTHOR("Boaz Harrosh <bharrosh@panasas.com>");
33MODULE_DESCRIPTION("Objects Raid Engine ore.ko");
34MODULE_LICENSE("GPL");
35
36
37
38
39
40
41
42
43
44
45
46enum { BIO_MAX_PAGES_KMALLOC =
47 (PAGE_SIZE - sizeof(struct bio)) / sizeof(struct bio_vec),};
48
49int ore_verify_layout(unsigned total_comps, struct ore_layout *layout)
50{
51 u64 stripe_length;
52
53 switch (layout->raid_algorithm) {
54 case PNFS_OSD_RAID_0:
55 layout->parity = 0;
56 break;
57 case PNFS_OSD_RAID_5:
58 layout->parity = 1;
59 break;
60 case PNFS_OSD_RAID_PQ:
61 case PNFS_OSD_RAID_4:
62 default:
63 ORE_ERR("Only RAID_0/5 for now\n");
64 return -EINVAL;
65 }
66 if (0 != (layout->stripe_unit & ~PAGE_MASK)) {
67 ORE_ERR("Stripe Unit(0x%llx)"
68 " must be Multples of PAGE_SIZE(0x%lx)\n",
69 _LLU(layout->stripe_unit), PAGE_SIZE);
70 return -EINVAL;
71 }
72 if (layout->group_width) {
73 if (!layout->group_depth) {
74 ORE_ERR("group_depth == 0 && group_width != 0\n");
75 return -EINVAL;
76 }
77 if (total_comps < (layout->group_width * layout->mirrors_p1)) {
78 ORE_ERR("Data Map wrong, "
79 "numdevs=%d < group_width=%d * mirrors=%d\n",
80 total_comps, layout->group_width,
81 layout->mirrors_p1);
82 return -EINVAL;
83 }
84 layout->group_count = total_comps / layout->mirrors_p1 /
85 layout->group_width;
86 } else {
87 if (layout->group_depth) {
88 printk(KERN_NOTICE "Warning: group_depth ignored "
89 "group_width == 0 && group_depth == %lld\n",
90 _LLU(layout->group_depth));
91 }
92 layout->group_width = total_comps / layout->mirrors_p1;
93 layout->group_depth = -1;
94 layout->group_count = 1;
95 }
96
97 stripe_length = (u64)layout->group_width * layout->stripe_unit;
98 if (stripe_length >= (1ULL << 32)) {
99 ORE_ERR("Stripe_length(0x%llx) >= 32bit is not supported\n",
100 _LLU(stripe_length));
101 return -EINVAL;
102 }
103
104 layout->max_io_length =
105 (BIO_MAX_PAGES_KMALLOC * PAGE_SIZE - layout->stripe_unit) *
106 layout->group_width;
107 if (layout->parity) {
108 unsigned stripe_length =
109 (layout->group_width - layout->parity) *
110 layout->stripe_unit;
111
112 layout->max_io_length /= stripe_length;
113 layout->max_io_length *= stripe_length;
114 }
115 return 0;
116}
117EXPORT_SYMBOL(ore_verify_layout);
118
119static u8 *_ios_cred(struct ore_io_state *ios, unsigned index)
120{
121 return ios->oc->comps[index & ios->oc->single_comp].cred;
122}
123
124static struct osd_obj_id *_ios_obj(struct ore_io_state *ios, unsigned index)
125{
126 return &ios->oc->comps[index & ios->oc->single_comp].obj;
127}
128
129static struct osd_dev *_ios_od(struct ore_io_state *ios, unsigned index)
130{
131 ORE_DBGMSG2("oc->first_dev=%d oc->numdevs=%d i=%d oc->ods=%p\n",
132 ios->oc->first_dev, ios->oc->numdevs, index,
133 ios->oc->ods);
134
135 return ore_comp_dev(ios->oc, index);
136}
137
138int _ore_get_io_state(struct ore_layout *layout,
139 struct ore_components *oc, unsigned numdevs,
140 unsigned sgs_per_dev, unsigned num_par_pages,
141 struct ore_io_state **pios)
142{
143 struct ore_io_state *ios;
144 struct page **pages;
145 struct osd_sg_entry *sgilist;
146 struct __alloc_all_io_state {
147 struct ore_io_state ios;
148 struct ore_per_dev_state per_dev[numdevs];
149 union {
150 struct osd_sg_entry sglist[sgs_per_dev * numdevs];
151 struct page *pages[num_par_pages];
152 };
153 } *_aios;
154
155 if (likely(sizeof(*_aios) <= PAGE_SIZE)) {
156 _aios = kzalloc(sizeof(*_aios), GFP_KERNEL);
157 if (unlikely(!_aios)) {
158 ORE_DBGMSG("Failed kzalloc bytes=%zd\n",
159 sizeof(*_aios));
160 *pios = NULL;
161 return -ENOMEM;
162 }
163 pages = num_par_pages ? _aios->pages : NULL;
164 sgilist = sgs_per_dev ? _aios->sglist : NULL;
165 ios = &_aios->ios;
166 } else {
167 struct __alloc_small_io_state {
168 struct ore_io_state ios;
169 struct ore_per_dev_state per_dev[numdevs];
170 } *_aio_small;
171 union __extra_part {
172 struct osd_sg_entry sglist[sgs_per_dev * numdevs];
173 struct page *pages[num_par_pages];
174 } *extra_part;
175
176 _aio_small = kzalloc(sizeof(*_aio_small), GFP_KERNEL);
177 if (unlikely(!_aio_small)) {
178 ORE_DBGMSG("Failed alloc first part bytes=%zd\n",
179 sizeof(*_aio_small));
180 *pios = NULL;
181 return -ENOMEM;
182 }
183 extra_part = kzalloc(sizeof(*extra_part), GFP_KERNEL);
184 if (unlikely(!extra_part)) {
185 ORE_DBGMSG("Failed alloc second part bytes=%zd\n",
186 sizeof(*extra_part));
187 kfree(_aio_small);
188 *pios = NULL;
189 return -ENOMEM;
190 }
191
192 pages = num_par_pages ? extra_part->pages : NULL;
193 sgilist = sgs_per_dev ? extra_part->sglist : NULL;
194
195
196
197 ios = &_aio_small->ios;
198 ios->extra_part_alloc = true;
199 }
200
201 if (pages) {
202 ios->parity_pages = pages;
203 ios->max_par_pages = num_par_pages;
204 }
205 if (sgilist) {
206 unsigned d;
207
208 for (d = 0; d < numdevs; ++d) {
209 ios->per_dev[d].sglist = sgilist;
210 sgilist += sgs_per_dev;
211 }
212 ios->sgs_per_dev = sgs_per_dev;
213 }
214
215 ios->layout = layout;
216 ios->oc = oc;
217 *pios = ios;
218 return 0;
219}
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237int ore_get_rw_state(struct ore_layout *layout, struct ore_components *oc,
238 bool is_reading, u64 offset, u64 length,
239 struct ore_io_state **pios)
240{
241 struct ore_io_state *ios;
242 unsigned numdevs = layout->group_width * layout->mirrors_p1;
243 unsigned sgs_per_dev = 0, max_par_pages = 0;
244 int ret;
245
246 if (layout->parity && length) {
247 unsigned data_devs = layout->group_width - layout->parity;
248 unsigned stripe_size = layout->stripe_unit * data_devs;
249 unsigned pages_in_unit = layout->stripe_unit / PAGE_SIZE;
250 u32 remainder;
251 u64 num_stripes;
252 u64 num_raid_units;
253
254 num_stripes = div_u64_rem(length, stripe_size, &remainder);
255 if (remainder)
256 ++num_stripes;
257
258 num_raid_units = num_stripes * layout->parity;
259
260 if (is_reading) {
261
262
263
264
265
266
267
268 num_raid_units += layout->group_width;
269 sgs_per_dev = div_u64(num_raid_units, data_devs) + 2;
270 } else {
271
272 max_par_pages = num_raid_units * pages_in_unit *
273 sizeof(struct page *);
274 }
275 }
276
277 ret = _ore_get_io_state(layout, oc, numdevs, sgs_per_dev, max_par_pages,
278 pios);
279 if (unlikely(ret))
280 return ret;
281
282 ios = *pios;
283 ios->reading = is_reading;
284 ios->offset = offset;
285
286 if (length) {
287 ore_calc_stripe_info(layout, offset, length, &ios->si);
288 ios->length = ios->si.length;
289 ios->nr_pages = (ios->length + PAGE_SIZE - 1) / PAGE_SIZE;
290 if (layout->parity)
291 _ore_post_alloc_raid_stuff(ios);
292 }
293
294 return 0;
295}
296EXPORT_SYMBOL(ore_get_rw_state);
297
298
299
300
301
302
303
304
305int ore_get_io_state(struct ore_layout *layout, struct ore_components *oc,
306 struct ore_io_state **pios)
307{
308 return _ore_get_io_state(layout, oc, oc->numdevs, 0, 0, pios);
309}
310EXPORT_SYMBOL(ore_get_io_state);
311
312void ore_put_io_state(struct ore_io_state *ios)
313{
314 if (ios) {
315 unsigned i;
316
317 for (i = 0; i < ios->numdevs; i++) {
318 struct ore_per_dev_state *per_dev = &ios->per_dev[i];
319
320 if (per_dev->or)
321 osd_end_request(per_dev->or);
322 if (per_dev->bio)
323 bio_put(per_dev->bio);
324 }
325
326 _ore_free_raid_stuff(ios);
327 kfree(ios);
328 }
329}
330EXPORT_SYMBOL(ore_put_io_state);
331
332static void _sync_done(struct ore_io_state *ios, void *p)
333{
334 struct completion *waiting = p;
335
336 complete(waiting);
337}
338
339static void _last_io(struct kref *kref)
340{
341 struct ore_io_state *ios = container_of(
342 kref, struct ore_io_state, kref);
343
344 ios->done(ios, ios->private);
345}
346
347static void _done_io(struct osd_request *or, void *p)
348{
349 struct ore_io_state *ios = p;
350
351 kref_put(&ios->kref, _last_io);
352}
353
354int ore_io_execute(struct ore_io_state *ios)
355{
356 DECLARE_COMPLETION_ONSTACK(wait);
357 bool sync = (ios->done == NULL);
358 int i, ret;
359
360 if (sync) {
361 ios->done = _sync_done;
362 ios->private = &wait;
363 }
364
365 for (i = 0; i < ios->numdevs; i++) {
366 struct osd_request *or = ios->per_dev[i].or;
367 if (unlikely(!or))
368 continue;
369
370 ret = osd_finalize_request(or, 0, _ios_cred(ios, i), NULL);
371 if (unlikely(ret)) {
372 ORE_DBGMSG("Failed to osd_finalize_request() => %d\n",
373 ret);
374 return ret;
375 }
376 }
377
378 kref_init(&ios->kref);
379
380 for (i = 0; i < ios->numdevs; i++) {
381 struct osd_request *or = ios->per_dev[i].or;
382 if (unlikely(!or))
383 continue;
384
385 kref_get(&ios->kref);
386 osd_execute_request_async(or, _done_io, ios);
387 }
388
389 kref_put(&ios->kref, _last_io);
390 ret = 0;
391
392 if (sync) {
393 wait_for_completion(&wait);
394 ret = ore_check_io(ios, NULL);
395 }
396 return ret;
397}
398
399static void _clear_bio(struct bio *bio)
400{
401 struct bio_vec *bv;
402 unsigned i;
403
404 __bio_for_each_segment(bv, bio, i, 0) {
405 unsigned this_count = bv->bv_len;
406
407 if (likely(PAGE_SIZE == this_count))
408 clear_highpage(bv->bv_page);
409 else
410 zero_user(bv->bv_page, bv->bv_offset, this_count);
411 }
412}
413
414int ore_check_io(struct ore_io_state *ios, ore_on_dev_error on_dev_error)
415{
416 enum osd_err_priority acumulated_osd_err = 0;
417 int acumulated_lin_err = 0;
418 int i;
419
420 for (i = 0; i < ios->numdevs; i++) {
421 struct osd_sense_info osi;
422 struct ore_per_dev_state *per_dev = &ios->per_dev[i];
423 struct osd_request *or = per_dev->or;
424 int ret;
425
426 if (unlikely(!or))
427 continue;
428
429 ret = osd_req_decode_sense(or, &osi);
430 if (likely(!ret))
431 continue;
432
433 if (OSD_ERR_PRI_CLEAR_PAGES == osi.osd_err_pri) {
434
435 _clear_bio(per_dev->bio);
436 ORE_DBGMSG("start read offset passed end of file "
437 "offset=0x%llx, length=0x%llx\n",
438 _LLU(per_dev->offset),
439 _LLU(per_dev->length));
440
441 continue;
442 }
443
444 if (on_dev_error) {
445 u64 residual = ios->reading ?
446 or->in.residual : or->out.residual;
447 u64 offset = (ios->offset + ios->length) - residual;
448 unsigned dev = per_dev->dev - ios->oc->first_dev;
449 struct ore_dev *od = ios->oc->ods[dev];
450
451 on_dev_error(ios, od, dev, osi.osd_err_pri,
452 offset, residual);
453 }
454 if (osi.osd_err_pri >= acumulated_osd_err) {
455 acumulated_osd_err = osi.osd_err_pri;
456 acumulated_lin_err = ret;
457 }
458 }
459
460 return acumulated_lin_err;
461}
462EXPORT_SYMBOL(ore_check_io);
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516void ore_calc_stripe_info(struct ore_layout *layout, u64 file_offset,
517 u64 length, struct ore_striping_info *si)
518{
519 u32 stripe_unit = layout->stripe_unit;
520 u32 group_width = layout->group_width;
521 u64 group_depth = layout->group_depth;
522 u32 parity = layout->parity;
523
524 u32 D = group_width - parity;
525 u32 U = D * stripe_unit;
526 u64 T = U * group_depth;
527 u64 S = T * layout->group_count;
528 u64 M = div64_u64(file_offset, S);
529
530
531
532
533
534 u64 LmodS = file_offset - M * S;
535 u32 G = div64_u64(LmodS, T);
536 u64 H = LmodS - G * T;
537
538 u32 N = div_u64(H, U);
539
540
541 u32 C = (u32)(H - (N * U)) / stripe_unit + G * group_width;
542
543 div_u64_rem(file_offset, stripe_unit, &si->unit_off);
544
545 si->obj_offset = si->unit_off + (N * stripe_unit) +
546 (M * group_depth * stripe_unit);
547
548 if (parity) {
549 u32 LCMdP = lcm(group_width, parity) / parity;
550
551 u32 RxP = (N % LCMdP) * parity;
552 u32 first_dev = C - C % group_width;
553
554 si->par_dev = (group_width + group_width - parity - RxP) %
555 group_width + first_dev;
556 si->dev = (group_width + C - RxP) % group_width + first_dev;
557 si->bytes_in_stripe = U;
558 si->first_stripe_start = M * S + G * T + N * U;
559 } else {
560
561 si->par_dev = group_width;
562 si->dev = C;
563 }
564
565 si->dev *= layout->mirrors_p1;
566 si->par_dev *= layout->mirrors_p1;
567 si->offset = file_offset;
568 si->length = T - H;
569 if (si->length > length)
570 si->length = length;
571 si->M = M;
572}
573EXPORT_SYMBOL(ore_calc_stripe_info);
574
575int _ore_add_stripe_unit(struct ore_io_state *ios, unsigned *cur_pg,
576 unsigned pgbase, struct page **pages,
577 struct ore_per_dev_state *per_dev, int cur_len)
578{
579 unsigned pg = *cur_pg;
580 struct request_queue *q =
581 osd_request_queue(_ios_od(ios, per_dev->dev));
582 unsigned len = cur_len;
583 int ret;
584
585 if (per_dev->bio == NULL) {
586 unsigned pages_in_stripe = ios->layout->group_width *
587 (ios->layout->stripe_unit / PAGE_SIZE);
588 unsigned nr_pages = ios->nr_pages * ios->layout->group_width /
589 (ios->layout->group_width -
590 ios->layout->parity);
591 unsigned bio_size = (nr_pages + pages_in_stripe) /
592 ios->layout->group_width;
593
594 per_dev->bio = bio_kmalloc(GFP_KERNEL, bio_size);
595 if (unlikely(!per_dev->bio)) {
596 ORE_DBGMSG("Failed to allocate BIO size=%u\n",
597 bio_size);
598 ret = -ENOMEM;
599 goto out;
600 }
601 }
602
603 while (cur_len > 0) {
604 unsigned pglen = min_t(unsigned, PAGE_SIZE - pgbase, cur_len);
605 unsigned added_len;
606
607 cur_len -= pglen;
608
609 added_len = bio_add_pc_page(q, per_dev->bio, pages[pg],
610 pglen, pgbase);
611 if (unlikely(pglen != added_len)) {
612 ORE_DBGMSG("Failed bio_add_pc_page bi_vcnt=%u\n",
613 per_dev->bio->bi_vcnt);
614 ret = -ENOMEM;
615 goto out;
616 }
617 _add_stripe_page(ios->sp2d, &ios->si, pages[pg]);
618
619 pgbase = 0;
620 ++pg;
621 }
622 BUG_ON(cur_len);
623
624 per_dev->length += len;
625 *cur_pg = pg;
626 ret = 0;
627out:
628
629
630
631
632 return ret;
633}
634
635static int _prepare_for_striping(struct ore_io_state *ios)
636{
637 struct ore_striping_info *si = &ios->si;
638 unsigned stripe_unit = ios->layout->stripe_unit;
639 unsigned mirrors_p1 = ios->layout->mirrors_p1;
640 unsigned group_width = ios->layout->group_width;
641 unsigned devs_in_group = group_width * mirrors_p1;
642 unsigned dev = si->dev;
643 unsigned first_dev = dev - (dev % devs_in_group);
644 unsigned dev_order;
645 unsigned cur_pg = ios->pages_consumed;
646 u64 length = ios->length;
647 int ret = 0;
648
649 if (!ios->pages) {
650 ios->numdevs = ios->layout->mirrors_p1;
651 return 0;
652 }
653
654 BUG_ON(length > si->length);
655
656 dev_order = _dev_order(devs_in_group, mirrors_p1, si->par_dev, dev);
657 si->cur_comp = dev_order;
658 si->cur_pg = si->unit_off / PAGE_SIZE;
659
660 while (length) {
661 unsigned comp = dev - first_dev;
662 struct ore_per_dev_state *per_dev = &ios->per_dev[comp];
663 unsigned cur_len, page_off = 0;
664
665 if (!per_dev->length) {
666 per_dev->dev = dev;
667 if (dev == si->dev) {
668 WARN_ON(dev == si->par_dev);
669 per_dev->offset = si->obj_offset;
670 cur_len = stripe_unit - si->unit_off;
671 page_off = si->unit_off & ~PAGE_MASK;
672 BUG_ON(page_off && (page_off != ios->pgbase));
673 } else {
674 if (si->cur_comp > dev_order)
675 per_dev->offset =
676 si->obj_offset - si->unit_off;
677 else
678 per_dev->offset =
679 si->obj_offset + stripe_unit -
680 si->unit_off;
681 cur_len = stripe_unit;
682 }
683 } else {
684 cur_len = stripe_unit;
685 }
686 if (cur_len >= length)
687 cur_len = length;
688
689 ret = _ore_add_stripe_unit(ios, &cur_pg, page_off, ios->pages,
690 per_dev, cur_len);
691 if (unlikely(ret))
692 goto out;
693
694 dev += mirrors_p1;
695 dev = (dev % devs_in_group) + first_dev;
696
697 length -= cur_len;
698
699 si->cur_comp = (si->cur_comp + 1) % group_width;
700 if (unlikely((dev == si->par_dev) || (!length && ios->sp2d))) {
701 if (!length && ios->sp2d) {
702
703
704
705 dev = si->par_dev;
706 }
707 if (ios->sp2d)
708
709
710
711 cur_len = length;
712 per_dev = &ios->per_dev[dev - first_dev];
713 if (!per_dev->length) {
714
715
716
717
718 per_dev->dev = dev;
719 per_dev->offset = si->obj_offset - si->unit_off;
720 }
721
722 ret = _ore_add_parity_unit(ios, si, per_dev, cur_len);
723 if (unlikely(ret))
724 goto out;
725
726
727 si->par_dev = (devs_in_group + si->par_dev -
728 ios->layout->parity * mirrors_p1) %
729 devs_in_group + first_dev;
730
731 si->cur_comp = 0;
732 si->cur_pg = 0;
733 }
734 }
735out:
736 ios->numdevs = devs_in_group;
737 ios->pages_consumed = cur_pg;
738 return ret;
739}
740
741int ore_create(struct ore_io_state *ios)
742{
743 int i, ret;
744
745 for (i = 0; i < ios->oc->numdevs; i++) {
746 struct osd_request *or;
747
748 or = osd_start_request(_ios_od(ios, i), GFP_KERNEL);
749 if (unlikely(!or)) {
750 ORE_ERR("%s: osd_start_request failed\n", __func__);
751 ret = -ENOMEM;
752 goto out;
753 }
754 ios->per_dev[i].or = or;
755 ios->numdevs++;
756
757 osd_req_create_object(or, _ios_obj(ios, i));
758 }
759 ret = ore_io_execute(ios);
760
761out:
762 return ret;
763}
764EXPORT_SYMBOL(ore_create);
765
766int ore_remove(struct ore_io_state *ios)
767{
768 int i, ret;
769
770 for (i = 0; i < ios->oc->numdevs; i++) {
771 struct osd_request *or;
772
773 or = osd_start_request(_ios_od(ios, i), GFP_KERNEL);
774 if (unlikely(!or)) {
775 ORE_ERR("%s: osd_start_request failed\n", __func__);
776 ret = -ENOMEM;
777 goto out;
778 }
779 ios->per_dev[i].or = or;
780 ios->numdevs++;
781
782 osd_req_remove_object(or, _ios_obj(ios, i));
783 }
784 ret = ore_io_execute(ios);
785
786out:
787 return ret;
788}
789EXPORT_SYMBOL(ore_remove);
790
791static int _write_mirror(struct ore_io_state *ios, int cur_comp)
792{
793 struct ore_per_dev_state *master_dev = &ios->per_dev[cur_comp];
794 unsigned dev = ios->per_dev[cur_comp].dev;
795 unsigned last_comp = cur_comp + ios->layout->mirrors_p1;
796 int ret = 0;
797
798 if (ios->pages && !master_dev->length)
799 return 0;
800
801 for (; cur_comp < last_comp; ++cur_comp, ++dev) {
802 struct ore_per_dev_state *per_dev = &ios->per_dev[cur_comp];
803 struct osd_request *or;
804
805 or = osd_start_request(_ios_od(ios, dev), GFP_KERNEL);
806 if (unlikely(!or)) {
807 ORE_ERR("%s: osd_start_request failed\n", __func__);
808 ret = -ENOMEM;
809 goto out;
810 }
811 per_dev->or = or;
812
813 if (ios->pages) {
814 struct bio *bio;
815
816 if (per_dev != master_dev) {
817 bio = bio_kmalloc(GFP_KERNEL,
818 master_dev->bio->bi_max_vecs);
819 if (unlikely(!bio)) {
820 ORE_DBGMSG(
821 "Failed to allocate BIO size=%u\n",
822 master_dev->bio->bi_max_vecs);
823 ret = -ENOMEM;
824 goto out;
825 }
826
827 __bio_clone(bio, master_dev->bio);
828 bio->bi_bdev = NULL;
829 bio->bi_next = NULL;
830 per_dev->offset = master_dev->offset;
831 per_dev->length = master_dev->length;
832 per_dev->bio = bio;
833 per_dev->dev = dev;
834 } else {
835 bio = master_dev->bio;
836
837 bio->bi_rw |= REQ_WRITE;
838 }
839
840 osd_req_write(or, _ios_obj(ios, cur_comp),
841 per_dev->offset, bio, per_dev->length);
842 ORE_DBGMSG("write(0x%llx) offset=0x%llx "
843 "length=0x%llx dev=%d\n",
844 _LLU(_ios_obj(ios, cur_comp)->id),
845 _LLU(per_dev->offset),
846 _LLU(per_dev->length), dev);
847 } else if (ios->kern_buff) {
848 per_dev->offset = ios->si.obj_offset;
849 per_dev->dev = ios->si.dev + dev;
850
851
852 BUG_ON((ios->layout->group_width > 1) &&
853 (ios->si.unit_off + ios->length >
854 ios->layout->stripe_unit));
855
856 ret = osd_req_write_kern(or, _ios_obj(ios, cur_comp),
857 per_dev->offset,
858 ios->kern_buff, ios->length);
859 if (unlikely(ret))
860 goto out;
861 ORE_DBGMSG2("write_kern(0x%llx) offset=0x%llx "
862 "length=0x%llx dev=%d\n",
863 _LLU(_ios_obj(ios, cur_comp)->id),
864 _LLU(per_dev->offset),
865 _LLU(ios->length), per_dev->dev);
866 } else {
867 osd_req_set_attributes(or, _ios_obj(ios, cur_comp));
868 ORE_DBGMSG2("obj(0x%llx) set_attributes=%d dev=%d\n",
869 _LLU(_ios_obj(ios, cur_comp)->id),
870 ios->out_attr_len, dev);
871 }
872
873 if (ios->out_attr)
874 osd_req_add_set_attr_list(or, ios->out_attr,
875 ios->out_attr_len);
876
877 if (ios->in_attr)
878 osd_req_add_get_attr_list(or, ios->in_attr,
879 ios->in_attr_len);
880 }
881
882out:
883 return ret;
884}
885
886int ore_write(struct ore_io_state *ios)
887{
888 int i;
889 int ret;
890
891 if (unlikely(ios->sp2d && !ios->r4w)) {
892
893
894
895 WARN_ON_ONCE(1);
896 return -ENOTSUPP;
897 }
898
899 ret = _prepare_for_striping(ios);
900 if (unlikely(ret))
901 return ret;
902
903 for (i = 0; i < ios->numdevs; i += ios->layout->mirrors_p1) {
904 ret = _write_mirror(ios, i);
905 if (unlikely(ret))
906 return ret;
907 }
908
909 ret = ore_io_execute(ios);
910 return ret;
911}
912EXPORT_SYMBOL(ore_write);
913
914int _ore_read_mirror(struct ore_io_state *ios, unsigned cur_comp)
915{
916 struct osd_request *or;
917 struct ore_per_dev_state *per_dev = &ios->per_dev[cur_comp];
918 struct osd_obj_id *obj = _ios_obj(ios, cur_comp);
919 unsigned first_dev = (unsigned)obj->id;
920
921 if (ios->pages && !per_dev->length)
922 return 0;
923
924 first_dev = per_dev->dev + first_dev % ios->layout->mirrors_p1;
925 or = osd_start_request(_ios_od(ios, first_dev), GFP_KERNEL);
926 if (unlikely(!or)) {
927 ORE_ERR("%s: osd_start_request failed\n", __func__);
928 return -ENOMEM;
929 }
930 per_dev->or = or;
931
932 if (ios->pages) {
933 if (per_dev->cur_sg) {
934
935 _ore_add_sg_seg(per_dev, 0, false);
936 if (unlikely(!per_dev->cur_sg))
937 return 0;
938
939 osd_req_read_sg(or, obj, per_dev->bio,
940 per_dev->sglist, per_dev->cur_sg);
941 } else {
942
943 osd_req_read(or, obj, per_dev->offset,
944 per_dev->bio, per_dev->length);
945 }
946
947 ORE_DBGMSG("read(0x%llx) offset=0x%llx length=0x%llx"
948 " dev=%d sg_len=%d\n", _LLU(obj->id),
949 _LLU(per_dev->offset), _LLU(per_dev->length),
950 first_dev, per_dev->cur_sg);
951 } else {
952 BUG_ON(ios->kern_buff);
953
954 osd_req_get_attributes(or, obj);
955 ORE_DBGMSG2("obj(0x%llx) get_attributes=%d dev=%d\n",
956 _LLU(obj->id),
957 ios->in_attr_len, first_dev);
958 }
959 if (ios->out_attr)
960 osd_req_add_set_attr_list(or, ios->out_attr, ios->out_attr_len);
961
962 if (ios->in_attr)
963 osd_req_add_get_attr_list(or, ios->in_attr, ios->in_attr_len);
964
965 return 0;
966}
967
968int ore_read(struct ore_io_state *ios)
969{
970 int i;
971 int ret;
972
973 ret = _prepare_for_striping(ios);
974 if (unlikely(ret))
975 return ret;
976
977 for (i = 0; i < ios->numdevs; i += ios->layout->mirrors_p1) {
978 ret = _ore_read_mirror(ios, i);
979 if (unlikely(ret))
980 return ret;
981 }
982
983 ret = ore_io_execute(ios);
984 return ret;
985}
986EXPORT_SYMBOL(ore_read);
987
988int extract_attr_from_ios(struct ore_io_state *ios, struct osd_attr *attr)
989{
990 struct osd_attr cur_attr = {.attr_page = 0};
991 void *iter = NULL;
992 int nelem;
993
994 do {
995 nelem = 1;
996 osd_req_decode_get_attr_list(ios->per_dev[0].or,
997 &cur_attr, &nelem, &iter);
998 if ((cur_attr.attr_page == attr->attr_page) &&
999 (cur_attr.attr_id == attr->attr_id)) {
1000 attr->len = cur_attr.len;
1001 attr->val_ptr = cur_attr.val_ptr;
1002 return 0;
1003 }
1004 } while (iter);
1005
1006 return -EIO;
1007}
1008EXPORT_SYMBOL(extract_attr_from_ios);
1009
1010static int _truncate_mirrors(struct ore_io_state *ios, unsigned cur_comp,
1011 struct osd_attr *attr)
1012{
1013 int last_comp = cur_comp + ios->layout->mirrors_p1;
1014
1015 for (; cur_comp < last_comp; ++cur_comp) {
1016 struct ore_per_dev_state *per_dev = &ios->per_dev[cur_comp];
1017 struct osd_request *or;
1018
1019 or = osd_start_request(_ios_od(ios, cur_comp), GFP_KERNEL);
1020 if (unlikely(!or)) {
1021 ORE_ERR("%s: osd_start_request failed\n", __func__);
1022 return -ENOMEM;
1023 }
1024 per_dev->or = or;
1025
1026 osd_req_set_attributes(or, _ios_obj(ios, cur_comp));
1027 osd_req_add_set_attr_list(or, attr, 1);
1028 }
1029
1030 return 0;
1031}
1032
1033struct _trunc_info {
1034 struct ore_striping_info si;
1035 u64 prev_group_obj_off;
1036 u64 next_group_obj_off;
1037
1038 unsigned first_group_dev;
1039 unsigned nex_group_dev;
1040};
1041
1042static void _calc_trunk_info(struct ore_layout *layout, u64 file_offset,
1043 struct _trunc_info *ti)
1044{
1045 unsigned stripe_unit = layout->stripe_unit;
1046
1047 ore_calc_stripe_info(layout, file_offset, 0, &ti->si);
1048
1049 ti->prev_group_obj_off = ti->si.M * stripe_unit;
1050 ti->next_group_obj_off = ti->si.M ? (ti->si.M - 1) * stripe_unit : 0;
1051
1052 ti->first_group_dev = ti->si.dev - (ti->si.dev % layout->group_width);
1053 ti->nex_group_dev = ti->first_group_dev + layout->group_width;
1054}
1055
1056int ore_truncate(struct ore_layout *layout, struct ore_components *oc,
1057 u64 size)
1058{
1059 struct ore_io_state *ios;
1060 struct exofs_trunc_attr {
1061 struct osd_attr attr;
1062 __be64 newsize;
1063 } *size_attrs;
1064 struct _trunc_info ti;
1065 int i, ret;
1066
1067 ret = ore_get_io_state(layout, oc, &ios);
1068 if (unlikely(ret))
1069 return ret;
1070
1071 _calc_trunk_info(ios->layout, size, &ti);
1072
1073 size_attrs = kcalloc(ios->oc->numdevs, sizeof(*size_attrs),
1074 GFP_KERNEL);
1075 if (unlikely(!size_attrs)) {
1076 ret = -ENOMEM;
1077 goto out;
1078 }
1079
1080 ios->numdevs = ios->oc->numdevs;
1081
1082 for (i = 0; i < ios->numdevs; ++i) {
1083 struct exofs_trunc_attr *size_attr = &size_attrs[i];
1084 u64 obj_size;
1085
1086 if (i < ti.first_group_dev)
1087 obj_size = ti.prev_group_obj_off;
1088 else if (i >= ti.nex_group_dev)
1089 obj_size = ti.next_group_obj_off;
1090 else if (i < ti.si.dev)
1091 obj_size = ti.si.obj_offset +
1092 ios->layout->stripe_unit - ti.si.unit_off;
1093 else if (i == ti.si.dev)
1094 obj_size = ti.si.obj_offset;
1095 else
1096 obj_size = ti.si.obj_offset - ti.si.unit_off;
1097
1098 size_attr->newsize = cpu_to_be64(obj_size);
1099 size_attr->attr = g_attr_logical_length;
1100 size_attr->attr.val_ptr = &size_attr->newsize;
1101
1102 ORE_DBGMSG("trunc(0x%llx) obj_offset=0x%llx dev=%d\n",
1103 _LLU(oc->comps->obj.id), _LLU(obj_size), i);
1104 ret = _truncate_mirrors(ios, i * ios->layout->mirrors_p1,
1105 &size_attr->attr);
1106 if (unlikely(ret))
1107 goto out;
1108 }
1109 ret = ore_io_execute(ios);
1110
1111out:
1112 kfree(size_attrs);
1113 ore_put_io_state(ios);
1114 return ret;
1115}
1116EXPORT_SYMBOL(ore_truncate);
1117
1118const struct osd_attr g_attr_logical_length = ATTR_DEF(
1119 OSD_APAGE_OBJECT_INFORMATION, OSD_ATTR_OI_LOGICAL_LENGTH, 8);
1120EXPORT_SYMBOL(g_attr_logical_length);
1121