1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25#include <linux/slab.h>
26#include <linux/module.h>
27#include <asm/div64.h>
28#include <linux/lcm.h>
29
30#include "ore_raid.h"
31
32MODULE_AUTHOR("Boaz Harrosh <bharrosh@panasas.com>");
33MODULE_DESCRIPTION("Objects Raid Engine ore.ko");
34MODULE_LICENSE("GPL");
35
36
37
38
39
40
41
42
43
44
45
46enum { BIO_MAX_PAGES_KMALLOC =
47 (PAGE_SIZE - sizeof(struct bio)) / sizeof(struct bio_vec),};
48
49int ore_verify_layout(unsigned total_comps, struct ore_layout *layout)
50{
51 u64 stripe_length;
52
53 switch (layout->raid_algorithm) {
54 case PNFS_OSD_RAID_0:
55 layout->parity = 0;
56 break;
57 case PNFS_OSD_RAID_5:
58 layout->parity = 1;
59 break;
60 case PNFS_OSD_RAID_PQ:
61 case PNFS_OSD_RAID_4:
62 default:
63 ORE_ERR("Only RAID_0/5 for now\n");
64 return -EINVAL;
65 }
66 if (0 != (layout->stripe_unit & ~PAGE_MASK)) {
67 ORE_ERR("Stripe Unit(0x%llx)"
68 " must be Multples of PAGE_SIZE(0x%lx)\n",
69 _LLU(layout->stripe_unit), PAGE_SIZE);
70 return -EINVAL;
71 }
72 if (layout->group_width) {
73 if (!layout->group_depth) {
74 ORE_ERR("group_depth == 0 && group_width != 0\n");
75 return -EINVAL;
76 }
77 if (total_comps < (layout->group_width * layout->mirrors_p1)) {
78 ORE_ERR("Data Map wrong, "
79 "numdevs=%d < group_width=%d * mirrors=%d\n",
80 total_comps, layout->group_width,
81 layout->mirrors_p1);
82 return -EINVAL;
83 }
84 layout->group_count = total_comps / layout->mirrors_p1 /
85 layout->group_width;
86 } else {
87 if (layout->group_depth) {
88 printk(KERN_NOTICE "Warning: group_depth ignored "
89 "group_width == 0 && group_depth == %lld\n",
90 _LLU(layout->group_depth));
91 }
92 layout->group_width = total_comps / layout->mirrors_p1;
93 layout->group_depth = -1;
94 layout->group_count = 1;
95 }
96
97 stripe_length = (u64)layout->group_width * layout->stripe_unit;
98 if (stripe_length >= (1ULL << 32)) {
99 ORE_ERR("Stripe_length(0x%llx) >= 32bit is not supported\n",
100 _LLU(stripe_length));
101 return -EINVAL;
102 }
103
104 layout->max_io_length =
105 (BIO_MAX_PAGES_KMALLOC * PAGE_SIZE - layout->stripe_unit) *
106 layout->group_width;
107 if (layout->parity) {
108 unsigned stripe_length =
109 (layout->group_width - layout->parity) *
110 layout->stripe_unit;
111
112 layout->max_io_length /= stripe_length;
113 layout->max_io_length *= stripe_length;
114 }
115 return 0;
116}
117EXPORT_SYMBOL(ore_verify_layout);
118
119static u8 *_ios_cred(struct ore_io_state *ios, unsigned index)
120{
121 return ios->oc->comps[index & ios->oc->single_comp].cred;
122}
123
124static struct osd_obj_id *_ios_obj(struct ore_io_state *ios, unsigned index)
125{
126 return &ios->oc->comps[index & ios->oc->single_comp].obj;
127}
128
129static struct osd_dev *_ios_od(struct ore_io_state *ios, unsigned index)
130{
131 ORE_DBGMSG2("oc->first_dev=%d oc->numdevs=%d i=%d oc->ods=%p\n",
132 ios->oc->first_dev, ios->oc->numdevs, index,
133 ios->oc->ods);
134
135 return ore_comp_dev(ios->oc, index);
136}
137
138int _ore_get_io_state(struct ore_layout *layout,
139 struct ore_components *oc, unsigned numdevs,
140 unsigned sgs_per_dev, unsigned num_par_pages,
141 struct ore_io_state **pios)
142{
143 struct ore_io_state *ios;
144 struct page **pages;
145 struct osd_sg_entry *sgilist;
146 struct __alloc_all_io_state {
147 struct ore_io_state ios;
148 struct ore_per_dev_state per_dev[numdevs];
149 union {
150 struct osd_sg_entry sglist[sgs_per_dev * numdevs];
151 struct page *pages[num_par_pages];
152 };
153 } *_aios;
154
155 if (likely(sizeof(*_aios) <= PAGE_SIZE)) {
156 _aios = kzalloc(sizeof(*_aios), GFP_KERNEL);
157 if (unlikely(!_aios)) {
158 ORE_DBGMSG("Failed kzalloc bytes=%zd\n",
159 sizeof(*_aios));
160 *pios = NULL;
161 return -ENOMEM;
162 }
163 pages = num_par_pages ? _aios->pages : NULL;
164 sgilist = sgs_per_dev ? _aios->sglist : NULL;
165 ios = &_aios->ios;
166 } else {
167 struct __alloc_small_io_state {
168 struct ore_io_state ios;
169 struct ore_per_dev_state per_dev[numdevs];
170 } *_aio_small;
171 union __extra_part {
172 struct osd_sg_entry sglist[sgs_per_dev * numdevs];
173 struct page *pages[num_par_pages];
174 } *extra_part;
175
176 _aio_small = kzalloc(sizeof(*_aio_small), GFP_KERNEL);
177 if (unlikely(!_aio_small)) {
178 ORE_DBGMSG("Failed alloc first part bytes=%zd\n",
179 sizeof(*_aio_small));
180 *pios = NULL;
181 return -ENOMEM;
182 }
183 extra_part = kzalloc(sizeof(*extra_part), GFP_KERNEL);
184 if (unlikely(!extra_part)) {
185 ORE_DBGMSG("Failed alloc second part bytes=%zd\n",
186 sizeof(*extra_part));
187 kfree(_aio_small);
188 *pios = NULL;
189 return -ENOMEM;
190 }
191
192 pages = num_par_pages ? extra_part->pages : NULL;
193 sgilist = sgs_per_dev ? extra_part->sglist : NULL;
194
195
196
197 ios = &_aio_small->ios;
198 ios->extra_part_alloc = true;
199 }
200
201 if (pages) {
202 ios->parity_pages = pages;
203 ios->max_par_pages = num_par_pages;
204 }
205 if (sgilist) {
206 unsigned d;
207
208 for (d = 0; d < numdevs; ++d) {
209 ios->per_dev[d].sglist = sgilist;
210 sgilist += sgs_per_dev;
211 }
212 ios->sgs_per_dev = sgs_per_dev;
213 }
214
215 ios->layout = layout;
216 ios->oc = oc;
217 *pios = ios;
218 return 0;
219}
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237int ore_get_rw_state(struct ore_layout *layout, struct ore_components *oc,
238 bool is_reading, u64 offset, u64 length,
239 struct ore_io_state **pios)
240{
241 struct ore_io_state *ios;
242 unsigned numdevs = layout->group_width * layout->mirrors_p1;
243 unsigned sgs_per_dev = 0, max_par_pages = 0;
244 int ret;
245
246 if (layout->parity && length) {
247 unsigned data_devs = layout->group_width - layout->parity;
248 unsigned stripe_size = layout->stripe_unit * data_devs;
249 unsigned pages_in_unit = layout->stripe_unit / PAGE_SIZE;
250 u32 remainder;
251 u64 num_stripes;
252 u64 num_raid_units;
253
254 num_stripes = div_u64_rem(length, stripe_size, &remainder);
255 if (remainder)
256 ++num_stripes;
257
258 num_raid_units = num_stripes * layout->parity;
259
260 if (is_reading) {
261
262
263
264
265
266
267
268 num_raid_units += layout->group_width;
269 sgs_per_dev = div_u64(num_raid_units, data_devs) + 2;
270 } else {
271
272 max_par_pages = num_raid_units * pages_in_unit *
273 sizeof(struct page *);
274 }
275 }
276
277 ret = _ore_get_io_state(layout, oc, numdevs, sgs_per_dev, max_par_pages,
278 pios);
279 if (unlikely(ret))
280 return ret;
281
282 ios = *pios;
283 ios->reading = is_reading;
284 ios->offset = offset;
285
286 if (length) {
287 ore_calc_stripe_info(layout, offset, length, &ios->si);
288 ios->length = ios->si.length;
289 ios->nr_pages = (ios->length + PAGE_SIZE - 1) / PAGE_SIZE;
290 if (layout->parity)
291 _ore_post_alloc_raid_stuff(ios);
292 }
293
294 return 0;
295}
296EXPORT_SYMBOL(ore_get_rw_state);
297
298
299
300
301
302
303
304
305int ore_get_io_state(struct ore_layout *layout, struct ore_components *oc,
306 struct ore_io_state **pios)
307{
308 return _ore_get_io_state(layout, oc, oc->numdevs, 0, 0, pios);
309}
310EXPORT_SYMBOL(ore_get_io_state);
311
312void ore_put_io_state(struct ore_io_state *ios)
313{
314 if (ios) {
315 unsigned i;
316
317 for (i = 0; i < ios->numdevs; i++) {
318 struct ore_per_dev_state *per_dev = &ios->per_dev[i];
319
320 if (per_dev->or)
321 osd_end_request(per_dev->or);
322 if (per_dev->bio)
323 bio_put(per_dev->bio);
324 }
325
326 _ore_free_raid_stuff(ios);
327 kfree(ios);
328 }
329}
330EXPORT_SYMBOL(ore_put_io_state);
331
332static void _sync_done(struct ore_io_state *ios, void *p)
333{
334 struct completion *waiting = p;
335
336 complete(waiting);
337}
338
339static void _last_io(struct kref *kref)
340{
341 struct ore_io_state *ios = container_of(
342 kref, struct ore_io_state, kref);
343
344 ios->done(ios, ios->private);
345}
346
347static void _done_io(struct osd_request *or, void *p)
348{
349 struct ore_io_state *ios = p;
350
351 kref_put(&ios->kref, _last_io);
352}
353
354int ore_io_execute(struct ore_io_state *ios)
355{
356 DECLARE_COMPLETION_ONSTACK(wait);
357 bool sync = (ios->done == NULL);
358 int i, ret;
359
360 if (sync) {
361 ios->done = _sync_done;
362 ios->private = &wait;
363 }
364
365 for (i = 0; i < ios->numdevs; i++) {
366 struct osd_request *or = ios->per_dev[i].or;
367 if (unlikely(!or))
368 continue;
369
370 ret = osd_finalize_request(or, 0, _ios_cred(ios, i), NULL);
371 if (unlikely(ret)) {
372 ORE_DBGMSG("Failed to osd_finalize_request() => %d\n",
373 ret);
374 return ret;
375 }
376 }
377
378 kref_init(&ios->kref);
379
380 for (i = 0; i < ios->numdevs; i++) {
381 struct osd_request *or = ios->per_dev[i].or;
382 if (unlikely(!or))
383 continue;
384
385 kref_get(&ios->kref);
386 osd_execute_request_async(or, _done_io, ios);
387 }
388
389 kref_put(&ios->kref, _last_io);
390 ret = 0;
391
392 if (sync) {
393 wait_for_completion(&wait);
394 ret = ore_check_io(ios, NULL);
395 }
396 return ret;
397}
398
399static void _clear_bio(struct bio *bio)
400{
401 struct bio_vec *bv;
402 unsigned i;
403
404 __bio_for_each_segment(bv, bio, i, 0) {
405 unsigned this_count = bv->bv_len;
406
407 if (likely(PAGE_SIZE == this_count))
408 clear_highpage(bv->bv_page);
409 else
410 zero_user(bv->bv_page, bv->bv_offset, this_count);
411 }
412}
413
414int ore_check_io(struct ore_io_state *ios, ore_on_dev_error on_dev_error)
415{
416 enum osd_err_priority acumulated_osd_err = 0;
417 int acumulated_lin_err = 0;
418 int i;
419
420 for (i = 0; i < ios->numdevs; i++) {
421 struct osd_sense_info osi;
422 struct ore_per_dev_state *per_dev = &ios->per_dev[i];
423 struct osd_request *or = per_dev->or;
424 int ret;
425
426 if (unlikely(!or))
427 continue;
428
429 ret = osd_req_decode_sense(or, &osi);
430 if (likely(!ret))
431 continue;
432
433 if (OSD_ERR_PRI_CLEAR_PAGES == osi.osd_err_pri) {
434
435 _clear_bio(per_dev->bio);
436 ORE_DBGMSG("start read offset passed end of file "
437 "offset=0x%llx, length=0x%llx\n",
438 _LLU(per_dev->offset),
439 _LLU(per_dev->length));
440
441 continue;
442 }
443
444 if (on_dev_error) {
445 u64 residual = ios->reading ?
446 or->in.residual : or->out.residual;
447 u64 offset = (ios->offset + ios->length) - residual;
448 unsigned dev = per_dev->dev - ios->oc->first_dev;
449 struct ore_dev *od = ios->oc->ods[dev];
450
451 on_dev_error(ios, od, dev, osi.osd_err_pri,
452 offset, residual);
453 }
454 if (osi.osd_err_pri >= acumulated_osd_err) {
455 acumulated_osd_err = osi.osd_err_pri;
456 acumulated_lin_err = ret;
457 }
458 }
459
460 return acumulated_lin_err;
461}
462EXPORT_SYMBOL(ore_check_io);
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516void ore_calc_stripe_info(struct ore_layout *layout, u64 file_offset,
517 u64 length, struct ore_striping_info *si)
518{
519 u32 stripe_unit = layout->stripe_unit;
520 u32 group_width = layout->group_width;
521 u64 group_depth = layout->group_depth;
522 u32 parity = layout->parity;
523
524 u32 D = group_width - parity;
525 u32 U = D * stripe_unit;
526 u64 T = U * group_depth;
527 u64 S = T * layout->group_count;
528 u64 M = div64_u64(file_offset, S);
529
530
531
532
533
534 u64 LmodS = file_offset - M * S;
535 u32 G = div64_u64(LmodS, T);
536 u64 H = LmodS - G * T;
537
538 u32 N = div_u64(H, U);
539
540
541 u32 C = (u32)(H - (N * U)) / stripe_unit + G * group_width;
542
543 div_u64_rem(file_offset, stripe_unit, &si->unit_off);
544
545 si->obj_offset = si->unit_off + (N * stripe_unit) +
546 (M * group_depth * stripe_unit);
547
548 if (parity) {
549 u32 LCMdP = lcm(group_width, parity) / parity;
550
551 u32 RxP = (N % LCMdP) * parity;
552 u32 first_dev = C - C % group_width;
553
554 si->par_dev = (group_width + group_width - parity - RxP) %
555 group_width + first_dev;
556 si->dev = (group_width + C - RxP) % group_width + first_dev;
557 si->bytes_in_stripe = U;
558 si->first_stripe_start = M * S + G * T + N * U;
559 } else {
560
561 si->par_dev = group_width;
562 si->dev = C;
563 }
564
565 si->dev *= layout->mirrors_p1;
566 si->par_dev *= layout->mirrors_p1;
567 si->offset = file_offset;
568 si->length = T - H;
569 if (si->length > length)
570 si->length = length;
571 si->M = M;
572}
573EXPORT_SYMBOL(ore_calc_stripe_info);
574
575int _ore_add_stripe_unit(struct ore_io_state *ios, unsigned *cur_pg,
576 unsigned pgbase, struct page **pages,
577 struct ore_per_dev_state *per_dev, int cur_len)
578{
579 unsigned pg = *cur_pg;
580 struct request_queue *q =
581 osd_request_queue(_ios_od(ios, per_dev->dev));
582 unsigned len = cur_len;
583 int ret;
584
585 if (per_dev->bio == NULL) {
586 unsigned pages_in_stripe = ios->layout->group_width *
587 (ios->layout->stripe_unit / PAGE_SIZE);
588 unsigned nr_pages = ios->nr_pages * ios->layout->group_width /
589 (ios->layout->group_width -
590 ios->layout->parity);
591 unsigned bio_size = (nr_pages + pages_in_stripe) /
592 ios->layout->group_width;
593
594 per_dev->bio = bio_kmalloc(GFP_KERNEL, bio_size);
595 if (unlikely(!per_dev->bio)) {
596 ORE_DBGMSG("Failed to allocate BIO size=%u\n",
597 bio_size);
598 ret = -ENOMEM;
599 goto out;
600 }
601 }
602
603 while (cur_len > 0) {
604 unsigned pglen = min_t(unsigned, PAGE_SIZE - pgbase, cur_len);
605 unsigned added_len;
606
607 cur_len -= pglen;
608
609 added_len = bio_add_pc_page(q, per_dev->bio, pages[pg],
610 pglen, pgbase);
611 if (unlikely(pglen != added_len)) {
612 ORE_DBGMSG("Failed bio_add_pc_page bi_vcnt=%u\n",
613 per_dev->bio->bi_vcnt);
614 ret = -ENOMEM;
615 goto out;
616 }
617 _add_stripe_page(ios->sp2d, &ios->si, pages[pg]);
618
619 pgbase = 0;
620 ++pg;
621 }
622 BUG_ON(cur_len);
623
624 per_dev->length += len;
625 *cur_pg = pg;
626 ret = 0;
627out:
628
629
630
631
632 return ret;
633}
634
635static int _prepare_for_striping(struct ore_io_state *ios)
636{
637 struct ore_striping_info *si = &ios->si;
638 unsigned stripe_unit = ios->layout->stripe_unit;
639 unsigned mirrors_p1 = ios->layout->mirrors_p1;
640 unsigned group_width = ios->layout->group_width;
641 unsigned devs_in_group = group_width * mirrors_p1;
642 unsigned dev = si->dev;
643 unsigned first_dev = dev - (dev % devs_in_group);
644 unsigned dev_order;
645 unsigned cur_pg = ios->pages_consumed;
646 u64 length = ios->length;
647 int ret = 0;
648
649 if (!ios->pages) {
650 ios->numdevs = ios->layout->mirrors_p1;
651 return 0;
652 }
653
654 BUG_ON(length > si->length);
655
656 dev_order = _dev_order(devs_in_group, mirrors_p1, si->par_dev, dev);
657 si->cur_comp = dev_order;
658 si->cur_pg = si->unit_off / PAGE_SIZE;
659
660 while (length) {
661 unsigned comp = dev - first_dev;
662 struct ore_per_dev_state *per_dev = &ios->per_dev[comp];
663 unsigned cur_len, page_off = 0;
664
665 if (!per_dev->length) {
666 per_dev->dev = dev;
667 if (dev == si->dev) {
668 WARN_ON(dev == si->par_dev);
669 per_dev->offset = si->obj_offset;
670 cur_len = stripe_unit - si->unit_off;
671 page_off = si->unit_off & ~PAGE_MASK;
672 BUG_ON(page_off && (page_off != ios->pgbase));
673 } else {
674 if (si->cur_comp > dev_order)
675 per_dev->offset =
676 si->obj_offset - si->unit_off;
677 else
678 per_dev->offset =
679 si->obj_offset + stripe_unit -
680 si->unit_off;
681 cur_len = stripe_unit;
682 }
683 } else {
684 cur_len = stripe_unit;
685 }
686 if (cur_len >= length)
687 cur_len = length;
688
689 ret = _ore_add_stripe_unit(ios, &cur_pg, page_off, ios->pages,
690 per_dev, cur_len);
691 if (unlikely(ret))
692 goto out;
693
694 dev += mirrors_p1;
695 dev = (dev % devs_in_group) + first_dev;
696
697 length -= cur_len;
698
699 si->cur_comp = (si->cur_comp + 1) % group_width;
700 if (unlikely((dev == si->par_dev) || (!length && ios->sp2d))) {
701 if (!length && ios->sp2d) {
702
703
704
705 dev = si->par_dev;
706 }
707 if (ios->sp2d)
708
709
710
711 cur_len = length;
712 per_dev = &ios->per_dev[dev - first_dev];
713 if (!per_dev->length) {
714
715
716
717
718 per_dev->dev = dev;
719 per_dev->offset = si->obj_offset - si->unit_off;
720 }
721
722 ret = _ore_add_parity_unit(ios, si, per_dev, cur_len);
723 if (unlikely(ret))
724 goto out;
725
726
727 si->par_dev = (devs_in_group + si->par_dev -
728 ios->layout->parity * mirrors_p1) %
729 devs_in_group + first_dev;
730
731 si->cur_comp = 0;
732 si->cur_pg = 0;
733 }
734 }
735out:
736 ios->numdevs = devs_in_group;
737 ios->pages_consumed = cur_pg;
738 if (unlikely(ret)) {
739 if (length == ios->length)
740 return ret;
741 else
742 ios->length -= length;
743 }
744 return 0;
745}
746
747int ore_create(struct ore_io_state *ios)
748{
749 int i, ret;
750
751 for (i = 0; i < ios->oc->numdevs; i++) {
752 struct osd_request *or;
753
754 or = osd_start_request(_ios_od(ios, i), GFP_KERNEL);
755 if (unlikely(!or)) {
756 ORE_ERR("%s: osd_start_request failed\n", __func__);
757 ret = -ENOMEM;
758 goto out;
759 }
760 ios->per_dev[i].or = or;
761 ios->numdevs++;
762
763 osd_req_create_object(or, _ios_obj(ios, i));
764 }
765 ret = ore_io_execute(ios);
766
767out:
768 return ret;
769}
770EXPORT_SYMBOL(ore_create);
771
772int ore_remove(struct ore_io_state *ios)
773{
774 int i, ret;
775
776 for (i = 0; i < ios->oc->numdevs; i++) {
777 struct osd_request *or;
778
779 or = osd_start_request(_ios_od(ios, i), GFP_KERNEL);
780 if (unlikely(!or)) {
781 ORE_ERR("%s: osd_start_request failed\n", __func__);
782 ret = -ENOMEM;
783 goto out;
784 }
785 ios->per_dev[i].or = or;
786 ios->numdevs++;
787
788 osd_req_remove_object(or, _ios_obj(ios, i));
789 }
790 ret = ore_io_execute(ios);
791
792out:
793 return ret;
794}
795EXPORT_SYMBOL(ore_remove);
796
797static int _write_mirror(struct ore_io_state *ios, int cur_comp)
798{
799 struct ore_per_dev_state *master_dev = &ios->per_dev[cur_comp];
800 unsigned dev = ios->per_dev[cur_comp].dev;
801 unsigned last_comp = cur_comp + ios->layout->mirrors_p1;
802 int ret = 0;
803
804 if (ios->pages && !master_dev->length)
805 return 0;
806
807 for (; cur_comp < last_comp; ++cur_comp, ++dev) {
808 struct ore_per_dev_state *per_dev = &ios->per_dev[cur_comp];
809 struct osd_request *or;
810
811 or = osd_start_request(_ios_od(ios, dev), GFP_KERNEL);
812 if (unlikely(!or)) {
813 ORE_ERR("%s: osd_start_request failed\n", __func__);
814 ret = -ENOMEM;
815 goto out;
816 }
817 per_dev->or = or;
818
819 if (ios->pages) {
820 struct bio *bio;
821
822 if (per_dev != master_dev) {
823 bio = bio_kmalloc(GFP_KERNEL,
824 master_dev->bio->bi_max_vecs);
825 if (unlikely(!bio)) {
826 ORE_DBGMSG(
827 "Failed to allocate BIO size=%u\n",
828 master_dev->bio->bi_max_vecs);
829 ret = -ENOMEM;
830 goto out;
831 }
832
833 __bio_clone(bio, master_dev->bio);
834 bio->bi_bdev = NULL;
835 bio->bi_next = NULL;
836 per_dev->offset = master_dev->offset;
837 per_dev->length = master_dev->length;
838 per_dev->bio = bio;
839 per_dev->dev = dev;
840 } else {
841 bio = master_dev->bio;
842
843 bio->bi_rw |= REQ_WRITE;
844 }
845
846 osd_req_write(or, _ios_obj(ios, dev), per_dev->offset,
847 bio, per_dev->length);
848 ORE_DBGMSG("write(0x%llx) offset=0x%llx "
849 "length=0x%llx dev=%d\n",
850 _LLU(_ios_obj(ios, dev)->id),
851 _LLU(per_dev->offset),
852 _LLU(per_dev->length), dev);
853 } else if (ios->kern_buff) {
854 per_dev->offset = ios->si.obj_offset;
855 per_dev->dev = ios->si.dev + dev;
856
857
858 BUG_ON((ios->layout->group_width > 1) &&
859 (ios->si.unit_off + ios->length >
860 ios->layout->stripe_unit));
861
862 ret = osd_req_write_kern(or, _ios_obj(ios, per_dev->dev),
863 per_dev->offset,
864 ios->kern_buff, ios->length);
865 if (unlikely(ret))
866 goto out;
867 ORE_DBGMSG2("write_kern(0x%llx) offset=0x%llx "
868 "length=0x%llx dev=%d\n",
869 _LLU(_ios_obj(ios, dev)->id),
870 _LLU(per_dev->offset),
871 _LLU(ios->length), per_dev->dev);
872 } else {
873 osd_req_set_attributes(or, _ios_obj(ios, dev));
874 ORE_DBGMSG2("obj(0x%llx) set_attributes=%d dev=%d\n",
875 _LLU(_ios_obj(ios, dev)->id),
876 ios->out_attr_len, dev);
877 }
878
879 if (ios->out_attr)
880 osd_req_add_set_attr_list(or, ios->out_attr,
881 ios->out_attr_len);
882
883 if (ios->in_attr)
884 osd_req_add_get_attr_list(or, ios->in_attr,
885 ios->in_attr_len);
886 }
887
888out:
889 return ret;
890}
891
892int ore_write(struct ore_io_state *ios)
893{
894 int i;
895 int ret;
896
897 if (unlikely(ios->sp2d && !ios->r4w)) {
898
899
900
901 WARN_ON_ONCE(1);
902 return -ENOTSUPP;
903 }
904
905 ret = _prepare_for_striping(ios);
906 if (unlikely(ret))
907 return ret;
908
909 for (i = 0; i < ios->numdevs; i += ios->layout->mirrors_p1) {
910 ret = _write_mirror(ios, i);
911 if (unlikely(ret))
912 return ret;
913 }
914
915 ret = ore_io_execute(ios);
916 return ret;
917}
918EXPORT_SYMBOL(ore_write);
919
920int _ore_read_mirror(struct ore_io_state *ios, unsigned cur_comp)
921{
922 struct osd_request *or;
923 struct ore_per_dev_state *per_dev = &ios->per_dev[cur_comp];
924 struct osd_obj_id *obj = _ios_obj(ios, cur_comp);
925 unsigned first_dev = (unsigned)obj->id;
926
927 if (ios->pages && !per_dev->length)
928 return 0;
929
930 first_dev = per_dev->dev + first_dev % ios->layout->mirrors_p1;
931 or = osd_start_request(_ios_od(ios, first_dev), GFP_KERNEL);
932 if (unlikely(!or)) {
933 ORE_ERR("%s: osd_start_request failed\n", __func__);
934 return -ENOMEM;
935 }
936 per_dev->or = or;
937
938 if (ios->pages) {
939 if (per_dev->cur_sg) {
940
941 _ore_add_sg_seg(per_dev, 0, false);
942 if (unlikely(!per_dev->cur_sg))
943 return 0;
944
945 osd_req_read_sg(or, obj, per_dev->bio,
946 per_dev->sglist, per_dev->cur_sg);
947 } else {
948
949 osd_req_read(or, obj, per_dev->offset,
950 per_dev->bio, per_dev->length);
951 }
952
953 ORE_DBGMSG("read(0x%llx) offset=0x%llx length=0x%llx"
954 " dev=%d sg_len=%d\n", _LLU(obj->id),
955 _LLU(per_dev->offset), _LLU(per_dev->length),
956 first_dev, per_dev->cur_sg);
957 } else {
958 BUG_ON(ios->kern_buff);
959
960 osd_req_get_attributes(or, obj);
961 ORE_DBGMSG2("obj(0x%llx) get_attributes=%d dev=%d\n",
962 _LLU(obj->id),
963 ios->in_attr_len, first_dev);
964 }
965 if (ios->out_attr)
966 osd_req_add_set_attr_list(or, ios->out_attr, ios->out_attr_len);
967
968 if (ios->in_attr)
969 osd_req_add_get_attr_list(or, ios->in_attr, ios->in_attr_len);
970
971 return 0;
972}
973
974int ore_read(struct ore_io_state *ios)
975{
976 int i;
977 int ret;
978
979 ret = _prepare_for_striping(ios);
980 if (unlikely(ret))
981 return ret;
982
983 for (i = 0; i < ios->numdevs; i += ios->layout->mirrors_p1) {
984 ret = _ore_read_mirror(ios, i);
985 if (unlikely(ret))
986 return ret;
987 }
988
989 ret = ore_io_execute(ios);
990 return ret;
991}
992EXPORT_SYMBOL(ore_read);
993
994int extract_attr_from_ios(struct ore_io_state *ios, struct osd_attr *attr)
995{
996 struct osd_attr cur_attr = {.attr_page = 0};
997 void *iter = NULL;
998 int nelem;
999
1000 do {
1001 nelem = 1;
1002 osd_req_decode_get_attr_list(ios->per_dev[0].or,
1003 &cur_attr, &nelem, &iter);
1004 if ((cur_attr.attr_page == attr->attr_page) &&
1005 (cur_attr.attr_id == attr->attr_id)) {
1006 attr->len = cur_attr.len;
1007 attr->val_ptr = cur_attr.val_ptr;
1008 return 0;
1009 }
1010 } while (iter);
1011
1012 return -EIO;
1013}
1014EXPORT_SYMBOL(extract_attr_from_ios);
1015
1016static int _truncate_mirrors(struct ore_io_state *ios, unsigned cur_comp,
1017 struct osd_attr *attr)
1018{
1019 int last_comp = cur_comp + ios->layout->mirrors_p1;
1020
1021 for (; cur_comp < last_comp; ++cur_comp) {
1022 struct ore_per_dev_state *per_dev = &ios->per_dev[cur_comp];
1023 struct osd_request *or;
1024
1025 or = osd_start_request(_ios_od(ios, cur_comp), GFP_KERNEL);
1026 if (unlikely(!or)) {
1027 ORE_ERR("%s: osd_start_request failed\n", __func__);
1028 return -ENOMEM;
1029 }
1030 per_dev->or = or;
1031
1032 osd_req_set_attributes(or, _ios_obj(ios, cur_comp));
1033 osd_req_add_set_attr_list(or, attr, 1);
1034 }
1035
1036 return 0;
1037}
1038
1039struct _trunc_info {
1040 struct ore_striping_info si;
1041 u64 prev_group_obj_off;
1042 u64 next_group_obj_off;
1043
1044 unsigned first_group_dev;
1045 unsigned nex_group_dev;
1046};
1047
1048static void _calc_trunk_info(struct ore_layout *layout, u64 file_offset,
1049 struct _trunc_info *ti)
1050{
1051 unsigned stripe_unit = layout->stripe_unit;
1052
1053 ore_calc_stripe_info(layout, file_offset, 0, &ti->si);
1054
1055 ti->prev_group_obj_off = ti->si.M * stripe_unit;
1056 ti->next_group_obj_off = ti->si.M ? (ti->si.M - 1) * stripe_unit : 0;
1057
1058 ti->first_group_dev = ti->si.dev - (ti->si.dev % layout->group_width);
1059 ti->nex_group_dev = ti->first_group_dev + layout->group_width;
1060}
1061
1062int ore_truncate(struct ore_layout *layout, struct ore_components *oc,
1063 u64 size)
1064{
1065 struct ore_io_state *ios;
1066 struct exofs_trunc_attr {
1067 struct osd_attr attr;
1068 __be64 newsize;
1069 } *size_attrs;
1070 struct _trunc_info ti;
1071 int i, ret;
1072
1073 ret = ore_get_io_state(layout, oc, &ios);
1074 if (unlikely(ret))
1075 return ret;
1076
1077 _calc_trunk_info(ios->layout, size, &ti);
1078
1079 size_attrs = kcalloc(ios->oc->numdevs, sizeof(*size_attrs),
1080 GFP_KERNEL);
1081 if (unlikely(!size_attrs)) {
1082 ret = -ENOMEM;
1083 goto out;
1084 }
1085
1086 ios->numdevs = ios->oc->numdevs;
1087
1088 for (i = 0; i < ios->numdevs; ++i) {
1089 struct exofs_trunc_attr *size_attr = &size_attrs[i];
1090 u64 obj_size;
1091
1092 if (i < ti.first_group_dev)
1093 obj_size = ti.prev_group_obj_off;
1094 else if (i >= ti.nex_group_dev)
1095 obj_size = ti.next_group_obj_off;
1096 else if (i < ti.si.dev)
1097 obj_size = ti.si.obj_offset +
1098 ios->layout->stripe_unit - ti.si.unit_off;
1099 else if (i == ti.si.dev)
1100 obj_size = ti.si.obj_offset;
1101 else
1102 obj_size = ti.si.obj_offset - ti.si.unit_off;
1103
1104 size_attr->newsize = cpu_to_be64(obj_size);
1105 size_attr->attr = g_attr_logical_length;
1106 size_attr->attr.val_ptr = &size_attr->newsize;
1107
1108 ORE_DBGMSG("trunc(0x%llx) obj_offset=0x%llx dev=%d\n",
1109 _LLU(oc->comps->obj.id), _LLU(obj_size), i);
1110 ret = _truncate_mirrors(ios, i * ios->layout->mirrors_p1,
1111 &size_attr->attr);
1112 if (unlikely(ret))
1113 goto out;
1114 }
1115 ret = ore_io_execute(ios);
1116
1117out:
1118 kfree(size_attrs);
1119 ore_put_io_state(ios);
1120 return ret;
1121}
1122EXPORT_SYMBOL(ore_truncate);
1123
1124const struct osd_attr g_attr_logical_length = ATTR_DEF(
1125 OSD_APAGE_OBJECT_INFORMATION, OSD_ATTR_OI_LOGICAL_LENGTH, 8);
1126EXPORT_SYMBOL(g_attr_logical_length);
1127