1// SPDX-License-Identifier: GPL-2.0 2/* 3 * Bad block management 4 * 5 * - Heavily based on MD badblocks code from Neil Brown 6 * 7 * Copyright (c) 2015, Intel Corporation. 8 */ 9 10#include <linux/badblocks.h> 11#include <linux/seqlock.h> 12#include <linux/device.h> 13#include <linux/kernel.h> 14#include <linux/module.h> 15#include <linux/stddef.h> 16#include <linux/types.h> 17#include <linux/slab.h> 18 19/* 20 * The purpose of badblocks set/clear is to manage bad blocks ranges which are 21 * identified by LBA addresses. 22 * 23 * When the caller of badblocks_set() wants to set a range of bad blocks, the 24 * setting range can be acked or unacked. And the setting range may merge, 25 * overwrite, skip the overlapped already set range, depends on who they are 26 * overlapped or adjacent, and the acknowledgment type of the ranges. It can be 27 * more complicated when the setting range covers multiple already set bad block 28 * ranges, with restrictions of maximum length of each bad range and the bad 29 * table space limitation. 30 * 31 * It is difficult and unnecessary to take care of all the possible situations, 32 * for setting a large range of bad blocks, we can handle it by dividing the 33 * large range into smaller ones when encounter overlap, max range length or 34 * bad table full conditions. Every time only a smaller piece of the bad range 35 * is handled with a limited number of conditions how it is interacted with 36 * possible overlapped or adjacent already set bad block ranges. Then the hard 37 * complicated problem can be much simpler to handle in proper way. 38 * 39 * When setting a range of bad blocks to the bad table, the simplified situations 40 * to be considered are, (The already set bad blocks ranges are naming with 41 * prefix E, and the setting bad blocks range is naming with prefix S) 42 * 43 * 1) A setting range is not overlapped or adjacent to any other already set bad 44 * block range. 45 * +--------+ 46 * | S | 47 * +--------+ 48 * +-------------+ +-------------+ 49 * | E1 | | E2 | 50 * +-------------+ +-------------+ 51 * For this situation if the bad blocks table is not full, just allocate a 52 * free slot from the bad blocks table to mark the setting range S. The 53 * result is, 54 * +-------------+ +--------+ +-------------+ 55 * | E1 | | S | | E2 | 56 * +-------------+ +--------+ +-------------+ 57 * 2) A setting range starts exactly at a start LBA of an already set bad blocks 58 * range. 59 * 2.1) The setting range size < already set range size 60 * +--------+ 61 * | S | 62 * +--------+ 63 * +-------------+ 64 * | E | 65 * +-------------+ 66 * 2.1.1) If S and E are both acked or unacked range, the setting range S can 67 * be merged into existing bad range E. The result is, 68 * +-------------+ 69 * | S | 70 * +-------------+ 71 * 2.1.2) If S is unacked setting and E is acked, the setting will be denied, and 72 * the result is, 73 * +-------------+ 74 * | E | 75 * +-------------+ 76 * 2.1.3) If S is acked setting and E is unacked, range S can overwrite on E. 77 * An extra slot from the bad blocks table will be allocated for S, and head 78 * of E will move to end of the inserted range S. The result is, 79 * +--------+----+ 80 * | S | E | 81 * +--------+----+ 82 * 2.2) The setting range size == already set range size 83 * 2.2.1) If S and E are both acked or unacked range, the setting range S can 84 * be merged into existing bad range E. The result is, 85 * +-------------+ 86 * | S | 87 * +-------------+ 88 * 2.2.2) If S is unacked setting and E is acked, the setting will be denied, and 89 * the result is, 90 * +-------------+ 91 * | E | 92 * +-------------+ 93 * 2.2.3) If S is acked setting and E is unacked, range S can overwrite all of 94 bad blocks range E. The result is, 95 * +-------------+ 96 * | S | 97 * +-------------+ 98 * 2.3) The setting range size > already set range size 99 * +-------------------+ 100 * | S | 101 * +-------------------+ 102 * +-------------+ 103 * | E | 104 * +-------------+ 105 * For such situation, the setting range S can be treated as two parts, the 106 * first part (S1) is as same size as the already set range E, the second 107 * part (S2) is the rest of setting range. 108 * +-------------+-----+ +-------------+ +-----+ 109 * | S1 | S2 | | S1 | | S2 | 110 * +-------------+-----+ ===> +-------------+ +-----+ 111 * +-------------+ +-------------+ 112 * | E | | E | 113 * +-------------+ +-------------+ 114 * Now we only focus on how to handle the setting range S1 and already set 115 * range E, which are already explained in 2.2), for the rest S2 it will be 116 * handled later in next loop. 117 * 3) A setting range starts before the start LBA of an already set bad blocks 118 * range. 119 * +-------------+ 120 * | S | 121 * +-------------+ 122 * +-------------+ 123 * | E | 124 * +-------------+ 125 * For this situation, the setting range S can be divided into two parts, the 126 * first (S1) ends at the start LBA of already set range E, the second part 127 * (S2) starts exactly at a start LBA of the already set range E. 128 * +----+---------+ +----+ +---------+ 129 * | S1 | S2 | | S1 | | S2 | 130 * +----+---------+ ===> +----+ +---------+ 131 * +-------------+ +-------------+ 132 * | E | | E | 133 * +-------------+ +-------------+ 134 * Now only the first part S1 should be handled in this loop, which is in 135 * similar condition as 1). The rest part S2 has exact same start LBA address 136 * of the already set range E, they will be handled in next loop in one of 137 * situations in 2). 138 * 4) A setting range starts after the start LBA of an already set bad blocks 139 * range. 140 * 4.1) If the setting range S exactly matches the tail part of already set bad 141 * blocks range E, like the following chart shows, 142 * +---------+ 143 * | S | 144 * +---------+ 145 * +-------------+ 146 * | E | 147 * +-------------+ 148 * 4.1.1) If range S and E have same acknowledge value (both acked or unacked), 149 * they will be merged into one, the result is, 150 * +-------------+ 151 * | S | 152 * +-------------+ 153 * 4.1.2) If range E is acked and the setting range S is unacked, the setting 154 * request of S will be rejected, the result is, 155 * +-------------+ 156 * | E | 157 * +-------------+ 158 * 4.1.3) If range E is unacked, and the setting range S is acked, then S may 159 * overwrite the overlapped range of E, the result is, 160 * +---+---------+ 161 * | E | S | 162 * +---+---------+ 163 * 4.2) If the setting range S stays in middle of an already set range E, like 164 * the following chart shows, 165 * +----+ 166 * | S | 167 * +----+ 168 * +--------------+ 169 * | E | 170 * +--------------+ 171 * 4.2.1) If range S and E have same acknowledge value (both acked or unacked), 172 * they will be merged into one, the result is, 173 * +--------------+ 174 * | S | 175 * +--------------+ 176 * 4.2.2) If range E is acked and the setting range S is unacked, the setting 177 * request of S will be rejected, the result is also, 178 * +--------------+ 179 * | E | 180 * +--------------+ 181 * 4.2.3) If range E is unacked, and the setting range S is acked, then S will 182 * inserted into middle of E and split previous range E into two parts (E1 183 * and E2), the result is, 184 * +----+----+----+ 185 * | E1 | S | E2 | 186 * +----+----+----+ 187 * 4.3) If the setting bad blocks range S is overlapped with an already set bad 188 * blocks range E. The range S starts after the start LBA of range E, and 189 * ends after the end LBA of range E, as the following chart shows, 190 * +-------------------+ 191 * | S | 192 * +-------------------+ 193 * +-------------+ 194 * | E | 195 * +-------------+ 196 * For this situation the range S can be divided into two parts, the first 197 * part (S1) ends at end range E, and the second part (S2) has rest range of 198 * origin S. 199 * +---------+---------+ +---------+ +---------+ 200 * | S1 | S2 | | S1 | | S2 | 201 * +---------+---------+ ===> +---------+ +---------+ 202 * +-------------+ +-------------+ 203 * | E | | E | 204 * +-------------+ +-------------+ 205 * Now in this loop the setting range S1 and already set range E can be 206 * handled as the situations 4.1), the rest range S2 will be handled in next 207 * loop and ignored in this loop. 208 * 5) A setting bad blocks range S is adjacent to one or more already set bad 209 * blocks range(s), and they are all acked or unacked range. 210 * 5.1) Front merge: If the already set bad blocks range E is before setting 211 * range S and they are adjacent, 212 * +------+ 213 * | S | 214 * +------+ 215 * +-------+ 216 * | E | 217 * +-------+ 218 * 5.1.1) When total size of range S and E <= BB_MAX_LEN, and their acknowledge 219 * values are same, the setting range S can front merges into range E. The 220 * result is, 221 * +--------------+ 222 * | S | 223 * +--------------+ 224 * 5.1.2) Otherwise these two ranges cannot merge, just insert the setting 225 * range S right after already set range E into the bad blocks table. The 226 * result is, 227 * +--------+------+ 228 * | E | S | 229 * +--------+------+ 230 * 6) Special cases which above conditions cannot handle 231 * 6.1) Multiple already set ranges may merge into less ones in a full bad table 232 * +-------------------------------------------------------+ 233 * | S | 234 * +-------------------------------------------------------+ 235 * |<----- BB_MAX_LEN ----->| 236 * +-----+ +-----+ +-----+ 237 * | E1 | | E2 | | E3 | 238 * +-----+ +-----+ +-----+ 239 * In the above example, when the bad blocks table is full, inserting the 240 * first part of setting range S will fail because no more available slot 241 * can be allocated from bad blocks table. In this situation a proper 242 * setting method should be go though all the setting bad blocks range and 243 * look for chance to merge already set ranges into less ones. When there 244 * is available slot from bad blocks table, re-try again to handle more 245 * setting bad blocks ranges as many as possible. 246 * +------------------------+ 247 * | S3 | 248 * +------------------------+ 249 * |<----- BB_MAX_LEN ----->| 250 * +-----+-----+-----+---+-----+--+ 251 * | S1 | S2 | 252 * +-----+-----+-----+---+-----+--+ 253 * The above chart shows although the first part (S3) cannot be inserted due 254 * to no-space in bad blocks table, but the following E1, E2 and E3 ranges 255 * can be merged with rest part of S into less range S1 and S2. Now there is 256 * 1 free slot in bad blocks table. 257 * +------------------------+-----+-----+-----+---+-----+--+ 258 * | S3 | S1 | S2 | 259 * +------------------------+-----+-----+-----+---+-----+--+ 260 * Since the bad blocks table is not full anymore, re-try again for the 261 * origin setting range S. Now the setting range S3 can be inserted into the 262 * bad blocks table with previous freed slot from multiple ranges merge. 263 * 6.2) Front merge after overwrite 264 * In the following example, in bad blocks table, E1 is an acked bad blocks 265 * range and E2 is an unacked bad blocks range, therefore they are not able 266 * to merge into a larger range. The setting bad blocks range S is acked, 267 * therefore part of E2 can be overwritten by S. 268 * +--------+ 269 * | S | acknowledged 270 * +--------+ S: 1 271 * +-------+-------------+ E1: 1 272 * | E1 | E2 | E2: 0 273 * +-------+-------------+ 274 * With previous simplified routines, after overwriting part of E2 with S, 275 * the bad blocks table should be (E3 is remaining part of E2 which is not 276 * overwritten by S), 277 * acknowledged 278 * +-------+--------+----+ S: 1 279 * | E1 | S | E3 | E1: 1 280 * +-------+--------+----+ E3: 0 281 * The above result is correct but not perfect. Range E1 and S in the bad 282 * blocks table are all acked, merging them into a larger one range may 283 * occupy less bad blocks table space and make badblocks_check() faster. 284 * Therefore in such situation, after overwriting range S, the previous range 285 * E1 should be checked for possible front combination. Then the ideal 286 * result can be, 287 * +----------------+----+ acknowledged 288 * | E1 | E3 | E1: 1 289 * +----------------+----+ E3: 0 290 * 6.3) Behind merge: If the already set bad blocks range E is behind the setting 291 * range S and they are adjacent. Normally we don't need to care about this 292 * because front merge handles this while going though range S from head to 293 * tail, except for the tail part of range S. When the setting range S are 294 * fully handled, all the above simplified routine doesn't check whether the 295 * tail LBA of range S is adjacent to the next already set range and not 296 * merge them even it is possible. 297 * +------+ 298 * | S | 299 * +------+ 300 * +-------+ 301 * | E | 302 * +-------+ 303 * For the above special situation, when the setting range S are all handled 304 * and the loop ends, an extra check is necessary for whether next already 305 * set range E is right after S and mergeable. 306 * 6.3.1) When total size of range E and S <= BB_MAX_LEN, and their acknowledge 307 * values are same, the setting range S can behind merges into range E. The 308 * result is, 309 * +--------------+ 310 * | S | 311 * +--------------+ 312 * 6.3.2) Otherwise these two ranges cannot merge, just insert the setting range 313 * S in front of the already set range E in the bad blocks table. The result 314 * is, 315 * +------+-------+ 316 * | S | E | 317 * +------+-------+ 318 * 319 * All the above 5 simplified situations and 3 special cases may cover 99%+ of 320 * the bad block range setting conditions. Maybe there is some rare corner case 321 * is not considered and optimized, it won't hurt if badblocks_set() fails due 322 * to no space, or some ranges are not merged to save bad blocks table space. 323 * 324 * Inside badblocks_set() each loop starts by jumping to re_insert label, every 325 * time for the new loop prev_badblocks() is called to find an already set range 326 * which starts before or at current setting range. Since the setting bad blocks 327 * range is handled from head to tail, most of the cases it is unnecessary to do 328 * the binary search inside prev_badblocks(), it is possible to provide a hint 329 * to prev_badblocks() for a fast path, then the expensive binary search can be 330 * avoided. In my test with the hint to prev_badblocks(), except for the first 331 * loop, all rested calls to prev_badblocks() can go into the fast path and 332 * return correct bad blocks table index immediately. 333 * 334 * 335 * Clearing a bad blocks range from the bad block table has similar idea as 336 * setting does, but much more simpler. The only thing needs to be noticed is 337 * when the clearing range hits middle of a bad block range, the existing bad 338 * block range will split into two, and one more item should be added into the 339 * bad block table. The simplified situations to be considered are, (The already 340 * set bad blocks ranges in bad block table are naming with prefix E, and the 341 * clearing bad blocks range is naming with prefix C) 342 * 343 * 1) A clearing range is not overlapped to any already set ranges in bad block 344 * table. 345 * +-----+ | +-----+ | +-----+ 346 * | C | | | C | | | C | 347 * +-----+ or +-----+ or +-----+ 348 * +---+ | +----+ +----+ | +---+ 349 * | E | | | E1 | | E2 | | | E | 350 * +---+ | +----+ +----+ | +---+ 351 * For the above situations, no bad block to be cleared and no failure 352 * happens, simply returns 0. 353 * 2) The clearing range hits middle of an already setting bad blocks range in 354 * the bad block table. 355 * +---+ 356 * | C | 357 * +---+ 358 * +-----------------+ 359 * | E | 360 * +-----------------+ 361 * In this situation if the bad block table is not full, the range E will be 362 * split into two ranges E1 and E2. The result is, 363 * +------+ +------+ 364 * | E1 | | E2 | 365 * +------+ +------+ 366 * 3) The clearing range starts exactly at same LBA as an already set bad block range 367 * from the bad block table. 368 * 3.1) Partially covered at head part 369 * +------------+ 370 * | C | 371 * +------------+ 372 * +-----------------+ 373 * | E | 374 * +-----------------+ 375 * For this situation, the overlapped already set range will update the 376 * start LBA to end of C and shrink the range to BB_LEN(E) - BB_LEN(C). No 377 * item deleted from bad block table. The result is, 378 * +----+ 379 * | E1 | 380 * +----+ 381 * 3.2) Exact fully covered 382 * +-----------------+ 383 * | C | 384 * +-----------------+ 385 * +-----------------+ 386 * | E | 387 * +-----------------+ 388 * For this situation the whole bad blocks range E will be cleared and its 389 * corresponded item is deleted from the bad block table. 390 * 4) The clearing range exactly ends at same LBA as an already set bad block 391 * range. 392 * +-------+ 393 * | C | 394 * +-------+ 395 * +-----------------+ 396 * | E | 397 * +-----------------+ 398 * For the above situation, the already set range E is updated to shrink its 399 * end to the start of C, and reduce its length to BB_LEN(E) - BB_LEN(C). 400 * The result is, 401 * +---------+ 402 * | E | 403 * +---------+ 404 * 5) The clearing range is partially overlapped with an already set bad block 405 * range from the bad block table. 406 * 5.1) The already set bad block range is front overlapped with the clearing 407 * range. 408 * +----------+ 409 * | C | 410 * +----------+ 411 * +------------+ 412 * | E | 413 * +------------+ 414 * For such situation, the clearing range C can be treated as two parts. The 415 * first part ends at the start LBA of range E, and the second part starts at 416 * same LBA of range E. 417 * +----+-----+ +----+ +-----+ 418 * | C1 | C2 | | C1 | | C2 | 419 * +----+-----+ ===> +----+ +-----+ 420 * +------------+ +------------+ 421 * | E | | E | 422 * +------------+ +------------+ 423 * Now the first part C1 can be handled as condition 1), and the second part C2 can be 424 * handled as condition 3.1) in next loop. 425 * 5.2) The already set bad block range is behind overlaopped with the clearing 426 * range. 427 * +----------+ 428 * | C | 429 * +----------+ 430 * +------------+ 431 * | E | 432 * +------------+ 433 * For such situation, the clearing range C can be treated as two parts. The 434 * first part C1 ends at same end LBA of range E, and the second part starts 435 * at end LBA of range E. 436 * +----+-----+ +----+ +-----+ 437 * | C1 | C2 | | C1 | | C2 | 438 * +----+-----+ ===> +----+ +-----+ 439 * +------------+ +------------+ 440 * | E | | E | 441 * +------------+ +------------+ 442 * Now the first part clearing range C1 can be handled as condition 4), and 443 * the second part clearing range C2 can be handled as condition 1) in next 444 * loop. 445 * 446 * All bad blocks range clearing can be simplified into the above 5 situations 447 * by only handling the head part of the clearing range in each run of the 448 * while-loop. The idea is similar to bad blocks range setting but much 449 * simpler. 450 */ 451 452/* 453 * Find the range starts at-or-before 's' from bad table. The search 454 * starts from index 'hint' and stops at index 'hint_end' from the bad 455 * table. 456 */ 457static int prev_by_hint(struct badblocks *bb, sector_t s, int hint) 458{ 459 int hint_end = hint + 2; 460 u64 *p = bb->page; 461 int ret = -1; 462 463 while ((hint < hint_end) && ((hint + 1) <= bb->count) && 464 (BB_OFFSET(p[hint]) <= s)) { 465 if ((hint + 1) == bb->count || BB_OFFSET(p[hint + 1]) > s) { 466 ret = hint; 467 break; 468 } 469 hint++; 470 } 471 472 return ret; 473} 474 475/* 476 * Find the range starts at-or-before bad->start. If 'hint' is provided 477 * (hint >= 0) then search in the bad table from hint firstly. It is 478 * very probably the wanted bad range can be found from the hint index, 479 * then the unnecessary while-loop iteration can be avoided. 480 */ 481static int prev_badblocks(struct badblocks *bb, struct badblocks_context *bad, 482 int hint) 483{ 484 sector_t s = bad->start; 485 int ret = -1; 486 int lo, hi; 487 u64 *p; 488 489 if (!bb->count) 490 goto out; 491 492 if (hint >= 0) { 493 ret = prev_by_hint(bb, s, hint); 494 if (ret >= 0) 495 goto out; 496 } 497 498 lo = 0; 499 hi = bb->count; 500 p = bb->page; 501 502 /* The following bisect search might be unnecessary */ 503 if (BB_OFFSET(p[lo]) > s) 504 return -1; 505 if (BB_OFFSET(p[hi - 1]) <= s) 506 return hi - 1; 507 508 /* Do bisect search in bad table */ 509 while (hi - lo > 1) { 510 int mid = (lo + hi)/2; 511 sector_t a = BB_OFFSET(p[mid]); 512 513 if (a == s) { 514 ret = mid; 515 goto out; 516 } 517 518 if (a < s) 519 lo = mid; 520 else 521 hi = mid; 522 } 523 524 if (BB_OFFSET(p[lo]) <= s) 525 ret = lo; 526out: 527 return ret; 528} 529 530/* 531 * Return 'true' if the range indicated by 'bad' can be backward merged 532 * with the bad range (from the bad table) index by 'behind'. 533 */ 534static bool can_merge_behind(struct badblocks *bb, 535 struct badblocks_context *bad, int behind) 536{ 537 sector_t sectors = bad->len; 538 sector_t s = bad->start; 539 u64 *p = bb->page; 540 541 if ((s < BB_OFFSET(p[behind])) && 542 ((s + sectors) >= BB_OFFSET(p[behind])) && 543 ((BB_END(p[behind]) - s) <= BB_MAX_LEN) && 544 BB_ACK(p[behind]) == bad->ack) 545 return true; 546 return false; 547} 548 549/* 550 * Do backward merge for range indicated by 'bad' and the bad range 551 * (from the bad table) indexed by 'behind'. The return value is merged 552 * sectors from bad->len. 553 */ 554static int behind_merge(struct badblocks *bb, struct badblocks_context *bad, 555 int behind) 556{ 557 sector_t sectors = bad->len; 558 sector_t s = bad->start; 559 u64 *p = bb->page; 560 int merged = 0; 561 562 WARN_ON(s >= BB_OFFSET(p[behind])); 563 WARN_ON((s + sectors) < BB_OFFSET(p[behind])); 564 565 if (s < BB_OFFSET(p[behind])) { 566 merged = BB_OFFSET(p[behind]) - s; 567 p[behind] = BB_MAKE(s, BB_LEN(p[behind]) + merged, bad->ack); 568 569 WARN_ON((BB_LEN(p[behind]) + merged) >= BB_MAX_LEN); 570 } 571 572 return merged; 573} 574 575/* 576 * Return 'true' if the range indicated by 'bad' can be forward 577 * merged with the bad range (from the bad table) indexed by 'prev'. 578 */ 579static bool can_merge_front(struct badblocks *bb, int prev, 580 struct badblocks_context *bad) 581{ 582 sector_t s = bad->start; 583 u64 *p = bb->page; 584 585 if (BB_ACK(p[prev]) == bad->ack && 586 (s < BB_END(p[prev]) || 587 (s == BB_END(p[prev]) && (BB_LEN(p[prev]) < BB_MAX_LEN)))) 588 return true; 589 return false; 590} 591 592/* 593 * Do forward merge for range indicated by 'bad' and the bad range 594 * (from bad table) indexed by 'prev'. The return value is sectors 595 * merged from bad->len. 596 */ 597static int front_merge(struct badblocks *bb, int prev, struct badblocks_context *bad) 598{ 599 sector_t sectors = bad->len; 600 sector_t s = bad->start; 601 u64 *p = bb->page; 602 int merged = 0; 603 604 WARN_ON(s > BB_END(p[prev])); 605 606 if (s < BB_END(p[prev])) { 607 merged = min_t(sector_t, sectors, BB_END(p[prev]) - s); 608 } else { 609 merged = min_t(sector_t, sectors, BB_MAX_LEN - BB_LEN(p[prev])); 610 if ((prev + 1) < bb->count && 611 merged > (BB_OFFSET(p[prev + 1]) - BB_END(p[prev]))) { 612 merged = BB_OFFSET(p[prev + 1]) - BB_END(p[prev]); 613 } 614 615 p[prev] = BB_MAKE(BB_OFFSET(p[prev]), 616 BB_LEN(p[prev]) + merged, bad->ack); 617 } 618 619 return merged; 620} 621 622/* 623 * 'Combine' is a special case which can_merge_front() is not able to 624 * handle: If a bad range (indexed by 'prev' from bad table) exactly 625 * starts as bad->start, and the bad range ahead of 'prev' (indexed by 626 * 'prev - 1' from bad table) exactly ends at where 'prev' starts, and 627 * the sum of their lengths does not exceed BB_MAX_LEN limitation, then 628 * these two bad range (from bad table) can be combined. 629 * 630 * Return 'true' if bad ranges indexed by 'prev' and 'prev - 1' from bad 631 * table can be combined. 632 */ 633static bool can_combine_front(struct badblocks *bb, int prev, 634 struct badblocks_context *bad) 635{ 636 u64 *p = bb->page; 637 638 if ((prev > 0) && 639 (BB_OFFSET(p[prev]) == bad->start) && 640 (BB_END(p[prev - 1]) == BB_OFFSET(p[prev])) && 641 (BB_LEN(p[prev - 1]) + BB_LEN(p[prev]) <= BB_MAX_LEN) && 642 (BB_ACK(p[prev - 1]) == BB_ACK(p[prev]))) 643 return true; 644 return false; 645} 646 647/* 648 * Combine the bad ranges indexed by 'prev' and 'prev - 1' (from bad 649 * table) into one larger bad range, and the new range is indexed by 650 * 'prev - 1'. 651 * The caller of front_combine() will decrease bb->count, therefore 652 * it is unnecessary to clear p[perv] after front merge. 653 */ 654static void front_combine(struct badblocks *bb, int prev) 655{ 656 u64 *p = bb->page; 657 658 p[prev - 1] = BB_MAKE(BB_OFFSET(p[prev - 1]), 659 BB_LEN(p[prev - 1]) + BB_LEN(p[prev]), 660 BB_ACK(p[prev])); 661 if ((prev + 1) < bb->count) 662 memmove(p + prev, p + prev + 1, (bb->count - prev - 1) * 8); 663} 664 665/* 666 * Return 'true' if the range indicated by 'bad' is exactly forward 667 * overlapped with the bad range (from bad table) indexed by 'front'. 668 * Exactly forward overlap means the bad range (from bad table) indexed 669 * by 'prev' does not cover the whole range indicated by 'bad'. 670 */ 671static bool overlap_front(struct badblocks *bb, int front, 672 struct badblocks_context *bad) 673{ 674 u64 *p = bb->page; 675 676 if (bad->start >= BB_OFFSET(p[front]) && 677 bad->start < BB_END(p[front])) 678 return true; 679 return false; 680} 681 682/* 683 * Return 'true' if the range indicated by 'bad' is exactly backward 684 * overlapped with the bad range (from bad table) indexed by 'behind'. 685 */ 686static bool overlap_behind(struct badblocks *bb, struct badblocks_context *bad, 687 int behind) 688{ 689 u64 *p = bb->page; 690 691 if (bad->start < BB_OFFSET(p[behind]) && 692 (bad->start + bad->len) > BB_OFFSET(p[behind])) 693 return true; 694 return false; 695} 696 697/* 698 * Return 'true' if the range indicated by 'bad' can overwrite the bad 699 * range (from bad table) indexed by 'prev'. 700 * 701 * The range indicated by 'bad' can overwrite the bad range indexed by 702 * 'prev' when, 703 * 1) The whole range indicated by 'bad' can cover partial or whole bad 704 * range (from bad table) indexed by 'prev'. 705 * 2) The ack value of 'bad' is larger or equal to the ack value of bad 706 * range 'prev'. 707 * 708 * If the overwriting doesn't cover the whole bad range (from bad table) 709 * indexed by 'prev', new range might be split from existing bad range, 710 * 1) The overwrite covers head or tail part of existing bad range, 1 711 * extra bad range will be split and added into the bad table. 712 * 2) The overwrite covers middle of existing bad range, 2 extra bad 713 * ranges will be split (ahead and after the overwritten range) and 714 * added into the bad table. 715 * The number of extra split ranges of the overwriting is stored in 716 * 'extra' and returned for the caller. 717 */ 718static bool can_front_overwrite(struct badblocks *bb, int prev, 719 struct badblocks_context *bad, int *extra) 720{ 721 u64 *p = bb->page; 722 int len; 723 724 WARN_ON(!overlap_front(bb, prev, bad)); 725 726 if (BB_ACK(p[prev]) >= bad->ack) 727 return false; 728 729 if (BB_END(p[prev]) <= (bad->start + bad->len)) { 730 len = BB_END(p[prev]) - bad->start; 731 if (BB_OFFSET(p[prev]) == bad->start) 732 *extra = 0; 733 else 734 *extra = 1; 735 736 bad->len = len; 737 } else { 738 if (BB_OFFSET(p[prev]) == bad->start) 739 *extra = 1; 740 else 741 /* 742 * prev range will be split into two, beside the overwritten 743 * one, an extra slot needed from bad table. 744 */ 745 *extra = 2; 746 } 747 748 if ((bb->count + (*extra)) >= MAX_BADBLOCKS) 749 return false; 750 751 return true; 752} 753 754/* 755 * Do the overwrite from the range indicated by 'bad' to the bad range 756 * (from bad table) indexed by 'prev'. 757 * The previously called can_front_overwrite() will provide how many 758 * extra bad range(s) might be split and added into the bad table. All 759 * the splitting cases in the bad table will be handled here. 760 */ 761static int front_overwrite(struct badblocks *bb, int prev, 762 struct badblocks_context *bad, int extra) 763{ 764 u64 *p = bb->page; 765 sector_t orig_end = BB_END(p[prev]); 766 int orig_ack = BB_ACK(p[prev]); 767 768 switch (extra) { 769 case 0: 770 p[prev] = BB_MAKE(BB_OFFSET(p[prev]), BB_LEN(p[prev]), 771 bad->ack); 772 break; 773 case 1: 774 if (BB_OFFSET(p[prev]) == bad->start) { 775 p[prev] = BB_MAKE(BB_OFFSET(p[prev]), 776 bad->len, bad->ack); 777 memmove(p + prev + 2, p + prev + 1, 778 (bb->count - prev - 1) * 8); 779 p[prev + 1] = BB_MAKE(bad->start + bad->len, 780 orig_end - BB_END(p[prev]), 781 orig_ack); 782 } else { 783 p[prev] = BB_MAKE(BB_OFFSET(p[prev]), 784 bad->start - BB_OFFSET(p[prev]), 785 orig_ack); 786 /* 787 * prev +2 -> prev + 1 + 1, which is for, 788 * 1) prev + 1: the slot index of the previous one 789 * 2) + 1: one more slot for extra being 1. 790 */ 791 memmove(p + prev + 2, p + prev + 1, 792 (bb->count - prev - 1) * 8); 793 p[prev + 1] = BB_MAKE(bad->start, bad->len, bad->ack); 794 } 795 break; 796 case 2: 797 p[prev] = BB_MAKE(BB_OFFSET(p[prev]), 798 bad->start - BB_OFFSET(p[prev]), 799 orig_ack); 800 /* 801 * prev + 3 -> prev + 1 + 2, which is for, 802 * 1) prev + 1: the slot index of the previous one 803 * 2) + 2: two more slots for extra being 2. 804 */ 805 memmove(p + prev + 3, p + prev + 1, 806 (bb->count - prev - 1) * 8); 807 p[prev + 1] = BB_MAKE(bad->start, bad->len, bad->ack); 808 p[prev + 2] = BB_MAKE(BB_END(p[prev + 1]), 809 orig_end - BB_END(p[prev + 1]), 810 orig_ack); 811 break; 812 default: 813 break; 814 } 815 816 return bad->len; 817} 818 819/* 820 * Explicitly insert a range indicated by 'bad' to the bad table, where 821 * the location is indexed by 'at'. 822 */ 823static int insert_at(struct badblocks *bb, int at, struct badblocks_context *bad) 824{ 825 u64 *p = bb->page; 826 int len; 827 828 WARN_ON(badblocks_full(bb)); 829 830 len = min_t(sector_t, bad->len, BB_MAX_LEN); 831 if (at < bb->count) 832 memmove(p + at + 1, p + at, (bb->count - at) * 8); 833 p[at] = BB_MAKE(bad->start, len, bad->ack); 834 835 return len; 836} 837 838static void badblocks_update_acked(struct badblocks *bb) 839{ 840 bool unacked = false; 841 u64 *p = bb->page; 842 int i; 843 844 if (!bb->unacked_exist) 845 return; 846 847 for (i = 0; i < bb->count ; i++) { 848 if (!BB_ACK(p[i])) { 849 unacked = true; 850 break; 851 } 852 } 853 854 if (!unacked) 855 bb->unacked_exist = 0; 856} 857 858/* Do exact work to set bad block range into the bad block table */ 859static int _badblocks_set(struct badblocks *bb, sector_t s, int sectors, 860 int acknowledged) 861{ 862 int retried = 0, space_desired = 0; 863 int orig_len, len = 0, added = 0; 864 struct badblocks_context bad; 865 int prev = -1, hint = -1; 866 sector_t orig_start; 867 unsigned long flags; 868 int rv = 0; 869 u64 *p; 870 871 if (bb->shift < 0) 872 /* badblocks are disabled */ 873 return 1; 874 875 if (sectors == 0) 876 /* Invalid sectors number */ 877 return 1; 878 879 if (bb->shift) { 880 /* round the start down, and the end up */ 881 sector_t next = s + sectors; 882 883 rounddown(s, bb->shift); 884 roundup(next, bb->shift); 885 sectors = next - s; 886 } 887 888 write_seqlock_irqsave(&bb->lock, flags); 889 890 orig_start = s; 891 orig_len = sectors; 892 bad.ack = acknowledged; 893 p = bb->page; 894 895re_insert: 896 bad.start = s; 897 bad.len = sectors; 898 len = 0; 899 900 if (badblocks_empty(bb)) { 901 len = insert_at(bb, 0, &bad); 902 bb->count++; 903 added++; 904 goto update_sectors; 905 } 906 907 prev = prev_badblocks(bb, &bad, hint); 908 909 /* start before all badblocks */ 910 if (prev < 0) { 911 if (!badblocks_full(bb)) { 912 /* insert on the first */ 913 if (bad.len > (BB_OFFSET(p[0]) - bad.start)) 914 bad.len = BB_OFFSET(p[0]) - bad.start; 915 len = insert_at(bb, 0, &bad); 916 bb->count++; 917 added++; 918 hint = 0; 919 goto update_sectors; 920 } 921 922 /* No sapce, try to merge */ 923 if (overlap_behind(bb, &bad, 0)) { 924 if (can_merge_behind(bb, &bad, 0)) { 925 len = behind_merge(bb, &bad, 0); 926 added++; 927 } else { 928 len = BB_OFFSET(p[0]) - s; 929 space_desired = 1; 930 } 931 hint = 0; 932 goto update_sectors; 933 } 934 935 /* no table space and give up */ 936 goto out; 937 } 938 939 /* in case p[prev-1] can be merged with p[prev] */ 940 if (can_combine_front(bb, prev, &bad)) { 941 front_combine(bb, prev); 942 bb->count--; 943 added++; 944 hint = prev; 945 goto update_sectors; 946 } 947 948 if (overlap_front(bb, prev, &bad)) { 949 if (can_merge_front(bb, prev, &bad)) { 950 len = front_merge(bb, prev, &bad); 951 added++; 952 } else { 953 int extra = 0; 954 955 if (!can_front_overwrite(bb, prev, &bad, &extra)) { 956 len = min_t(sector_t, 957 BB_END(p[prev]) - s, sectors); 958 hint = prev; 959 goto update_sectors; 960 } 961 962 len = front_overwrite(bb, prev, &bad, extra); 963 added++; 964 bb->count += extra; 965 966 if (can_combine_front(bb, prev, &bad)) { 967 front_combine(bb, prev); 968 bb->count--; 969 } 970 } 971 hint = prev; 972 goto update_sectors; 973 } 974 975 if (can_merge_front(bb, prev, &bad)) { 976 len = front_merge(bb, prev, &bad); 977 added++; 978 hint = prev; 979 goto update_sectors; 980 } 981 982 /* if no space in table, still try to merge in the covered range */ 983 if (badblocks_full(bb)) { 984 /* skip the cannot-merge range */ 985 if (((prev + 1) < bb->count) && 986 overlap_behind(bb, &bad, prev + 1) && 987 ((s + sectors) >= BB_END(p[prev + 1]))) { 988 len = BB_END(p[prev + 1]) - s; 989 hint = prev + 1; 990 goto update_sectors; 991 } 992 993 /* no retry any more */ 994 len = sectors; 995 space_desired = 1; 996 hint = -1; 997 goto update_sectors; 998 } 999 1000 /* cannot merge and there is space in bad table */
1001 if ((prev + 1) < bb->count && 1002 overlap_behind(bb, &bad, prev + 1)) 1003 bad.len = min_t(sector_t, 1004 bad.len, BB_OFFSET(p[prev + 1]) - bad.start); 1005 1006 len = insert_at(bb, prev + 1, &bad); 1007 bb->count++; 1008 added++; 1009 hint = prev + 1; 1010 1011update_sectors: 1012 s += len; 1013 sectors -= len; 1014 1015 if (sectors > 0) 1016 goto re_insert; 1017 1018 WARN_ON(sectors < 0); 1019 1020 /* 1021 * Check whether the following already set range can be 1022 * merged. (prev < 0) condition is not handled here, 1023 * because it's already complicated enough. 1024 */ 1025 if (prev >= 0 && 1026 (prev + 1) < bb->count && 1027 BB_END(p[prev]) == BB_OFFSET(p[prev + 1]) && 1028 (BB_LEN(p[prev]) + BB_LEN(p[prev + 1])) <= BB_MAX_LEN && 1029 BB_ACK(p[prev]) == BB_ACK(p[prev + 1])) { 1030 p[prev] = BB_MAKE(BB_OFFSET(p[prev]), 1031 BB_LEN(p[prev]) + BB_LEN(p[prev + 1]), 1032 BB_ACK(p[prev])); 1033 1034 if ((prev + 2) < bb->count) 1035 memmove(p + prev + 1, p + prev + 2, 1036 (bb->count - (prev + 2)) * 8); 1037 bb->count--; 1038 } 1039 1040 if (space_desired && !badblocks_full(bb)) { 1041 s = orig_start; 1042 sectors = orig_len; 1043 space_desired = 0; 1044 if (retried++ < 3) 1045 goto re_insert; 1046 } 1047 1048out: 1049 if (added) { 1050 set_changed(bb); 1051 1052 if (!acknowledged) 1053 bb->unacked_exist = 1; 1054 else 1055 badblocks_update_acked(bb); 1056 } 1057 1058 write_sequnlock_irqrestore(&bb->lock, flags); 1059 1060 if (!added) 1061 rv = 1; 1062 1063 return rv; 1064} 1065 1066/* 1067 * Clear the bad block range from bad block table which is front overlapped 1068 * with the clearing range. The return value is how many sectors from an 1069 * already set bad block range are cleared. If the whole bad block range is 1070 * covered by the clearing range and fully cleared, 'delete' is set as 1 for 1071 * the caller to reduce bb->count. 1072 */ 1073static int front_clear(struct badblocks *bb, int prev, 1074 struct badblocks_context *bad, int *deleted) 1075{ 1076 sector_t sectors = bad->len; 1077 sector_t s = bad->start; 1078 u64 *p = bb->page; 1079 int cleared = 0; 1080 1081 *deleted = 0; 1082 if (s == BB_OFFSET(p[prev])) { 1083 if (BB_LEN(p[prev]) > sectors) { 1084 p[prev] = BB_MAKE(BB_OFFSET(p[prev]) + sectors, 1085 BB_LEN(p[prev]) - sectors, 1086 BB_ACK(p[prev])); 1087 cleared = sectors; 1088 } else { 1089 /* BB_LEN(p[prev]) <= sectors */ 1090 cleared = BB_LEN(p[prev]); 1091 if ((prev + 1) < bb->count) 1092 memmove(p + prev, p + prev + 1, 1093 (bb->count - prev - 1) * 8); 1094 *deleted = 1; 1095 } 1096 } else if (s > BB_OFFSET(p[prev])) { 1097 if (BB_END(p[prev]) <= (s + sectors)) { 1098 cleared = BB_END(p[prev]) - s; 1099 p[prev] = BB_MAKE(BB_OFFSET(p[prev]), 1100 s - BB_OFFSET(p[prev]), 1101 BB_ACK(p[prev])); 1102 } else { 1103 /* Splitting is handled in front_splitting_clear() */ 1104 BUG(); 1105 } 1106 } 1107 1108 return cleared; 1109} 1110 1111/* 1112 * Handle the condition that the clearing range hits middle of an already set 1113 * bad block range from bad block table. In this condition the existing bad 1114 * block range is split into two after the middle part is cleared. 1115 */ 1116static int front_splitting_clear(struct badblocks *bb, int prev, 1117 struct badblocks_context *bad) 1118{ 1119 u64 *p = bb->page; 1120 u64 end = BB_END(p[prev]); 1121 int ack = BB_ACK(p[prev]); 1122 sector_t sectors = bad->len; 1123 sector_t s = bad->start; 1124 1125 p[prev] = BB_MAKE(BB_OFFSET(p[prev]), 1126 s - BB_OFFSET(p[prev]), 1127 ack); 1128 memmove(p + prev + 2, p + prev + 1, (bb->count - prev - 1) * 8); 1129 p[prev + 1] = BB_MAKE(s + sectors, end - s - sectors, ack); 1130 return sectors; 1131} 1132 1133/* Do the exact work to clear bad block range from the bad block table */ 1134static int _badblocks_clear(struct badblocks *bb, sector_t s, int sectors) 1135{ 1136 struct badblocks_context bad; 1137 int prev = -1, hint = -1; 1138 int len = 0, cleared = 0; 1139 int rv = 0; 1140 u64 *p; 1141 1142 if (bb->shift < 0) 1143 /* badblocks are disabled */ 1144 return 1; 1145 1146 if (sectors == 0) 1147 /* Invalid sectors number */ 1148 return 1; 1149 1150 if (bb->shift) { 1151 sector_t target; 1152 1153 /* When clearing we round the start up and the end down. 1154 * This should not matter as the shift should align with 1155 * the block size and no rounding should ever be needed. 1156 * However it is better the think a block is bad when it 1157 * isn't than to think a block is not bad when it is. 1158 */ 1159 target = s + sectors; 1160 roundup(s, bb->shift); 1161 rounddown(target, bb->shift); 1162 sectors = target - s; 1163 } 1164 1165 write_seqlock_irq(&bb->lock); 1166 1167 bad.ack = true; 1168 p = bb->page; 1169 1170re_clear: 1171 bad.start = s; 1172 bad.len = sectors; 1173 1174 if (badblocks_empty(bb)) { 1175 len = sectors; 1176 cleared++; 1177 goto update_sectors; 1178 } 1179 1180 1181 prev = prev_badblocks(bb, &bad, hint); 1182 1183 /* Start before all badblocks */ 1184 if (prev < 0) { 1185 if (overlap_behind(bb, &bad, 0)) { 1186 len = BB_OFFSET(p[0]) - s; 1187 hint = 0; 1188 } else { 1189 len = sectors; 1190 } 1191 /* 1192 * Both situations are to clear non-bad range, 1193 * should be treated as successful 1194 */ 1195 cleared++; 1196 goto update_sectors; 1197 } 1198 1199 /* Start after all badblocks */ 1200 if ((prev + 1) >= bb->count && !overlap_front(bb, prev, &bad)) { 1201 len = sectors; 1202 cleared++; 1203 goto update_sectors; 1204 } 1205 1206 /* Clear will split a bad record but the table is full */ 1207 if (badblocks_full(bb) && (BB_OFFSET(p[prev]) < bad.start) && 1208 (BB_END(p[prev]) > (bad.start + sectors))) { 1209 len = sectors; 1210 goto update_sectors; 1211 } 1212 1213 if (overlap_front(bb, prev, &bad)) { 1214 if ((BB_OFFSET(p[prev]) < bad.start) && 1215 (BB_END(p[prev]) > (bad.start + bad.len))) { 1216 /* Splitting */ 1217 if ((bb->count + 1) < MAX_BADBLOCKS) { 1218 len = front_splitting_clear(bb, prev, &bad); 1219 bb->count += 1; 1220 cleared++; 1221 } else { 1222 /* No space to split, give up */ 1223 len = sectors; 1224 } 1225 } else { 1226 int deleted = 0; 1227 1228 len = front_clear(bb, prev, &bad, &deleted); 1229 bb->count -= deleted; 1230 cleared++; 1231 hint = prev; 1232 } 1233 1234 goto update_sectors; 1235 } 1236 1237 /* Not front overlap, but behind overlap */ 1238 if ((prev + 1) < bb->count && overlap_behind(bb, &bad, prev + 1)) { 1239 len = BB_OFFSET(p[prev + 1]) - bad.start; 1240 hint = prev + 1; 1241 /* Clear non-bad range should be treated as successful */ 1242 cleared++; 1243 goto update_sectors; 1244 } 1245 1246 /* Not cover any badblocks range in the table */ 1247 len = sectors; 1248 /* Clear non-bad range should be treated as successful */ 1249 cleared++; 1250 1251update_sectors: 1252 s += len; 1253 sectors -= len; 1254 1255 if (sectors > 0) 1256 goto re_clear; 1257 1258 WARN_ON(sectors < 0); 1259 1260 if (cleared) { 1261 badblocks_update_acked(bb); 1262 set_changed(bb); 1263 } 1264 1265 write_sequnlock_irq(&bb->lock); 1266 1267 if (!cleared) 1268 rv = 1; 1269 1270 return rv; 1271} 1272 1273/* Do the exact work to check bad blocks range from the bad block table */ 1274static int _badblocks_check(struct badblocks *bb, sector_t s, int sectors, 1275 sector_t *first_bad, int *bad_sectors) 1276{ 1277 int unacked_badblocks, acked_badblocks; 1278 int prev = -1, hint = -1, set = 0; 1279 struct badblocks_context bad; 1280 unsigned int seq; 1281 int len, rv; 1282 u64 *p; 1283 1284 WARN_ON(bb->shift < 0 || sectors == 0); 1285 1286 if (bb->shift > 0) { 1287 sector_t target; 1288 1289 /* round the start down, and the end up */ 1290 target = s + sectors; 1291 rounddown(s, bb->shift); 1292 roundup(target, bb->shift); 1293 sectors = target - s; 1294 } 1295 1296retry: 1297 seq = read_seqbegin(&bb->lock); 1298 1299 p = bb->page; 1300 unacked_badblocks = 0; 1301 acked_badblocks = 0; 1302 1303re_check: 1304 bad.start = s; 1305 bad.len = sectors; 1306 1307 if (badblocks_empty(bb)) { 1308 len = sectors; 1309 goto update_sectors; 1310 } 1311 1312 prev = prev_badblocks(bb, &bad, hint); 1313 1314 /* start after all badblocks */ 1315 if ((prev >= 0) && 1316 ((prev + 1) >= bb->count) && !overlap_front(bb, prev, &bad)) { 1317 len = sectors; 1318 goto update_sectors; 1319 } 1320 1321 /* Overlapped with front badblocks record */ 1322 if ((prev >= 0) && overlap_front(bb, prev, &bad)) { 1323 if (BB_ACK(p[prev])) 1324 acked_badblocks++; 1325 else 1326 unacked_badblocks++; 1327 1328 if (BB_END(p[prev]) >= (s + sectors)) 1329 len = sectors; 1330 else 1331 len = BB_END(p[prev]) - s; 1332 1333 if (set == 0) { 1334 *first_bad = BB_OFFSET(p[prev]); 1335 *bad_sectors = BB_LEN(p[prev]); 1336 set = 1; 1337 } 1338 goto update_sectors; 1339 } 1340 1341 /* Not front overlap, but behind overlap */ 1342 if ((prev + 1) < bb->count && overlap_behind(bb, &bad, prev + 1)) { 1343 len = BB_OFFSET(p[prev + 1]) - bad.start; 1344 hint = prev + 1; 1345 goto update_sectors; 1346 } 1347 1348 /* not cover any badblocks range in the table */ 1349 len = sectors; 1350 1351update_sectors: 1352 s += len; 1353 sectors -= len; 1354 1355 if (sectors > 0) 1356 goto re_check; 1357 1358 WARN_ON(sectors < 0); 1359 1360 if (unacked_badblocks > 0) 1361 rv = -1; 1362 else if (acked_badblocks > 0) 1363 rv = 1; 1364 else 1365 rv = 0; 1366 1367 if (read_seqretry(&bb->lock, seq)) 1368 goto retry; 1369 1370 return rv; 1371} 1372 1373/** 1374 * badblocks_check() - check a given range for bad sectors 1375 * @bb: the badblocks structure that holds all badblock information 1376 * @s: sector (start) at which to check for badblocks 1377 * @sectors: number of sectors to check for badblocks 1378 * @first_bad: pointer to store location of the first badblock 1379 * @bad_sectors: pointer to store number of badblocks after @first_bad 1380 * 1381 * We can record which blocks on each device are 'bad' and so just 1382 * fail those blocks, or that stripe, rather than the whole device. 1383 * Entries in the bad-block table are 64bits wide. This comprises: 1384 * Length of bad-range, in sectors: 0-511 for lengths 1-512 1385 * Start of bad-range, sector offset, 54 bits (allows 8 exbibytes) 1386 * A 'shift' can be set so that larger blocks are tracked and 1387 * consequently larger devices can be covered. 1388 * 'Acknowledged' flag - 1 bit. - the most significant bit. 1389 * 1390 * Locking of the bad-block table uses a seqlock so badblocks_check 1391 * might need to retry if it is very unlucky. 1392 * We will sometimes want to check for bad blocks in a bi_end_io function, 1393 * so we use the write_seqlock_irq variant. 1394 * 1395 * When looking for a bad block we specify a range and want to 1396 * know if any block in the range is bad. So we binary-search 1397 * to the last range that starts at-or-before the given endpoint, 1398 * (or "before the sector after the target range") 1399 * then see if it ends after the given start. 1400 * 1401 * Return: 1402 * 0: there are no known bad blocks in the range 1403 * 1: there are known bad block which are all acknowledged 1404 * -1: there are bad blocks which have not yet been acknowledged in metadata. 1405 * plus the start/length of the first bad section we overlap. 1406 */ 1407int badblocks_check(struct badblocks *bb, sector_t s, int sectors, 1408 sector_t *first_bad, int *bad_sectors) 1409{ 1410 return _badblocks_check(bb, s, sectors, first_bad, bad_sectors); 1411} 1412EXPORT_SYMBOL_GPL(badblocks_check); 1413 1414/** 1415 * badblocks_set() - Add a range of bad blocks to the table. 1416 * @bb: the badblocks structure that holds all badblock information 1417 * @s: first sector to mark as bad 1418 * @sectors: number of sectors to mark as bad 1419 * @acknowledged: weather to mark the bad sectors as acknowledged 1420 * 1421 * This might extend the table, or might contract it if two adjacent ranges 1422 * can be merged. We binary-search to find the 'insertion' point, then 1423 * decide how best to handle it. 1424 * 1425 * Return: 1426 * 0: success 1427 * 1: failed to set badblocks (out of space) 1428 */ 1429int badblocks_set(struct badblocks *bb, sector_t s, int sectors, 1430 int acknowledged) 1431{ 1432 return _badblocks_set(bb, s, sectors, acknowledged); 1433} 1434EXPORT_SYMBOL_GPL(badblocks_set); 1435 1436/** 1437 * badblocks_clear() - Remove a range of bad blocks to the table. 1438 * @bb: the badblocks structure that holds all badblock information 1439 * @s: first sector to mark as bad 1440 * @sectors: number of sectors to mark as bad 1441 * 1442 * This may involve extending the table if we spilt a region, 1443 * but it must not fail. So if the table becomes full, we just 1444 * drop the remove request. 1445 * 1446 * Return: 1447 * 0: success 1448 * 1: failed to clear badblocks 1449 */ 1450int badblocks_clear(struct badblocks *bb, sector_t s, int sectors) 1451{ 1452 return _badblocks_clear(bb, s, sectors); 1453} 1454EXPORT_SYMBOL_GPL(badblocks_clear); 1455 1456/** 1457 * ack_all_badblocks() - Acknowledge all bad blocks in a list. 1458 * @bb: the badblocks structure that holds all badblock information 1459 * 1460 * This only succeeds if ->changed is clear. It is used by 1461 * in-kernel metadata updates 1462 */ 1463void ack_all_badblocks(struct badblocks *bb) 1464{ 1465 if (bb->page == NULL || bb->changed) 1466 /* no point even trying */ 1467 return; 1468 write_seqlock_irq(&bb->lock); 1469 1470 if (bb->changed == 0 && bb->unacked_exist) { 1471 u64 *p = bb->page; 1472 int i; 1473 1474 for (i = 0; i < bb->count ; i++) { 1475 if (!BB_ACK(p[i])) { 1476 sector_t start = BB_OFFSET(p[i]); 1477 int len = BB_LEN(p[i]); 1478 1479 p[i] = BB_MAKE(start, len, 1); 1480 } 1481 } 1482 bb->unacked_exist = 0; 1483 } 1484 write_sequnlock_irq(&bb->lock); 1485} 1486EXPORT_SYMBOL_GPL(ack_all_badblocks); 1487 1488/** 1489 * badblocks_show() - sysfs access to bad-blocks list 1490 * @bb: the badblocks structure that holds all badblock information 1491 * @page: buffer received from sysfs 1492 * @unack: weather to show unacknowledged badblocks 1493 * 1494 * Return: 1495 * Length of returned data 1496 */ 1497ssize_t badblocks_show(struct badblocks *bb, char *page, int unack) 1498{ 1499 size_t len; 1500 int i; 1501 u64 *p = bb->page; 1502 unsigned seq; 1503 1504 if (bb->shift < 0) 1505 return 0; 1506 1507retry: 1508 seq = read_seqbegin(&bb->lock); 1509 1510 len = 0; 1511 i = 0; 1512 1513 while (len < PAGE_SIZE && i < bb->count) { 1514 sector_t s = BB_OFFSET(p[i]); 1515 unsigned int length = BB_LEN(p[i]); 1516 int ack = BB_ACK(p[i]); 1517 1518 i++; 1519 1520 if (unack && ack) 1521 continue; 1522 1523 len += snprintf(page+len, PAGE_SIZE-len, "%llu %u\n", 1524 (unsigned long long)s << bb->shift, 1525 length << bb->shift); 1526 } 1527 if (unack && len == 0) 1528 bb->unacked_exist = 0; 1529 1530 if (read_seqretry(&bb->lock, seq)) 1531 goto retry; 1532 1533 return len; 1534} 1535EXPORT_SYMBOL_GPL(badblocks_show); 1536 1537/** 1538 * badblocks_store() - sysfs access to bad-blocks list 1539 * @bb: the badblocks structure that holds all badblock information 1540 * @page: buffer received from sysfs 1541 * @len: length of data received from sysfs 1542 * @unack: weather to show unacknowledged badblocks 1543 * 1544 * Return: 1545 * Length of the buffer processed or -ve error. 1546 */ 1547ssize_t badblocks_store(struct badblocks *bb, const char *page, size_t len, 1548 int unack) 1549{ 1550 unsigned long long sector; 1551 int length; 1552 char newline; 1553 1554 switch (sscanf(page, "%llu %d%c", §or, &length, &newline)) { 1555 case 3: 1556 if (newline != '\n') 1557 return -EINVAL; 1558 fallthrough; 1559 case 2: 1560 if (length <= 0) 1561 return -EINVAL; 1562 break; 1563 default: 1564 return -EINVAL; 1565 } 1566 1567 if (badblocks_set(bb, sector, length, !unack)) 1568 return -ENOSPC; 1569 else 1570 return len; 1571} 1572EXPORT_SYMBOL_GPL(badblocks_store); 1573 1574static int __badblocks_init(struct device *dev, struct badblocks *bb, 1575 int enable) 1576{ 1577 bb->dev = dev; 1578 bb->count = 0; 1579 if (enable) 1580 bb->shift = 0; 1581 else 1582 bb->shift = -1; 1583 if (dev) 1584 bb->page = devm_kzalloc(dev, PAGE_SIZE, GFP_KERNEL); 1585 else 1586 bb->page = kzalloc(PAGE_SIZE, GFP_KERNEL); 1587 if (!bb->page) { 1588 bb->shift = -1; 1589 return -ENOMEM; 1590 } 1591 seqlock_init(&bb->lock); 1592 1593 return 0; 1594} 1595 1596/** 1597 * badblocks_init() - initialize the badblocks structure 1598 * @bb: the badblocks structure that holds all badblock information 1599 * @enable: weather to enable badblocks accounting 1600 * 1601 * Return: 1602 * 0: success 1603 * -ve errno: on error 1604 */ 1605int badblocks_init(struct badblocks *bb, int enable) 1606{ 1607 return __badblocks_init(NULL, bb, enable); 1608} 1609EXPORT_SYMBOL_GPL(badblocks_init); 1610 1611int devm_init_badblocks(struct device *dev, struct badblocks *bb) 1612{ 1613 if (!bb) 1614 return -EINVAL; 1615 return __badblocks_init(dev, bb, 1); 1616} 1617EXPORT_SYMBOL_GPL(devm_init_badblocks); 1618 1619/** 1620 * badblocks_exit() - free the badblocks structure 1621 * @bb: the badblocks structure that holds all badblock information 1622 */ 1623void badblocks_exit(struct badblocks *bb) 1624{ 1625 if (!bb) 1626 return; 1627 if (bb->dev) 1628 devm_kfree(bb->dev, bb->page); 1629 else 1630 kfree(bb->page); 1631 bb->page = NULL; 1632} 1633EXPORT_SYMBOL_GPL(badblocks_exit); 1634