linux/block/badblocks.c
<<
>>
Prefs
   1// SPDX-License-Identifier: GPL-2.0
   2/*
   3 * Bad block management
   4 *
   5 * - Heavily based on MD badblocks code from Neil Brown
   6 *
   7 * Copyright (c) 2015, Intel Corporation.
   8 */
   9
  10#include <linux/badblocks.h>
  11#include <linux/seqlock.h>
  12#include <linux/device.h>
  13#include <linux/kernel.h>
  14#include <linux/module.h>
  15#include <linux/stddef.h>
  16#include <linux/types.h>
  17#include <linux/slab.h>
  18
  19/*
  20 * The purpose of badblocks set/clear is to manage bad blocks ranges which are
  21 * identified by LBA addresses.
  22 *
  23 * When the caller of badblocks_set() wants to set a range of bad blocks, the
  24 * setting range can be acked or unacked. And the setting range may merge,
  25 * overwrite, skip the overlapped already set range, depends on who they are
  26 * overlapped or adjacent, and the acknowledgment type of the ranges. It can be
  27 * more complicated when the setting range covers multiple already set bad block
  28 * ranges, with restrictions of maximum length of each bad range and the bad
  29 * table space limitation.
  30 *
  31 * It is difficult and unnecessary to take care of all the possible situations,
  32 * for setting a large range of bad blocks, we can handle it by dividing the
  33 * large range into smaller ones when encounter overlap, max range length or
  34 * bad table full conditions. Every time only a smaller piece of the bad range
  35 * is handled with a limited number of conditions how it is interacted with
  36 * possible overlapped or adjacent already set bad block ranges. Then the hard
  37 * complicated problem can be much simpler to handle in proper way.
  38 *
  39 * When setting a range of bad blocks to the bad table, the simplified situations
  40 * to be considered are, (The already set bad blocks ranges are naming with
  41 *  prefix E, and the setting bad blocks range is naming with prefix S)
  42 *
  43 * 1) A setting range is not overlapped or adjacent to any other already set bad
  44 *    block range.
  45 *                         +--------+
  46 *                         |    S   |
  47 *                         +--------+
  48 *        +-------------+               +-------------+
  49 *        |      E1     |               |      E2     |
  50 *        +-------------+               +-------------+
  51 *    For this situation if the bad blocks table is not full, just allocate a
  52 *    free slot from the bad blocks table to mark the setting range S. The
  53 *    result is,
  54 *        +-------------+  +--------+   +-------------+
  55 *        |      E1     |  |    S   |   |      E2     |
  56 *        +-------------+  +--------+   +-------------+
  57 * 2) A setting range starts exactly at a start LBA of an already set bad blocks
  58 *    range.
  59 * 2.1) The setting range size < already set range size
  60 *        +--------+
  61 *        |    S   |
  62 *        +--------+
  63 *        +-------------+
  64 *        |      E      |
  65 *        +-------------+
  66 * 2.1.1) If S and E are both acked or unacked range, the setting range S can
  67 *    be merged into existing bad range E. The result is,
  68 *        +-------------+
  69 *        |      S      |
  70 *        +-------------+
  71 * 2.1.2) If S is unacked setting and E is acked, the setting will be denied, and
  72 *    the result is,
  73 *        +-------------+
  74 *        |      E      |
  75 *        +-------------+
  76 * 2.1.3) If S is acked setting and E is unacked, range S can overwrite on E.
  77 *    An extra slot from the bad blocks table will be allocated for S, and head
  78 *    of E will move to end of the inserted range S. The result is,
  79 *        +--------+----+
  80 *        |    S   | E  |
  81 *        +--------+----+
  82 * 2.2) The setting range size == already set range size
  83 * 2.2.1) If S and E are both acked or unacked range, the setting range S can
  84 *    be merged into existing bad range E. The result is,
  85 *        +-------------+
  86 *        |      S      |
  87 *        +-------------+
  88 * 2.2.2) If S is unacked setting and E is acked, the setting will be denied, and
  89 *    the result is,
  90 *        +-------------+
  91 *        |      E      |
  92 *        +-------------+
  93 * 2.2.3) If S is acked setting and E is unacked, range S can overwrite all of
  94      bad blocks range E. The result is,
  95 *        +-------------+
  96 *        |      S      |
  97 *        +-------------+
  98 * 2.3) The setting range size > already set range size
  99 *        +-------------------+
 100 *        |          S        |
 101 *        +-------------------+
 102 *        +-------------+
 103 *        |      E      |
 104 *        +-------------+
 105 *    For such situation, the setting range S can be treated as two parts, the
 106 *    first part (S1) is as same size as the already set range E, the second
 107 *    part (S2) is the rest of setting range.
 108 *        +-------------+-----+        +-------------+       +-----+
 109 *        |    S1       | S2  |        |     S1      |       | S2  |
 110 *        +-------------+-----+  ===>  +-------------+       +-----+
 111 *        +-------------+              +-------------+
 112 *        |      E      |              |      E      |
 113 *        +-------------+              +-------------+
 114 *    Now we only focus on how to handle the setting range S1 and already set
 115 *    range E, which are already explained in 2.2), for the rest S2 it will be
 116 *    handled later in next loop.
 117 * 3) A setting range starts before the start LBA of an already set bad blocks
 118 *    range.
 119 *        +-------------+
 120 *        |      S      |
 121 *        +-------------+
 122 *             +-------------+
 123 *             |      E      |
 124 *             +-------------+
 125 *    For this situation, the setting range S can be divided into two parts, the
 126 *    first (S1) ends at the start LBA of already set range E, the second part
 127 *    (S2) starts exactly at a start LBA of the already set range E.
 128 *        +----+---------+             +----+      +---------+
 129 *        | S1 |    S2   |             | S1 |      |    S2   |
 130 *        +----+---------+      ===>   +----+      +---------+
 131 *             +-------------+                     +-------------+
 132 *             |      E      |                     |      E      |
 133 *             +-------------+                     +-------------+
 134 *    Now only the first part S1 should be handled in this loop, which is in
 135 *    similar condition as 1). The rest part S2 has exact same start LBA address
 136 *    of the already set range E, they will be handled in next loop in one of
 137 *    situations in 2).
 138 * 4) A setting range starts after the start LBA of an already set bad blocks
 139 *    range.
 140 * 4.1) If the setting range S exactly matches the tail part of already set bad
 141 *    blocks range E, like the following chart shows,
 142 *            +---------+
 143 *            |   S     |
 144 *            +---------+
 145 *        +-------------+
 146 *        |      E      |
 147 *        +-------------+
 148 * 4.1.1) If range S and E have same acknowledge value (both acked or unacked),
 149 *    they will be merged into one, the result is,
 150 *        +-------------+
 151 *        |      S      |
 152 *        +-------------+
 153 * 4.1.2) If range E is acked and the setting range S is unacked, the setting
 154 *    request of S will be rejected, the result is,
 155 *        +-------------+
 156 *        |      E      |
 157 *        +-------------+
 158 * 4.1.3) If range E is unacked, and the setting range S is acked, then S may
 159 *    overwrite the overlapped range of E, the result is,
 160 *        +---+---------+
 161 *        | E |    S    |
 162 *        +---+---------+
 163 * 4.2) If the setting range S stays in middle of an already set range E, like
 164 *    the following chart shows,
 165 *             +----+
 166 *             | S  |
 167 *             +----+
 168 *        +--------------+
 169 *        |       E      |
 170 *        +--------------+
 171 * 4.2.1) If range S and E have same acknowledge value (both acked or unacked),
 172 *    they will be merged into one, the result is,
 173 *        +--------------+
 174 *        |       S      |
 175 *        +--------------+
 176 * 4.2.2) If range E is acked and the setting range S is unacked, the setting
 177 *    request of S will be rejected, the result is also,
 178 *        +--------------+
 179 *        |       E      |
 180 *        +--------------+
 181 * 4.2.3) If range E is unacked, and the setting range S is acked, then S will
 182 *    inserted into middle of E and split previous range E into two parts (E1
 183 *    and E2), the result is,
 184 *        +----+----+----+
 185 *        | E1 |  S | E2 |
 186 *        +----+----+----+
 187 * 4.3) If the setting bad blocks range S is overlapped with an already set bad
 188 *    blocks range E. The range S starts after the start LBA of range E, and
 189 *    ends after the end LBA of range E, as the following chart shows,
 190 *            +-------------------+
 191 *            |          S        |
 192 *            +-------------------+
 193 *        +-------------+
 194 *        |      E      |
 195 *        +-------------+
 196 *    For this situation the range S can be divided into two parts, the first
 197 *    part (S1) ends at end range E, and the second part (S2) has rest range of
 198 *    origin S.
 199 *            +---------+---------+            +---------+      +---------+
 200 *            |    S1   |    S2   |            |    S1   |      |    S2   |
 201 *            +---------+---------+  ===>      +---------+      +---------+
 202 *        +-------------+                  +-------------+
 203 *        |      E      |                  |      E      |
 204 *        +-------------+                  +-------------+
 205 *     Now in this loop the setting range S1 and already set range E can be
 206 *     handled as the situations 4.1), the rest range S2 will be handled in next
 207 *     loop and ignored in this loop.
 208 * 5) A setting bad blocks range S is adjacent to one or more already set bad
 209 *    blocks range(s), and they are all acked or unacked range.
 210 * 5.1) Front merge: If the already set bad blocks range E is before setting
 211 *    range S and they are adjacent,
 212 *                +------+
 213 *                |  S   |
 214 *                +------+
 215 *        +-------+
 216 *        |   E   |
 217 *        +-------+
 218 * 5.1.1) When total size of range S and E <= BB_MAX_LEN, and their acknowledge
 219 *    values are same, the setting range S can front merges into range E. The
 220 *    result is,
 221 *        +--------------+
 222 *        |       S      |
 223 *        +--------------+
 224 * 5.1.2) Otherwise these two ranges cannot merge, just insert the setting
 225 *    range S right after already set range E into the bad blocks table. The
 226 *    result is,
 227 *        +--------+------+
 228 *        |   E    |   S  |
 229 *        +--------+------+
 230 * 6) Special cases which above conditions cannot handle
 231 * 6.1) Multiple already set ranges may merge into less ones in a full bad table
 232 *        +-------------------------------------------------------+
 233 *        |                           S                           |
 234 *        +-------------------------------------------------------+
 235 *        |<----- BB_MAX_LEN ----->|
 236 *                                 +-----+     +-----+   +-----+
 237 *                                 | E1  |     | E2  |   | E3  |
 238 *                                 +-----+     +-----+   +-----+
 239 *     In the above example, when the bad blocks table is full, inserting the
 240 *     first part of setting range S will fail because no more available slot
 241 *     can be allocated from bad blocks table. In this situation a proper
 242 *     setting method should be go though all the setting bad blocks range and
 243 *     look for chance to merge already set ranges into less ones. When there
 244 *     is available slot from bad blocks table, re-try again to handle more
 245 *     setting bad blocks ranges as many as possible.
 246 *        +------------------------+
 247 *        |          S3            |
 248 *        +------------------------+
 249 *        |<----- BB_MAX_LEN ----->|
 250 *                                 +-----+-----+-----+---+-----+--+
 251 *                                 |       S1        |     S2     |
 252 *                                 +-----+-----+-----+---+-----+--+
 253 *     The above chart shows although the first part (S3) cannot be inserted due
 254 *     to no-space in bad blocks table, but the following E1, E2 and E3 ranges
 255 *     can be merged with rest part of S into less range S1 and S2. Now there is
 256 *     1 free slot in bad blocks table.
 257 *        +------------------------+-----+-----+-----+---+-----+--+
 258 *        |           S3           |       S1        |     S2     |
 259 *        +------------------------+-----+-----+-----+---+-----+--+
 260 *     Since the bad blocks table is not full anymore, re-try again for the
 261 *     origin setting range S. Now the setting range S3 can be inserted into the
 262 *     bad blocks table with previous freed slot from multiple ranges merge.
 263 * 6.2) Front merge after overwrite
 264 *    In the following example, in bad blocks table, E1 is an acked bad blocks
 265 *    range and E2 is an unacked bad blocks range, therefore they are not able
 266 *    to merge into a larger range. The setting bad blocks range S is acked,
 267 *    therefore part of E2 can be overwritten by S.
 268 *                      +--------+
 269 *                      |    S   |                             acknowledged
 270 *                      +--------+                         S:       1
 271 *              +-------+-------------+                   E1:       1
 272 *              |   E1  |    E2       |                   E2:       0
 273 *              +-------+-------------+
 274 *     With previous simplified routines, after overwriting part of E2 with S,
 275 *     the bad blocks table should be (E3 is remaining part of E2 which is not
 276 *     overwritten by S),
 277 *                                                             acknowledged
 278 *              +-------+--------+----+                    S:       1
 279 *              |   E1  |    S   | E3 |                   E1:       1
 280 *              +-------+--------+----+                   E3:       0
 281 *     The above result is correct but not perfect. Range E1 and S in the bad
 282 *     blocks table are all acked, merging them into a larger one range may
 283 *     occupy less bad blocks table space and make badblocks_check() faster.
 284 *     Therefore in such situation, after overwriting range S, the previous range
 285 *     E1 should be checked for possible front combination. Then the ideal
 286 *     result can be,
 287 *              +----------------+----+                        acknowledged
 288 *              |       E1       | E3 |                   E1:       1
 289 *              +----------------+----+                   E3:       0
 290 * 6.3) Behind merge: If the already set bad blocks range E is behind the setting
 291 *    range S and they are adjacent. Normally we don't need to care about this
 292 *    because front merge handles this while going though range S from head to
 293 *    tail, except for the tail part of range S. When the setting range S are
 294 *    fully handled, all the above simplified routine doesn't check whether the
 295 *    tail LBA of range S is adjacent to the next already set range and not
 296 *    merge them even it is possible.
 297 *        +------+
 298 *        |  S   |
 299 *        +------+
 300 *               +-------+
 301 *               |   E   |
 302 *               +-------+
 303 *    For the above special situation, when the setting range S are all handled
 304 *    and the loop ends, an extra check is necessary for whether next already
 305 *    set range E is right after S and mergeable.
 306 * 6.3.1) When total size of range E and S <= BB_MAX_LEN, and their acknowledge
 307 *    values are same, the setting range S can behind merges into range E. The
 308 *    result is,
 309 *        +--------------+
 310 *        |       S      |
 311 *        +--------------+
 312 * 6.3.2) Otherwise these two ranges cannot merge, just insert the setting range
 313 *     S in front of the already set range E in the bad blocks table. The result
 314 *     is,
 315 *        +------+-------+
 316 *        |  S   |   E   |
 317 *        +------+-------+
 318 *
 319 * All the above 5 simplified situations and 3 special cases may cover 99%+ of
 320 * the bad block range setting conditions. Maybe there is some rare corner case
 321 * is not considered and optimized, it won't hurt if badblocks_set() fails due
 322 * to no space, or some ranges are not merged to save bad blocks table space.
 323 *
 324 * Inside badblocks_set() each loop starts by jumping to re_insert label, every
 325 * time for the new loop prev_badblocks() is called to find an already set range
 326 * which starts before or at current setting range. Since the setting bad blocks
 327 * range is handled from head to tail, most of the cases it is unnecessary to do
 328 * the binary search inside prev_badblocks(), it is possible to provide a hint
 329 * to prev_badblocks() for a fast path, then the expensive binary search can be
 330 * avoided. In my test with the hint to prev_badblocks(), except for the first
 331 * loop, all rested calls to prev_badblocks() can go into the fast path and
 332 * return correct bad blocks table index immediately.
 333 *
 334 *
 335 * Clearing a bad blocks range from the bad block table has similar idea as
 336 * setting does, but much more simpler. The only thing needs to be noticed is
 337 * when the clearing range hits middle of a bad block range, the existing bad
 338 * block range will split into two, and one more item should be added into the
 339 * bad block table. The simplified situations to be considered are, (The already
 340 * set bad blocks ranges in bad block table are naming with prefix E, and the
 341 * clearing bad blocks range is naming with prefix C)
 342 *
 343 * 1) A clearing range is not overlapped to any already set ranges in bad block
 344 *    table.
 345 *    +-----+         |          +-----+         |          +-----+
 346 *    |  C  |         |          |  C  |         |          |  C  |
 347 *    +-----+         or         +-----+         or         +-----+
 348 *            +---+   |   +----+         +----+  |  +---+
 349 *            | E |   |   | E1 |         | E2 |  |  | E |
 350 *            +---+   |   +----+         +----+  |  +---+
 351 *    For the above situations, no bad block to be cleared and no failure
 352 *    happens, simply returns 0.
 353 * 2) The clearing range hits middle of an already setting bad blocks range in
 354 *    the bad block table.
 355 *            +---+
 356 *            | C |
 357 *            +---+
 358 *     +-----------------+
 359 *     |         E       |
 360 *     +-----------------+
 361 *    In this situation if the bad block table is not full, the range E will be
 362 *    split into two ranges E1 and E2. The result is,
 363 *     +------+   +------+
 364 *     |  E1  |   |  E2  |
 365 *     +------+   +------+
 366 * 3) The clearing range starts exactly at same LBA as an already set bad block range
 367 *    from the bad block table.
 368 * 3.1) Partially covered at head part
 369 *         +------------+
 370 *         |     C      |
 371 *         +------------+
 372 *         +-----------------+
 373 *         |         E       |
 374 *         +-----------------+
 375 *    For this situation, the overlapped already set range will update the
 376 *    start LBA to end of C and shrink the range to BB_LEN(E) - BB_LEN(C). No
 377 *    item deleted from bad block table. The result is,
 378 *                      +----+
 379 *                      | E1 |
 380 *                      +----+
 381 * 3.2) Exact fully covered
 382 *         +-----------------+
 383 *         |         C       |
 384 *         +-----------------+
 385 *         +-----------------+
 386 *         |         E       |
 387 *         +-----------------+
 388 *    For this situation the whole bad blocks range E will be cleared and its
 389 *    corresponded item is deleted from the bad block table.
 390 * 4) The clearing range exactly ends at same LBA as an already set bad block
 391 *    range.
 392 *                   +-------+
 393 *                   |   C   |
 394 *                   +-------+
 395 *         +-----------------+
 396 *         |         E       |
 397 *         +-----------------+
 398 *    For the above situation, the already set range E is updated to shrink its
 399 *    end to the start of C, and reduce its length to BB_LEN(E) - BB_LEN(C).
 400 *    The result is,
 401 *         +---------+
 402 *         |    E    |
 403 *         +---------+
 404 * 5) The clearing range is partially overlapped with an already set bad block
 405 *    range from the bad block table.
 406 * 5.1) The already set bad block range is front overlapped with the clearing
 407 *    range.
 408 *         +----------+
 409 *         |     C    |
 410 *         +----------+
 411 *              +------------+
 412 *              |      E     |
 413 *              +------------+
 414 *   For such situation, the clearing range C can be treated as two parts. The
 415 *   first part ends at the start LBA of range E, and the second part starts at
 416 *   same LBA of range E.
 417 *         +----+-----+               +----+   +-----+
 418 *         | C1 | C2  |               | C1 |   | C2  |
 419 *         +----+-----+         ===>  +----+   +-----+
 420 *              +------------+                 +------------+
 421 *              |      E     |                 |      E     |
 422 *              +------------+                 +------------+
 423 *   Now the first part C1 can be handled as condition 1), and the second part C2 can be
 424 *   handled as condition 3.1) in next loop.
 425 * 5.2) The already set bad block range is behind overlaopped with the clearing
 426 *   range.
 427 *                 +----------+
 428 *                 |     C    |
 429 *                 +----------+
 430 *         +------------+
 431 *         |      E     |
 432 *         +------------+
 433 *   For such situation, the clearing range C can be treated as two parts. The
 434 *   first part C1 ends at same end LBA of range E, and the second part starts
 435 *   at end LBA of range E.
 436 *                 +----+-----+                 +----+    +-----+
 437 *                 | C1 | C2  |                 | C1 |    | C2  |
 438 *                 +----+-----+  ===>           +----+    +-----+
 439 *         +------------+               +------------+
 440 *         |      E     |               |      E     |
 441 *         +------------+               +------------+
 442 *   Now the first part clearing range C1 can be handled as condition 4), and
 443 *   the second part clearing range C2 can be handled as condition 1) in next
 444 *   loop.
 445 *
 446 *   All bad blocks range clearing can be simplified into the above 5 situations
 447 *   by only handling the head part of the clearing range in each run of the
 448 *   while-loop. The idea is similar to bad blocks range setting but much
 449 *   simpler.
 450 */
 451
 452/*
 453 * Find the range starts at-or-before 's' from bad table. The search
 454 * starts from index 'hint' and stops at index 'hint_end' from the bad
 455 * table.
 456 */
 457static int prev_by_hint(struct badblocks *bb, sector_t s, int hint)
 458{
 459        int hint_end = hint + 2;
 460        u64 *p = bb->page;
 461        int ret = -1;
 462
 463        while ((hint < hint_end) && ((hint + 1) <= bb->count) &&
 464               (BB_OFFSET(p[hint]) <= s)) {
 465                if ((hint + 1) == bb->count || BB_OFFSET(p[hint + 1]) > s) {
 466                        ret = hint;
 467                        break;
 468                }
 469                hint++;
 470        }
 471
 472        return ret;
 473}
 474
 475/*
 476 * Find the range starts at-or-before bad->start. If 'hint' is provided
 477 * (hint >= 0) then search in the bad table from hint firstly. It is
 478 * very probably the wanted bad range can be found from the hint index,
 479 * then the unnecessary while-loop iteration can be avoided.
 480 */
 481static int prev_badblocks(struct badblocks *bb, struct badblocks_context *bad,
 482                          int hint)
 483{
 484        sector_t s = bad->start;
 485        int ret = -1;
 486        int lo, hi;
 487        u64 *p;
 488
 489        if (!bb->count)
 490                goto out;
 491
 492        if (hint >= 0) {
 493                ret = prev_by_hint(bb, s, hint);
 494                if (ret >= 0)
 495                        goto out;
 496        }
 497
 498        lo = 0;
 499        hi = bb->count;
 500        p = bb->page;
 501
 502        /* The following bisect search might be unnecessary */
 503        if (BB_OFFSET(p[lo]) > s)
 504                return -1;
 505        if (BB_OFFSET(p[hi - 1]) <= s)
 506                return hi - 1;
 507
 508        /* Do bisect search in bad table */
 509        while (hi - lo > 1) {
 510                int mid = (lo + hi)/2;
 511                sector_t a = BB_OFFSET(p[mid]);
 512
 513                if (a == s) {
 514                        ret = mid;
 515                        goto out;
 516                }
 517
 518                if (a < s)
 519                        lo = mid;
 520                else
 521                        hi = mid;
 522        }
 523
 524        if (BB_OFFSET(p[lo]) <= s)
 525                ret = lo;
 526out:
 527        return ret;
 528}
 529
 530/*
 531 * Return 'true' if the range indicated by 'bad' can be backward merged
 532 * with the bad range (from the bad table) index by 'behind'.
 533 */
 534static bool can_merge_behind(struct badblocks *bb,
 535                             struct badblocks_context *bad, int behind)
 536{
 537        sector_t sectors = bad->len;
 538        sector_t s = bad->start;
 539        u64 *p = bb->page;
 540
 541        if ((s < BB_OFFSET(p[behind])) &&
 542            ((s + sectors) >= BB_OFFSET(p[behind])) &&
 543            ((BB_END(p[behind]) - s) <= BB_MAX_LEN) &&
 544            BB_ACK(p[behind]) == bad->ack)
 545                return true;
 546        return false;
 547}
 548
 549/*
 550 * Do backward merge for range indicated by 'bad' and the bad range
 551 * (from the bad table) indexed by 'behind'. The return value is merged
 552 * sectors from bad->len.
 553 */
 554static int behind_merge(struct badblocks *bb, struct badblocks_context *bad,
 555                        int behind)
 556{
 557        sector_t sectors = bad->len;
 558        sector_t s = bad->start;
 559        u64 *p = bb->page;
 560        int merged = 0;
 561
 562        WARN_ON(s >= BB_OFFSET(p[behind]));
 563        WARN_ON((s + sectors) < BB_OFFSET(p[behind]));
 564
 565        if (s < BB_OFFSET(p[behind])) {
 566                merged = BB_OFFSET(p[behind]) - s;
 567                p[behind] =  BB_MAKE(s, BB_LEN(p[behind]) + merged, bad->ack);
 568
 569                WARN_ON((BB_LEN(p[behind]) + merged) >= BB_MAX_LEN);
 570        }
 571
 572        return merged;
 573}
 574
 575/*
 576 * Return 'true' if the range indicated by 'bad' can be forward
 577 * merged with the bad range (from the bad table) indexed by 'prev'.
 578 */
 579static bool can_merge_front(struct badblocks *bb, int prev,
 580                            struct badblocks_context *bad)
 581{
 582        sector_t s = bad->start;
 583        u64 *p = bb->page;
 584
 585        if (BB_ACK(p[prev]) == bad->ack &&
 586            (s < BB_END(p[prev]) ||
 587             (s == BB_END(p[prev]) && (BB_LEN(p[prev]) < BB_MAX_LEN))))
 588                return true;
 589        return false;
 590}
 591
 592/*
 593 * Do forward merge for range indicated by 'bad' and the bad range
 594 * (from bad table) indexed by 'prev'. The return value is sectors
 595 * merged from bad->len.
 596 */
 597static int front_merge(struct badblocks *bb, int prev, struct badblocks_context *bad)
 598{
 599        sector_t sectors = bad->len;
 600        sector_t s = bad->start;
 601        u64 *p = bb->page;
 602        int merged = 0;
 603
 604        WARN_ON(s > BB_END(p[prev]));
 605
 606        if (s < BB_END(p[prev])) {
 607                merged = min_t(sector_t, sectors, BB_END(p[prev]) - s);
 608        } else {
 609                merged = min_t(sector_t, sectors, BB_MAX_LEN - BB_LEN(p[prev]));
 610                if ((prev + 1) < bb->count &&
 611                    merged > (BB_OFFSET(p[prev + 1]) - BB_END(p[prev]))) {
 612                        merged = BB_OFFSET(p[prev + 1]) - BB_END(p[prev]);
 613                }
 614
 615                p[prev] = BB_MAKE(BB_OFFSET(p[prev]),
 616                                  BB_LEN(p[prev]) + merged, bad->ack);
 617        }
 618
 619        return merged;
 620}
 621
 622/*
 623 * 'Combine' is a special case which can_merge_front() is not able to
 624 * handle: If a bad range (indexed by 'prev' from bad table) exactly
 625 * starts as bad->start, and the bad range ahead of 'prev' (indexed by
 626 * 'prev - 1' from bad table) exactly ends at where 'prev' starts, and
 627 * the sum of their lengths does not exceed BB_MAX_LEN limitation, then
 628 * these two bad range (from bad table) can be combined.
 629 *
 630 * Return 'true' if bad ranges indexed by 'prev' and 'prev - 1' from bad
 631 * table can be combined.
 632 */
 633static bool can_combine_front(struct badblocks *bb, int prev,
 634                              struct badblocks_context *bad)
 635{
 636        u64 *p = bb->page;
 637
 638        if ((prev > 0) &&
 639            (BB_OFFSET(p[prev]) == bad->start) &&
 640            (BB_END(p[prev - 1]) == BB_OFFSET(p[prev])) &&
 641            (BB_LEN(p[prev - 1]) + BB_LEN(p[prev]) <= BB_MAX_LEN) &&
 642            (BB_ACK(p[prev - 1]) == BB_ACK(p[prev])))
 643                return true;
 644        return false;
 645}
 646
 647/*
 648 * Combine the bad ranges indexed by 'prev' and 'prev - 1' (from bad
 649 * table) into one larger bad range, and the new range is indexed by
 650 * 'prev - 1'.
 651 * The caller of front_combine() will decrease bb->count, therefore
 652 * it is unnecessary to clear p[perv] after front merge.
 653 */
 654static void front_combine(struct badblocks *bb, int prev)
 655{
 656        u64 *p = bb->page;
 657
 658        p[prev - 1] = BB_MAKE(BB_OFFSET(p[prev - 1]),
 659                              BB_LEN(p[prev - 1]) + BB_LEN(p[prev]),
 660                              BB_ACK(p[prev]));
 661        if ((prev + 1) < bb->count)
 662                memmove(p + prev, p + prev + 1, (bb->count - prev - 1) * 8);
 663}
 664
 665/*
 666 * Return 'true' if the range indicated by 'bad' is exactly forward
 667 * overlapped with the bad range (from bad table) indexed by 'front'.
 668 * Exactly forward overlap means the bad range (from bad table) indexed
 669 * by 'prev' does not cover the whole range indicated by 'bad'.
 670 */
 671static bool overlap_front(struct badblocks *bb, int front,
 672                          struct badblocks_context *bad)
 673{
 674        u64 *p = bb->page;
 675
 676        if (bad->start >= BB_OFFSET(p[front]) &&
 677            bad->start < BB_END(p[front]))
 678                return true;
 679        return false;
 680}
 681
 682/*
 683 * Return 'true' if the range indicated by 'bad' is exactly backward
 684 * overlapped with the bad range (from bad table) indexed by 'behind'.
 685 */
 686static bool overlap_behind(struct badblocks *bb, struct badblocks_context *bad,
 687                           int behind)
 688{
 689        u64 *p = bb->page;
 690
 691        if (bad->start < BB_OFFSET(p[behind]) &&
 692            (bad->start + bad->len) > BB_OFFSET(p[behind]))
 693                return true;
 694        return false;
 695}
 696
 697/*
 698 * Return 'true' if the range indicated by 'bad' can overwrite the bad
 699 * range (from bad table) indexed by 'prev'.
 700 *
 701 * The range indicated by 'bad' can overwrite the bad range indexed by
 702 * 'prev' when,
 703 * 1) The whole range indicated by 'bad' can cover partial or whole bad
 704 *    range (from bad table) indexed by 'prev'.
 705 * 2) The ack value of 'bad' is larger or equal to the ack value of bad
 706 *    range 'prev'.
 707 *
 708 * If the overwriting doesn't cover the whole bad range (from bad table)
 709 * indexed by 'prev', new range might be split from existing bad range,
 710 * 1) The overwrite covers head or tail part of existing bad range, 1
 711 *    extra bad range will be split and added into the bad table.
 712 * 2) The overwrite covers middle of existing bad range, 2 extra bad
 713 *    ranges will be split (ahead and after the overwritten range) and
 714 *    added into the bad table.
 715 * The number of extra split ranges of the overwriting is stored in
 716 * 'extra' and returned for the caller.
 717 */
 718static bool can_front_overwrite(struct badblocks *bb, int prev,
 719                                struct badblocks_context *bad, int *extra)
 720{
 721        u64 *p = bb->page;
 722        int len;
 723
 724        WARN_ON(!overlap_front(bb, prev, bad));
 725
 726        if (BB_ACK(p[prev]) >= bad->ack)
 727                return false;
 728
 729        if (BB_END(p[prev]) <= (bad->start + bad->len)) {
 730                len = BB_END(p[prev]) - bad->start;
 731                if (BB_OFFSET(p[prev]) == bad->start)
 732                        *extra = 0;
 733                else
 734                        *extra = 1;
 735
 736                bad->len = len;
 737        } else {
 738                if (BB_OFFSET(p[prev]) == bad->start)
 739                        *extra = 1;
 740                else
 741                /*
 742                 * prev range will be split into two, beside the overwritten
 743                 * one, an extra slot needed from bad table.
 744                 */
 745                        *extra = 2;
 746        }
 747
 748        if ((bb->count + (*extra)) >= MAX_BADBLOCKS)
 749                return false;
 750
 751        return true;
 752}
 753
 754/*
 755 * Do the overwrite from the range indicated by 'bad' to the bad range
 756 * (from bad table) indexed by 'prev'.
 757 * The previously called can_front_overwrite() will provide how many
 758 * extra bad range(s) might be split and added into the bad table. All
 759 * the splitting cases in the bad table will be handled here.
 760 */
 761static int front_overwrite(struct badblocks *bb, int prev,
 762                           struct badblocks_context *bad, int extra)
 763{
 764        u64 *p = bb->page;
 765        sector_t orig_end = BB_END(p[prev]);
 766        int orig_ack = BB_ACK(p[prev]);
 767
 768        switch (extra) {
 769        case 0:
 770                p[prev] = BB_MAKE(BB_OFFSET(p[prev]), BB_LEN(p[prev]),
 771                                  bad->ack);
 772                break;
 773        case 1:
 774                if (BB_OFFSET(p[prev]) == bad->start) {
 775                        p[prev] = BB_MAKE(BB_OFFSET(p[prev]),
 776                                          bad->len, bad->ack);
 777                        memmove(p + prev + 2, p + prev + 1,
 778                                (bb->count - prev - 1) * 8);
 779                        p[prev + 1] = BB_MAKE(bad->start + bad->len,
 780                                              orig_end - BB_END(p[prev]),
 781                                              orig_ack);
 782                } else {
 783                        p[prev] = BB_MAKE(BB_OFFSET(p[prev]),
 784                                          bad->start - BB_OFFSET(p[prev]),
 785                                          orig_ack);
 786                        /*
 787                         * prev +2 -> prev + 1 + 1, which is for,
 788                         * 1) prev + 1: the slot index of the previous one
 789                         * 2) + 1: one more slot for extra being 1.
 790                         */
 791                        memmove(p + prev + 2, p + prev + 1,
 792                                (bb->count - prev - 1) * 8);
 793                        p[prev + 1] = BB_MAKE(bad->start, bad->len, bad->ack);
 794                }
 795                break;
 796        case 2:
 797                p[prev] = BB_MAKE(BB_OFFSET(p[prev]),
 798                                  bad->start - BB_OFFSET(p[prev]),
 799                                  orig_ack);
 800                /*
 801                 * prev + 3 -> prev + 1 + 2, which is for,
 802                 * 1) prev + 1: the slot index of the previous one
 803                 * 2) + 2: two more slots for extra being 2.
 804                 */
 805                memmove(p + prev + 3, p + prev + 1,
 806                        (bb->count - prev - 1) * 8);
 807                p[prev + 1] = BB_MAKE(bad->start, bad->len, bad->ack);
 808                p[prev + 2] = BB_MAKE(BB_END(p[prev + 1]),
 809                                      orig_end - BB_END(p[prev + 1]),
 810                                      orig_ack);
 811                break;
 812        default:
 813                break;
 814        }
 815
 816        return bad->len;
 817}
 818
 819/*
 820 * Explicitly insert a range indicated by 'bad' to the bad table, where
 821 * the location is indexed by 'at'.
 822 */
 823static int insert_at(struct badblocks *bb, int at, struct badblocks_context *bad)
 824{
 825        u64 *p = bb->page;
 826        int len;
 827
 828        WARN_ON(badblocks_full(bb));
 829
 830        len = min_t(sector_t, bad->len, BB_MAX_LEN);
 831        if (at < bb->count)
 832                memmove(p + at + 1, p + at, (bb->count - at) * 8);
 833        p[at] = BB_MAKE(bad->start, len, bad->ack);
 834
 835        return len;
 836}
 837
 838static void badblocks_update_acked(struct badblocks *bb)
 839{
 840        bool unacked = false;
 841        u64 *p = bb->page;
 842        int i;
 843
 844        if (!bb->unacked_exist)
 845                return;
 846
 847        for (i = 0; i < bb->count ; i++) {
 848                if (!BB_ACK(p[i])) {
 849                        unacked = true;
 850                        break;
 851                }
 852        }
 853
 854        if (!unacked)
 855                bb->unacked_exist = 0;
 856}
 857
 858/* Do exact work to set bad block range into the bad block table */
 859static int _badblocks_set(struct badblocks *bb, sector_t s, int sectors,
 860                          int acknowledged)
 861{
 862        int retried = 0, space_desired = 0;
 863        int orig_len, len = 0, added = 0;
 864        struct badblocks_context bad;
 865        int prev = -1, hint = -1;
 866        sector_t orig_start;
 867        unsigned long flags;
 868        int rv = 0;
 869        u64 *p;
 870
 871        if (bb->shift < 0)
 872                /* badblocks are disabled */
 873                return 1;
 874
 875        if (sectors == 0)
 876                /* Invalid sectors number */
 877                return 1;
 878
 879        if (bb->shift) {
 880                /* round the start down, and the end up */
 881                sector_t next = s + sectors;
 882
 883                rounddown(s, bb->shift);
 884                roundup(next, bb->shift);
 885                sectors = next - s;
 886        }
 887
 888        write_seqlock_irqsave(&bb->lock, flags);
 889
 890        orig_start = s;
 891        orig_len = sectors;
 892        bad.ack = acknowledged;
 893        p = bb->page;
 894
 895re_insert:
 896        bad.start = s;
 897        bad.len = sectors;
 898        len = 0;
 899
 900        if (badblocks_empty(bb)) {
 901                len = insert_at(bb, 0, &bad);
 902                bb->count++;
 903                added++;
 904                goto update_sectors;
 905        }
 906
 907        prev = prev_badblocks(bb, &bad, hint);
 908
 909        /* start before all badblocks */
 910        if (prev < 0) {
 911                if (!badblocks_full(bb)) {
 912                        /* insert on the first */
 913                        if (bad.len > (BB_OFFSET(p[0]) - bad.start))
 914                                bad.len = BB_OFFSET(p[0]) - bad.start;
 915                        len = insert_at(bb, 0, &bad);
 916                        bb->count++;
 917                        added++;
 918                        hint = 0;
 919                        goto update_sectors;
 920                }
 921
 922                /* No sapce, try to merge */
 923                if (overlap_behind(bb, &bad, 0)) {
 924                        if (can_merge_behind(bb, &bad, 0)) {
 925                                len = behind_merge(bb, &bad, 0);
 926                                added++;
 927                        } else {
 928                                len = BB_OFFSET(p[0]) - s;
 929                                space_desired = 1;
 930                        }
 931                        hint = 0;
 932                        goto update_sectors;
 933                }
 934
 935                /* no table space and give up */
 936                goto out;
 937        }
 938
 939        /* in case p[prev-1] can be merged with p[prev] */
 940        if (can_combine_front(bb, prev, &bad)) {
 941                front_combine(bb, prev);
 942                bb->count--;
 943                added++;
 944                hint = prev;
 945                goto update_sectors;
 946        }
 947
 948        if (overlap_front(bb, prev, &bad)) {
 949                if (can_merge_front(bb, prev, &bad)) {
 950                        len = front_merge(bb, prev, &bad);
 951                        added++;
 952                } else {
 953                        int extra = 0;
 954
 955                        if (!can_front_overwrite(bb, prev, &bad, &extra)) {
 956                                len = min_t(sector_t,
 957                                            BB_END(p[prev]) - s, sectors);
 958                                hint = prev;
 959                                goto update_sectors;
 960                        }
 961
 962                        len = front_overwrite(bb, prev, &bad, extra);
 963                        added++;
 964                        bb->count += extra;
 965
 966                        if (can_combine_front(bb, prev, &bad)) {
 967                                front_combine(bb, prev);
 968                                bb->count--;
 969                        }
 970                }
 971                hint = prev;
 972                goto update_sectors;
 973        }
 974
 975        if (can_merge_front(bb, prev, &bad)) {
 976                len = front_merge(bb, prev, &bad);
 977                added++;
 978                hint = prev;
 979                goto update_sectors;
 980        }
 981
 982        /* if no space in table, still try to merge in the covered range */
 983        if (badblocks_full(bb)) {
 984                /* skip the cannot-merge range */
 985                if (((prev + 1) < bb->count) &&
 986                    overlap_behind(bb, &bad, prev + 1) &&
 987                    ((s + sectors) >= BB_END(p[prev + 1]))) {
 988                        len = BB_END(p[prev + 1]) - s;
 989                        hint = prev + 1;
 990                        goto update_sectors;
 991                }
 992
 993                /* no retry any more */
 994                len = sectors;
 995                space_desired = 1;
 996                hint = -1;
 997                goto update_sectors;
 998        }
 999
1000        /* cannot merge and there is space in bad table */
1001        if ((prev + 1) < bb->count &&
1002            overlap_behind(bb, &bad, prev + 1))
1003                bad.len = min_t(sector_t,
1004                                bad.len, BB_OFFSET(p[prev + 1]) - bad.start);
1005
1006        len = insert_at(bb, prev + 1, &bad);
1007        bb->count++;
1008        added++;
1009        hint = prev + 1;
1010
1011update_sectors:
1012        s += len;
1013        sectors -= len;
1014
1015        if (sectors > 0)
1016                goto re_insert;
1017
1018        WARN_ON(sectors < 0);
1019
1020        /*
1021         * Check whether the following already set range can be
1022         * merged. (prev < 0) condition is not handled here,
1023         * because it's already complicated enough.
1024         */
1025        if (prev >= 0 &&
1026            (prev + 1) < bb->count &&
1027            BB_END(p[prev]) == BB_OFFSET(p[prev + 1]) &&
1028            (BB_LEN(p[prev]) + BB_LEN(p[prev + 1])) <= BB_MAX_LEN &&
1029            BB_ACK(p[prev]) == BB_ACK(p[prev + 1])) {
1030                p[prev] = BB_MAKE(BB_OFFSET(p[prev]),
1031                                  BB_LEN(p[prev]) + BB_LEN(p[prev + 1]),
1032                                  BB_ACK(p[prev]));
1033
1034                if ((prev + 2) < bb->count)
1035                        memmove(p + prev + 1, p + prev + 2,
1036                                (bb->count -  (prev + 2)) * 8);
1037                bb->count--;
1038        }
1039
1040        if (space_desired && !badblocks_full(bb)) {
1041                s = orig_start;
1042                sectors = orig_len;
1043                space_desired = 0;
1044                if (retried++ < 3)
1045                        goto re_insert;
1046        }
1047
1048out:
1049        if (added) {
1050                set_changed(bb);
1051
1052                if (!acknowledged)
1053                        bb->unacked_exist = 1;
1054                else
1055                        badblocks_update_acked(bb);
1056        }
1057
1058        write_sequnlock_irqrestore(&bb->lock, flags);
1059
1060        if (!added)
1061                rv = 1;
1062
1063        return rv;
1064}
1065
1066/*
1067 * Clear the bad block range from bad block table which is front overlapped
1068 * with the clearing range. The return value is how many sectors from an
1069 * already set bad block range are cleared. If the whole bad block range is
1070 * covered by the clearing range and fully cleared, 'delete' is set as 1 for
1071 * the caller to reduce bb->count.
1072 */
1073static int front_clear(struct badblocks *bb, int prev,
1074                       struct badblocks_context *bad, int *deleted)
1075{
1076        sector_t sectors = bad->len;
1077        sector_t s = bad->start;
1078        u64 *p = bb->page;
1079        int cleared = 0;
1080
1081        *deleted = 0;
1082        if (s == BB_OFFSET(p[prev])) {
1083                if (BB_LEN(p[prev]) > sectors) {
1084                        p[prev] = BB_MAKE(BB_OFFSET(p[prev]) + sectors,
1085                                          BB_LEN(p[prev]) - sectors,
1086                                          BB_ACK(p[prev]));
1087                        cleared = sectors;
1088                } else {
1089                        /* BB_LEN(p[prev]) <= sectors */
1090                        cleared = BB_LEN(p[prev]);
1091                        if ((prev + 1) < bb->count)
1092                                memmove(p + prev, p + prev + 1,
1093                                       (bb->count - prev - 1) * 8);
1094                        *deleted = 1;
1095                }
1096        } else if (s > BB_OFFSET(p[prev])) {
1097                if (BB_END(p[prev]) <= (s + sectors)) {
1098                        cleared = BB_END(p[prev]) - s;
1099                        p[prev] = BB_MAKE(BB_OFFSET(p[prev]),
1100                                          s - BB_OFFSET(p[prev]),
1101                                          BB_ACK(p[prev]));
1102                } else {
1103                        /* Splitting is handled in front_splitting_clear() */
1104                        BUG();
1105                }
1106        }
1107
1108        return cleared;
1109}
1110
1111/*
1112 * Handle the condition that the clearing range hits middle of an already set
1113 * bad block range from bad block table. In this condition the existing bad
1114 * block range is split into two after the middle part is cleared.
1115 */
1116static int front_splitting_clear(struct badblocks *bb, int prev,
1117                                  struct badblocks_context *bad)
1118{
1119        u64 *p = bb->page;
1120        u64 end = BB_END(p[prev]);
1121        int ack = BB_ACK(p[prev]);
1122        sector_t sectors = bad->len;
1123        sector_t s = bad->start;
1124
1125        p[prev] = BB_MAKE(BB_OFFSET(p[prev]),
1126                          s - BB_OFFSET(p[prev]),
1127                          ack);
1128        memmove(p + prev + 2, p + prev + 1, (bb->count - prev - 1) * 8);
1129        p[prev + 1] = BB_MAKE(s + sectors, end - s - sectors, ack);
1130        return sectors;
1131}
1132
1133/* Do the exact work to clear bad block range from the bad block table */
1134static int _badblocks_clear(struct badblocks *bb, sector_t s, int sectors)
1135{
1136        struct badblocks_context bad;
1137        int prev = -1, hint = -1;
1138        int len = 0, cleared = 0;
1139        int rv = 0;
1140        u64 *p;
1141
1142        if (bb->shift < 0)
1143                /* badblocks are disabled */
1144                return 1;
1145
1146        if (sectors == 0)
1147                /* Invalid sectors number */
1148                return 1;
1149
1150        if (bb->shift) {
1151                sector_t target;
1152
1153                /* When clearing we round the start up and the end down.
1154                 * This should not matter as the shift should align with
1155                 * the block size and no rounding should ever be needed.
1156                 * However it is better the think a block is bad when it
1157                 * isn't than to think a block is not bad when it is.
1158                 */
1159                target = s + sectors;
1160                roundup(s, bb->shift);
1161                rounddown(target, bb->shift);
1162                sectors = target - s;
1163        }
1164
1165        write_seqlock_irq(&bb->lock);
1166
1167        bad.ack = true;
1168        p = bb->page;
1169
1170re_clear:
1171        bad.start = s;
1172        bad.len = sectors;
1173
1174        if (badblocks_empty(bb)) {
1175                len = sectors;
1176                cleared++;
1177                goto update_sectors;
1178        }
1179
1180
1181        prev = prev_badblocks(bb, &bad, hint);
1182
1183        /* Start before all badblocks */
1184        if (prev < 0) {
1185                if (overlap_behind(bb, &bad, 0)) {
1186                        len = BB_OFFSET(p[0]) - s;
1187                        hint = 0;
1188                } else {
1189                        len = sectors;
1190                }
1191                /*
1192                 * Both situations are to clear non-bad range,
1193                 * should be treated as successful
1194                 */
1195                cleared++;
1196                goto update_sectors;
1197        }
1198
1199        /* Start after all badblocks */
1200        if ((prev + 1) >= bb->count && !overlap_front(bb, prev, &bad)) {
1201                len = sectors;
1202                cleared++;
1203                goto update_sectors;
1204        }
1205
1206        /* Clear will split a bad record but the table is full */
1207        if (badblocks_full(bb) && (BB_OFFSET(p[prev]) < bad.start) &&
1208            (BB_END(p[prev]) > (bad.start + sectors))) {
1209                len = sectors;
1210                goto update_sectors;
1211        }
1212
1213        if (overlap_front(bb, prev, &bad)) {
1214                if ((BB_OFFSET(p[prev]) < bad.start) &&
1215                    (BB_END(p[prev]) > (bad.start + bad.len))) {
1216                        /* Splitting */
1217                        if ((bb->count + 1) < MAX_BADBLOCKS) {
1218                                len = front_splitting_clear(bb, prev, &bad);
1219                                bb->count += 1;
1220                                cleared++;
1221                        } else {
1222                                /* No space to split, give up */
1223                                len = sectors;
1224                        }
1225                } else {
1226                        int deleted = 0;
1227
1228                        len = front_clear(bb, prev, &bad, &deleted);
1229                        bb->count -= deleted;
1230                        cleared++;
1231                        hint = prev;
1232                }
1233
1234                goto update_sectors;
1235        }
1236
1237        /* Not front overlap, but behind overlap */
1238        if ((prev + 1) < bb->count && overlap_behind(bb, &bad, prev + 1)) {
1239                len = BB_OFFSET(p[prev + 1]) - bad.start;
1240                hint = prev + 1;
1241                /* Clear non-bad range should be treated as successful */
1242                cleared++;
1243                goto update_sectors;
1244        }
1245
1246        /* Not cover any badblocks range in the table */
1247        len = sectors;
1248        /* Clear non-bad range should be treated as successful */
1249        cleared++;
1250
1251update_sectors:
1252        s += len;
1253        sectors -= len;
1254
1255        if (sectors > 0)
1256                goto re_clear;
1257
1258        WARN_ON(sectors < 0);
1259
1260        if (cleared) {
1261                badblocks_update_acked(bb);
1262                set_changed(bb);
1263        }
1264
1265        write_sequnlock_irq(&bb->lock);
1266
1267        if (!cleared)
1268                rv = 1;
1269
1270        return rv;
1271}
1272
1273/* Do the exact work to check bad blocks range from the bad block table */
1274static int _badblocks_check(struct badblocks *bb, sector_t s, int sectors,
1275                            sector_t *first_bad, int *bad_sectors)
1276{
1277        int unacked_badblocks, acked_badblocks;
1278        int prev = -1, hint = -1, set = 0;
1279        struct badblocks_context bad;
1280        unsigned int seq;
1281        int len, rv;
1282        u64 *p;
1283
1284        WARN_ON(bb->shift < 0 || sectors == 0);
1285
1286        if (bb->shift > 0) {
1287                sector_t target;
1288
1289                /* round the start down, and the end up */
1290                target = s + sectors;
1291                rounddown(s, bb->shift);
1292                roundup(target, bb->shift);
1293                sectors = target - s;
1294        }
1295
1296retry:
1297        seq = read_seqbegin(&bb->lock);
1298
1299        p = bb->page;
1300        unacked_badblocks = 0;
1301        acked_badblocks = 0;
1302
1303re_check:
1304        bad.start = s;
1305        bad.len = sectors;
1306
1307        if (badblocks_empty(bb)) {
1308                len = sectors;
1309                goto update_sectors;
1310        }
1311
1312        prev = prev_badblocks(bb, &bad, hint);
1313
1314        /* start after all badblocks */
1315        if ((prev >= 0) &&
1316            ((prev + 1) >= bb->count) && !overlap_front(bb, prev, &bad)) {
1317                len = sectors;
1318                goto update_sectors;
1319        }
1320
1321        /* Overlapped with front badblocks record */
1322        if ((prev >= 0) && overlap_front(bb, prev, &bad)) {
1323                if (BB_ACK(p[prev]))
1324                        acked_badblocks++;
1325                else
1326                        unacked_badblocks++;
1327
1328                if (BB_END(p[prev]) >= (s + sectors))
1329                        len = sectors;
1330                else
1331                        len = BB_END(p[prev]) - s;
1332
1333                if (set == 0) {
1334                        *first_bad = BB_OFFSET(p[prev]);
1335                        *bad_sectors = BB_LEN(p[prev]);
1336                        set = 1;
1337                }
1338                goto update_sectors;
1339        }
1340
1341        /* Not front overlap, but behind overlap */
1342        if ((prev + 1) < bb->count && overlap_behind(bb, &bad, prev + 1)) {
1343                len = BB_OFFSET(p[prev + 1]) - bad.start;
1344                hint = prev + 1;
1345                goto update_sectors;
1346        }
1347
1348        /* not cover any badblocks range in the table */
1349        len = sectors;
1350
1351update_sectors:
1352        s += len;
1353        sectors -= len;
1354
1355        if (sectors > 0)
1356                goto re_check;
1357
1358        WARN_ON(sectors < 0);
1359
1360        if (unacked_badblocks > 0)
1361                rv = -1;
1362        else if (acked_badblocks > 0)
1363                rv = 1;
1364        else
1365                rv = 0;
1366
1367        if (read_seqretry(&bb->lock, seq))
1368                goto retry;
1369
1370        return rv;
1371}
1372
1373/**
1374 * badblocks_check() - check a given range for bad sectors
1375 * @bb:         the badblocks structure that holds all badblock information
1376 * @s:          sector (start) at which to check for badblocks
1377 * @sectors:    number of sectors to check for badblocks
1378 * @first_bad:  pointer to store location of the first badblock
1379 * @bad_sectors: pointer to store number of badblocks after @first_bad
1380 *
1381 * We can record which blocks on each device are 'bad' and so just
1382 * fail those blocks, or that stripe, rather than the whole device.
1383 * Entries in the bad-block table are 64bits wide.  This comprises:
1384 * Length of bad-range, in sectors: 0-511 for lengths 1-512
1385 * Start of bad-range, sector offset, 54 bits (allows 8 exbibytes)
1386 *  A 'shift' can be set so that larger blocks are tracked and
1387 *  consequently larger devices can be covered.
1388 * 'Acknowledged' flag - 1 bit. - the most significant bit.
1389 *
1390 * Locking of the bad-block table uses a seqlock so badblocks_check
1391 * might need to retry if it is very unlucky.
1392 * We will sometimes want to check for bad blocks in a bi_end_io function,
1393 * so we use the write_seqlock_irq variant.
1394 *
1395 * When looking for a bad block we specify a range and want to
1396 * know if any block in the range is bad.  So we binary-search
1397 * to the last range that starts at-or-before the given endpoint,
1398 * (or "before the sector after the target range")
1399 * then see if it ends after the given start.
1400 *
1401 * Return:
1402 *  0: there are no known bad blocks in the range
1403 *  1: there are known bad block which are all acknowledged
1404 * -1: there are bad blocks which have not yet been acknowledged in metadata.
1405 * plus the start/length of the first bad section we overlap.
1406 */
1407int badblocks_check(struct badblocks *bb, sector_t s, int sectors,
1408                        sector_t *first_bad, int *bad_sectors)
1409{
1410        return _badblocks_check(bb, s, sectors, first_bad, bad_sectors);
1411}
1412EXPORT_SYMBOL_GPL(badblocks_check);
1413
1414/**
1415 * badblocks_set() - Add a range of bad blocks to the table.
1416 * @bb:         the badblocks structure that holds all badblock information
1417 * @s:          first sector to mark as bad
1418 * @sectors:    number of sectors to mark as bad
1419 * @acknowledged: weather to mark the bad sectors as acknowledged
1420 *
1421 * This might extend the table, or might contract it if two adjacent ranges
1422 * can be merged. We binary-search to find the 'insertion' point, then
1423 * decide how best to handle it.
1424 *
1425 * Return:
1426 *  0: success
1427 *  1: failed to set badblocks (out of space)
1428 */
1429int badblocks_set(struct badblocks *bb, sector_t s, int sectors,
1430                        int acknowledged)
1431{
1432        return _badblocks_set(bb, s, sectors, acknowledged);
1433}
1434EXPORT_SYMBOL_GPL(badblocks_set);
1435
1436/**
1437 * badblocks_clear() - Remove a range of bad blocks to the table.
1438 * @bb:         the badblocks structure that holds all badblock information
1439 * @s:          first sector to mark as bad
1440 * @sectors:    number of sectors to mark as bad
1441 *
1442 * This may involve extending the table if we spilt a region,
1443 * but it must not fail.  So if the table becomes full, we just
1444 * drop the remove request.
1445 *
1446 * Return:
1447 *  0: success
1448 *  1: failed to clear badblocks
1449 */
1450int badblocks_clear(struct badblocks *bb, sector_t s, int sectors)
1451{
1452        return _badblocks_clear(bb, s, sectors);
1453}
1454EXPORT_SYMBOL_GPL(badblocks_clear);
1455
1456/**
1457 * ack_all_badblocks() - Acknowledge all bad blocks in a list.
1458 * @bb:         the badblocks structure that holds all badblock information
1459 *
1460 * This only succeeds if ->changed is clear.  It is used by
1461 * in-kernel metadata updates
1462 */
1463void ack_all_badblocks(struct badblocks *bb)
1464{
1465        if (bb->page == NULL || bb->changed)
1466                /* no point even trying */
1467                return;
1468        write_seqlock_irq(&bb->lock);
1469
1470        if (bb->changed == 0 && bb->unacked_exist) {
1471                u64 *p = bb->page;
1472                int i;
1473
1474                for (i = 0; i < bb->count ; i++) {
1475                        if (!BB_ACK(p[i])) {
1476                                sector_t start = BB_OFFSET(p[i]);
1477                                int len = BB_LEN(p[i]);
1478
1479                                p[i] = BB_MAKE(start, len, 1);
1480                        }
1481                }
1482                bb->unacked_exist = 0;
1483        }
1484        write_sequnlock_irq(&bb->lock);
1485}
1486EXPORT_SYMBOL_GPL(ack_all_badblocks);
1487
1488/**
1489 * badblocks_show() - sysfs access to bad-blocks list
1490 * @bb:         the badblocks structure that holds all badblock information
1491 * @page:       buffer received from sysfs
1492 * @unack:      weather to show unacknowledged badblocks
1493 *
1494 * Return:
1495 *  Length of returned data
1496 */
1497ssize_t badblocks_show(struct badblocks *bb, char *page, int unack)
1498{
1499        size_t len;
1500        int i;
1501        u64 *p = bb->page;
1502        unsigned seq;
1503
1504        if (bb->shift < 0)
1505                return 0;
1506
1507retry:
1508        seq = read_seqbegin(&bb->lock);
1509
1510        len = 0;
1511        i = 0;
1512
1513        while (len < PAGE_SIZE && i < bb->count) {
1514                sector_t s = BB_OFFSET(p[i]);
1515                unsigned int length = BB_LEN(p[i]);
1516                int ack = BB_ACK(p[i]);
1517
1518                i++;
1519
1520                if (unack && ack)
1521                        continue;
1522
1523                len += snprintf(page+len, PAGE_SIZE-len, "%llu %u\n",
1524                                (unsigned long long)s << bb->shift,
1525                                length << bb->shift);
1526        }
1527        if (unack && len == 0)
1528                bb->unacked_exist = 0;
1529
1530        if (read_seqretry(&bb->lock, seq))
1531                goto retry;
1532
1533        return len;
1534}
1535EXPORT_SYMBOL_GPL(badblocks_show);
1536
1537/**
1538 * badblocks_store() - sysfs access to bad-blocks list
1539 * @bb:         the badblocks structure that holds all badblock information
1540 * @page:       buffer received from sysfs
1541 * @len:        length of data received from sysfs
1542 * @unack:      weather to show unacknowledged badblocks
1543 *
1544 * Return:
1545 *  Length of the buffer processed or -ve error.
1546 */
1547ssize_t badblocks_store(struct badblocks *bb, const char *page, size_t len,
1548                        int unack)
1549{
1550        unsigned long long sector;
1551        int length;
1552        char newline;
1553
1554        switch (sscanf(page, "%llu %d%c", &sector, &length, &newline)) {
1555        case 3:
1556                if (newline != '\n')
1557                        return -EINVAL;
1558                fallthrough;
1559        case 2:
1560                if (length <= 0)
1561                        return -EINVAL;
1562                break;
1563        default:
1564                return -EINVAL;
1565        }
1566
1567        if (badblocks_set(bb, sector, length, !unack))
1568                return -ENOSPC;
1569        else
1570                return len;
1571}
1572EXPORT_SYMBOL_GPL(badblocks_store);
1573
1574static int __badblocks_init(struct device *dev, struct badblocks *bb,
1575                int enable)
1576{
1577        bb->dev = dev;
1578        bb->count = 0;
1579        if (enable)
1580                bb->shift = 0;
1581        else
1582                bb->shift = -1;
1583        if (dev)
1584                bb->page = devm_kzalloc(dev, PAGE_SIZE, GFP_KERNEL);
1585        else
1586                bb->page = kzalloc(PAGE_SIZE, GFP_KERNEL);
1587        if (!bb->page) {
1588                bb->shift = -1;
1589                return -ENOMEM;
1590        }
1591        seqlock_init(&bb->lock);
1592
1593        return 0;
1594}
1595
1596/**
1597 * badblocks_init() - initialize the badblocks structure
1598 * @bb:         the badblocks structure that holds all badblock information
1599 * @enable:     weather to enable badblocks accounting
1600 *
1601 * Return:
1602 *  0: success
1603 *  -ve errno: on error
1604 */
1605int badblocks_init(struct badblocks *bb, int enable)
1606{
1607        return __badblocks_init(NULL, bb, enable);
1608}
1609EXPORT_SYMBOL_GPL(badblocks_init);
1610
1611int devm_init_badblocks(struct device *dev, struct badblocks *bb)
1612{
1613        if (!bb)
1614                return -EINVAL;
1615        return __badblocks_init(dev, bb, 1);
1616}
1617EXPORT_SYMBOL_GPL(devm_init_badblocks);
1618
1619/**
1620 * badblocks_exit() - free the badblocks structure
1621 * @bb:         the badblocks structure that holds all badblock information
1622 */
1623void badblocks_exit(struct badblocks *bb)
1624{
1625        if (!bb)
1626                return;
1627        if (bb->dev)
1628                devm_kfree(bb->dev, bb->page);
1629        else
1630                kfree(bb->page);
1631        bb->page = NULL;
1632}
1633EXPORT_SYMBOL_GPL(badblocks_exit);
1634