linux/drivers/scsi/scsi_error.c
<<
>>
Prefs
   1/*
   2 *  scsi_error.c Copyright (C) 1997 Eric Youngdale
   3 *
   4 *  SCSI error/timeout handling
   5 *      Initial versions: Eric Youngdale.  Based upon conversations with
   6 *                        Leonard Zubkoff and David Miller at Linux Expo, 
   7 *                        ideas originating from all over the place.
   8 *
   9 *      Restructured scsi_unjam_host and associated functions.
  10 *      September 04, 2002 Mike Anderson (andmike@us.ibm.com)
  11 *
  12 *      Forward port of Russell King's (rmk@arm.linux.org.uk) changes and
  13 *      minor  cleanups.
  14 *      September 30, 2002 Mike Anderson (andmike@us.ibm.com)
  15 */
  16
  17#include <linux/module.h>
  18#include <linux/sched.h>
  19#include <linux/timer.h>
  20#include <linux/string.h>
  21#include <linux/kernel.h>
  22#include <linux/freezer.h>
  23#include <linux/kthread.h>
  24#include <linux/interrupt.h>
  25#include <linux/blkdev.h>
  26#include <linux/delay.h>
  27
  28#include <scsi/scsi.h>
  29#include <scsi/scsi_cmnd.h>
  30#include <scsi/scsi_dbg.h>
  31#include <scsi/scsi_device.h>
  32#include <scsi/scsi_eh.h>
  33#include <scsi/scsi_transport.h>
  34#include <scsi/scsi_host.h>
  35#include <scsi/scsi_ioctl.h>
  36
  37#include "scsi_priv.h"
  38#include "scsi_logging.h"
  39#include "scsi_transport_api.h"
  40
  41#define SENSE_TIMEOUT           (10*HZ)
  42
  43/*
  44 * These should *probably* be handled by the host itself.
  45 * Since it is allowed to sleep, it probably should.
  46 */
  47#define BUS_RESET_SETTLE_TIME   (10)
  48#define HOST_RESET_SETTLE_TIME  (10)
  49
  50/* called with shost->host_lock held */
  51void scsi_eh_wakeup(struct Scsi_Host *shost)
  52{
  53        if (shost->host_busy == shost->host_failed) {
  54                wake_up_process(shost->ehandler);
  55                SCSI_LOG_ERROR_RECOVERY(5,
  56                                printk("Waking error handler thread\n"));
  57        }
  58}
  59
  60/**
  61 * scsi_schedule_eh - schedule EH for SCSI host
  62 * @shost:      SCSI host to invoke error handling on.
  63 *
  64 * Schedule SCSI EH without scmd.
  65 */
  66void scsi_schedule_eh(struct Scsi_Host *shost)
  67{
  68        unsigned long flags;
  69
  70        spin_lock_irqsave(shost->host_lock, flags);
  71
  72        if (scsi_host_set_state(shost, SHOST_RECOVERY) == 0 ||
  73            scsi_host_set_state(shost, SHOST_CANCEL_RECOVERY) == 0) {
  74                shost->host_eh_scheduled++;
  75                scsi_eh_wakeup(shost);
  76        }
  77
  78        spin_unlock_irqrestore(shost->host_lock, flags);
  79}
  80EXPORT_SYMBOL_GPL(scsi_schedule_eh);
  81
  82/**
  83 * scsi_eh_scmd_add - add scsi cmd to error handling.
  84 * @scmd:       scmd to run eh on.
  85 * @eh_flag:    optional SCSI_EH flag.
  86 *
  87 * Return value:
  88 *      0 on failure.
  89 */
  90int scsi_eh_scmd_add(struct scsi_cmnd *scmd, int eh_flag)
  91{
  92        struct Scsi_Host *shost = scmd->device->host;
  93        unsigned long flags;
  94        int ret = 0;
  95
  96        if (!shost->ehandler)
  97                return 0;
  98
  99        spin_lock_irqsave(shost->host_lock, flags);
 100        if (scsi_host_set_state(shost, SHOST_RECOVERY))
 101                if (scsi_host_set_state(shost, SHOST_CANCEL_RECOVERY))
 102                        goto out_unlock;
 103
 104        ret = 1;
 105        scmd->eh_eflags |= eh_flag;
 106        list_add_tail(&scmd->eh_entry, &shost->eh_cmd_q);
 107        shost->host_failed++;
 108        scsi_eh_wakeup(shost);
 109 out_unlock:
 110        spin_unlock_irqrestore(shost->host_lock, flags);
 111        return ret;
 112}
 113
 114/**
 115 * scsi_add_timer - Start timeout timer for a single scsi command.
 116 * @scmd:       scsi command that is about to start running.
 117 * @timeout:    amount of time to allow this command to run.
 118 * @complete:   timeout function to call if timer isn't canceled.
 119 *
 120 * Notes:
 121 *    This should be turned into an inline function.  Each scsi command
 122 *    has its own timer, and as it is added to the queue, we set up the
 123 *    timer.  When the command completes, we cancel the timer.
 124 */
 125void scsi_add_timer(struct scsi_cmnd *scmd, int timeout,
 126                    void (*complete)(struct scsi_cmnd *))
 127{
 128
 129        /*
 130         * If the clock was already running for this command, then
 131         * first delete the timer.  The timer handling code gets rather
 132         * confused if we don't do this.
 133         */
 134        if (scmd->eh_timeout.function)
 135                del_timer(&scmd->eh_timeout);
 136
 137        scmd->eh_timeout.data = (unsigned long)scmd;
 138        scmd->eh_timeout.expires = jiffies + timeout;
 139        scmd->eh_timeout.function = (void (*)(unsigned long)) complete;
 140
 141        SCSI_LOG_ERROR_RECOVERY(5, printk("%s: scmd: %p, time:"
 142                                          " %d, (%p)\n", __FUNCTION__,
 143                                          scmd, timeout, complete));
 144
 145        add_timer(&scmd->eh_timeout);
 146}
 147
 148/**
 149 * scsi_delete_timer - Delete/cancel timer for a given function.
 150 * @scmd:       Cmd that we are canceling timer for
 151 *
 152 * Notes:
 153 *     This should be turned into an inline function.
 154 *
 155 * Return value:
 156 *     1 if we were able to detach the timer.  0 if we blew it, and the
 157 *     timer function has already started to run.
 158 */
 159int scsi_delete_timer(struct scsi_cmnd *scmd)
 160{
 161        int rtn;
 162
 163        rtn = del_timer(&scmd->eh_timeout);
 164
 165        SCSI_LOG_ERROR_RECOVERY(5, printk("%s: scmd: %p,"
 166                                         " rtn: %d\n", __FUNCTION__,
 167                                         scmd, rtn));
 168
 169        scmd->eh_timeout.data = (unsigned long)NULL;
 170        scmd->eh_timeout.function = NULL;
 171
 172        return rtn;
 173}
 174
 175/**
 176 * scsi_times_out - Timeout function for normal scsi commands.
 177 * @scmd:       Cmd that is timing out.
 178 *
 179 * Notes:
 180 *     We do not need to lock this.  There is the potential for a race
 181 *     only in that the normal completion handling might run, but if the
 182 *     normal completion function determines that the timer has already
 183 *     fired, then it mustn't do anything.
 184 */
 185void scsi_times_out(struct scsi_cmnd *scmd)
 186{
 187        enum scsi_eh_timer_return (* eh_timed_out)(struct scsi_cmnd *);
 188
 189        scsi_log_completion(scmd, TIMEOUT_ERROR);
 190
 191        if (scmd->device->host->transportt->eh_timed_out)
 192                eh_timed_out = scmd->device->host->transportt->eh_timed_out;
 193        else if (scmd->device->host->hostt->eh_timed_out)
 194                eh_timed_out = scmd->device->host->hostt->eh_timed_out;
 195        else
 196                eh_timed_out = NULL;
 197
 198        if (eh_timed_out)
 199                switch (eh_timed_out(scmd)) {
 200                case EH_HANDLED:
 201                        __scsi_done(scmd);
 202                        return;
 203                case EH_RESET_TIMER:
 204                        scsi_add_timer(scmd, scmd->timeout_per_command,
 205                                       scsi_times_out);
 206                        return;
 207                case EH_NOT_HANDLED:
 208                        break;
 209                }
 210
 211        if (unlikely(!scsi_eh_scmd_add(scmd, SCSI_EH_CANCEL_CMD))) {
 212                scmd->result |= DID_TIME_OUT << 16;
 213                __scsi_done(scmd);
 214        }
 215}
 216
 217/**
 218 * scsi_block_when_processing_errors - Prevent cmds from being queued.
 219 * @sdev:       Device on which we are performing recovery.
 220 *
 221 * Description:
 222 *     We block until the host is out of error recovery, and then check to
 223 *     see whether the host or the device is offline.
 224 *
 225 * Return value:
 226 *     0 when dev was taken offline by error recovery. 1 OK to proceed.
 227 */
 228int scsi_block_when_processing_errors(struct scsi_device *sdev)
 229{
 230        int online;
 231
 232        wait_event(sdev->host->host_wait, !scsi_host_in_recovery(sdev->host));
 233
 234        online = scsi_device_online(sdev);
 235
 236        SCSI_LOG_ERROR_RECOVERY(5, printk("%s: rtn: %d\n", __FUNCTION__,
 237                                          online));
 238
 239        return online;
 240}
 241EXPORT_SYMBOL(scsi_block_when_processing_errors);
 242
 243#ifdef CONFIG_SCSI_LOGGING
 244/**
 245 * scsi_eh_prt_fail_stats - Log info on failures.
 246 * @shost:      scsi host being recovered.
 247 * @work_q:     Queue of scsi cmds to process.
 248 */
 249static inline void scsi_eh_prt_fail_stats(struct Scsi_Host *shost,
 250                                          struct list_head *work_q)
 251{
 252        struct scsi_cmnd *scmd;
 253        struct scsi_device *sdev;
 254        int total_failures = 0;
 255        int cmd_failed = 0;
 256        int cmd_cancel = 0;
 257        int devices_failed = 0;
 258
 259        shost_for_each_device(sdev, shost) {
 260                list_for_each_entry(scmd, work_q, eh_entry) {
 261                        if (scmd->device == sdev) {
 262                                ++total_failures;
 263                                if (scmd->eh_eflags & SCSI_EH_CANCEL_CMD)
 264                                        ++cmd_cancel;
 265                                else 
 266                                        ++cmd_failed;
 267                        }
 268                }
 269
 270                if (cmd_cancel || cmd_failed) {
 271                        SCSI_LOG_ERROR_RECOVERY(3,
 272                                sdev_printk(KERN_INFO, sdev,
 273                                            "%s: cmds failed: %d, cancel: %d\n",
 274                                            __FUNCTION__, cmd_failed,
 275                                            cmd_cancel));
 276                        cmd_cancel = 0;
 277                        cmd_failed = 0;
 278                        ++devices_failed;
 279                }
 280        }
 281
 282        SCSI_LOG_ERROR_RECOVERY(2, printk("Total of %d commands on %d"
 283                                          " devices require eh work\n",
 284                                  total_failures, devices_failed));
 285}
 286#endif
 287
 288/**
 289 * scsi_check_sense - Examine scsi cmd sense
 290 * @scmd:       Cmd to have sense checked.
 291 *
 292 * Return value:
 293 *      SUCCESS or FAILED or NEEDS_RETRY
 294 *
 295 * Notes:
 296 *      When a deferred error is detected the current command has
 297 *      not been executed and needs retrying.
 298 */
 299static int scsi_check_sense(struct scsi_cmnd *scmd)
 300{
 301        struct scsi_sense_hdr sshdr;
 302
 303        if (! scsi_command_normalize_sense(scmd, &sshdr))
 304                return FAILED;  /* no valid sense data */
 305
 306        if (scsi_sense_is_deferred(&sshdr))
 307                return NEEDS_RETRY;
 308
 309        /*
 310         * Previous logic looked for FILEMARK, EOM or ILI which are
 311         * mainly associated with tapes and returned SUCCESS.
 312         */
 313        if (sshdr.response_code == 0x70) {
 314                /* fixed format */
 315                if (scmd->sense_buffer[2] & 0xe0)
 316                        return SUCCESS;
 317        } else {
 318                /*
 319                 * descriptor format: look for "stream commands sense data
 320                 * descriptor" (see SSC-3). Assume single sense data
 321                 * descriptor. Ignore ILI from SBC-2 READ LONG and WRITE LONG.
 322                 */
 323                if ((sshdr.additional_length > 3) &&
 324                    (scmd->sense_buffer[8] == 0x4) &&
 325                    (scmd->sense_buffer[11] & 0xe0))
 326                        return SUCCESS;
 327        }
 328
 329        switch (sshdr.sense_key) {
 330        case NO_SENSE:
 331                return SUCCESS;
 332        case RECOVERED_ERROR:
 333                return /* soft_error */ SUCCESS;
 334
 335        case ABORTED_COMMAND:
 336                return NEEDS_RETRY;
 337        case NOT_READY:
 338        case UNIT_ATTENTION:
 339                /*
 340                 * if we are expecting a cc/ua because of a bus reset that we
 341                 * performed, treat this just as a retry.  otherwise this is
 342                 * information that we should pass up to the upper-level driver
 343                 * so that we can deal with it there.
 344                 */
 345                if (scmd->device->expecting_cc_ua) {
 346                        scmd->device->expecting_cc_ua = 0;
 347                        return NEEDS_RETRY;
 348                }
 349                /*
 350                 * if the device is in the process of becoming ready, we 
 351                 * should retry.
 352                 */
 353                if ((sshdr.asc == 0x04) && (sshdr.ascq == 0x01))
 354                        return NEEDS_RETRY;
 355                /*
 356                 * if the device is not started, we need to wake
 357                 * the error handler to start the motor
 358                 */
 359                if (scmd->device->allow_restart &&
 360                    (sshdr.asc == 0x04) && (sshdr.ascq == 0x02))
 361                        return FAILED;
 362                return SUCCESS;
 363
 364                /* these three are not supported */
 365        case COPY_ABORTED:
 366        case VOLUME_OVERFLOW:
 367        case MISCOMPARE:
 368                return SUCCESS;
 369
 370        case MEDIUM_ERROR:
 371                if (sshdr.asc == 0x11 || /* UNRECOVERED READ ERR */
 372                    sshdr.asc == 0x13 || /* AMNF DATA FIELD */
 373                    sshdr.asc == 0x14) { /* RECORD NOT FOUND */
 374                        return SUCCESS;
 375                }
 376                return NEEDS_RETRY;
 377
 378        case HARDWARE_ERROR:
 379                if (scmd->device->retry_hwerror)
 380                        return NEEDS_RETRY;
 381                else
 382                        return SUCCESS;
 383
 384        case ILLEGAL_REQUEST:
 385        case BLANK_CHECK:
 386        case DATA_PROTECT:
 387        default:
 388                return SUCCESS;
 389        }
 390}
 391
 392/**
 393 * scsi_eh_completed_normally - Disposition a eh cmd on return from LLD.
 394 * @scmd:       SCSI cmd to examine.
 395 *
 396 * Notes:
 397 *    This is *only* called when we are examining the status of commands
 398 *    queued during error recovery.  the main difference here is that we
 399 *    don't allow for the possibility of retries here, and we are a lot
 400 *    more restrictive about what we consider acceptable.
 401 */
 402static int scsi_eh_completed_normally(struct scsi_cmnd *scmd)
 403{
 404        /*
 405         * first check the host byte, to see if there is anything in there
 406         * that would indicate what we need to do.
 407         */
 408        if (host_byte(scmd->result) == DID_RESET) {
 409                /*
 410                 * rats.  we are already in the error handler, so we now
 411                 * get to try and figure out what to do next.  if the sense
 412                 * is valid, we have a pretty good idea of what to do.
 413                 * if not, we mark it as FAILED.
 414                 */
 415                return scsi_check_sense(scmd);
 416        }
 417        if (host_byte(scmd->result) != DID_OK)
 418                return FAILED;
 419
 420        /*
 421         * next, check the message byte.
 422         */
 423        if (msg_byte(scmd->result) != COMMAND_COMPLETE)
 424                return FAILED;
 425
 426        /*
 427         * now, check the status byte to see if this indicates
 428         * anything special.
 429         */
 430        switch (status_byte(scmd->result)) {
 431        case GOOD:
 432        case COMMAND_TERMINATED:
 433                return SUCCESS;
 434        case CHECK_CONDITION:
 435                return scsi_check_sense(scmd);
 436        case CONDITION_GOOD:
 437        case INTERMEDIATE_GOOD:
 438        case INTERMEDIATE_C_GOOD:
 439                /*
 440                 * who knows?  FIXME(eric)
 441                 */
 442                return SUCCESS;
 443        case BUSY:
 444        case QUEUE_FULL:
 445        case RESERVATION_CONFLICT:
 446        default:
 447                return FAILED;
 448        }
 449        return FAILED;
 450}
 451
 452/**
 453 * scsi_eh_done - Completion function for error handling.
 454 * @scmd:       Cmd that is done.
 455 */
 456static void scsi_eh_done(struct scsi_cmnd *scmd)
 457{
 458        struct completion     *eh_action;
 459
 460        SCSI_LOG_ERROR_RECOVERY(3,
 461                printk("%s scmd: %p result: %x\n",
 462                        __FUNCTION__, scmd, scmd->result));
 463
 464        eh_action = scmd->device->host->eh_action;
 465        if (eh_action)
 466                complete(eh_action);
 467}
 468
 469/**
 470 * scsi_try_host_reset - ask host adapter to reset itself
 471 * @scmd:       SCSI cmd to send hsot reset.
 472 */
 473static int scsi_try_host_reset(struct scsi_cmnd *scmd)
 474{
 475        unsigned long flags;
 476        int rtn;
 477
 478        SCSI_LOG_ERROR_RECOVERY(3, printk("%s: Snd Host RST\n",
 479                                          __FUNCTION__));
 480
 481        if (!scmd->device->host->hostt->eh_host_reset_handler)
 482                return FAILED;
 483
 484        rtn = scmd->device->host->hostt->eh_host_reset_handler(scmd);
 485
 486        if (rtn == SUCCESS) {
 487                if (!scmd->device->host->hostt->skip_settle_delay)
 488                        ssleep(HOST_RESET_SETTLE_TIME);
 489                spin_lock_irqsave(scmd->device->host->host_lock, flags);
 490                scsi_report_bus_reset(scmd->device->host,
 491                                      scmd_channel(scmd));
 492                spin_unlock_irqrestore(scmd->device->host->host_lock, flags);
 493        }
 494
 495        return rtn;
 496}
 497
 498/**
 499 * scsi_try_bus_reset - ask host to perform a bus reset
 500 * @scmd:       SCSI cmd to send bus reset.
 501 */
 502static int scsi_try_bus_reset(struct scsi_cmnd *scmd)
 503{
 504        unsigned long flags;
 505        int rtn;
 506
 507        SCSI_LOG_ERROR_RECOVERY(3, printk("%s: Snd Bus RST\n",
 508                                          __FUNCTION__));
 509
 510        if (!scmd->device->host->hostt->eh_bus_reset_handler)
 511                return FAILED;
 512
 513        rtn = scmd->device->host->hostt->eh_bus_reset_handler(scmd);
 514
 515        if (rtn == SUCCESS) {
 516                if (!scmd->device->host->hostt->skip_settle_delay)
 517                        ssleep(BUS_RESET_SETTLE_TIME);
 518                spin_lock_irqsave(scmd->device->host->host_lock, flags);
 519                scsi_report_bus_reset(scmd->device->host,
 520                                      scmd_channel(scmd));
 521                spin_unlock_irqrestore(scmd->device->host->host_lock, flags);
 522        }
 523
 524        return rtn;
 525}
 526
 527/**
 528 * scsi_try_bus_device_reset - Ask host to perform a BDR on a dev
 529 * @scmd:       SCSI cmd used to send BDR
 530 *
 531 * Notes:
 532 *    There is no timeout for this operation.  if this operation is
 533 *    unreliable for a given host, then the host itself needs to put a
 534 *    timer on it, and set the host back to a consistent state prior to
 535 *    returning.
 536 */
 537static int scsi_try_bus_device_reset(struct scsi_cmnd *scmd)
 538{
 539        int rtn;
 540
 541        if (!scmd->device->host->hostt->eh_device_reset_handler)
 542                return FAILED;
 543
 544        rtn = scmd->device->host->hostt->eh_device_reset_handler(scmd);
 545        if (rtn == SUCCESS) {
 546                scmd->device->was_reset = 1;
 547                scmd->device->expecting_cc_ua = 1;
 548        }
 549
 550        return rtn;
 551}
 552
 553static int __scsi_try_to_abort_cmd(struct scsi_cmnd *scmd)
 554{
 555        if (!scmd->device->host->hostt->eh_abort_handler)
 556                return FAILED;
 557
 558        return scmd->device->host->hostt->eh_abort_handler(scmd);
 559}
 560
 561/**
 562 * scsi_try_to_abort_cmd - Ask host to abort a running command.
 563 * @scmd:       SCSI cmd to abort from Lower Level.
 564 *
 565 * Notes:
 566 *    This function will not return until the user's completion function
 567 *    has been called.  there is no timeout on this operation.  if the
 568 *    author of the low-level driver wishes this operation to be timed,
 569 *    they can provide this facility themselves.  helper functions in
 570 *    scsi_error.c can be supplied to make this easier to do.
 571 */
 572static int scsi_try_to_abort_cmd(struct scsi_cmnd *scmd)
 573{
 574        /*
 575         * scsi_done was called just after the command timed out and before
 576         * we had a chance to process it. (db)
 577         */
 578        if (scmd->serial_number == 0)
 579                return SUCCESS;
 580        return __scsi_try_to_abort_cmd(scmd);
 581}
 582
 583static void scsi_abort_eh_cmnd(struct scsi_cmnd *scmd)
 584{
 585        if (__scsi_try_to_abort_cmd(scmd) != SUCCESS)
 586                if (scsi_try_bus_device_reset(scmd) != SUCCESS)
 587                        if (scsi_try_bus_reset(scmd) != SUCCESS)
 588                                scsi_try_host_reset(scmd);
 589}
 590
 591/**
 592 * scsi_eh_prep_cmnd  - Save a scsi command info as part of error recory
 593 * @scmd:       SCSI command structure to hijack
 594 * @ses:        structure to save restore information
 595 * @cmnd:       CDB to send. Can be NULL if no new cmnd is needed
 596 * @cmnd_size:  size in bytes of @cmnd
 597 * @sense_bytes: size of sense data to copy. or 0 (if != 0 @cmnd is ignored)
 598 *
 599 * This function is used to save a scsi command information before re-execution
 600 * as part of the error recovery process.  If @sense_bytes is 0 the command
 601 * sent must be one that does not transfer any data.  If @sense_bytes != 0
 602 * @cmnd is ignored and this functions sets up a REQUEST_SENSE command
 603 * and cmnd buffers to read @sense_bytes into @scmd->sense_buffer.
 604 */
 605void scsi_eh_prep_cmnd(struct scsi_cmnd *scmd, struct scsi_eh_save *ses,
 606                        unsigned char *cmnd, int cmnd_size, unsigned sense_bytes)
 607{
 608        struct scsi_device *sdev = scmd->device;
 609
 610        /*
 611         * We need saved copies of a number of fields - this is because
 612         * error handling may need to overwrite these with different values
 613         * to run different commands, and once error handling is complete,
 614         * we will need to restore these values prior to running the actual
 615         * command.
 616         */
 617        ses->cmd_len = scmd->cmd_len;
 618        memcpy(ses->cmnd, scmd->cmnd, sizeof(scmd->cmnd));
 619        ses->data_direction = scmd->sc_data_direction;
 620        ses->sdb = scmd->sdb;
 621        ses->next_rq = scmd->request->next_rq;
 622        ses->result = scmd->result;
 623
 624        memset(&scmd->sdb, 0, sizeof(scmd->sdb));
 625        scmd->request->next_rq = NULL;
 626
 627        if (sense_bytes) {
 628                scmd->sdb.length = min_t(unsigned, SCSI_SENSE_BUFFERSIZE,
 629                                         sense_bytes);
 630                sg_init_one(&ses->sense_sgl, scmd->sense_buffer,
 631                            scmd->sdb.length);
 632                scmd->sdb.table.sgl = &ses->sense_sgl;
 633                scmd->sc_data_direction = DMA_FROM_DEVICE;
 634                scmd->sdb.table.nents = 1;
 635                memset(scmd->cmnd, 0, sizeof(scmd->cmnd));
 636                scmd->cmnd[0] = REQUEST_SENSE;
 637                scmd->cmnd[4] = scmd->sdb.length;
 638                scmd->cmd_len = COMMAND_SIZE(scmd->cmnd[0]);
 639        } else {
 640                scmd->sc_data_direction = DMA_NONE;
 641                if (cmnd) {
 642                        memset(scmd->cmnd, 0, sizeof(scmd->cmnd));
 643                        memcpy(scmd->cmnd, cmnd, cmnd_size);
 644                        scmd->cmd_len = COMMAND_SIZE(scmd->cmnd[0]);
 645                }
 646        }
 647
 648        scmd->underflow = 0;
 649
 650        if (sdev->scsi_level <= SCSI_2 && sdev->scsi_level != SCSI_UNKNOWN)
 651                scmd->cmnd[1] = (scmd->cmnd[1] & 0x1f) |
 652                        (sdev->lun << 5 & 0xe0);
 653
 654        /*
 655         * Zero the sense buffer.  The scsi spec mandates that any
 656         * untransferred sense data should be interpreted as being zero.
 657         */
 658        memset(scmd->sense_buffer, 0, SCSI_SENSE_BUFFERSIZE);
 659}
 660EXPORT_SYMBOL(scsi_eh_prep_cmnd);
 661
 662/**
 663 * scsi_eh_restore_cmnd  - Restore a scsi command info as part of error recory
 664 * @scmd:       SCSI command structure to restore
 665 * @ses:        saved information from a coresponding call to scsi_prep_eh_cmnd
 666 *
 667 * Undo any damage done by above scsi_prep_eh_cmnd().
 668 */
 669void scsi_eh_restore_cmnd(struct scsi_cmnd* scmd, struct scsi_eh_save *ses)
 670{
 671        /*
 672         * Restore original data
 673         */
 674        scmd->cmd_len = ses->cmd_len;
 675        memcpy(scmd->cmnd, ses->cmnd, sizeof(scmd->cmnd));
 676        scmd->sc_data_direction = ses->data_direction;
 677        scmd->sdb = ses->sdb;
 678        scmd->request->next_rq = ses->next_rq;
 679        scmd->result = ses->result;
 680}
 681EXPORT_SYMBOL(scsi_eh_restore_cmnd);
 682
 683/**
 684 * scsi_send_eh_cmnd  - submit a scsi command as part of error recory
 685 * @scmd:       SCSI command structure to hijack
 686 * @cmnd:       CDB to send
 687 * @cmnd_size:  size in bytes of @cmnd
 688 * @timeout:    timeout for this request
 689 * @sense_bytes: size of sense data to copy or 0
 690 *
 691 * This function is used to send a scsi command down to a target device
 692 * as part of the error recovery process. See also scsi_eh_prep_cmnd() above.
 693 *
 694 * Return value:
 695 *    SUCCESS or FAILED or NEEDS_RETRY
 696 */
 697static int scsi_send_eh_cmnd(struct scsi_cmnd *scmd, unsigned char *cmnd,
 698                             int cmnd_size, int timeout, unsigned sense_bytes)
 699{
 700        struct scsi_device *sdev = scmd->device;
 701        struct Scsi_Host *shost = sdev->host;
 702        DECLARE_COMPLETION_ONSTACK(done);
 703        unsigned long timeleft;
 704        unsigned long flags;
 705        struct scsi_eh_save ses;
 706        int rtn;
 707
 708        scsi_eh_prep_cmnd(scmd, &ses, cmnd, cmnd_size, sense_bytes);
 709        shost->eh_action = &done;
 710
 711        spin_lock_irqsave(shost->host_lock, flags);
 712        scsi_log_send(scmd);
 713        shost->hostt->queuecommand(scmd, scsi_eh_done);
 714        spin_unlock_irqrestore(shost->host_lock, flags);
 715
 716        timeleft = wait_for_completion_timeout(&done, timeout);
 717
 718        shost->eh_action = NULL;
 719
 720        scsi_log_completion(scmd, SUCCESS);
 721
 722        SCSI_LOG_ERROR_RECOVERY(3,
 723                printk("%s: scmd: %p, timeleft: %ld\n",
 724                        __FUNCTION__, scmd, timeleft));
 725
 726        /*
 727         * If there is time left scsi_eh_done got called, and we will
 728         * examine the actual status codes to see whether the command
 729         * actually did complete normally, else tell the host to forget
 730         * about this command.
 731         */
 732        if (timeleft) {
 733                rtn = scsi_eh_completed_normally(scmd);
 734                SCSI_LOG_ERROR_RECOVERY(3,
 735                        printk("%s: scsi_eh_completed_normally %x\n",
 736                               __FUNCTION__, rtn));
 737
 738                switch (rtn) {
 739                case SUCCESS:
 740                case NEEDS_RETRY:
 741                case FAILED:
 742                        break;
 743                default:
 744                        rtn = FAILED;
 745                        break;
 746                }
 747        } else {
 748                scsi_abort_eh_cmnd(scmd);
 749                rtn = FAILED;
 750        }
 751
 752        scsi_eh_restore_cmnd(scmd, &ses);
 753        return rtn;
 754}
 755
 756/**
 757 * scsi_request_sense - Request sense data from a particular target.
 758 * @scmd:       SCSI cmd for request sense.
 759 *
 760 * Notes:
 761 *    Some hosts automatically obtain this information, others require
 762 *    that we obtain it on our own. This function will *not* return until
 763 *    the command either times out, or it completes.
 764 */
 765static int scsi_request_sense(struct scsi_cmnd *scmd)
 766{
 767        return scsi_send_eh_cmnd(scmd, NULL, 0, SENSE_TIMEOUT, ~0);
 768}
 769
 770/**
 771 * scsi_eh_finish_cmd - Handle a cmd that eh is finished with.
 772 * @scmd:       Original SCSI cmd that eh has finished.
 773 * @done_q:     Queue for processed commands.
 774 *
 775 * Notes:
 776 *    We don't want to use the normal command completion while we are are
 777 *    still handling errors - it may cause other commands to be queued,
 778 *    and that would disturb what we are doing.  Thus we really want to
 779 *    keep a list of pending commands for final completion, and once we
 780 *    are ready to leave error handling we handle completion for real.
 781 */
 782void scsi_eh_finish_cmd(struct scsi_cmnd *scmd, struct list_head *done_q)
 783{
 784        scmd->device->host->host_failed--;
 785        scmd->eh_eflags = 0;
 786        list_move_tail(&scmd->eh_entry, done_q);
 787}
 788EXPORT_SYMBOL(scsi_eh_finish_cmd);
 789
 790/**
 791 * scsi_eh_get_sense - Get device sense data.
 792 * @work_q:     Queue of commands to process.
 793 * @done_q:     Queue of processed commands.
 794 *
 795 * Description:
 796 *    See if we need to request sense information.  if so, then get it
 797 *    now, so we have a better idea of what to do.  
 798 *
 799 * Notes:
 800 *    This has the unfortunate side effect that if a shost adapter does
 801 *    not automatically request sense information, we end up shutting
 802 *    it down before we request it.
 803 *
 804 *    All drivers should request sense information internally these days,
 805 *    so for now all I have to say is tough noogies if you end up in here.
 806 *
 807 *    XXX: Long term this code should go away, but that needs an audit of
 808 *         all LLDDs first.
 809 */
 810int scsi_eh_get_sense(struct list_head *work_q,
 811                      struct list_head *done_q)
 812{
 813        struct scsi_cmnd *scmd, *next;
 814        int rtn;
 815
 816        list_for_each_entry_safe(scmd, next, work_q, eh_entry) {
 817                if ((scmd->eh_eflags & SCSI_EH_CANCEL_CMD) ||
 818                    SCSI_SENSE_VALID(scmd))
 819                        continue;
 820
 821                SCSI_LOG_ERROR_RECOVERY(2, scmd_printk(KERN_INFO, scmd,
 822                                                  "%s: requesting sense\n",
 823                                                  current->comm));
 824                rtn = scsi_request_sense(scmd);
 825                if (rtn != SUCCESS)
 826                        continue;
 827
 828                SCSI_LOG_ERROR_RECOVERY(3, printk("sense requested for %p"
 829                                                  " result %x\n", scmd,
 830                                                  scmd->result));
 831                SCSI_LOG_ERROR_RECOVERY(3, scsi_print_sense("bh", scmd));
 832
 833                rtn = scsi_decide_disposition(scmd);
 834
 835                /*
 836                 * if the result was normal, then just pass it along to the
 837                 * upper level.
 838                 */
 839                if (rtn == SUCCESS)
 840                        /* we don't want this command reissued, just
 841                         * finished with the sense data, so set
 842                         * retries to the max allowed to ensure it
 843                         * won't get reissued */
 844                        scmd->retries = scmd->allowed;
 845                else if (rtn != NEEDS_RETRY)
 846                        continue;
 847
 848                scsi_eh_finish_cmd(scmd, done_q);
 849        }
 850
 851        return list_empty(work_q);
 852}
 853EXPORT_SYMBOL_GPL(scsi_eh_get_sense);
 854
 855/**
 856 * scsi_eh_tur - Send TUR to device.
 857 * @scmd:       &scsi_cmnd to send TUR
 858 *
 859 * Return value:
 860 *    0 - Device is ready. 1 - Device NOT ready.
 861 */
 862static int scsi_eh_tur(struct scsi_cmnd *scmd)
 863{
 864        static unsigned char tur_command[6] = {TEST_UNIT_READY, 0, 0, 0, 0, 0};
 865        int retry_cnt = 1, rtn;
 866
 867retry_tur:
 868        rtn = scsi_send_eh_cmnd(scmd, tur_command, 6, SENSE_TIMEOUT, 0);
 869
 870        SCSI_LOG_ERROR_RECOVERY(3, printk("%s: scmd %p rtn %x\n",
 871                __FUNCTION__, scmd, rtn));
 872
 873        switch (rtn) {
 874        case NEEDS_RETRY:
 875                if (retry_cnt--)
 876                        goto retry_tur;
 877                /*FALLTHRU*/
 878        case SUCCESS:
 879                return 0;
 880        default:
 881                return 1;
 882        }
 883}
 884
 885/**
 886 * scsi_eh_abort_cmds - abort pending commands.
 887 * @work_q:     &list_head for pending commands.
 888 * @done_q:     &list_head for processed commands.
 889 *
 890 * Decription:
 891 *    Try and see whether or not it makes sense to try and abort the
 892 *    running command.  This only works out to be the case if we have one
 893 *    command that has timed out.  If the command simply failed, it makes
 894 *    no sense to try and abort the command, since as far as the shost
 895 *    adapter is concerned, it isn't running.
 896 */
 897static int scsi_eh_abort_cmds(struct list_head *work_q,
 898                              struct list_head *done_q)
 899{
 900        struct scsi_cmnd *scmd, *next;
 901        int rtn;
 902
 903        list_for_each_entry_safe(scmd, next, work_q, eh_entry) {
 904                if (!(scmd->eh_eflags & SCSI_EH_CANCEL_CMD))
 905                        continue;
 906                SCSI_LOG_ERROR_RECOVERY(3, printk("%s: aborting cmd:"
 907                                                  "0x%p\n", current->comm,
 908                                                  scmd));
 909                rtn = scsi_try_to_abort_cmd(scmd);
 910                if (rtn == SUCCESS) {
 911                        scmd->eh_eflags &= ~SCSI_EH_CANCEL_CMD;
 912                        if (!scsi_device_online(scmd->device) ||
 913                            !scsi_eh_tur(scmd)) {
 914                                scsi_eh_finish_cmd(scmd, done_q);
 915                        }
 916                                
 917                } else
 918                        SCSI_LOG_ERROR_RECOVERY(3, printk("%s: aborting"
 919                                                          " cmd failed:"
 920                                                          "0x%p\n",
 921                                                          current->comm,
 922                                                          scmd));
 923        }
 924
 925        return list_empty(work_q);
 926}
 927
 928/**
 929 * scsi_eh_try_stu - Send START_UNIT to device.
 930 * @scmd:       &scsi_cmnd to send START_UNIT
 931 *
 932 * Return value:
 933 *    0 - Device is ready. 1 - Device NOT ready.
 934 */
 935static int scsi_eh_try_stu(struct scsi_cmnd *scmd)
 936{
 937        static unsigned char stu_command[6] = {START_STOP, 0, 0, 0, 1, 0};
 938
 939        if (scmd->device->allow_restart) {
 940                int i, rtn = NEEDS_RETRY;
 941
 942                for (i = 0; rtn == NEEDS_RETRY && i < 2; i++)
 943                        rtn = scsi_send_eh_cmnd(scmd, stu_command, 6,
 944                                                scmd->device->timeout, 0);
 945
 946                if (rtn == SUCCESS)
 947                        return 0;
 948        }
 949
 950        return 1;
 951}
 952
 953 /**
 954 * scsi_eh_stu - send START_UNIT if needed
 955 * @shost:      &scsi host being recovered.
 956 * @work_q:     &list_head for pending commands.
 957 * @done_q:     &list_head for processed commands.
 958 *
 959 * Notes:
 960 *    If commands are failing due to not ready, initializing command required,
 961 *      try revalidating the device, which will end up sending a start unit. 
 962 */
 963static int scsi_eh_stu(struct Scsi_Host *shost,
 964                              struct list_head *work_q,
 965                              struct list_head *done_q)
 966{
 967        struct scsi_cmnd *scmd, *stu_scmd, *next;
 968        struct scsi_device *sdev;
 969
 970        shost_for_each_device(sdev, shost) {
 971                stu_scmd = NULL;
 972                list_for_each_entry(scmd, work_q, eh_entry)
 973                        if (scmd->device == sdev && SCSI_SENSE_VALID(scmd) &&
 974                            scsi_check_sense(scmd) == FAILED ) {
 975                                stu_scmd = scmd;
 976                                break;
 977                        }
 978
 979                if (!stu_scmd)
 980                        continue;
 981
 982                SCSI_LOG_ERROR_RECOVERY(3, printk("%s: Sending START_UNIT to sdev:"
 983                                                  " 0x%p\n", current->comm, sdev));
 984
 985                if (!scsi_eh_try_stu(stu_scmd)) {
 986                        if (!scsi_device_online(sdev) ||
 987                            !scsi_eh_tur(stu_scmd)) {
 988                                list_for_each_entry_safe(scmd, next,
 989                                                          work_q, eh_entry) {
 990                                        if (scmd->device == sdev)
 991                                                scsi_eh_finish_cmd(scmd, done_q);
 992                                }
 993                        }
 994                } else {
 995                        SCSI_LOG_ERROR_RECOVERY(3,
 996                                                printk("%s: START_UNIT failed to sdev:"
 997                                                       " 0x%p\n", current->comm, sdev));
 998                }
 999        }
1000
1001        return list_empty(work_q);
1002}
1003
1004
1005/**
1006 * scsi_eh_bus_device_reset - send bdr if needed
1007 * @shost:      scsi host being recovered.
1008 * @work_q:     &list_head for pending commands.
1009 * @done_q:     &list_head for processed commands.
1010 *
1011 * Notes:
1012 *    Try a bus device reset.  Still, look to see whether we have multiple
1013 *    devices that are jammed or not - if we have multiple devices, it
1014 *    makes no sense to try bus_device_reset - we really would need to try
1015 *    a bus_reset instead. 
1016 */
1017static int scsi_eh_bus_device_reset(struct Scsi_Host *shost,
1018                                    struct list_head *work_q,
1019                                    struct list_head *done_q)
1020{
1021        struct scsi_cmnd *scmd, *bdr_scmd, *next;
1022        struct scsi_device *sdev;
1023        int rtn;
1024
1025        shost_for_each_device(sdev, shost) {
1026                bdr_scmd = NULL;
1027                list_for_each_entry(scmd, work_q, eh_entry)
1028                        if (scmd->device == sdev) {
1029                                bdr_scmd = scmd;
1030                                break;
1031                        }
1032
1033                if (!bdr_scmd)
1034                        continue;
1035
1036                SCSI_LOG_ERROR_RECOVERY(3, printk("%s: Sending BDR sdev:"
1037                                                  " 0x%p\n", current->comm,
1038                                                  sdev));
1039                rtn = scsi_try_bus_device_reset(bdr_scmd);
1040                if (rtn == SUCCESS) {
1041                        if (!scsi_device_online(sdev) ||
1042                            !scsi_eh_tur(bdr_scmd)) {
1043                                list_for_each_entry_safe(scmd, next,
1044                                                         work_q, eh_entry) {
1045                                        if (scmd->device == sdev)
1046                                                scsi_eh_finish_cmd(scmd,
1047                                                                   done_q);
1048                                }
1049                        }
1050                } else {
1051                        SCSI_LOG_ERROR_RECOVERY(3, printk("%s: BDR"
1052                                                          " failed sdev:"
1053                                                          "0x%p\n",
1054                                                          current->comm,
1055                                                           sdev));
1056                }
1057        }
1058
1059        return list_empty(work_q);
1060}
1061
1062/**
1063 * scsi_eh_bus_reset - send a bus reset 
1064 * @shost:      &scsi host being recovered.
1065 * @work_q:     &list_head for pending commands.
1066 * @done_q:     &list_head for processed commands.
1067 */
1068static int scsi_eh_bus_reset(struct Scsi_Host *shost,
1069                             struct list_head *work_q,
1070                             struct list_head *done_q)
1071{
1072        struct scsi_cmnd *scmd, *chan_scmd, *next;
1073        unsigned int channel;
1074        int rtn;
1075
1076        /*
1077         * we really want to loop over the various channels, and do this on
1078         * a channel by channel basis.  we should also check to see if any
1079         * of the failed commands are on soft_reset devices, and if so, skip
1080         * the reset.  
1081         */
1082
1083        for (channel = 0; channel <= shost->max_channel; channel++) {
1084                chan_scmd = NULL;
1085                list_for_each_entry(scmd, work_q, eh_entry) {
1086                        if (channel == scmd_channel(scmd)) {
1087                                chan_scmd = scmd;
1088                                break;
1089                                /*
1090                                 * FIXME add back in some support for
1091                                 * soft_reset devices.
1092                                 */
1093                        }
1094                }
1095
1096                if (!chan_scmd)
1097                        continue;
1098                SCSI_LOG_ERROR_RECOVERY(3, printk("%s: Sending BRST chan:"
1099                                                  " %d\n", current->comm,
1100                                                  channel));
1101                rtn = scsi_try_bus_reset(chan_scmd);
1102                if (rtn == SUCCESS) {
1103                        list_for_each_entry_safe(scmd, next, work_q, eh_entry) {
1104                                if (channel == scmd_channel(scmd))
1105                                        if (!scsi_device_online(scmd->device) ||
1106                                            !scsi_eh_tur(scmd))
1107                                                scsi_eh_finish_cmd(scmd,
1108                                                                   done_q);
1109                        }
1110                } else {
1111                        SCSI_LOG_ERROR_RECOVERY(3, printk("%s: BRST"
1112                                                          " failed chan: %d\n",
1113                                                          current->comm,
1114                                                          channel));
1115                }
1116        }
1117        return list_empty(work_q);
1118}
1119
1120/**
1121 * scsi_eh_host_reset - send a host reset 
1122 * @work_q:     list_head for processed commands.
1123 * @done_q:     list_head for processed commands.
1124 */
1125static int scsi_eh_host_reset(struct list_head *work_q,
1126                              struct list_head *done_q)
1127{
1128        struct scsi_cmnd *scmd, *next;
1129        int rtn;
1130
1131        if (!list_empty(work_q)) {
1132                scmd = list_entry(work_q->next,
1133                                  struct scsi_cmnd, eh_entry);
1134
1135                SCSI_LOG_ERROR_RECOVERY(3, printk("%s: Sending HRST\n"
1136                                                  , current->comm));
1137
1138                rtn = scsi_try_host_reset(scmd);
1139                if (rtn == SUCCESS) {
1140                        list_for_each_entry_safe(scmd, next, work_q, eh_entry) {
1141                                if (!scsi_device_online(scmd->device) ||
1142                                    (!scsi_eh_try_stu(scmd) && !scsi_eh_tur(scmd)) ||
1143                                    !scsi_eh_tur(scmd))
1144                                        scsi_eh_finish_cmd(scmd, done_q);
1145                        }
1146                } else {
1147                        SCSI_LOG_ERROR_RECOVERY(3, printk("%s: HRST"
1148                                                          " failed\n",
1149                                                          current->comm));
1150                }
1151        }
1152        return list_empty(work_q);
1153}
1154
1155/**
1156 * scsi_eh_offline_sdevs - offline scsi devices that fail to recover
1157 * @work_q:     list_head for processed commands.
1158 * @done_q:     list_head for processed commands.
1159 */
1160static void scsi_eh_offline_sdevs(struct list_head *work_q,
1161                                  struct list_head *done_q)
1162{
1163        struct scsi_cmnd *scmd, *next;
1164
1165        list_for_each_entry_safe(scmd, next, work_q, eh_entry) {
1166                sdev_printk(KERN_INFO, scmd->device, "Device offlined - "
1167                            "not ready after error recovery\n");
1168                scsi_device_set_state(scmd->device, SDEV_OFFLINE);
1169                if (scmd->eh_eflags & SCSI_EH_CANCEL_CMD) {
1170                        /*
1171                         * FIXME: Handle lost cmds.
1172                         */
1173                }
1174                scsi_eh_finish_cmd(scmd, done_q);
1175        }
1176        return;
1177}
1178
1179/**
1180 * scsi_decide_disposition - Disposition a cmd on return from LLD.
1181 * @scmd:       SCSI cmd to examine.
1182 *
1183 * Notes:
1184 *    This is *only* called when we are examining the status after sending
1185 *    out the actual data command.  any commands that are queued for error
1186 *    recovery (e.g. test_unit_ready) do *not* come through here.
1187 *
1188 *    When this routine returns failed, it means the error handler thread
1189 *    is woken.  In cases where the error code indicates an error that
1190 *    doesn't require the error handler read (i.e. we don't need to
1191 *    abort/reset), this function should return SUCCESS.
1192 */
1193int scsi_decide_disposition(struct scsi_cmnd *scmd)
1194{
1195        int rtn;
1196
1197        /*
1198         * if the device is offline, then we clearly just pass the result back
1199         * up to the top level.
1200         */
1201        if (!scsi_device_online(scmd->device)) {
1202                SCSI_LOG_ERROR_RECOVERY(5, printk("%s: device offline - report"
1203                                                  " as SUCCESS\n",
1204                                                  __FUNCTION__));
1205                return SUCCESS;
1206        }
1207
1208        /*
1209         * first check the host byte, to see if there is anything in there
1210         * that would indicate what we need to do.
1211         */
1212        switch (host_byte(scmd->result)) {
1213        case DID_PASSTHROUGH:
1214                /*
1215                 * no matter what, pass this through to the upper layer.
1216                 * nuke this special code so that it looks like we are saying
1217                 * did_ok.
1218                 */
1219                scmd->result &= 0xff00ffff;
1220                return SUCCESS;
1221        case DID_OK:
1222                /*
1223                 * looks good.  drop through, and check the next byte.
1224                 */
1225                break;
1226        case DID_NO_CONNECT:
1227        case DID_BAD_TARGET:
1228        case DID_ABORT:
1229                /*
1230                 * note - this means that we just report the status back
1231                 * to the top level driver, not that we actually think
1232                 * that it indicates SUCCESS.
1233                 */
1234                return SUCCESS;
1235                /*
1236                 * when the low level driver returns did_soft_error,
1237                 * it is responsible for keeping an internal retry counter 
1238                 * in order to avoid endless loops (db)
1239                 *
1240                 * actually this is a bug in this function here.  we should
1241                 * be mindful of the maximum number of retries specified
1242                 * and not get stuck in a loop.
1243                 */
1244        case DID_SOFT_ERROR:
1245                goto maybe_retry;
1246        case DID_IMM_RETRY:
1247                return NEEDS_RETRY;
1248
1249        case DID_REQUEUE:
1250                return ADD_TO_MLQUEUE;
1251
1252        case DID_ERROR:
1253                if (msg_byte(scmd->result) == COMMAND_COMPLETE &&
1254                    status_byte(scmd->result) == RESERVATION_CONFLICT)
1255                        /*
1256                         * execute reservation conflict processing code
1257                         * lower down
1258                         */
1259                        break;
1260                /* fallthrough */
1261
1262        case DID_BUS_BUSY:
1263        case DID_PARITY:
1264                goto maybe_retry;
1265        case DID_TIME_OUT:
1266                /*
1267                 * when we scan the bus, we get timeout messages for
1268                 * these commands if there is no device available.
1269                 * other hosts report did_no_connect for the same thing.
1270                 */
1271                if ((scmd->cmnd[0] == TEST_UNIT_READY ||
1272                     scmd->cmnd[0] == INQUIRY)) {
1273                        return SUCCESS;
1274                } else {
1275                        return FAILED;
1276                }
1277        case DID_RESET:
1278                return SUCCESS;
1279        default:
1280                return FAILED;
1281        }
1282
1283        /*
1284         * next, check the message byte.
1285         */
1286        if (msg_byte(scmd->result) != COMMAND_COMPLETE)
1287                return FAILED;
1288
1289        /*
1290         * check the status byte to see if this indicates anything special.
1291         */
1292        switch (status_byte(scmd->result)) {
1293        case QUEUE_FULL:
1294                /*
1295                 * the case of trying to send too many commands to a
1296                 * tagged queueing device.
1297                 */
1298        case BUSY:
1299                /*
1300                 * device can't talk to us at the moment.  Should only
1301                 * occur (SAM-3) when the task queue is empty, so will cause
1302                 * the empty queue handling to trigger a stall in the
1303                 * device.
1304                 */
1305                return ADD_TO_MLQUEUE;
1306        case GOOD:
1307        case COMMAND_TERMINATED:
1308        case TASK_ABORTED:
1309                return SUCCESS;
1310        case CHECK_CONDITION:
1311                rtn = scsi_check_sense(scmd);
1312                if (rtn == NEEDS_RETRY)
1313                        goto maybe_retry;
1314                /* if rtn == FAILED, we have no sense information;
1315                 * returning FAILED will wake the error handler thread
1316                 * to collect the sense and redo the decide
1317                 * disposition */
1318                return rtn;
1319        case CONDITION_GOOD:
1320        case INTERMEDIATE_GOOD:
1321        case INTERMEDIATE_C_GOOD:
1322        case ACA_ACTIVE:
1323                /*
1324                 * who knows?  FIXME(eric)
1325                 */
1326                return SUCCESS;
1327
1328        case RESERVATION_CONFLICT:
1329                sdev_printk(KERN_INFO, scmd->device,
1330                            "reservation conflict\n");
1331                return SUCCESS; /* causes immediate i/o error */
1332        default:
1333                return FAILED;
1334        }
1335        return FAILED;
1336
1337      maybe_retry:
1338
1339        /* we requeue for retry because the error was retryable, and
1340         * the request was not marked fast fail.  Note that above,
1341         * even if the request is marked fast fail, we still requeue
1342         * for queue congestion conditions (QUEUE_FULL or BUSY) */
1343        if ((++scmd->retries) <= scmd->allowed
1344            && !blk_noretry_request(scmd->request)) {
1345                return NEEDS_RETRY;
1346        } else {
1347                /*
1348                 * no more retries - report this one back to upper level.
1349                 */
1350                return SUCCESS;
1351        }
1352}
1353
1354/**
1355 * scsi_eh_lock_door - Prevent medium removal for the specified device
1356 * @sdev:       SCSI device to prevent medium removal
1357 *
1358 * Locking:
1359 *      We must be called from process context; scsi_allocate_request()
1360 *      may sleep.
1361 *
1362 * Notes:
1363 *      We queue up an asynchronous "ALLOW MEDIUM REMOVAL" request on the
1364 *      head of the devices request queue, and continue.
1365 *
1366 * Bugs:
1367 *      scsi_allocate_request() may sleep waiting for existing requests to
1368 *      be processed.  However, since we haven't kicked off any request
1369 *      processing for this host, this may deadlock.
1370 *
1371 *      If scsi_allocate_request() fails for what ever reason, we
1372 *      completely forget to lock the door.
1373 */
1374static void scsi_eh_lock_door(struct scsi_device *sdev)
1375{
1376        unsigned char cmnd[MAX_COMMAND_SIZE];
1377
1378        cmnd[0] = ALLOW_MEDIUM_REMOVAL;
1379        cmnd[1] = 0;
1380        cmnd[2] = 0;
1381        cmnd[3] = 0;
1382        cmnd[4] = SCSI_REMOVAL_PREVENT;
1383        cmnd[5] = 0;
1384
1385        scsi_execute_async(sdev, cmnd, 6, DMA_NONE, NULL, 0, 0, 10 * HZ,
1386                           5, NULL, NULL, GFP_KERNEL);
1387}
1388
1389
1390/**
1391 * scsi_restart_operations - restart io operations to the specified host.
1392 * @shost:      Host we are restarting.
1393 *
1394 * Notes:
1395 *    When we entered the error handler, we blocked all further i/o to
1396 *    this device.  we need to 'reverse' this process.
1397 */
1398static void scsi_restart_operations(struct Scsi_Host *shost)
1399{
1400        struct scsi_device *sdev;
1401        unsigned long flags;
1402
1403        /*
1404         * If the door was locked, we need to insert a door lock request
1405         * onto the head of the SCSI request queue for the device.  There
1406         * is no point trying to lock the door of an off-line device.
1407         */
1408        shost_for_each_device(sdev, shost) {
1409                if (scsi_device_online(sdev) && sdev->locked)
1410                        scsi_eh_lock_door(sdev);
1411        }
1412
1413        /*
1414         * next free up anything directly waiting upon the host.  this
1415         * will be requests for character device operations, and also for
1416         * ioctls to queued block devices.
1417         */
1418        SCSI_LOG_ERROR_RECOVERY(3, printk("%s: waking up host to restart\n",
1419                                          __FUNCTION__));
1420
1421        spin_lock_irqsave(shost->host_lock, flags);
1422        if (scsi_host_set_state(shost, SHOST_RUNNING))
1423                if (scsi_host_set_state(shost, SHOST_CANCEL))
1424                        BUG_ON(scsi_host_set_state(shost, SHOST_DEL));
1425        spin_unlock_irqrestore(shost->host_lock, flags);
1426
1427        wake_up(&shost->host_wait);
1428
1429        /*
1430         * finally we need to re-initiate requests that may be pending.  we will
1431         * have had everything blocked while error handling is taking place, and
1432         * now that error recovery is done, we will need to ensure that these
1433         * requests are started.
1434         */
1435        scsi_run_host_queues(shost);
1436}
1437
1438/**
1439 * scsi_eh_ready_devs - check device ready state and recover if not.
1440 * @shost:      host to be recovered.
1441 * @work_q:     &list_head for pending commands.
1442 * @done_q:     &list_head for processed commands.
1443 */
1444void scsi_eh_ready_devs(struct Scsi_Host *shost,
1445                        struct list_head *work_q,
1446                        struct list_head *done_q)
1447{
1448        if (!scsi_eh_stu(shost, work_q, done_q))
1449                if (!scsi_eh_bus_device_reset(shost, work_q, done_q))
1450                        if (!scsi_eh_bus_reset(shost, work_q, done_q))
1451                                if (!scsi_eh_host_reset(work_q, done_q))
1452                                        scsi_eh_offline_sdevs(work_q, done_q);
1453}
1454EXPORT_SYMBOL_GPL(scsi_eh_ready_devs);
1455
1456/**
1457 * scsi_eh_flush_done_q - finish processed commands or retry them.
1458 * @done_q:     list_head of processed commands.
1459 */
1460void scsi_eh_flush_done_q(struct list_head *done_q)
1461{
1462        struct scsi_cmnd *scmd, *next;
1463
1464        list_for_each_entry_safe(scmd, next, done_q, eh_entry) {
1465                list_del_init(&scmd->eh_entry);
1466                if (scsi_device_online(scmd->device) &&
1467                    !blk_noretry_request(scmd->request) &&
1468                    (++scmd->retries <= scmd->allowed)) {
1469                        SCSI_LOG_ERROR_RECOVERY(3, printk("%s: flush"
1470                                                          " retry cmd: %p\n",
1471                                                          current->comm,
1472                                                          scmd));
1473                                scsi_queue_insert(scmd, SCSI_MLQUEUE_EH_RETRY);
1474                } else {
1475                        /*
1476                         * If just we got sense for the device (called
1477                         * scsi_eh_get_sense), scmd->result is already
1478                         * set, do not set DRIVER_TIMEOUT.
1479                         */
1480                        if (!scmd->result)
1481                                scmd->result |= (DRIVER_TIMEOUT << 24);
1482                        SCSI_LOG_ERROR_RECOVERY(3, printk("%s: flush finish"
1483                                                        " cmd: %p\n",
1484                                                        current->comm, scmd));
1485                        scsi_finish_command(scmd);
1486                }
1487        }
1488}
1489EXPORT_SYMBOL(scsi_eh_flush_done_q);
1490
1491/**
1492 * scsi_unjam_host - Attempt to fix a host which has a cmd that failed.
1493 * @shost:      Host to unjam.
1494 *
1495 * Notes:
1496 *    When we come in here, we *know* that all commands on the bus have
1497 *    either completed, failed or timed out.  we also know that no further
1498 *    commands are being sent to the host, so things are relatively quiet
1499 *    and we have freedom to fiddle with things as we wish.
1500 *
1501 *    This is only the *default* implementation.  it is possible for
1502 *    individual drivers to supply their own version of this function, and
1503 *    if the maintainer wishes to do this, it is strongly suggested that
1504 *    this function be taken as a template and modified.  this function
1505 *    was designed to correctly handle problems for about 95% of the
1506 *    different cases out there, and it should always provide at least a
1507 *    reasonable amount of error recovery.
1508 *
1509 *    Any command marked 'failed' or 'timeout' must eventually have
1510 *    scsi_finish_cmd() called for it.  we do all of the retry stuff
1511 *    here, so when we restart the host after we return it should have an
1512 *    empty queue.
1513 */
1514static void scsi_unjam_host(struct Scsi_Host *shost)
1515{
1516        unsigned long flags;
1517        LIST_HEAD(eh_work_q);
1518        LIST_HEAD(eh_done_q);
1519
1520        spin_lock_irqsave(shost->host_lock, flags);
1521        list_splice_init(&shost->eh_cmd_q, &eh_work_q);
1522        spin_unlock_irqrestore(shost->host_lock, flags);
1523
1524        SCSI_LOG_ERROR_RECOVERY(1, scsi_eh_prt_fail_stats(shost, &eh_work_q));
1525
1526        if (!scsi_eh_get_sense(&eh_work_q, &eh_done_q))
1527                if (!scsi_eh_abort_cmds(&eh_work_q, &eh_done_q))
1528                        scsi_eh_ready_devs(shost, &eh_work_q, &eh_done_q);
1529
1530        scsi_eh_flush_done_q(&eh_done_q);
1531}
1532
1533/**
1534 * scsi_error_handler - SCSI error handler thread
1535 * @data:       Host for which we are running.
1536 *
1537 * Notes:
1538 *    This is the main error handling loop.  This is run as a kernel thread
1539 *    for every SCSI host and handles all error handling activity.
1540 */
1541int scsi_error_handler(void *data)
1542{
1543        struct Scsi_Host *shost = data;
1544
1545        /*
1546         * We use TASK_INTERRUPTIBLE so that the thread is not
1547         * counted against the load average as a running process.
1548         * We never actually get interrupted because kthread_run
1549         * disables singal delivery for the created thread.
1550         */
1551        set_current_state(TASK_INTERRUPTIBLE);
1552        while (!kthread_should_stop()) {
1553                if ((shost->host_failed == 0 && shost->host_eh_scheduled == 0) ||
1554                    shost->host_failed != shost->host_busy) {
1555                        SCSI_LOG_ERROR_RECOVERY(1,
1556                                printk("Error handler scsi_eh_%d sleeping\n",
1557                                        shost->host_no));
1558                        schedule();
1559                        set_current_state(TASK_INTERRUPTIBLE);
1560                        continue;
1561                }
1562
1563                __set_current_state(TASK_RUNNING);
1564                SCSI_LOG_ERROR_RECOVERY(1,
1565                        printk("Error handler scsi_eh_%d waking up\n",
1566                                shost->host_no));
1567
1568                /*
1569                 * We have a host that is failing for some reason.  Figure out
1570                 * what we need to do to get it up and online again (if we can).
1571                 * If we fail, we end up taking the thing offline.
1572                 */
1573                if (shost->transportt->eh_strategy_handler)
1574                        shost->transportt->eh_strategy_handler(shost);
1575                else
1576                        scsi_unjam_host(shost);
1577
1578                /*
1579                 * Note - if the above fails completely, the action is to take
1580                 * individual devices offline and flush the queue of any
1581                 * outstanding requests that may have been pending.  When we
1582                 * restart, we restart any I/O to any other devices on the bus
1583                 * which are still online.
1584                 */
1585                scsi_restart_operations(shost);
1586                set_current_state(TASK_INTERRUPTIBLE);
1587        }
1588        __set_current_state(TASK_RUNNING);
1589
1590        SCSI_LOG_ERROR_RECOVERY(1,
1591                printk("Error handler scsi_eh_%d exiting\n", shost->host_no));
1592        shost->ehandler = NULL;
1593        return 0;
1594}
1595
1596/*
1597 * Function:    scsi_report_bus_reset()
1598 *
1599 * Purpose:     Utility function used by low-level drivers to report that
1600 *              they have observed a bus reset on the bus being handled.
1601 *
1602 * Arguments:   shost       - Host in question
1603 *              channel     - channel on which reset was observed.
1604 *
1605 * Returns:     Nothing
1606 *
1607 * Lock status: Host lock must be held.
1608 *
1609 * Notes:       This only needs to be called if the reset is one which
1610 *              originates from an unknown location.  Resets originated
1611 *              by the mid-level itself don't need to call this, but there
1612 *              should be no harm.
1613 *
1614 *              The main purpose of this is to make sure that a CHECK_CONDITION
1615 *              is properly treated.
1616 */
1617void scsi_report_bus_reset(struct Scsi_Host *shost, int channel)
1618{
1619        struct scsi_device *sdev;
1620
1621        __shost_for_each_device(sdev, shost) {
1622                if (channel == sdev_channel(sdev)) {
1623                        sdev->was_reset = 1;
1624                        sdev->expecting_cc_ua = 1;
1625                }
1626        }
1627}
1628EXPORT_SYMBOL(scsi_report_bus_reset);
1629
1630/*
1631 * Function:    scsi_report_device_reset()
1632 *
1633 * Purpose:     Utility function used by low-level drivers to report that
1634 *              they have observed a device reset on the device being handled.
1635 *
1636 * Arguments:   shost       - Host in question
1637 *              channel     - channel on which reset was observed
1638 *              target      - target on which reset was observed
1639 *
1640 * Returns:     Nothing
1641 *
1642 * Lock status: Host lock must be held
1643 *
1644 * Notes:       This only needs to be called if the reset is one which
1645 *              originates from an unknown location.  Resets originated
1646 *              by the mid-level itself don't need to call this, but there
1647 *              should be no harm.
1648 *
1649 *              The main purpose of this is to make sure that a CHECK_CONDITION
1650 *              is properly treated.
1651 */
1652void scsi_report_device_reset(struct Scsi_Host *shost, int channel, int target)
1653{
1654        struct scsi_device *sdev;
1655
1656        __shost_for_each_device(sdev, shost) {
1657                if (channel == sdev_channel(sdev) &&
1658                    target == sdev_id(sdev)) {
1659                        sdev->was_reset = 1;
1660                        sdev->expecting_cc_ua = 1;
1661                }
1662        }
1663}
1664EXPORT_SYMBOL(scsi_report_device_reset);
1665
1666static void
1667scsi_reset_provider_done_command(struct scsi_cmnd *scmd)
1668{
1669}
1670
1671/*
1672 * Function:    scsi_reset_provider
1673 *
1674 * Purpose:     Send requested reset to a bus or device at any phase.
1675 *
1676 * Arguments:   device  - device to send reset to
1677 *              flag - reset type (see scsi.h)
1678 *
1679 * Returns:     SUCCESS/FAILURE.
1680 *
1681 * Notes:       This is used by the SCSI Generic driver to provide
1682 *              Bus/Device reset capability.
1683 */
1684int
1685scsi_reset_provider(struct scsi_device *dev, int flag)
1686{
1687        struct scsi_cmnd *scmd = scsi_get_command(dev, GFP_KERNEL);
1688        struct Scsi_Host *shost = dev->host;
1689        struct request req;
1690        unsigned long flags;
1691        int rtn;
1692
1693        scmd->request = &req;
1694        memset(&scmd->eh_timeout, 0, sizeof(scmd->eh_timeout));
1695
1696        memset(&scmd->cmnd, '\0', sizeof(scmd->cmnd));
1697    
1698        scmd->scsi_done         = scsi_reset_provider_done_command;
1699        memset(&scmd->sdb, 0, sizeof(scmd->sdb));
1700
1701        scmd->cmd_len                   = 0;
1702
1703        scmd->sc_data_direction         = DMA_BIDIRECTIONAL;
1704
1705        init_timer(&scmd->eh_timeout);
1706
1707        spin_lock_irqsave(shost->host_lock, flags);
1708        shost->tmf_in_progress = 1;
1709        spin_unlock_irqrestore(shost->host_lock, flags);
1710
1711        switch (flag) {
1712        case SCSI_TRY_RESET_DEVICE:
1713                rtn = scsi_try_bus_device_reset(scmd);
1714                if (rtn == SUCCESS)
1715                        break;
1716                /* FALLTHROUGH */
1717        case SCSI_TRY_RESET_BUS:
1718                rtn = scsi_try_bus_reset(scmd);
1719                if (rtn == SUCCESS)
1720                        break;
1721                /* FALLTHROUGH */
1722        case SCSI_TRY_RESET_HOST:
1723                rtn = scsi_try_host_reset(scmd);
1724                break;
1725        default:
1726                rtn = FAILED;
1727        }
1728
1729        spin_lock_irqsave(shost->host_lock, flags);
1730        shost->tmf_in_progress = 0;
1731        spin_unlock_irqrestore(shost->host_lock, flags);
1732
1733        /*
1734         * be sure to wake up anyone who was sleeping or had their queue
1735         * suspended while we performed the TMF.
1736         */
1737        SCSI_LOG_ERROR_RECOVERY(3,
1738                printk("%s: waking up host to restart after TMF\n",
1739                __FUNCTION__));
1740
1741        wake_up(&shost->host_wait);
1742
1743        scsi_run_host_queues(shost);
1744
1745        scsi_next_command(scmd);
1746        return rtn;
1747}
1748EXPORT_SYMBOL(scsi_reset_provider);
1749
1750/**
1751 * scsi_normalize_sense - normalize main elements from either fixed or
1752 *                      descriptor sense data format into a common format.
1753 *
1754 * @sense_buffer:       byte array containing sense data returned by device
1755 * @sb_len:             number of valid bytes in sense_buffer
1756 * @sshdr:              pointer to instance of structure that common
1757 *                      elements are written to.
1758 *
1759 * Notes:
1760 *      The "main elements" from sense data are: response_code, sense_key,
1761 *      asc, ascq and additional_length (only for descriptor format).
1762 *
1763 *      Typically this function can be called after a device has
1764 *      responded to a SCSI command with the CHECK_CONDITION status.
1765 *
1766 * Return value:
1767 *      1 if valid sense data information found, else 0;
1768 */
1769int scsi_normalize_sense(const u8 *sense_buffer, int sb_len,
1770                         struct scsi_sense_hdr *sshdr)
1771{
1772        if (!sense_buffer || !sb_len)
1773                return 0;
1774
1775        memset(sshdr, 0, sizeof(struct scsi_sense_hdr));
1776
1777        sshdr->response_code = (sense_buffer[0] & 0x7f);
1778
1779        if (!scsi_sense_valid(sshdr))
1780                return 0;
1781
1782        if (sshdr->response_code >= 0x72) {
1783                /*
1784                 * descriptor format
1785                 */
1786                if (sb_len > 1)
1787                        sshdr->sense_key = (sense_buffer[1] & 0xf);
1788                if (sb_len > 2)
1789                        sshdr->asc = sense_buffer[2];
1790                if (sb_len > 3)
1791                        sshdr->ascq = sense_buffer[3];
1792                if (sb_len > 7)
1793                        sshdr->additional_length = sense_buffer[7];
1794        } else {
1795                /* 
1796                 * fixed format
1797                 */
1798                if (sb_len > 2)
1799                        sshdr->sense_key = (sense_buffer[2] & 0xf);
1800                if (sb_len > 7) {
1801                        sb_len = (sb_len < (sense_buffer[7] + 8)) ?
1802                                         sb_len : (sense_buffer[7] + 8);
1803                        if (sb_len > 12)
1804                                sshdr->asc = sense_buffer[12];
1805                        if (sb_len > 13)
1806                                sshdr->ascq = sense_buffer[13];
1807                }
1808        }
1809
1810        return 1;
1811}
1812EXPORT_SYMBOL(scsi_normalize_sense);
1813
1814int scsi_command_normalize_sense(struct scsi_cmnd *cmd,
1815                                 struct scsi_sense_hdr *sshdr)
1816{
1817        return scsi_normalize_sense(cmd->sense_buffer,
1818                        SCSI_SENSE_BUFFERSIZE, sshdr);
1819}
1820EXPORT_SYMBOL(scsi_command_normalize_sense);
1821
1822/**
1823 * scsi_sense_desc_find - search for a given descriptor type in descriptor sense data format.
1824 * @sense_buffer:       byte array of descriptor format sense data
1825 * @sb_len:             number of valid bytes in sense_buffer
1826 * @desc_type:          value of descriptor type to find
1827 *                      (e.g. 0 -> information)
1828 *
1829 * Notes:
1830 *      only valid when sense data is in descriptor format
1831 *
1832 * Return value:
1833 *      pointer to start of (first) descriptor if found else NULL
1834 */
1835const u8 * scsi_sense_desc_find(const u8 * sense_buffer, int sb_len,
1836                                int desc_type)
1837{
1838        int add_sen_len, add_len, desc_len, k;
1839        const u8 * descp;
1840
1841        if ((sb_len < 8) || (0 == (add_sen_len = sense_buffer[7])))
1842                return NULL;
1843        if ((sense_buffer[0] < 0x72) || (sense_buffer[0] > 0x73))
1844                return NULL;
1845        add_sen_len = (add_sen_len < (sb_len - 8)) ?
1846                        add_sen_len : (sb_len - 8);
1847        descp = &sense_buffer[8];
1848        for (desc_len = 0, k = 0; k < add_sen_len; k += desc_len) {
1849                descp += desc_len;
1850                add_len = (k < (add_sen_len - 1)) ? descp[1]: -1;
1851                desc_len = add_len + 2;
1852                if (descp[0] == desc_type)
1853                        return descp;
1854                if (add_len < 0) // short descriptor ??
1855                        break;
1856        }
1857        return NULL;
1858}
1859EXPORT_SYMBOL(scsi_sense_desc_find);
1860
1861/**
1862 * scsi_get_sense_info_fld - get information field from sense data (either fixed or descriptor format)
1863 * @sense_buffer:       byte array of sense data
1864 * @sb_len:             number of valid bytes in sense_buffer
1865 * @info_out:           pointer to 64 integer where 8 or 4 byte information
1866 *                      field will be placed if found.
1867 *
1868 * Return value:
1869 *      1 if information field found, 0 if not found.
1870 */
1871int scsi_get_sense_info_fld(const u8 * sense_buffer, int sb_len,
1872                            u64 * info_out)
1873{
1874        int j;
1875        const u8 * ucp;
1876        u64 ull;
1877
1878        if (sb_len < 7)
1879                return 0;
1880        switch (sense_buffer[0] & 0x7f) {
1881        case 0x70:
1882        case 0x71:
1883                if (sense_buffer[0] & 0x80) {
1884                        *info_out = (sense_buffer[3] << 24) +
1885                                    (sense_buffer[4] << 16) +
1886                                    (sense_buffer[5] << 8) + sense_buffer[6];
1887                        return 1;
1888                } else
1889                        return 0;
1890        case 0x72:
1891        case 0x73:
1892                ucp = scsi_sense_desc_find(sense_buffer, sb_len,
1893                                           0 /* info desc */);
1894                if (ucp && (0xa == ucp[1])) {
1895                        ull = 0;
1896                        for (j = 0; j < 8; ++j) {
1897                                if (j > 0)
1898                                        ull <<= 8;
1899                                ull |= ucp[4 + j];
1900                        }
1901                        *info_out = ull;
1902                        return 1;
1903                } else
1904                        return 0;
1905        default:
1906                return 0;
1907        }
1908}
1909EXPORT_SYMBOL(scsi_get_sense_info_fld);
1910
lxr.linux.no kindly hosted by Redpill Linpro AS, provider of Linux consulting and operations services since 1995.