linux/include/linux/raid/md_k.h
<<
>>
Prefs
   1/*
   2   md_k.h : kernel internal structure of the Linux MD driver
   3          Copyright (C) 1996-98 Ingo Molnar, Gadi Oxman
   4          
   5   This program is free software; you can redistribute it and/or modify
   6   it under the terms of the GNU General Public License as published by
   7   the Free Software Foundation; either version 2, or (at your option)
   8   any later version.
   9   
  10   You should have received a copy of the GNU General Public License
  11   (for example /usr/src/linux/COPYING); if not, write to the Free
  12   Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.  
  13*/
  14
  15#ifndef _MD_K_H
  16#define _MD_K_H
  17
  18/* and dm-bio-list.h is not under include/linux because.... ??? */
  19#include "../../../drivers/md/dm-bio-list.h"
  20
  21#ifdef CONFIG_BLOCK
  22
  23#define LEVEL_MULTIPATH         (-4)
  24#define LEVEL_LINEAR            (-1)
  25#define LEVEL_FAULTY            (-5)
  26
  27/* we need a value for 'no level specified' and 0
  28 * means 'raid0', so we need something else.  This is
  29 * for internal use only
  30 */
  31#define LEVEL_NONE              (-1000000)
  32
  33#define MaxSector (~(sector_t)0)
  34
  35typedef struct mddev_s mddev_t;
  36typedef struct mdk_rdev_s mdk_rdev_t;
  37
  38/*
  39 * options passed in raidrun:
  40 */
  41
  42/* Currently this must fit in an 'int' */
  43#define MAX_CHUNK_SIZE (1<<30)
  44
  45/*
  46 * MD's 'extended' device
  47 */
  48struct mdk_rdev_s
  49{
  50        struct list_head same_set;      /* RAID devices within the same set */
  51
  52        sector_t size;                  /* Device size (in blocks) */
  53        mddev_t *mddev;                 /* RAID array if running */
  54        long last_events;               /* IO event timestamp */
  55
  56        struct block_device *bdev;      /* block device handle */
  57
  58        struct page     *sb_page;
  59        int             sb_loaded;
  60        __u64           sb_events;
  61        sector_t        data_offset;    /* start of data in array */
  62        sector_t        sb_offset;
  63        int             sb_size;        /* bytes in the superblock */
  64        int             preferred_minor;        /* autorun support */
  65
  66        struct kobject  kobj;
  67
  68        /* A device can be in one of three states based on two flags:
  69         * Not working:   faulty==1 in_sync==0
  70         * Fully working: faulty==0 in_sync==1
  71         * Working, but not
  72         * in sync with array
  73         *                faulty==0 in_sync==0
  74         *
  75         * It can never have faulty==1, in_sync==1
  76         * This reduces the burden of testing multiple flags in many cases
  77         */
  78
  79        unsigned long   flags;
  80#define Faulty          1               /* device is known to have a fault */
  81#define In_sync         2               /* device is in_sync with rest of array */
  82#define WriteMostly     4               /* Avoid reading if at all possible */
  83#define BarriersNotsupp 5               /* BIO_RW_BARRIER is not supported */
  84#define AllReserved     6               /* If whole device is reserved for
  85                                         * one array */
  86#define AutoDetected    7               /* added by auto-detect */
  87#define Blocked         8               /* An error occured on an externally
  88                                         * managed array, don't allow writes
  89                                         * until it is cleared */
  90        wait_queue_head_t blocked_wait;
  91
  92        int desc_nr;                    /* descriptor index in the superblock */
  93        int raid_disk;                  /* role of device in array */
  94        int saved_raid_disk;            /* role that device used to have in the
  95                                         * array and could again if we did a partial
  96                                         * resync from the bitmap
  97                                         */
  98        sector_t        recovery_offset;/* If this device has been partially
  99                                         * recovered, this is where we were
 100                                         * up to.
 101                                         */
 102
 103        atomic_t        nr_pending;     /* number of pending requests.
 104                                         * only maintained for arrays that
 105                                         * support hot removal
 106                                         */
 107        atomic_t        read_errors;    /* number of consecutive read errors that
 108                                         * we have tried to ignore.
 109                                         */
 110        atomic_t        corrected_errors; /* number of corrected read errors,
 111                                           * for reporting to userspace and storing
 112                                           * in superblock.
 113                                           */
 114        struct work_struct del_work;    /* used for delayed sysfs removal */
 115};
 116
 117struct mddev_s
 118{
 119        void                            *private;
 120        struct mdk_personality          *pers;
 121        dev_t                           unit;
 122        int                             md_minor;
 123        struct list_head                disks;
 124        unsigned long                   flags;
 125#define MD_CHANGE_DEVS  0       /* Some device status has changed */
 126#define MD_CHANGE_CLEAN 1       /* transition to or from 'clean' */
 127#define MD_CHANGE_PENDING 2     /* superblock update in progress */
 128
 129        int                             ro;
 130
 131        struct gendisk                  *gendisk;
 132
 133        struct kobject                  kobj;
 134
 135        /* Superblock information */
 136        int                             major_version,
 137                                        minor_version,
 138                                        patch_version;
 139        int                             persistent;
 140        int                             external;       /* metadata is
 141                                                         * managed externally */
 142        char                            metadata_type[17]; /* externally set*/
 143        int                             chunk_size;
 144        time_t                          ctime, utime;
 145        int                             level, layout;
 146        char                            clevel[16];
 147        int                             raid_disks;
 148        int                             max_disks;
 149        sector_t                        size; /* used size of component devices */
 150        sector_t                        array_size; /* exported array size */
 151        __u64                           events;
 152
 153        char                            uuid[16];
 154
 155        /* If the array is being reshaped, we need to record the
 156         * new shape and an indication of where we are up to.
 157         * This is written to the superblock.
 158         * If reshape_position is MaxSector, then no reshape is happening (yet).
 159         */
 160        sector_t                        reshape_position;
 161        int                             delta_disks, new_level, new_layout, new_chunk;
 162
 163        struct mdk_thread_s             *thread;        /* management thread */
 164        struct mdk_thread_s             *sync_thread;   /* doing resync or reconstruct */
 165        sector_t                        curr_resync;    /* last block scheduled */
 166        unsigned long                   resync_mark;    /* a recent timestamp */
 167        sector_t                        resync_mark_cnt;/* blocks written at resync_mark */
 168        sector_t                        curr_mark_cnt; /* blocks scheduled now */
 169
 170        sector_t                        resync_max_sectors; /* may be set by personality */
 171
 172        sector_t                        resync_mismatches; /* count of sectors where
 173                                                            * parity/replica mismatch found
 174                                                            */
 175
 176        /* allow user-space to request suspension of IO to regions of the array */
 177        sector_t                        suspend_lo;
 178        sector_t                        suspend_hi;
 179        /* if zero, use the system-wide default */
 180        int                             sync_speed_min;
 181        int                             sync_speed_max;
 182
 183        /* resync even though the same disks are shared among md-devices */
 184        int                             parallel_resync;
 185
 186        int                             ok_start_degraded;
 187        /* recovery/resync flags 
 188         * NEEDED:   we might need to start a resync/recover
 189         * RUNNING:  a thread is running, or about to be started
 190         * SYNC:     actually doing a resync, not a recovery
 191         * INTR:     resync needs to be aborted for some reason
 192         * DONE:     thread is done and is waiting to be reaped
 193         * REQUEST:  user-space has requested a sync (used with SYNC)
 194         * CHECK:    user-space request for for check-only, no repair
 195         * RESHAPE:  A reshape is happening
 196         *
 197         * If neither SYNC or RESHAPE are set, then it is a recovery.
 198         */
 199#define MD_RECOVERY_RUNNING     0
 200#define MD_RECOVERY_SYNC        1
 201#define MD_RECOVERY_INTR        3
 202#define MD_RECOVERY_DONE        4
 203#define MD_RECOVERY_NEEDED      5
 204#define MD_RECOVERY_REQUESTED   6
 205#define MD_RECOVERY_CHECK       7
 206#define MD_RECOVERY_RESHAPE     8
 207#define MD_RECOVERY_FROZEN      9
 208
 209        unsigned long                   recovery;
 210
 211        int                             in_sync;        /* know to not need resync */
 212        struct mutex                    reconfig_mutex;
 213        atomic_t                        active;
 214
 215        int                             changed;        /* true if we might need to reread partition info */
 216        int                             degraded;       /* whether md should consider
 217                                                         * adding a spare
 218                                                         */
 219        int                             barriers_work;  /* initialised to true, cleared as soon
 220                                                         * as a barrier request to slave
 221                                                         * fails.  Only supported
 222                                                         */
 223        struct bio                      *biolist;       /* bios that need to be retried
 224                                                         * because BIO_RW_BARRIER is not supported
 225                                                         */
 226
 227        atomic_t                        recovery_active; /* blocks scheduled, but not written */
 228        wait_queue_head_t               recovery_wait;
 229        sector_t                        recovery_cp;
 230        sector_t                        resync_max;     /* resync should pause
 231                                                         * when it gets here */
 232
 233        spinlock_t                      write_lock;
 234        wait_queue_head_t               sb_wait;        /* for waiting on superblock updates */
 235        atomic_t                        pending_writes; /* number of active superblock writes */
 236
 237        unsigned int                    safemode;       /* if set, update "clean" superblock
 238                                                         * when no writes pending.
 239                                                         */ 
 240        unsigned int                    safemode_delay;
 241        struct timer_list               safemode_timer;
 242        atomic_t                        writes_pending; 
 243        struct request_queue            *queue; /* for plugging ... */
 244
 245        atomic_t                        write_behind; /* outstanding async IO */
 246        unsigned int                    max_write_behind; /* 0 = sync */
 247
 248        struct bitmap                   *bitmap; /* the bitmap for the device */
 249        struct file                     *bitmap_file; /* the bitmap file */
 250        long                            bitmap_offset; /* offset from superblock of
 251                                                        * start of bitmap. May be
 252                                                        * negative, but not '0'
 253                                                        */
 254        long                            default_bitmap_offset; /* this is the offset to use when
 255                                                                * hot-adding a bitmap.  It should
 256                                                                * eventually be settable by sysfs.
 257                                                                */
 258
 259        struct list_head                all_mddevs;
 260};
 261
 262
 263static inline void rdev_dec_pending(mdk_rdev_t *rdev, mddev_t *mddev)
 264{
 265        int faulty = test_bit(Faulty, &rdev->flags);
 266        if (atomic_dec_and_test(&rdev->nr_pending) && faulty)
 267                set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
 268}
 269
 270static inline void md_sync_acct(struct block_device *bdev, unsigned long nr_sectors)
 271{
 272        atomic_add(nr_sectors, &bdev->bd_contains->bd_disk->sync_io);
 273}
 274
 275struct mdk_personality
 276{
 277        char *name;
 278        int level;
 279        struct list_head list;
 280        struct module *owner;
 281        int (*make_request)(struct request_queue *q, struct bio *bio);
 282        int (*run)(mddev_t *mddev);
 283        int (*stop)(mddev_t *mddev);
 284        void (*status)(struct seq_file *seq, mddev_t *mddev);
 285        /* error_handler must set ->faulty and clear ->in_sync
 286         * if appropriate, and should abort recovery if needed 
 287         */
 288        void (*error_handler)(mddev_t *mddev, mdk_rdev_t *rdev);
 289        int (*hot_add_disk) (mddev_t *mddev, mdk_rdev_t *rdev);
 290        int (*hot_remove_disk) (mddev_t *mddev, int number);
 291        int (*spare_active) (mddev_t *mddev);
 292        sector_t (*sync_request)(mddev_t *mddev, sector_t sector_nr, int *skipped, int go_faster);
 293        int (*resize) (mddev_t *mddev, sector_t sectors);
 294        int (*check_reshape) (mddev_t *mddev);
 295        int (*start_reshape) (mddev_t *mddev);
 296        int (*reconfig) (mddev_t *mddev, int layout, int chunk_size);
 297        /* quiesce moves between quiescence states
 298         * 0 - fully active
 299         * 1 - no new requests allowed
 300         * others - reserved
 301         */
 302        void (*quiesce) (mddev_t *mddev, int state);
 303};
 304
 305
 306struct md_sysfs_entry {
 307        struct attribute attr;
 308        ssize_t (*show)(mddev_t *, char *);
 309        ssize_t (*store)(mddev_t *, const char *, size_t);
 310};
 311
 312
 313static inline char * mdname (mddev_t * mddev)
 314{
 315        return mddev->gendisk ? mddev->gendisk->disk_name : "mdX";
 316}
 317
 318/*
 319 * iterates through some rdev ringlist. It's safe to remove the
 320 * current 'rdev'. Dont touch 'tmp' though.
 321 */
 322#define rdev_for_each_list(rdev, tmp, list)                             \
 323                                                                        \
 324        for ((tmp) = (list).next;                                       \
 325                (rdev) = (list_entry((tmp), mdk_rdev_t, same_set)),     \
 326                        (tmp) = (tmp)->next, (tmp)->prev != &(list)     \
 327                ; )
 328/*
 329 * iterates through the 'same array disks' ringlist
 330 */
 331#define rdev_for_each(rdev, tmp, mddev)                         \
 332        rdev_for_each_list(rdev, tmp, (mddev)->disks)
 333
 334typedef struct mdk_thread_s {
 335        void                    (*run) (mddev_t *mddev);
 336        mddev_t                 *mddev;
 337        wait_queue_head_t       wqueue;
 338        unsigned long           flags;
 339        struct task_struct      *tsk;
 340        unsigned long           timeout;
 341} mdk_thread_t;
 342
 343#define THREAD_WAKEUP  0
 344
 345#define __wait_event_lock_irq(wq, condition, lock, cmd)                 \
 346do {                                                                    \
 347        wait_queue_t __wait;                                            \
 348        init_waitqueue_entry(&__wait, current);                         \
 349                                                                        \
 350        add_wait_queue(&wq, &__wait);                                   \
 351        for (;;) {                                                      \
 352                set_current_state(TASK_UNINTERRUPTIBLE);                \
 353                if (condition)                                          \
 354                        break;                                          \
 355                spin_unlock_irq(&lock);                                 \
 356                cmd;                                                    \
 357                schedule();                                             \
 358                spin_lock_irq(&lock);                                   \
 359        }                                                               \
 360        current->state = TASK_RUNNING;                                  \
 361        remove_wait_queue(&wq, &__wait);                                \
 362} while (0)
 363
 364#define wait_event_lock_irq(wq, condition, lock, cmd)                   \
 365do {                                                                    \
 366        if (condition)                                                  \
 367                break;                                                  \
 368        __wait_event_lock_irq(wq, condition, lock, cmd);                \
 369} while (0)
 370
 371static inline void safe_put_page(struct page *p)
 372{
 373        if (p) put_page(p);
 374}
 375
 376#endif /* CONFIG_BLOCK */
 377#endif
 378
 379
lxr.linux.no kindly hosted by Redpill Linpro AS, provider of Linux consulting and operations services since 1995.