linux-old/drivers/md/lvm.c
<<
>>
Prefs
   1/*
   2 * kernel/lvm.c
   3 *
   4 * Copyright (C) 1997 - 2002  Heinz Mauelshagen, Sistina Software
   5 *
   6 * February-November 1997
   7 * April-May,July-August,November 1998
   8 * January-March,May,July,September,October 1999
   9 * January,February,July,September-November 2000
  10 * January-May,June,October 2001
  11 * May-July 2002
  12 *
  13 *
  14 * LVM driver is free software; you can redistribute it and/or modify
  15 * it under the terms of the GNU General Public License as published by
  16 * the Free Software Foundation; either version 2, or (at your option)
  17 * any later version.
  18 *
  19 * LVM driver is distributed in the hope that it will be useful,
  20 * but WITHOUT ANY WARRANTY; without even the implied warranty of
  21 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  22 * GNU General Public License for more details.
  23 *
  24 * You should have received a copy of the GNU General Public License
  25 * along with GNU CC; see the file COPYING.  If not, write to
  26 * the Free Software Foundation, 59 Temple Place - Suite 330,
  27 * Boston, MA 02111-1307, USA.
  28 *
  29 */
  30
  31/*
  32 * Changelog
  33 *
  34 *    09/11/1997 - added chr ioctls VG_STATUS_GET_COUNT
  35 *                 and VG_STATUS_GET_NAMELIST
  36 *    18/01/1998 - change lvm_chr_open/close lock handling
  37 *    30/04/1998 - changed LV_STATUS ioctl to LV_STATUS_BYNAME and
  38 *               - added   LV_STATUS_BYINDEX ioctl
  39 *               - used lvm_status_byname_req_t and
  40 *                      lvm_status_byindex_req_t vars
  41 *    04/05/1998 - added multiple device support
  42 *    08/05/1998 - added support to set/clear extendable flag in volume group
  43 *    09/05/1998 - changed output of lvm_proc_get_global_info() because of
  44 *                 support for free (eg. longer) logical volume names
  45 *    12/05/1998 - added spin_locks (thanks to Pascal van Dam
  46 *                 <pascal@ramoth.xs4all.nl>)
  47 *    25/05/1998 - fixed handling of locked PEs in lvm_map() and
  48 *                 lvm_chr_ioctl()
  49 *    26/05/1998 - reactivated verify_area by access_ok
  50 *    07/06/1998 - used vmalloc/vfree instead of kmalloc/kfree to go
  51 *                 beyond 128/256 KB max allocation limit per call
  52 *               - #ifdef blocked spin_lock calls to avoid compile errors
  53 *                 with 2.0.x
  54 *    11/06/1998 - another enhancement to spinlock code in lvm_chr_open()
  55 *                 and use of LVM_VERSION_CODE instead of my own macros
  56 *                 (thanks to  Michael Marxmeier <mike@msede.com>)
  57 *    07/07/1998 - added statistics in lvm_map()
  58 *    08/07/1998 - saved statistics in lvm_do_lv_extend_reduce()
  59 *    25/07/1998 - used __initfunc macro
  60 *    02/08/1998 - changes for official char/block major numbers
  61 *    07/08/1998 - avoided init_module() and cleanup_module() to be static
  62 *    30/08/1998 - changed VG lv_open counter from sum of LV lv_open counters
  63 *                 to sum of LVs open (no matter how often each is)
  64 *    01/09/1998 - fixed lvm_gendisk.part[] index error
  65 *    07/09/1998 - added copying of lv_current_pe-array
  66 *                 in LV_STATUS_BYINDEX ioctl
  67 *    17/11/1998 - added KERN_* levels to printk
  68 *    13/01/1999 - fixed LV index bug in lvm_do_lv_create() which hit lvrename
  69 *    07/02/1999 - fixed spinlock handling bug in case of LVM_RESET
  70 *                 by moving spinlock code from lvm_chr_open()
  71 *                 to lvm_chr_ioctl()
  72 *               - added LVM_LOCK_LVM ioctl to lvm_chr_ioctl()
  73 *               - allowed LVM_RESET and retrieval commands to go ahead;
  74 *                 only other update ioctls are blocked now
  75 *               - fixed pv->pe to NULL for pv_status
  76 *               - using lv_req structure in lvm_chr_ioctl() now
  77 *               - fixed NULL ptr reference bug in lvm_do_lv_extend_reduce()
  78 *                 caused by uncontiguous PV array in lvm_chr_ioctl(VG_REDUCE)
  79 *    09/02/1999 - changed BLKRASET and BLKRAGET in lvm_chr_ioctl() to
  80 *                 handle lgoical volume private read ahead sector
  81 *               - implemented LV read_ahead handling with lvm_blk_read()
  82 *                 and lvm_blk_write()
  83 *    10/02/1999 - implemented 2.[12].* support function lvm_hd_name()
  84 *                 to be used in drivers/block/genhd.c by disk_name()
  85 *    12/02/1999 - fixed index bug in lvm_blk_ioctl(), HDIO_GETGEO
  86 *               - enhanced gendisk insert/remove handling
  87 *    16/02/1999 - changed to dynamic block minor number allocation to
  88 *                 have as much as 99 volume groups with 256 logical volumes
  89 *                 as the grand total; this allows having 1 volume group with
  90 *                 up to 256 logical volumes in it
  91 *    21/02/1999 - added LV open count information to proc filesystem
  92 *               - substituted redundant LVM_RESET code by calls
  93 *                 to lvm_do_vg_remove()
  94 *    22/02/1999 - used schedule_timeout() to be more responsive
  95 *                 in case of lvm_do_vg_remove() with lots of logical volumes
  96 *    19/03/1999 - fixed NULL pointer bug in module_init/lvm_init
  97 *    17/05/1999 - used DECLARE_WAIT_QUEUE_HEAD macro (>2.3.0)
  98 *               - enhanced lvm_hd_name support
  99 *    03/07/1999 - avoided use of KERNEL_VERSION macro based ifdefs and
 100 *                 memcpy_tofs/memcpy_fromfs macro redefinitions
 101 *    06/07/1999 - corrected reads/writes statistic counter copy in case
 102 *                 of striped logical volume
 103 *    28/07/1999 - implemented snapshot logical volumes
 104 *                 - lvm_chr_ioctl
 105 *                   - LV_STATUS_BYINDEX
 106 *                   - LV_STATUS_BYNAME
 107 *                 - lvm_do_lv_create
 108 *                 - lvm_do_lv_remove
 109 *                 - lvm_map
 110 *                 - new lvm_snapshot_remap_block
 111 *                 - new lvm_snapshot_remap_new_block
 112 *    08/10/1999 - implemented support for multiple snapshots per
 113 *                 original logical volume
 114 *    12/10/1999 - support for 2.3.19
 115 *    11/11/1999 - support for 2.3.28
 116 *    21/11/1999 - changed lvm_map() interface to buffer_head based
 117 *    19/12/1999 - support for 2.3.33
 118 *    01/01/2000 - changed locking concept in lvm_map(),
 119 *                 lvm_do_vg_create() and lvm_do_lv_remove()
 120 *    15/01/2000 - fixed PV_FLUSH bug in lvm_chr_ioctl()
 121 *    24/01/2000 - ported to 2.3.40 including Alan Cox's pointer changes etc.
 122 *    29/01/2000 - used kmalloc/kfree again for all small structures
 123 *    20/01/2000 - cleaned up lvm_chr_ioctl by moving code
 124 *                 to seperated functions
 125 *               - avoided "/dev/" in proc filesystem output
 126 *               - avoided inline strings functions lvm_strlen etc.
 127 *    14/02/2000 - support for 2.3.43
 128 *               - integrated Andrea Arcagneli's snapshot code
 129 *    25/06/2000 - james (chip) , IKKHAYD! roffl
 130 *    26/06/2000 - enhanced lv_extend_reduce for snapshot logical volume
 131 *                 support
 132 *    06/09/2000 - added devfs support
 133 *    07/09/2000 - changed IOP version to 9
 134 *               - started to add new char ioctl LV_STATUS_BYDEV_T to support
 135 *                 getting an lv_t based on the dev_t of the Logical Volume
 136 *    14/09/2000 - enhanced lvm_do_lv_create to upcall VFS functions
 137 *                 to sync and lock, activate snapshot and unlock the FS
 138 *                 (to support journaled filesystems)
 139 *    18/09/2000 - hardsector size support
 140 *    27/09/2000 - implemented lvm_do_lv_rename() and lvm_do_vg_rename()
 141 *    30/10/2000 - added Andi Kleen's LV_BMAP ioctl to support LILO
 142 *    01/11/2000 - added memory information on hash tables to
 143 *                 lvm_proc_get_global_info()
 144 *    02/11/2000 - implemented /proc/lvm/ hierarchy
 145 *    22/11/2000 - changed lvm_do_create_proc_entry_of_pv () to work
 146 *                 with devfs
 147 *    26/11/2000 - corrected #ifdef locations for PROC_FS
 148 *    28/11/2000 - fixed lvm_do_vg_extend() NULL pointer BUG
 149 *               - fixed lvm_do_create_proc_entry_of_pv() buffer tampering BUG
 150 *    08/01/2001 - Removed conditional compiles related to PROC_FS,
 151 *                 procfs is always supported now. (JT)
 152 *    12/01/2001 - avoided flushing logical volume in case of shrinking
 153 *                 because of unecessary overhead in case of heavy updates
 154 *    25/01/2001 - Allow RO open of an inactive LV so it can be reactivated.
 155 *    31/01/2001 - removed blk_init_queue/blk_cleanup_queue queueing will be
 156 *                 handled by the proper devices.
 157 *               - If you try and BMAP a snapshot you now get an -EPERM
 158 *    01/01/2001 - lvm_map() now calls buffer_IO_error on error for 2.4
 159 *               - factored __remap_snapshot out of lvm_map
 160 *    12/02/2001 - move devfs code to create VG before LVs
 161 *    13/02/2001 - allow VG_CREATE on /dev/lvm
 162 *    14/02/2001 - removed modversions.h
 163 *               - tidied device defines for blk.h
 164 *               - tidied debug statements
 165 *               - bug: vg[] member not set back to NULL if activation fails
 166 *               - more lvm_map tidying
 167 *    15/02/2001 - register /dev/lvm with devfs correctly (major/minor
 168 *                 were swapped)
 169 *    19/02/2001 - preallocated buffer_heads for rawio when using
 170 *                 snapshots [JT]
 171 *    28/02/2001 - introduced the P_DEV macro and changed some internel
 172 *                 functions to be static [AD]
 173 *    28/02/2001 - factored lvm_get_snapshot_use_rate out of blk_ioctl [AD]
 174 *               - fixed user address accessing bug in lvm_do_lv_create()
 175 *                 where the check for an existing LV takes place right at
 176 *                 the beginning
 177 *    01/03/2001 - Add VG_CREATE_OLD for IOP 10 compatibility
 178 *    02/03/2001 - Don't destroy usermode pointers in lv_t structures duing
 179 *                 LV_STATUS_BYxxx
 180 *                 and remove redundant lv_t variables from same.
 181 *               - avoid compilation of lvm_dummy_device_request in case of
 182 *                 Linux >= 2.3.0 to avoid a warning
 183 *               - added lvm_name argument to printk in buffer allocation
 184 *                 in order to avoid a warning
 185 *    04/03/2001 - moved linux/version.h above first use of KERNEL_VERSION
 186 *                 macros
 187 *    05/03/2001 - restore copying pe_t array in lvm_do_lv_status_byname. For
 188 *                 lvdisplay -v (PC)
 189 *               - restore copying pe_t array in lvm_do_lv_status_byindex (HM)
 190 *               - added copying pe_t array in lvm_do_lv_status_bydev (HM)
 191 *               - enhanced lvm_do_lv_status_by{name,index,dev} to be capable
 192 *                 to copy the lv_block_exception_t array to userspace (HM)
 193 *    08/03/2001 - initialize new lv_ptr->lv_COW_table_iobuf for snapshots;
 194 *                 removed obsolete lv_ptr->lv_COW_table_page initialization
 195 *               - factored lvm_do_pv_flush out of lvm_chr_ioctl (HM)
 196 *    09/03/2001 - Added _lock_open_count to ensure we only drop the lock
 197 *                 when the locking process closes.
 198 *    05/04/2001 - Defer writes to an extent that is being moved [JT]
 199 *    05/04/2001 - use b_rdev and b_rsector rather than b_dev and b_blocknr in
 200 *                 lvm_map() in order to make stacking devices more happy (HM)
 201 *    11/04/2001 - cleaned up the pvmove queue code. I no longer retain the
 202 *                 rw flag, instead WRITEA's are just dropped [JT]
 203 *    30/04/2001 - added KERNEL_VERSION > 2.4.3 get_hardsect_size() rather
 204 *                 than get_hardblocksize() call
 205 *    03/05/2001 - Use copy_to/from_user to preserve pointers in
 206 *                 lvm_do_status_by*
 207 *    11/05/2001 - avoid accesses to inactive snapshot data in
 208 *                 __update_hardsectsize() and lvm_do_lv_extend_reduce() (JW)
 209 *    28/05/2001 - implemented missing BLKSSZGET ioctl
 210 *    05/06/2001 - Move _pe_lock out of fast path for lvm_map when no PEs
 211 *                 locked.  Make buffer queue flush not need locking.
 212 *                 Fix lvm_user_bmap() to set b_rsector for new lvm_map(). [AED]
 213 *    30/06/2001 - Speed up __update_hardsectsize() by checking if PVs have
 214 *                 the same hardsectsize (very likely) before scanning all LEs
 215 *                 in the LV each time.  [AED]
 216 *    12/10/2001 - Use add/del_gendisk() routines in 2.4.10+
 217 *    01/11/2001 - Backport read_ahead change from Linus kernel [AED]
 218 *    24/05/2002 - fixed locking bug in lvm_do_le_remap() introduced with 1.0.4
 219 *    13/06/2002 - use blk_ioctl() to support various standard block ioctls
 220 *               - support HDIO_GETGEO_BIG ioctl
 221 *    05/07/2002 - fixed OBO error on vg array access [benh@kernel.crashing.org]
 222 *    22/07/2002 - streamlined blk_ioctl() call
 223 *
 224 */
 225
 226#include <linux/version.h>
 227
 228#define MAJOR_NR LVM_BLK_MAJOR
 229#define DEVICE_OFF(device)
 230#define LOCAL_END_REQUEST
 231
 232/* lvm_do_lv_create calls fsync_dev_lockfs()/unlockfs() */
 233/* #define      LVM_VFS_ENHANCEMENT */
 234
 235#include <linux/config.h>
 236#include <linux/module.h>
 237#include <linux/kernel.h>
 238#include <linux/vmalloc.h>
 239
 240#include <linux/slab.h>
 241#include <linux/init.h>
 242
 243#include <linux/hdreg.h>
 244#include <linux/stat.h>
 245#include <linux/fs.h>
 246#include <linux/proc_fs.h>
 247#include <linux/blkdev.h>
 248#include <linux/genhd.h>
 249#include <linux/locks.h>
 250
 251
 252#include <linux/devfs_fs_kernel.h>
 253#include <linux/smp_lock.h>
 254#include <asm/ioctl.h>
 255#include <asm/segment.h>
 256#include <asm/uaccess.h>
 257
 258#ifdef CONFIG_KERNELD
 259#include <linux/kerneld.h>
 260#endif
 261
 262#include <linux/blk.h>
 263#include <linux/blkpg.h>
 264
 265#include <linux/errno.h>
 266#include <linux/lvm.h>
 267
 268#include "lvm-internal.h"
 269
 270#define LVM_CORRECT_READ_AHEAD(a)               \
 271do {                                            \
 272        if ((a) < LVM_MIN_READ_AHEAD ||         \
 273            (a) > LVM_MAX_READ_AHEAD)           \
 274                (a) = LVM_DEFAULT_READ_AHEAD;   \
 275        read_ahead[MAJOR_NR] = (a);             \
 276} while(0)
 277
 278#ifndef WRITEA
 279#  define WRITEA WRITE
 280#endif
 281
 282
 283/*
 284 * External function prototypes
 285 */
 286static int lvm_make_request_fn(request_queue_t*, int, struct buffer_head*);
 287
 288static int lvm_blk_ioctl(struct inode *, struct file *, uint, ulong);
 289static int lvm_blk_open(struct inode *, struct file *);
 290
 291static int lvm_blk_close(struct inode *, struct file *);
 292static int lvm_get_snapshot_use_rate(lv_t *lv_ptr, void *arg);
 293static int lvm_user_bmap(struct inode *, struct lv_bmap *);
 294
 295static int lvm_chr_open(struct inode *, struct file *);
 296static int lvm_chr_close(struct inode *, struct file *);
 297static int lvm_chr_ioctl(struct inode *, struct file *, uint, ulong);
 298
 299
 300/* End external function prototypes */
 301
 302
 303/*
 304 * Internal function prototypes
 305 */
 306static void lvm_cleanup(void);
 307static void lvm_init_vars(void);
 308
 309#ifdef LVM_HD_NAME
 310extern void (*lvm_hd_name_ptr) (char *, int);
 311#endif
 312static int lvm_map(struct buffer_head *, int);
 313static int lvm_do_lock_lvm(void);
 314static int lvm_do_le_remap(vg_t *, void *);
 315
 316static int lvm_do_pv_create(pv_t *, vg_t *, ulong);
 317static int lvm_do_pv_remove(vg_t *, ulong);
 318static int lvm_do_lv_create(int, char *, lv_t *);
 319static int lvm_do_lv_extend_reduce(int, char *, lv_t *);
 320static int lvm_do_lv_remove(int, char *, int);
 321static int lvm_do_lv_rename(vg_t *, lv_req_t *, lv_t *);
 322static int lvm_do_lv_status_byname(vg_t *r, void *);
 323static int lvm_do_lv_status_byindex(vg_t *, void *);
 324static int lvm_do_lv_status_bydev(vg_t *, void *);
 325
 326static int lvm_do_pe_lock_unlock(vg_t *r, void *);
 327
 328static int lvm_do_pv_change(vg_t*, void*);
 329static int lvm_do_pv_status(vg_t *, void *);
 330static int lvm_do_pv_flush(void *);
 331
 332static int lvm_do_vg_create(void *, int minor);
 333static int lvm_do_vg_extend(vg_t *, void *);
 334static int lvm_do_vg_reduce(vg_t *, void *);
 335static int lvm_do_vg_rename(vg_t *, void *);
 336static int lvm_do_vg_remove(int);
 337static void lvm_geninit(struct gendisk *);
 338static void __update_hardsectsize(lv_t *lv);
 339
 340
 341static void _queue_io(struct buffer_head *bh, int rw);
 342static struct buffer_head *_dequeue_io(void);
 343static void _flush_io(struct buffer_head *bh);
 344
 345static int _open_pv(pv_t *pv);
 346static void _close_pv(pv_t *pv);
 347
 348static unsigned long _sectors_to_k(unsigned long sect);
 349
 350#ifdef LVM_HD_NAME
 351void lvm_hd_name(char *, int);
 352#endif
 353/* END Internal function prototypes */
 354
 355
 356/* variables */
 357char *lvm_version = "LVM version "LVM_RELEASE_NAME"("LVM_RELEASE_DATE")";
 358ushort lvm_iop_version = LVM_DRIVER_IOP_VERSION;
 359int loadtime = 0;
 360const char *const lvm_name = LVM_NAME;
 361
 362
 363/* volume group descriptor area pointers */
 364vg_t *vg[ABS_MAX_VG + 1];
 365
 366/* map from block minor number to VG and LV numbers */
 367static struct {
 368        int vg_number;
 369        int lv_number;
 370} vg_lv_map[ABS_MAX_LV];
 371
 372
 373/* Request structures (lvm_chr_ioctl()) */
 374static pv_change_req_t pv_change_req;
 375static pv_status_req_t pv_status_req;
 376volatile static pe_lock_req_t pe_lock_req;
 377static le_remap_req_t le_remap_req;
 378static lv_req_t lv_req;
 379
 380#ifdef LVM_TOTAL_RESET
 381static int lvm_reset_spindown = 0;
 382#endif
 383
 384static char pv_name[NAME_LEN];
 385/* static char rootvg[NAME_LEN] = { 0, }; */
 386static int lock = 0;
 387static int _lock_open_count = 0;
 388static uint vg_count = 0;
 389static long lvm_chr_open_count = 0;
 390static DECLARE_WAIT_QUEUE_HEAD(lvm_wait);
 391
 392static spinlock_t lvm_lock = SPIN_LOCK_UNLOCKED;
 393static spinlock_t lvm_snapshot_lock = SPIN_LOCK_UNLOCKED;
 394
 395static struct buffer_head *_pe_requests;
 396static DECLARE_RWSEM(_pe_lock);
 397
 398
 399struct file_operations lvm_chr_fops = {
 400        owner:          THIS_MODULE,
 401        open:           lvm_chr_open,
 402        release:        lvm_chr_close,
 403        ioctl:          lvm_chr_ioctl,
 404};
 405
 406/* block device operations structure needed for 2.3.38? and above */
 407struct block_device_operations lvm_blk_dops =
 408{
 409        owner:          THIS_MODULE,
 410        open:           lvm_blk_open,
 411        release:        lvm_blk_close,
 412        ioctl:          lvm_blk_ioctl,
 413};
 414
 415
 416/* gendisk structures */
 417static struct hd_struct lvm_hd_struct[MAX_LV];
 418static int lvm_blocksizes[MAX_LV];
 419static int lvm_hardsectsizes[MAX_LV];
 420static int lvm_size[MAX_LV];
 421
 422static struct gendisk lvm_gendisk =
 423{
 424        major:          MAJOR_NR,
 425        major_name:     LVM_NAME,
 426        minor_shift:    0,
 427        max_p:          1,
 428        part:           lvm_hd_struct,
 429        sizes:          lvm_size,
 430        nr_real:        MAX_LV,
 431};
 432
 433
 434/*
 435 * Driver initialization...
 436 */
 437int lvm_init(void)
 438{
 439        if (devfs_register_chrdev(LVM_CHAR_MAJOR,
 440                                  lvm_name, &lvm_chr_fops) < 0) {
 441                printk(KERN_ERR "%s -- devfs_register_chrdev failed\n",
 442                       lvm_name);
 443                return -EIO;
 444        }
 445        if (devfs_register_blkdev(MAJOR_NR, lvm_name, &lvm_blk_dops) < 0)
 446        {
 447                printk("%s -- devfs_register_blkdev failed\n", lvm_name);
 448                if (devfs_unregister_chrdev(LVM_CHAR_MAJOR, lvm_name) < 0)
 449                        printk(KERN_ERR
 450                               "%s -- devfs_unregister_chrdev failed\n",
 451                               lvm_name);
 452                return -EIO;
 453        }
 454
 455        lvm_init_fs();
 456        lvm_init_vars();
 457        lvm_geninit(&lvm_gendisk);
 458
 459        /* insert our gendisk at the corresponding major */
 460        add_gendisk(&lvm_gendisk);
 461
 462#ifdef LVM_HD_NAME
 463        /* reference from drivers/block/genhd.c */
 464        lvm_hd_name_ptr = lvm_hd_name;
 465#endif
 466
 467        blk_queue_make_request(BLK_DEFAULT_QUEUE(MAJOR_NR), lvm_make_request_fn);
 468
 469
 470        /* initialise the pe lock */
 471        pe_lock_req.lock = UNLOCK_PE;
 472
 473        /* optional read root VGDA */
 474/*
 475   if ( *rootvg != 0) vg_read_with_pv_and_lv ( rootvg, &vg);
 476*/
 477
 478#ifdef MODULE
 479        printk(KERN_INFO "%s module loaded\n", lvm_version);
 480#else
 481        printk(KERN_INFO "%s\n", lvm_version);
 482#endif
 483
 484        return 0;
 485} /* lvm_init() */
 486
 487/*
 488 * cleanup...
 489 */
 490
 491static void lvm_cleanup(void)
 492{
 493        if (devfs_unregister_chrdev(LVM_CHAR_MAJOR, lvm_name) < 0)
 494                printk(KERN_ERR "%s -- devfs_unregister_chrdev failed\n",
 495                       lvm_name);
 496        if (devfs_unregister_blkdev(MAJOR_NR, lvm_name) < 0)
 497                printk(KERN_ERR "%s -- devfs_unregister_blkdev failed\n",
 498                       lvm_name);
 499
 500
 501
 502        /* delete our gendisk from chain */
 503        del_gendisk(&lvm_gendisk);
 504
 505        blk_size[MAJOR_NR] = NULL;
 506        blksize_size[MAJOR_NR] = NULL;
 507        hardsect_size[MAJOR_NR] = NULL;
 508
 509#ifdef LVM_HD_NAME
 510        /* reference from linux/drivers/block/genhd.c */
 511        lvm_hd_name_ptr = NULL;
 512#endif
 513
 514        /* unregister with procfs and devfs */
 515        lvm_fin_fs();
 516
 517#ifdef MODULE
 518        printk(KERN_INFO "%s -- Module successfully deactivated\n", lvm_name);
 519#endif
 520
 521        return;
 522}       /* lvm_cleanup() */
 523
 524/*
 525 * support function to initialize lvm variables
 526 */
 527static void __init lvm_init_vars(void)
 528{
 529        int v;
 530
 531        loadtime = CURRENT_TIME;
 532
 533        lvm_lock = lvm_snapshot_lock = SPIN_LOCK_UNLOCKED;
 534
 535        pe_lock_req.lock = UNLOCK_PE;
 536        pe_lock_req.data.lv_dev = 0;
 537        pe_lock_req.data.pv_dev = 0;
 538        pe_lock_req.data.pv_offset = 0;
 539
 540        /* Initialize VG pointers */
 541        for (v = 0; v < ABS_MAX_VG + 1; v++)
 542                vg[v] = NULL;
 543
 544        /* Initialize LV -> VG association */
 545        for (v = 0; v < ABS_MAX_LV; v++) {
 546                /* index ABS_MAX_VG never used for real VG */
 547                vg_lv_map[v].vg_number = ABS_MAX_VG;
 548                vg_lv_map[v].lv_number = -1;
 549        }
 550
 551        return;
 552} /* lvm_init_vars() */
 553
 554
 555/********************************************************************
 556 *
 557 * Character device functions
 558 *
 559 ********************************************************************/
 560
 561#define MODE_TO_STR(mode) (mode) & FMODE_READ ? "READ" : "", \
 562                          (mode) & FMODE_WRITE ? "WRITE" : ""
 563
 564/*
 565 * character device open routine
 566 */
 567static int lvm_chr_open(struct inode *inode, struct file *file)
 568{
 569        int minor = MINOR(inode->i_rdev);
 570
 571        P_DEV("chr_open MINOR: %d  VG#: %d  mode: %s%s  lock: %d\n",
 572              minor, VG_CHR(minor), MODE_TO_STR(file->f_mode), lock);
 573
 574        /* super user validation */
 575        if (!capable(CAP_SYS_ADMIN)) return -EACCES;
 576
 577        /* Group special file open */
 578        if (VG_CHR(minor) > MAX_VG) return -ENXIO;
 579
 580        spin_lock(&lvm_lock);
 581        if(lock == current->pid)
 582                _lock_open_count++;
 583        spin_unlock(&lvm_lock);
 584
 585        lvm_chr_open_count++;
 586
 587        MOD_INC_USE_COUNT;
 588
 589        return 0;
 590} /* lvm_chr_open() */
 591
 592
 593/*
 594 * character device i/o-control routine
 595 *
 596 * Only one changing process can do changing ioctl at one time,
 597 * others will block.
 598 *
 599 */
 600static int lvm_chr_ioctl(struct inode *inode, struct file *file,
 601                         uint command, ulong a)
 602{
 603        int minor = MINOR(inode->i_rdev);
 604        uint extendable, l, v;
 605        void *arg = (void *) a;
 606        lv_t lv;
 607        vg_t* vg_ptr = vg[VG_CHR(minor)];
 608
 609        /* otherwise cc will complain about unused variables */
 610        (void) lvm_lock;
 611
 612        P_IOCTL("chr MINOR: %d  command: 0x%X  arg: %p  VG#: %d  mode: %s%s\n",
 613                minor, command, arg, VG_CHR(minor), MODE_TO_STR(file->f_mode));
 614
 615#ifdef LVM_TOTAL_RESET
 616        if (lvm_reset_spindown > 0) return -EACCES;
 617#endif
 618
 619        /* Main command switch */
 620        switch (command) {
 621        case LVM_LOCK_LVM:
 622                /* lock the LVM */
 623                return lvm_do_lock_lvm();
 624
 625        case LVM_GET_IOP_VERSION:
 626                /* check lvm version to ensure driver/tools+lib
 627                   interoperability */
 628                if (copy_to_user(arg, &lvm_iop_version, sizeof(ushort)) != 0)
 629                        return -EFAULT;
 630                return 0;
 631
 632#ifdef LVM_TOTAL_RESET
 633        case LVM_RESET:
 634                /* lock reset function */
 635                lvm_reset_spindown = 1;
 636                for (v = 0; v < ABS_MAX_VG; v++) {
 637                        if (vg[v] != NULL) lvm_do_vg_remove(v);
 638                }
 639
 640#ifdef MODULE
 641                while (GET_USE_COUNT(&__this_module) < 1)
 642                        MOD_INC_USE_COUNT;
 643                while (GET_USE_COUNT(&__this_module) > 1)
 644                        MOD_DEC_USE_COUNT;
 645#endif /* MODULE */
 646                lock = 0;       /* release lock */
 647                wake_up_interruptible(&lvm_wait);
 648                return 0;
 649#endif /* LVM_TOTAL_RESET */
 650
 651
 652        case LE_REMAP:
 653                /* remap a logical extent (after moving the physical extent) */
 654                return lvm_do_le_remap(vg_ptr,arg);
 655
 656        case PE_LOCK_UNLOCK:
 657                /* lock/unlock i/o to a physical extent to move it to another
 658                   physical volume (move's done in user space's pvmove) */
 659                return lvm_do_pe_lock_unlock(vg_ptr,arg);
 660
 661        case VG_CREATE_OLD:
 662                /* create a VGDA */
 663                return lvm_do_vg_create(arg, minor);
 664
 665        case VG_CREATE:
 666                /* create a VGDA, assume VG number is filled in */
 667                return lvm_do_vg_create(arg, -1);
 668
 669        case VG_EXTEND:
 670                /* extend a volume group */
 671                return lvm_do_vg_extend(vg_ptr, arg);
 672
 673        case VG_REDUCE:
 674                /* reduce a volume group */
 675                return lvm_do_vg_reduce(vg_ptr, arg);
 676
 677        case VG_RENAME:
 678                /* rename a volume group */
 679                return lvm_do_vg_rename(vg_ptr, arg);
 680
 681        case VG_REMOVE:
 682                /* remove an inactive VGDA */
 683                return lvm_do_vg_remove(minor);
 684
 685
 686        case VG_SET_EXTENDABLE:
 687                /* set/clear extendability flag of volume group */
 688                if (vg_ptr == NULL) return -ENXIO;
 689                if (copy_from_user(&extendable, arg, sizeof(extendable)) != 0)
 690                        return -EFAULT;
 691
 692                if (extendable == VG_EXTENDABLE ||
 693                    extendable == ~VG_EXTENDABLE) {
 694                        if (extendable == VG_EXTENDABLE)
 695                                vg_ptr->vg_status |= VG_EXTENDABLE;
 696                        else
 697                                vg_ptr->vg_status &= ~VG_EXTENDABLE;
 698                } else return -EINVAL;
 699                return 0;
 700
 701
 702        case VG_STATUS:
 703                /* get volume group data (only the vg_t struct) */
 704                if (vg_ptr == NULL) return -ENXIO;
 705                if (copy_to_user(arg, vg_ptr, sizeof(vg_t)) != 0)
 706                        return -EFAULT;
 707                return 0;
 708
 709
 710        case VG_STATUS_GET_COUNT:
 711                /* get volume group count */
 712                if (copy_to_user(arg, &vg_count, sizeof(vg_count)) != 0)
 713                        return -EFAULT;
 714                return 0;
 715
 716
 717        case VG_STATUS_GET_NAMELIST:
 718                /* get volume group names */
 719                for (l = v = 0; v < ABS_MAX_VG; v++) {
 720                        if (vg[v] != NULL) {
 721                                if (copy_to_user(arg + l * NAME_LEN,
 722                                                 vg[v]->vg_name,
 723                                                 NAME_LEN) != 0)
 724                                        return -EFAULT;
 725                                l++;
 726                        }
 727                }
 728                return 0;
 729
 730
 731        case LV_CREATE:
 732        case LV_EXTEND:
 733        case LV_REDUCE:
 734        case LV_REMOVE:
 735        case LV_RENAME:
 736                /* create, extend, reduce, remove or rename a logical volume */
 737                if (vg_ptr == NULL) return -ENXIO;
 738                if (copy_from_user(&lv_req, arg, sizeof(lv_req)) != 0)
 739                        return -EFAULT;
 740
 741                if (command != LV_REMOVE) {
 742                        if (copy_from_user(&lv, lv_req.lv, sizeof(lv_t)) != 0)
 743                                return -EFAULT;
 744                }
 745                switch (command) {
 746                case LV_CREATE:
 747                        return lvm_do_lv_create(minor, lv_req.lv_name, &lv);
 748
 749                case LV_EXTEND:
 750                case LV_REDUCE:
 751                        return lvm_do_lv_extend_reduce(minor, lv_req.lv_name, &lv);
 752                case LV_REMOVE:
 753                        return lvm_do_lv_remove(minor, lv_req.lv_name, -1);
 754
 755                case LV_RENAME:
 756                        return lvm_do_lv_rename(vg_ptr, &lv_req, &lv);
 757                }
 758
 759
 760
 761
 762        case LV_STATUS_BYNAME:
 763                /* get status of a logical volume by name */
 764                return lvm_do_lv_status_byname(vg_ptr, arg);
 765
 766
 767        case LV_STATUS_BYINDEX:
 768                /* get status of a logical volume by index */
 769                return lvm_do_lv_status_byindex(vg_ptr, arg);
 770
 771
 772        case LV_STATUS_BYDEV:
 773                /* get status of a logical volume by device */
 774                return lvm_do_lv_status_bydev(vg_ptr, arg);
 775
 776
 777        case PV_CHANGE:
 778                /* change a physical volume */
 779                return lvm_do_pv_change(vg_ptr,arg);
 780
 781
 782        case PV_STATUS:
 783                /* get physical volume data (pv_t structure only) */
 784                return lvm_do_pv_status(vg_ptr,arg);
 785
 786
 787        case PV_FLUSH:
 788                /* physical volume buffer flush/invalidate */
 789                return lvm_do_pv_flush(arg);
 790
 791
 792        default:
 793                printk(KERN_WARNING
 794                       "%s -- lvm_chr_ioctl: unknown command 0x%x\n",
 795                       lvm_name, command);
 796                return -ENOTTY;
 797        }
 798
 799        return 0;
 800} /* lvm_chr_ioctl */
 801
 802
 803/*
 804 * character device close routine
 805 */
 806static int lvm_chr_close(struct inode *inode, struct file *file)
 807{
 808        P_DEV("chr_close MINOR: %d  VG#: %d\n",
 809              MINOR(inode->i_rdev), VG_CHR(MINOR(inode->i_rdev)));
 810
 811#ifdef LVM_TOTAL_RESET
 812        if (lvm_reset_spindown > 0) {
 813                lvm_reset_spindown = 0;
 814                lvm_chr_open_count = 0;
 815        }
 816#endif
 817
 818        if (lvm_chr_open_count > 0) lvm_chr_open_count--;
 819
 820        spin_lock(&lvm_lock);
 821        if(lock == current->pid) {
 822                if(!_lock_open_count) {
 823                        P_DEV("chr_close: unlocking LVM for pid %d\n", lock);
 824                        lock = 0;
 825                        wake_up_interruptible(&lvm_wait);
 826                } else
 827                        _lock_open_count--;
 828        }
 829        spin_unlock(&lvm_lock);
 830
 831        MOD_DEC_USE_COUNT;
 832
 833        return 0;
 834} /* lvm_chr_close() */
 835
 836
 837
 838/********************************************************************
 839 *
 840 * Block device functions
 841 *
 842 ********************************************************************/
 843
 844/*
 845 * block device open routine
 846 */
 847static int lvm_blk_open(struct inode *inode, struct file *file)
 848{
 849        int minor = MINOR(inode->i_rdev);
 850        lv_t *lv_ptr;
 851        vg_t *vg_ptr = vg[VG_BLK(minor)];
 852
 853        P_DEV("blk_open MINOR: %d  VG#: %d  LV#: %d  mode: %s%s\n",
 854              minor, VG_BLK(minor), LV_BLK(minor), MODE_TO_STR(file->f_mode));
 855
 856#ifdef LVM_TOTAL_RESET
 857        if (lvm_reset_spindown > 0)
 858                return -EPERM;
 859#endif
 860
 861        if (vg_ptr != NULL &&
 862            (vg_ptr->vg_status & VG_ACTIVE) &&
 863            (lv_ptr = vg_ptr->lv[LV_BLK(minor)]) != NULL &&
 864            LV_BLK(minor) >= 0 &&
 865            LV_BLK(minor) < vg_ptr->lv_max) {
 866
 867                /* Check parallel LV spindown (LV remove) */
 868                if (lv_ptr->lv_status & LV_SPINDOWN) return -EPERM;
 869
 870                /* Check inactive LV and open for read/write */
 871                /* We need to be able to "read" an inactive LV
 872                   to re-activate it again */
 873                if ((file->f_mode & FMODE_WRITE) &&
 874                    (!(lv_ptr->lv_status & LV_ACTIVE)))
 875                    return -EPERM;
 876
 877                if (!(lv_ptr->lv_access & LV_WRITE) &&
 878                    (file->f_mode & FMODE_WRITE))
 879                        return -EACCES;
 880
 881
 882                /* be sure to increment VG counter */
 883                if (lv_ptr->lv_open == 0) vg_ptr->lv_open++;
 884                lv_ptr->lv_open++;
 885
 886                MOD_INC_USE_COUNT;
 887
 888                P_DEV("blk_open OK, LV size %d\n", lv_ptr->lv_size);
 889
 890                return 0;
 891        }
 892        return -ENXIO;
 893} /* lvm_blk_open() */
 894
 895/* Deliver "hard disk geometry" */
 896static int _hdio_getgeo(ulong a, lv_t *lv_ptr, int what)
 897{
 898        int ret = 0;
 899        uchar heads = 128;
 900        uchar sectors = 128;
 901        ulong start = 0;
 902        uint cylinders;
 903
 904        while ( heads * sectors > lv_ptr->lv_size) {
 905                heads >>= 1;
 906                sectors >>= 1;
 907        }
 908        cylinders = lv_ptr->lv_size / heads / sectors;
 909
 910        switch (what) {
 911                case 0:
 912                {
 913                        struct hd_geometry *hd = (struct hd_geometry *) a;
 914
 915                        if (put_user(heads, &hd->heads) ||
 916                            put_user(sectors, &hd->sectors) ||
 917                            put_user((ushort) cylinders, &hd->cylinders) ||
 918                            put_user(start, &hd->start))
 919                                return -EFAULT;
 920                        break;
 921                }
 922
 923#ifdef HDIO_GETGEO_BIG
 924                case 1:
 925                {
 926                        struct hd_big_geometry *hd =
 927                                (struct hd_big_geometry *) a;
 928
 929                        if (put_user(heads, &hd->heads) ||
 930                            put_user(sectors, &hd->sectors) ||
 931                            put_user(cylinders, &hd->cylinders) ||
 932                            put_user(start, &hd->start))
 933                                return -EFAULT;
 934                        break;
 935                }
 936#endif
 937
 938        }
 939
 940        P_IOCTL("%s -- lvm_blk_ioctl -- cylinders: %d\n",
 941                lvm_name, cylinders);
 942        return ret;
 943}
 944
 945
 946/*
 947 * block device i/o-control routine
 948 */
 949static int lvm_blk_ioctl(struct inode *inode, struct file *file,
 950                         uint cmd, ulong a)
 951{
 952        kdev_t dev = inode->i_rdev;
 953        int minor = MINOR(dev), ret;
 954        vg_t *vg_ptr = vg[VG_BLK(minor)];
 955        lv_t *lv_ptr = vg_ptr->lv[LV_BLK(minor)];
 956        void *arg = (void *) a;
 957
 958        P_IOCTL("blk MINOR: %d  cmd: 0x%X  arg: %p  VG#: %d  LV#: %d  "
 959                "mode: %s%s\n", minor, cmd, arg, VG_BLK(minor),
 960                LV_BLK(minor), MODE_TO_STR(file->f_mode));
 961
 962        switch (cmd) {
 963                case BLKRASET:
 964                        /* set read ahead for block device */
 965                        ret = blk_ioctl(dev, cmd, a);
 966                        if (ret)
 967                                return ret;
 968                        lv_ptr->lv_read_ahead = (long) a;
 969                        LVM_CORRECT_READ_AHEAD(lv_ptr->lv_read_ahead);
 970                        break;
 971        
 972                case HDIO_GETGEO:
 973#ifdef HDIO_GETGEO_BIG
 974                case HDIO_GETGEO_BIG:
 975#endif
 976                        /* get disk geometry */
 977                        P_IOCTL("%s -- lvm_blk_ioctl -- HDIO_GETGEO\n",
 978                                lvm_name);
 979                        if (!a)
 980                                return -EINVAL;
 981
 982                        switch (cmd) {
 983                                case HDIO_GETGEO:
 984                                        return _hdio_getgeo(a, lv_ptr, 0);
 985#ifdef HDIO_GETGEO_BIG
 986                                case HDIO_GETGEO_BIG:
 987                                        return _hdio_getgeo(a, lv_ptr, 1);
 988#endif
 989                        }
 990        
 991                case LV_BMAP:
 992                        /* turn logical block into (dev_t, block). non privileged. */
 993                        /* don't bmap a snapshot, since the mapping can change */
 994                        if (lv_ptr->lv_access & LV_SNAPSHOT)
 995                                return -EPERM;
 996        
 997                        return lvm_user_bmap(inode, (struct lv_bmap *) arg);
 998        
 999                case LV_SET_ACCESS:
1000                        /* set access flags of a logical volume */
1001                        if (!capable(CAP_SYS_ADMIN)) return -EACCES;
1002        
1003                        down_write(&lv_ptr->lv_lock);
1004                        lv_ptr->lv_access = (ulong) arg;
1005                        up_write(&lv_ptr->lv_lock);
1006        
1007                        if ( lv_ptr->lv_access & LV_WRITE)
1008                                set_device_ro(lv_ptr->lv_dev, 0);
1009                        else
1010                                set_device_ro(lv_ptr->lv_dev, 1);
1011                        break;
1012        
1013        
1014                case LV_SET_ALLOCATION:
1015                        /* set allocation flags of a logical volume */
1016                        if (!capable(CAP_SYS_ADMIN)) return -EACCES;
1017                        down_write(&lv_ptr->lv_lock);
1018                        lv_ptr->lv_allocation = (ulong) arg;
1019                        up_write(&lv_ptr->lv_lock);
1020                        break;
1021        
1022                case LV_SET_STATUS:
1023                        /* set status flags of a logical volume */
1024                        if (!capable(CAP_SYS_ADMIN)) return -EACCES;
1025                        if (!((ulong) arg & LV_ACTIVE) && lv_ptr->lv_open > 1)
1026                                return -EPERM;
1027                        down_write(&lv_ptr->lv_lock);
1028                        lv_ptr->lv_status = (ulong) arg;
1029                        up_write(&lv_ptr->lv_lock);
1030                        break;
1031        
1032                case LV_SNAPSHOT_USE_RATE:
1033                        return lvm_get_snapshot_use_rate(lv_ptr, arg);
1034        
1035                default:
1036                        /* Handle rest here */
1037                        ret = blk_ioctl(dev, cmd, a);
1038                        if (ret)
1039                                printk(KERN_WARNING
1040                                       "%s -- lvm_blk_ioctl: unknown "
1041                                       "cmd 0x%x\n",
1042                                       lvm_name, cmd);
1043                        return ret;
1044        }
1045
1046        return 0;
1047} /* lvm_blk_ioctl() */
1048
1049
1050/*
1051 * block device close routine
1052 */
1053static int lvm_blk_close(struct inode *inode, struct file *file)
1054{
1055        int minor = MINOR(inode->i_rdev);
1056        vg_t *vg_ptr = vg[VG_BLK(minor)];
1057        lv_t *lv_ptr = vg_ptr->lv[LV_BLK(minor)];
1058
1059        P_DEV("blk_close MINOR: %d  VG#: %d  LV#: %d\n",
1060              minor, VG_BLK(minor), LV_BLK(minor));
1061
1062        if (lv_ptr->lv_open == 1) vg_ptr->lv_open--;
1063        lv_ptr->lv_open--;
1064
1065        MOD_DEC_USE_COUNT;
1066
1067        return 0;
1068} /* lvm_blk_close() */
1069
1070static int lvm_get_snapshot_use_rate(lv_t *lv, void *arg)
1071{
1072        lv_snapshot_use_rate_req_t lv_rate_req;
1073
1074        down_read(&lv->lv_lock);
1075        if (!(lv->lv_access & LV_SNAPSHOT)) {
1076                up_read(&lv->lv_lock);
1077                return -EPERM;
1078        }
1079        up_read(&lv->lv_lock);
1080
1081        if (copy_from_user(&lv_rate_req, arg, sizeof(lv_rate_req)))
1082                return -EFAULT;
1083
1084        if (lv_rate_req.rate < 0 || lv_rate_req.rate > 100)
1085                return -EINVAL;
1086
1087        switch (lv_rate_req.block) {
1088        case 0:
1089                down_write(&lv->lv_lock);
1090                lv->lv_snapshot_use_rate = lv_rate_req.rate;
1091                up_write(&lv->lv_lock);
1092                down_read(&lv->lv_lock);
1093                if (lv->lv_remap_ptr * 100 / lv->lv_remap_end <
1094                    lv->lv_snapshot_use_rate) {
1095                        up_read(&lv->lv_lock);
1096                        interruptible_sleep_on(&lv->lv_snapshot_wait);
1097                        down_read(&lv->lv_lock);
1098                }
1099                up_read(&lv->lv_lock);
1100                break;
1101
1102        case O_NONBLOCK:
1103                break;
1104
1105        default:
1106                return -EINVAL;
1107        }
1108        down_read(&lv->lv_lock);
1109        lv_rate_req.rate = lv->lv_remap_ptr * 100 / lv->lv_remap_end;
1110        up_read(&lv->lv_lock);
1111
1112        return copy_to_user(arg, &lv_rate_req,
1113                            sizeof(lv_rate_req)) ? -EFAULT : 0;
1114}
1115
1116static int lvm_user_bmap(struct inode *inode, struct lv_bmap *user_result)
1117{
1118        struct buffer_head bh;
1119        unsigned long block;
1120        int err;
1121
1122        if (get_user(block, &user_result->lv_block))
1123                return -EFAULT;
1124
1125        memset(&bh,0,sizeof bh);
1126        bh.b_blocknr = block;
1127        bh.b_dev = bh.b_rdev = inode->i_rdev;
1128        bh.b_size = lvm_get_blksize(bh.b_dev);
1129        bh.b_rsector = block * (bh.b_size >> 9);
1130        bh.b_end_io = NULL;
1131        if ((err = lvm_map(&bh, READ)) < 0)  {
1132                printk("lvm map failed: %d\n", err);
1133                return -EINVAL;
1134        }
1135
1136        return put_user(kdev_t_to_nr(bh.b_rdev), &user_result->lv_dev) ||
1137               put_user(bh.b_rsector/(bh.b_size>>9), &user_result->lv_block) ?
1138                -EFAULT : 0;
1139}
1140
1141
1142/*
1143 * block device support function for /usr/src/linux/drivers/block/ll_rw_blk.c
1144 * (see init_module/lvm_init)
1145 */
1146static void __remap_snapshot(kdev_t rdev, ulong rsector,
1147                                    ulong pe_start, lv_t *lv, vg_t *vg) {
1148
1149        /* copy a chunk from the origin to a snapshot device */
1150        down_write(&lv->lv_lock);
1151
1152        /* we must redo lvm_snapshot_remap_block in order to avoid a
1153           race condition in the gap where no lock was held */
1154        if (!lvm_snapshot_remap_block(&rdev, &rsector, pe_start, lv) &&
1155            !lvm_snapshot_COW(rdev, rsector, pe_start, rsector, vg, lv))
1156                lvm_write_COW_table_block(vg, lv);
1157
1158        up_write(&lv->lv_lock);
1159}
1160
1161static inline void _remap_snapshot(kdev_t rdev, ulong rsector,
1162                                   ulong pe_start, lv_t *lv, vg_t *vg) {
1163        int r;
1164
1165        /* check to see if this chunk is already in the snapshot */
1166        down_read(&lv->lv_lock);
1167        r = lvm_snapshot_remap_block(&rdev, &rsector, pe_start, lv);
1168        up_read(&lv->lv_lock);
1169
1170        if (!r)
1171                /* we haven't yet copied this block to the snapshot */
1172                __remap_snapshot(rdev, rsector, pe_start, lv, vg);
1173}
1174
1175
1176/*
1177 * extents destined for a pe that is on the move should be deferred
1178 */
1179static inline int _should_defer(kdev_t pv, ulong sector, uint32_t pe_size) {
1180        return ((pe_lock_req.lock == LOCK_PE) &&
1181                (pv == pe_lock_req.data.pv_dev) &&
1182                (sector >= pe_lock_req.data.pv_offset) &&
1183                (sector < (pe_lock_req.data.pv_offset + pe_size)));
1184}
1185
1186static inline int _defer_extent(struct buffer_head *bh, int rw,
1187                                kdev_t pv, ulong sector, uint32_t pe_size)
1188{
1189        if (pe_lock_req.lock == LOCK_PE) {
1190                down_read(&_pe_lock);
1191                if (_should_defer(pv, sector, pe_size)) {
1192                        up_read(&_pe_lock);
1193                        down_write(&_pe_lock);
1194                        if (_should_defer(pv, sector, pe_size))
1195                                _queue_io(bh, rw);
1196                        up_write(&_pe_lock);
1197                        return 1;
1198                }
1199                up_read(&_pe_lock);
1200        }
1201        return 0;
1202}
1203
1204
1205static int lvm_map(struct buffer_head *bh, int rw)
1206{
1207        int minor = MINOR(bh->b_rdev);
1208        ulong index;
1209        ulong pe_start;
1210        ulong size = bh->b_size >> 9;
1211        ulong rsector_org = bh->b_rsector;
1212        ulong rsector_map;
1213        kdev_t rdev_map;
1214        vg_t *vg_this = vg[VG_BLK(minor)];
1215        lv_t *lv = vg_this->lv[LV_BLK(minor)];
1216
1217
1218        down_read(&lv->lv_lock);
1219        if (!(lv->lv_status & LV_ACTIVE)) {
1220                printk(KERN_ALERT
1221                       "%s - lvm_map: ll_rw_blk for inactive LV %s\n",
1222                       lvm_name, lv->lv_name);
1223                goto bad;
1224        }
1225
1226        if ((rw == WRITE || rw == WRITEA) &&
1227            !(lv->lv_access & LV_WRITE)) {
1228                printk(KERN_CRIT
1229                       "%s - lvm_map: ll_rw_blk write for readonly LV %s\n",
1230                       lvm_name, lv->lv_name);
1231                goto bad;
1232        }
1233
1234        P_MAP("%s - lvm_map minor: %d  *rdev: %s  *rsector: %lu  size:%lu\n",
1235              lvm_name, minor,
1236              kdevname(bh->b_rdev),
1237              rsector_org, size);
1238
1239        if (rsector_org + size > lv->lv_size) {
1240                printk(KERN_ALERT
1241                       "%s - lvm_map access beyond end of device; *rsector: "
1242                       "%lu or size: %lu wrong for minor: %2d\n",
1243                       lvm_name, rsector_org, size, minor);
1244                goto bad;
1245        }
1246
1247
1248        if (lv->lv_stripes < 2) { /* linear mapping */
1249                /* get the index */
1250                index = rsector_org / vg_this->pe_size;
1251                pe_start = lv->lv_current_pe[index].pe;
1252                rsector_map = lv->lv_current_pe[index].pe +
1253                        (rsector_org % vg_this->pe_size);
1254                rdev_map = lv->lv_current_pe[index].dev;
1255
1256                P_MAP("lv_current_pe[%ld].pe: %d  rdev: %s  rsector:%ld\n",
1257                      index, lv->lv_current_pe[index].pe,
1258                      kdevname(rdev_map), rsector_map);
1259
1260        } else {                /* striped mapping */
1261                ulong stripe_index;
1262                ulong stripe_length;
1263
1264                stripe_length = vg_this->pe_size * lv->lv_stripes;
1265                stripe_index = (rsector_org % stripe_length) /
1266                        lv->lv_stripesize;
1267                index = rsector_org / stripe_length +
1268                        (stripe_index % lv->lv_stripes) *
1269                        (lv->lv_allocated_le / lv->lv_stripes);
1270                pe_start = lv->lv_current_pe[index].pe;
1271                rsector_map = lv->lv_current_pe[index].pe +
1272                        (rsector_org % stripe_length) -
1273                        (stripe_index % lv->lv_stripes) * lv->lv_stripesize -
1274                        stripe_index / lv->lv_stripes *
1275                        (lv->lv_stripes - 1) * lv->lv_stripesize;
1276                rdev_map = lv->lv_current_pe[index].dev;
1277
1278                P_MAP("lv_current_pe[%ld].pe: %d  rdev: %s  rsector:%ld\n"
1279                      "stripe_length: %ld  stripe_index: %ld\n",
1280                      index, lv->lv_current_pe[index].pe, kdevname(rdev_map),
1281                      rsector_map, stripe_length, stripe_index);
1282        }
1283
1284        /*
1285         * Queue writes to physical extents on the move until move completes.
1286         * Don't get _pe_lock until there is a reasonable expectation that
1287         * we need to queue this request, because this is in the fast path.
1288         */
1289        if (rw == WRITE || rw == WRITEA) {
1290                if(_defer_extent(bh, rw, rdev_map,
1291                                 rsector_map, vg_this->pe_size)) {
1292
1293                        up_read(&lv->lv_lock);
1294                        return 0;
1295                }
1296
1297                lv->lv_current_pe[index].writes++;      /* statistic */
1298        } else
1299                lv->lv_current_pe[index].reads++;       /* statistic */
1300
1301        /* snapshot volume exception handling on physical device address base */
1302        if (!(lv->lv_access & (LV_SNAPSHOT|LV_SNAPSHOT_ORG)))
1303                goto out;
1304
1305        if (lv->lv_access & LV_SNAPSHOT) { /* remap snapshot */
1306                if (lvm_snapshot_remap_block(&rdev_map, &rsector_map,
1307                                             pe_start, lv) < 0)
1308                        goto bad;
1309
1310        } else if (rw == WRITE || rw == WRITEA) { /* snapshot origin */
1311                lv_t *snap;
1312
1313                /* start with first snapshot and loop through all of
1314                   them */
1315                for (snap = lv->lv_snapshot_next; snap;
1316                     snap = snap->lv_snapshot_next) {
1317                        /* Check for inactive snapshot */
1318                        if (!(snap->lv_status & LV_ACTIVE))
1319                                continue;
1320
1321                        /* Serializes the COW with the accesses to the
1322                           snapshot device */
1323                        _remap_snapshot(rdev_map, rsector_map,
1324                                         pe_start, snap, vg_this);
1325                }
1326        }
1327
1328 out:
1329        bh->b_rdev = rdev_map;
1330        bh->b_rsector = rsector_map;
1331        up_read(&lv->lv_lock);
1332        return 1;
1333
1334 bad:
1335        if (bh->b_end_io)
1336        buffer_IO_error(bh);
1337        up_read(&lv->lv_lock);
1338        return -1;
1339} /* lvm_map() */
1340
1341
1342/*
1343 * internal support functions
1344 */
1345
1346#ifdef LVM_HD_NAME
1347/*
1348 * generate "hard disk" name
1349 */
1350void lvm_hd_name(char *buf, int minor)
1351{
1352        int len = 0;
1353        lv_t *lv_ptr;
1354
1355        if (vg[VG_BLK(minor)] == NULL ||
1356            (lv_ptr = vg[VG_BLK(minor)]->lv[LV_BLK(minor)]) == NULL)
1357                return;
1358        len = strlen(lv_ptr->lv_name) - 5;
1359        memcpy(buf, &lv_ptr->lv_name[5], len);
1360        buf[len] = 0;
1361        return;
1362}
1363#endif
1364
1365
1366
1367
1368/*
1369 * make request function
1370 */
1371static int lvm_make_request_fn(request_queue_t *q,
1372                               int rw,
1373                               struct buffer_head *bh)
1374{
1375        return (lvm_map(bh, rw) <= 0) ? 0 : 1;
1376}
1377
1378
1379/********************************************************************
1380 *
1381 * Character device support functions
1382 *
1383 ********************************************************************/
1384/*
1385 * character device support function logical volume manager lock
1386 */
1387static int lvm_do_lock_lvm(void)
1388{
1389lock_try_again:
1390        spin_lock(&lvm_lock);
1391        if (lock != 0 && lock != current->pid) {
1392                P_DEV("lvm_do_lock_lvm: locked by pid %d ...\n", lock);
1393                spin_unlock(&lvm_lock);
1394                interruptible_sleep_on(&lvm_wait);
1395                if (current->sigpending != 0)
1396                        return -EINTR;
1397#ifdef LVM_TOTAL_RESET
1398                if (lvm_reset_spindown > 0)
1399                        return -EACCES;
1400#endif
1401                goto lock_try_again;
1402        }
1403        lock = current->pid;
1404        P_DEV("lvm_do_lock_lvm: locking LVM for pid %d\n", lock);
1405        spin_unlock(&lvm_lock);
1406        return 0;
1407} /* lvm_do_lock_lvm */
1408
1409
1410/*
1411 * character device support function lock/unlock physical extend
1412 */
1413static int lvm_do_pe_lock_unlock(vg_t *vg_ptr, void *arg)
1414{
1415        pe_lock_req_t new_lock;
1416        struct buffer_head *bh;
1417        uint p;
1418
1419        if (vg_ptr == NULL) return -ENXIO;
1420        if (copy_from_user(&new_lock, arg, sizeof(new_lock)) != 0)
1421                return -EFAULT;
1422
1423        switch (new_lock.lock) {
1424        case LOCK_PE:
1425                for (p = 0; p < vg_ptr->pv_max; p++) {
1426                        if (vg_ptr->pv[p] != NULL &&
1427                            new_lock.data.pv_dev == vg_ptr->pv[p]->pv_dev)
1428                                break;
1429                }
1430                if (p == vg_ptr->pv_max) return -ENXIO;
1431
1432                /*
1433                 * this sync releaves memory pressure to lessen the
1434                 * likelyhood of pvmove being paged out - resulting in
1435                 * deadlock.
1436                 *
1437                 * This method of doing a pvmove is broken
1438                 */
1439                fsync_dev(pe_lock_req.data.lv_dev);
1440
1441                down_write(&_pe_lock);
1442                if (pe_lock_req.lock == LOCK_PE) {
1443                        up_write(&_pe_lock);
1444                        return -EBUSY;
1445                }
1446
1447                /* Should we do to_kdev_t() on the pv_dev and lv_dev??? */
1448                pe_lock_req.lock = LOCK_PE;
1449                pe_lock_req.data.lv_dev = new_lock.data.lv_dev;
1450                pe_lock_req.data.pv_dev = new_lock.data.pv_dev;
1451                pe_lock_req.data.pv_offset = new_lock.data.pv_offset;
1452                up_write(&_pe_lock);
1453
1454                /* some requests may have got through since the fsync */
1455                fsync_dev(pe_lock_req.data.pv_dev);
1456                break;
1457
1458        case UNLOCK_PE:
1459                down_write(&_pe_lock);
1460                pe_lock_req.lock = UNLOCK_PE;
1461                pe_lock_req.data.lv_dev = 0;
1462                pe_lock_req.data.pv_dev = 0;
1463                pe_lock_req.data.pv_offset = 0;
1464                bh = _dequeue_io();
1465                up_write(&_pe_lock);
1466
1467                /* handle all deferred io for this PE */
1468                _flush_io(bh);
1469                break;
1470
1471        default:
1472                return -EINVAL;
1473        }
1474        return 0;
1475}
1476
1477
1478/*
1479 * character device support function logical extend remap
1480 */
1481static int lvm_do_le_remap(vg_t *vg_ptr, void *arg)
1482{
1483        uint l, le;
1484        lv_t *lv_ptr;
1485
1486        if (vg_ptr == NULL) return -ENXIO;
1487        if (copy_from_user(&le_remap_req, arg,
1488                           sizeof(le_remap_req_t)) != 0)
1489                return -EFAULT;
1490
1491        for (l = 0; l < vg_ptr->lv_max; l++) {
1492                lv_ptr = vg_ptr->lv[l];
1493
1494                if (!lv_ptr)
1495                        continue;
1496
1497                if (strcmp(lv_ptr->lv_name, le_remap_req.lv_name) == 0) {
1498                        down_write(&lv_ptr->lv_lock);
1499                        for (le = 0; le < lv_ptr->lv_allocated_le; le++) {
1500                                if (lv_ptr->lv_current_pe[le].dev ==
1501                                    le_remap_req.old_dev &&
1502                                    lv_ptr->lv_current_pe[le].pe ==
1503                                    le_remap_req.old_pe) {
1504                                        lv_ptr->lv_current_pe[le].dev =
1505                                            le_remap_req.new_dev;
1506                                        lv_ptr->lv_current_pe[le].pe =
1507                                            le_remap_req.new_pe;
1508                                        __update_hardsectsize(lv_ptr);
1509                                        up_write(&lv_ptr->lv_lock);
1510                                        return 0;
1511                                }
1512                        }
1513                        up_write(&lv_ptr->lv_lock);
1514                        return -EINVAL;
1515                }
1516        }
1517        return -ENXIO;
1518} /* lvm_do_le_remap() */
1519
1520
1521/*
1522 * character device support function VGDA create
1523 */
1524static int lvm_do_vg_create(void *arg, int minor)
1525{
1526        int ret = 0;
1527        ulong l, ls = 0, p, size;
1528        lv_t lv;
1529        vg_t *vg_ptr;
1530        lv_t **snap_lv_ptr;
1531
1532        if ((vg_ptr = kmalloc(sizeof(vg_t),GFP_KERNEL)) == NULL) {
1533                printk(KERN_CRIT
1534                       "%s -- VG_CREATE: kmalloc error VG at line %d\n",
1535                       lvm_name, __LINE__);
1536                return -ENOMEM;
1537        }
1538        /* get the volume group structure */
1539        if (copy_from_user(vg_ptr, arg, sizeof(vg_t)) != 0) {
1540                P_IOCTL("lvm_do_vg_create ERROR: copy VG ptr %p (%d bytes)\n",
1541                        arg, sizeof(vg_t));
1542                kfree(vg_ptr);
1543                return -EFAULT;
1544        }
1545
1546        /* VG_CREATE now uses minor number in VG structure */
1547        if (minor == -1)
1548                minor = vg_ptr->vg_number;
1549
1550        /* check limits */
1551        if (minor >= ABS_MAX_VG)
1552                return -EFAULT;
1553
1554        /* Validate it */
1555        if (vg[VG_CHR(minor)] != NULL) {
1556                P_IOCTL("lvm_do_vg_create ERROR: VG %d in use\n", minor);
1557                kfree(vg_ptr);
1558                return -EPERM;
1559        }
1560
1561        /* we are not that active so far... */
1562        vg_ptr->vg_status &= ~VG_ACTIVE;
1563        vg_ptr->pe_allocated = 0;
1564
1565        if (vg_ptr->pv_max > ABS_MAX_PV) {
1566                printk(KERN_WARNING
1567                       "%s -- Can't activate VG: ABS_MAX_PV too small\n",
1568                       lvm_name);
1569                kfree(vg_ptr);
1570                return -EPERM;
1571        }
1572
1573        if (vg_ptr->lv_max > ABS_MAX_LV) {
1574                printk(KERN_WARNING
1575                "%s -- Can't activate VG: ABS_MAX_LV too small for %u\n",
1576                       lvm_name, vg_ptr->lv_max);
1577                kfree(vg_ptr);
1578                return -EPERM;
1579        }
1580
1581        /* create devfs and procfs entries */
1582        lvm_fs_create_vg(vg_ptr);
1583
1584        vg[VG_CHR(minor)] = vg_ptr;
1585
1586        /* get the physical volume structures */
1587        vg_ptr->pv_act = vg_ptr->pv_cur = 0;
1588        for (p = 0; p < vg_ptr->pv_max; p++) {
1589                pv_t *pvp;
1590                /* user space address */
1591                if ((pvp = vg_ptr->pv[p]) != NULL) {
1592                        ret = lvm_do_pv_create(pvp, vg_ptr, p);
1593                        if ( ret != 0) {
1594                                lvm_do_vg_remove(minor);
1595                                return ret;
1596                        }
1597                }
1598        }
1599
1600        size = vg_ptr->lv_max * sizeof(lv_t *);
1601        if ((snap_lv_ptr = vmalloc ( size)) == NULL) {
1602                printk(KERN_CRIT
1603                       "%s -- VG_CREATE: vmalloc error snapshot LVs at line %d\n",
1604                       lvm_name, __LINE__);
1605                lvm_do_vg_remove(minor);
1606                return -EFAULT;
1607        }
1608        memset(snap_lv_ptr, 0, size);
1609
1610        /* get the logical volume structures */
1611        vg_ptr->lv_cur = 0;
1612        for (l = 0; l < vg_ptr->lv_max; l++) {
1613                lv_t *lvp;
1614                /* user space address */
1615                if ((lvp = vg_ptr->lv[l]) != NULL) {
1616                        if (copy_from_user(&lv, lvp, sizeof(lv_t)) != 0) {
1617                                P_IOCTL("ERROR: copying LV ptr %p (%d bytes)\n",
1618                                        lvp, sizeof(lv_t));
1619                                lvm_do_vg_remove(minor);
1620                                return -EFAULT;
1621                        }
1622                        if ( lv.lv_access & LV_SNAPSHOT) {
1623                                snap_lv_ptr[ls] = lvp;
1624                                vg_ptr->lv[l] = NULL;
1625                                ls++;
1626                                continue;
1627                        }
1628                        vg_ptr->lv[l] = NULL;
1629                        /* only create original logical volumes for now */
1630                        if (lvm_do_lv_create(minor, lv.lv_name, &lv) != 0) {
1631                                lvm_do_vg_remove(minor);
1632                                return -EFAULT;
1633                        }
1634                }
1635        }
1636
1637        /* Second path to correct snapshot logical volumes which are not
1638           in place during first path above */
1639        for (l = 0; l < ls; l++) {
1640                lv_t *lvp = snap_lv_ptr[l];
1641                if (copy_from_user(&lv, lvp, sizeof(lv_t)) != 0) {
1642                        lvm_do_vg_remove(minor);
1643                        return -EFAULT;
1644                }
1645                if (lvm_do_lv_create(minor, lv.lv_name, &lv) != 0) {
1646                        lvm_do_vg_remove(minor);
1647                        return -EFAULT;
1648                }
1649        }
1650
1651        vfree(snap_lv_ptr);
1652
1653        vg_count++;
1654
1655
1656        MOD_INC_USE_COUNT;
1657
1658        /* let's go active */
1659        vg_ptr->vg_status |= VG_ACTIVE;
1660
1661        return 0;
1662} /* lvm_do_vg_create() */
1663
1664
1665/*
1666 * character device support function VGDA extend
1667 */
1668static int lvm_do_vg_extend(vg_t *vg_ptr, void *arg)
1669{
1670        int ret = 0;
1671        uint p;
1672        pv_t *pv_ptr;
1673
1674        if (vg_ptr == NULL) return -ENXIO;
1675        if (vg_ptr->pv_cur < vg_ptr->pv_max) {
1676                for (p = 0; p < vg_ptr->pv_max; p++) {
1677                        if ( ( pv_ptr = vg_ptr->pv[p]) == NULL) {
1678                                ret = lvm_do_pv_create(arg, vg_ptr, p);
1679                                if ( ret != 0) return ret;
1680                                pv_ptr = vg_ptr->pv[p];
1681                                vg_ptr->pe_total += pv_ptr->pe_total;
1682                                return 0;
1683                        }
1684                }
1685        }
1686        return -EPERM;
1687} /* lvm_do_vg_extend() */
1688
1689
1690/*
1691 * character device support function VGDA reduce
1692 */
1693static int lvm_do_vg_reduce(vg_t *vg_ptr, void *arg) {
1694        uint p;
1695        pv_t *pv_ptr;
1696
1697        if (vg_ptr == NULL) return -ENXIO;
1698        if (copy_from_user(pv_name, arg, sizeof(pv_name)) != 0)
1699                return -EFAULT;
1700
1701        for (p = 0; p < vg_ptr->pv_max; p++) {
1702                pv_ptr = vg_ptr->pv[p];
1703                if (pv_ptr != NULL &&
1704                    strcmp(pv_ptr->pv_name,
1705                               pv_name) == 0) {
1706                        if (pv_ptr->lv_cur > 0) return -EPERM;
1707                        lvm_do_pv_remove(vg_ptr, p);
1708                        /* Make PV pointer array contiguous */
1709                        for (; p < vg_ptr->pv_max - 1; p++)
1710                                vg_ptr->pv[p] = vg_ptr->pv[p + 1];
1711                        vg_ptr->pv[p + 1] = NULL;
1712                        return 0;
1713                }
1714        }
1715        return -ENXIO;
1716} /* lvm_do_vg_reduce */
1717
1718
1719/*
1720 * character device support function VG rename
1721 */
1722static int lvm_do_vg_rename(vg_t *vg_ptr, void *arg)
1723{
1724        int l = 0, p = 0, len = 0;
1725        char vg_name[NAME_LEN] = { 0,};
1726        char lv_name[NAME_LEN] = { 0,};
1727        char *ptr = NULL;
1728        lv_t *lv_ptr = NULL;
1729        pv_t *pv_ptr = NULL;
1730
1731        /* If the VG doesn't exist in the kernel then just exit */
1732        if (!vg_ptr) return 0;
1733
1734        if (copy_from_user(vg_name, arg, sizeof(vg_name)) != 0)
1735                return -EFAULT;
1736
1737        lvm_fs_remove_vg(vg_ptr);
1738
1739        strncpy ( vg_ptr->vg_name, vg_name, sizeof ( vg_name)-1);
1740        for ( l = 0; l < vg_ptr->lv_max; l++)
1741        {
1742                if ((lv_ptr = vg_ptr->lv[l]) == NULL) continue;
1743                memset (lv_ptr->vg_name, 0, sizeof (*vg_name));
1744                strncpy(lv_ptr->vg_name, vg_name, sizeof ( vg_name));
1745                ptr = strrchr(lv_ptr->lv_name, '/');
1746                ptr = ptr ? ptr + 1 : lv_ptr->lv_name;
1747                strncpy(lv_name, ptr, sizeof ( lv_name));
1748                len = sizeof(LVM_DIR_PREFIX);
1749                strcpy(lv_ptr->lv_name, LVM_DIR_PREFIX);
1750                strncat(lv_ptr->lv_name, vg_name, NAME_LEN - len);
1751                strcat (lv_ptr->lv_name, "/");
1752                len += strlen(vg_name) + 1;
1753                strncat(lv_ptr->lv_name, lv_name, NAME_LEN - len);
1754        }
1755        for ( p = 0; p < vg_ptr->pv_max; p++)
1756        {
1757                if ( (pv_ptr = vg_ptr->pv[p]) == NULL) continue;
1758                strncpy(pv_ptr->vg_name, vg_name, NAME_LEN);
1759        }
1760
1761        lvm_fs_create_vg(vg_ptr);
1762
1763        /* Need to add PV entries */
1764        for ( p = 0; p < vg_ptr->pv_act; p++) {
1765                pv_t *pv_ptr = vg_ptr->pv[p];
1766
1767                if (pv_ptr)
1768                        lvm_fs_create_pv(vg_ptr, pv_ptr);
1769        }
1770
1771        /* Need to add LV entries */
1772        for ( l = 0; l < vg_ptr->lv_max; l++) {
1773                lv_t *lv_ptr = vg_ptr->lv[l];
1774
1775                if (!lv_ptr)
1776                        continue;
1777
1778                lvm_gendisk.part[MINOR(lv_ptr->lv_dev)].de =
1779                        lvm_fs_create_lv(vg_ptr, lv_ptr);
1780        }
1781
1782        return 0;
1783} /* lvm_do_vg_rename */
1784
1785
1786/*
1787 * character device support function VGDA remove
1788 */
1789static int lvm_do_vg_remove(int minor)
1790{
1791        int i;
1792        vg_t *vg_ptr = vg[VG_CHR(minor)];
1793        pv_t *pv_ptr;
1794
1795        if (vg_ptr == NULL) return -ENXIO;
1796
1797#ifdef LVM_TOTAL_RESET
1798        if (vg_ptr->lv_open > 0 && lvm_reset_spindown == 0)
1799#else
1800        if (vg_ptr->lv_open > 0)
1801#endif
1802                return -EPERM;
1803
1804        /* let's go inactive */
1805        vg_ptr->vg_status &= ~VG_ACTIVE;
1806
1807        /* remove from procfs and devfs */
1808        lvm_fs_remove_vg(vg_ptr);
1809
1810        /* free LVs */
1811        /* first free snapshot logical volumes */
1812        for (i = 0; i < vg_ptr->lv_max; i++) {
1813                if (vg_ptr->lv[i] != NULL &&
1814                    vg_ptr->lv[i]->lv_access & LV_SNAPSHOT) {
1815                        lvm_do_lv_remove(minor, NULL, i);
1816                        current->state = TASK_UNINTERRUPTIBLE;
1817                        schedule_timeout(1);
1818                }
1819        }
1820        /* then free the rest of the LVs */
1821        for (i = 0; i < vg_ptr->lv_max; i++) {
1822                if (vg_ptr->lv[i] != NULL) {
1823                        lvm_do_lv_remove(minor, NULL, i);
1824                        current->state = TASK_UNINTERRUPTIBLE;
1825                        schedule_timeout(1);
1826                }
1827        }
1828
1829        /* free PVs */
1830        for (i = 0; i < vg_ptr->pv_max; i++) {
1831                if ((pv_ptr = vg_ptr->pv[i]) != NULL) {
1832                        P_KFREE("%s -- kfree %d\n", lvm_name, __LINE__);
1833                        lvm_do_pv_remove(vg_ptr, i);
1834                }
1835        }
1836
1837        P_KFREE("%s -- kfree %d\n", lvm_name, __LINE__);
1838        kfree(vg_ptr);
1839        vg[VG_CHR(minor)] = NULL;
1840
1841        vg_count--;
1842
1843        MOD_DEC_USE_COUNT;
1844
1845        return 0;
1846} /* lvm_do_vg_remove() */
1847
1848
1849/*
1850 * character device support function physical volume create
1851 */
1852static int lvm_do_pv_create(pv_t *pvp, vg_t *vg_ptr, ulong p) {
1853        pv_t *pv;
1854        int err;
1855
1856        if (!vg_ptr)
1857                return -ENXIO;
1858
1859        pv = kmalloc(sizeof(pv_t),GFP_KERNEL);
1860        if (pv == NULL) {
1861                printk(KERN_CRIT
1862                       "%s -- PV_CREATE: kmalloc error PV at line %d\n",
1863                       lvm_name, __LINE__);
1864                return -ENOMEM;
1865        }
1866
1867        memset(pv, 0, sizeof(*pv));
1868
1869        if (copy_from_user(pv, pvp, sizeof(pv_t)) != 0) {
1870                P_IOCTL("lvm_do_pv_create ERROR: copy PV ptr %p (%d bytes)\n",
1871                        pvp, sizeof(pv_t));
1872                kfree(pv);
1873                return -EFAULT;
1874        }
1875
1876        if ((err = _open_pv(pv))) {
1877                kfree(pv);
1878                return err;
1879        }
1880
1881        /* We don't need the PE list
1882           in kernel space as with LVs pe_t list (see below) */
1883        pv->pe = NULL;
1884        pv->pe_allocated = 0;
1885        pv->pv_status = PV_ACTIVE;
1886        vg_ptr->pv_act++;
1887        vg_ptr->pv_cur++;
1888        lvm_fs_create_pv(vg_ptr, pv);
1889
1890        vg_ptr->pv[p] = pv;
1891        return 0;
1892} /* lvm_do_pv_create() */
1893
1894
1895/*
1896 * character device support function physical volume remove
1897 */
1898static int lvm_do_pv_remove(vg_t *vg_ptr, ulong p) {
1899        pv_t *pv = vg_ptr->pv[p];
1900
1901        lvm_fs_remove_pv(vg_ptr, pv);
1902
1903        vg_ptr->pe_total -= pv->pe_total;
1904        vg_ptr->pv_cur--;
1905        vg_ptr->pv_act--;
1906
1907        _close_pv(pv);
1908        kfree(pv);
1909
1910        vg_ptr->pv[p] = NULL;
1911
1912        return 0;
1913}
1914
1915
1916static void __update_hardsectsize(lv_t *lv)
1917{
1918        int max_hardsectsize = 0, hardsectsize = 0;
1919        int p;
1920
1921        /* Check PVs first to see if they all have same sector size */
1922        for (p = 0; p < lv->vg->pv_cur; p++) {
1923                pv_t *pv = lv->vg->pv[p];
1924                if (pv && (hardsectsize = lvm_sectsize(pv->pv_dev))) {
1925                        if (max_hardsectsize == 0)
1926                                max_hardsectsize = hardsectsize;
1927                        else if (hardsectsize != max_hardsectsize) {
1928                                P_DEV("%s PV[%d] (%s) sector size %d, not %d\n",
1929                                      lv->lv_name, p, kdevname(pv->pv_dev),
1930                                      hardsectsize, max_hardsectsize);
1931                                break;
1932                        }
1933                }
1934        }
1935
1936        /* PVs have different block size, need to check each LE sector size */
1937        if (hardsectsize != max_hardsectsize) {
1938                int le;
1939                for (le = 0; le < lv->lv_allocated_le; le++) {
1940                        hardsectsize = lvm_sectsize(lv->lv_current_pe[le].dev);
1941                        if (hardsectsize > max_hardsectsize) {
1942                                P_DEV("%s LE[%d] (%s) blocksize %d not %d\n",
1943                                      lv->lv_name, le,
1944                                      kdevname(lv->lv_current_pe[le].dev),
1945                                      hardsectsize, max_hardsectsize);
1946                                max_hardsectsize = hardsectsize;
1947                        }
1948                }
1949
1950                /* only perform this operation on active snapshots */
1951                if ((lv->lv_access & LV_SNAPSHOT) &&
1952                    (lv->lv_status & LV_ACTIVE)) {
1953                        int e;
1954                        for (e = 0; e < lv->lv_remap_end; e++) {
1955                                hardsectsize = lvm_sectsize(lv->lv_block_exception[e].rdev_new);
1956                                if (hardsectsize > max_hardsectsize)
1957                                        max_hardsectsize = hardsectsize;
1958                        }
1959                }
1960        }
1961
1962        if (max_hardsectsize == 0)
1963                max_hardsectsize = SECTOR_SIZE;
1964        P_DEV("hardblocksize for LV %s is %d\n",
1965              kdevname(lv->lv_dev), max_hardsectsize);
1966        lvm_hardsectsizes[MINOR(lv->lv_dev)] = max_hardsectsize;
1967}
1968
1969/*
1970 * character device support function logical volume create
1971 */
1972static int lvm_do_lv_create(int minor, char *lv_name, lv_t *lv)
1973{
1974        int e, ret, l, le, l_new, p, size, activate = 1;
1975        ulong lv_status_save;
1976        lv_block_exception_t *lvbe = lv->lv_block_exception;
1977        vg_t *vg_ptr = vg[VG_CHR(minor)];
1978        lv_t *lv_ptr = NULL;
1979        pe_t *pep;
1980
1981        if (!(pep = lv->lv_current_pe))
1982                return -EINVAL;
1983
1984        if (_sectors_to_k(lv->lv_chunk_size) > LVM_SNAPSHOT_MAX_CHUNK)
1985                return -EINVAL;
1986
1987        for (l = 0; l < vg_ptr->lv_cur; l++) {
1988                if (vg_ptr->lv[l] != NULL &&
1989                    strcmp(vg_ptr->lv[l]->lv_name, lv_name) == 0)
1990                        return -EEXIST;
1991        }
1992
1993        /* in case of lv_remove(), lv_create() pair */
1994        l_new = -1;
1995        if (vg_ptr->lv[lv->lv_number] == NULL)
1996                l_new = lv->lv_number;
1997        else {
1998                for (l = 0; l < vg_ptr->lv_max; l++) {
1999                        if (vg_ptr->lv[l] == NULL)
2000                                if (l_new == -1) l_new = l;
2001                }
2002        }
2003        if (l_new == -1) return -EPERM;
2004        else             l = l_new;
2005
2006        if ((lv_ptr = kmalloc(sizeof(lv_t),GFP_KERNEL)) == NULL) {;
2007                printk(KERN_CRIT "%s -- LV_CREATE: kmalloc error LV at line %d\n",
2008                       lvm_name, __LINE__);
2009                return -ENOMEM;
2010        }
2011        /* copy preloaded LV */
2012        memcpy((char *) lv_ptr, (char *) lv, sizeof(lv_t));
2013
2014        lv_status_save = lv_ptr->lv_status;
2015        lv_ptr->lv_status &= ~LV_ACTIVE;
2016        lv_ptr->lv_snapshot_org = NULL;
2017        lv_ptr->lv_snapshot_prev = NULL;
2018        lv_ptr->lv_snapshot_next = NULL;
2019        lv_ptr->lv_block_exception = NULL;
2020        lv_ptr->lv_iobuf = NULL;
2021        lv_ptr->lv_COW_table_iobuf = NULL;
2022        lv_ptr->lv_snapshot_hash_table = NULL;
2023        lv_ptr->lv_snapshot_hash_table_size = 0;
2024        lv_ptr->lv_snapshot_hash_mask = 0;
2025        init_rwsem(&lv_ptr->lv_lock);
2026
2027        lv_ptr->lv_snapshot_use_rate = 0;
2028
2029        vg_ptr->lv[l] = lv_ptr;
2030
2031        /* get the PE structures from user space if this
2032           is not a snapshot logical volume */
2033        if (!(lv_ptr->lv_access & LV_SNAPSHOT)) {
2034                size = lv_ptr->lv_allocated_le * sizeof(pe_t);
2035
2036                if ((lv_ptr->lv_current_pe = vmalloc(size)) == NULL) {
2037                        printk(KERN_CRIT
2038                               "%s -- LV_CREATE: vmalloc error LV_CURRENT_PE of %d Byte "
2039                               "at line %d\n",
2040                               lvm_name, size, __LINE__);
2041                        P_KFREE("%s -- kfree %d\n", lvm_name, __LINE__);
2042                        kfree(lv_ptr);
2043                        vg_ptr->lv[l] = NULL;
2044                        return -ENOMEM;
2045                }
2046                if (copy_from_user(lv_ptr->lv_current_pe, pep, size)) {
2047                        P_IOCTL("ERROR: copying PE ptr %p (%d bytes)\n",
2048                                pep, sizeof(size));
2049                        vfree(lv_ptr->lv_current_pe);
2050                        kfree(lv_ptr);
2051                        vg_ptr->lv[l] = NULL;
2052                        return -EFAULT;
2053                }
2054                /* correct the PE count in PVs */
2055                for (le = 0; le < lv_ptr->lv_allocated_le; le++) {
2056                        vg_ptr->pe_allocated++;
2057                        for (p = 0; p < vg_ptr->pv_cur; p++) {
2058                                if (vg_ptr->pv[p]->pv_dev ==
2059                                    lv_ptr->lv_current_pe[le].dev)
2060                                        vg_ptr->pv[p]->pe_allocated++;
2061                        }
2062                }
2063        } else {
2064                /* Get snapshot exception data and block list */
2065                if (lvbe != NULL) {
2066                        lv_ptr->lv_snapshot_org =
2067                            vg_ptr->lv[LV_BLK(lv_ptr->lv_snapshot_minor)];
2068                        if (lv_ptr->lv_snapshot_org != NULL) {
2069                                size = lv_ptr->lv_remap_end * sizeof(lv_block_exception_t);
2070
2071                                if (!size) {
2072                                        printk(KERN_WARNING
2073                                               "%s -- zero length exception table requested\n",
2074                                               lvm_name);
2075                                        kfree(lv_ptr);
2076                                        return -EINVAL;
2077                                }
2078
2079                                if ((lv_ptr->lv_block_exception = vmalloc(size)) == NULL) {
2080                                        printk(KERN_CRIT
2081                                               "%s -- lvm_do_lv_create: vmalloc error LV_BLOCK_EXCEPTION "
2082                                               "of %d byte at line %d\n",
2083                                               lvm_name, size, __LINE__);
2084                                        P_KFREE("%s -- kfree %d\n", lvm_name,
2085                                                __LINE__);
2086                                        kfree(lv_ptr);
2087                                        vg_ptr->lv[l] = NULL;
2088                                        return -ENOMEM;
2089                                }
2090                                if (copy_from_user(lv_ptr->lv_block_exception, lvbe, size)) {
2091                                        vfree(lv_ptr->lv_block_exception);
2092                                        kfree(lv_ptr);
2093                                        vg_ptr->lv[l] = NULL;
2094                                        return -EFAULT;
2095                                }
2096
2097                                if(lv_ptr->lv_block_exception[0].rsector_org ==
2098                                   LVM_SNAPSHOT_DROPPED_SECTOR)
2099                                {
2100                                        printk(KERN_WARNING
2101   "%s -- lvm_do_lv_create: snapshot has been dropped and will not be activated\n",
2102                                               lvm_name);
2103                                        activate = 0;
2104                                }
2105
2106                                /* point to the original logical volume */
2107                                lv_ptr = lv_ptr->lv_snapshot_org;
2108
2109                                lv_ptr->lv_snapshot_minor = 0;
2110                                lv_ptr->lv_snapshot_org = lv_ptr;
2111                                /* our new one now back points to the previous last in the chain
2112                                   which can be the original logical volume */
2113                                lv_ptr = vg_ptr->lv[l];
2114                                /* now lv_ptr points to our new last snapshot logical volume */
2115                                lv_ptr->lv_current_pe = lv_ptr->lv_snapshot_org->lv_current_pe;
2116                                lv_ptr->lv_allocated_snapshot_le = lv_ptr->lv_allocated_le;
2117                                lv_ptr->lv_allocated_le = lv_ptr->lv_snapshot_org->lv_allocated_le;
2118                                lv_ptr->lv_current_le = lv_ptr->lv_snapshot_org->lv_current_le;
2119                                lv_ptr->lv_size = lv_ptr->lv_snapshot_org->lv_size;
2120                                lv_ptr->lv_stripes = lv_ptr->lv_snapshot_org->lv_stripes;
2121                                lv_ptr->lv_stripesize = lv_ptr->lv_snapshot_org->lv_stripesize;
2122
2123                                /* Update the VG PE(s) used by snapshot reserve space. */
2124                                vg_ptr->pe_allocated += lv_ptr->lv_allocated_snapshot_le;
2125
2126                                if ((ret = lvm_snapshot_alloc(lv_ptr)) != 0)
2127                                {
2128                                        vfree(lv_ptr->lv_block_exception);
2129                                        kfree(lv_ptr);
2130                                        vg_ptr->lv[l] = NULL;
2131                                        return ret;
2132                                }
2133                                for ( e = 0; e < lv_ptr->lv_remap_ptr; e++)
2134                                        lvm_hash_link (lv_ptr->lv_block_exception + e,
2135                                                       lv_ptr->lv_block_exception[e].rdev_org,
2136                                                       lv_ptr->lv_block_exception[e].rsector_org, lv_ptr);
2137                                /* need to fill the COW exception table data
2138                                   into the page for disk i/o */
2139                                if(lvm_snapshot_fill_COW_page(vg_ptr, lv_ptr)) {
2140                                        kfree(lv_ptr);
2141                                        vg_ptr->lv[l] = NULL;
2142                                        return -EINVAL;
2143                                }
2144                                init_waitqueue_head(&lv_ptr->lv_snapshot_wait);
2145                        } else {
2146                                kfree(lv_ptr);
2147                                vg_ptr->lv[l] = NULL;
2148                                return -EFAULT;
2149                        }
2150                } else {
2151                        kfree(vg_ptr->lv[l]);
2152                        vg_ptr->lv[l] = NULL;
2153                        return -EINVAL;
2154                }
2155        } /* if ( vg[VG_CHR(minor)]->lv[l]->lv_access & LV_SNAPSHOT) */
2156
2157        lv_ptr = vg_ptr->lv[l];
2158        lvm_gendisk.part[MINOR(lv_ptr->lv_dev)].start_sect = 0;
2159        lvm_gendisk.part[MINOR(lv_ptr->lv_dev)].nr_sects = lv_ptr->lv_size;
2160        lvm_size[MINOR(lv_ptr->lv_dev)] = lv_ptr->lv_size >> 1;
2161        vg_lv_map[MINOR(lv_ptr->lv_dev)].vg_number = vg_ptr->vg_number;
2162        vg_lv_map[MINOR(lv_ptr->lv_dev)].lv_number = lv_ptr->lv_number;
2163        LVM_CORRECT_READ_AHEAD(lv_ptr->lv_read_ahead);
2164        vg_ptr->lv_cur++;
2165        lv_ptr->lv_status = lv_status_save;
2166        lv_ptr->vg = vg_ptr;
2167
2168        __update_hardsectsize(lv_ptr);
2169
2170        /* optionally add our new snapshot LV */
2171        if (lv_ptr->lv_access & LV_SNAPSHOT) {
2172                lv_t *org = lv_ptr->lv_snapshot_org, *last;
2173
2174                /* sync the original logical volume */
2175                fsync_dev(org->lv_dev);
2176#ifdef  LVM_VFS_ENHANCEMENT
2177                /* VFS function call to sync and lock the filesystem */
2178                fsync_dev_lockfs(org->lv_dev);
2179#endif
2180
2181                down_write(&org->lv_lock);
2182                org->lv_access |= LV_SNAPSHOT_ORG;
2183                lv_ptr->lv_access &= ~LV_SNAPSHOT_ORG; /* this can only hide an userspace bug */
2184
2185
2186                /* Link in the list of snapshot volumes */
2187                for (last = org; last->lv_snapshot_next; last = last->lv_snapshot_next);
2188                lv_ptr->lv_snapshot_prev = last;
2189                last->lv_snapshot_next = lv_ptr;
2190                up_write(&org->lv_lock);
2191        }
2192
2193        /* activate the logical volume */
2194        if(activate)
2195                lv_ptr->lv_status |= LV_ACTIVE;
2196        else
2197                lv_ptr->lv_status &= ~LV_ACTIVE;
2198
2199        if ( lv_ptr->lv_access & LV_WRITE)
2200                set_device_ro(lv_ptr->lv_dev, 0);
2201        else
2202                set_device_ro(lv_ptr->lv_dev, 1);
2203
2204#ifdef  LVM_VFS_ENHANCEMENT
2205/* VFS function call to unlock the filesystem */
2206        if (lv_ptr->lv_access & LV_SNAPSHOT)
2207                unlockfs(lv_ptr->lv_snapshot_org->lv_dev);
2208#endif
2209
2210        lvm_gendisk.part[MINOR(lv_ptr->lv_dev)].de =
2211            lvm_fs_create_lv(vg_ptr, lv_ptr);
2212        return 0;
2213} /* lvm_do_lv_create() */
2214
2215
2216/*
2217 * character device support function logical volume remove
2218 */
2219static int lvm_do_lv_remove(int minor, char *lv_name, int l)
2220{
2221        uint le, p;
2222        vg_t *vg_ptr = vg[VG_CHR(minor)];
2223        lv_t *lv_ptr;
2224
2225        if (!vg_ptr)
2226                return -ENXIO;
2227
2228        if (l == -1) {
2229                for (l = 0; l < vg_ptr->lv_max; l++) {
2230                        if (vg_ptr->lv[l] != NULL &&
2231                            strcmp(vg_ptr->lv[l]->lv_name, lv_name) == 0) {
2232                                break;
2233                        }
2234                }
2235        }
2236        if (l == vg_ptr->lv_max) return -ENXIO;
2237
2238        lv_ptr = vg_ptr->lv[l];
2239#ifdef LVM_TOTAL_RESET
2240        if (lv_ptr->lv_open > 0 && lvm_reset_spindown == 0)
2241#else
2242        if (lv_ptr->lv_open > 0)
2243#endif
2244                return -EBUSY;
2245
2246        /* check for deletion of snapshot source while
2247           snapshot volume still exists */
2248        if ((lv_ptr->lv_access & LV_SNAPSHOT_ORG) &&
2249            lv_ptr->lv_snapshot_next != NULL)
2250                return -EPERM;
2251
2252        lvm_fs_remove_lv(vg_ptr, lv_ptr);
2253
2254        if (lv_ptr->lv_access & LV_SNAPSHOT) {
2255                /*
2256                 * Atomically make the snapshot invisible
2257                 * to the original lv before playing with it.
2258                 */
2259                lv_t * org = lv_ptr->lv_snapshot_org;
2260                down_write(&org->lv_lock);
2261
2262                /* remove this snapshot logical volume from the chain */
2263                lv_ptr->lv_snapshot_prev->lv_snapshot_next = lv_ptr->lv_snapshot_next;
2264                if (lv_ptr->lv_snapshot_next != NULL) {
2265                        lv_ptr->lv_snapshot_next->lv_snapshot_prev =
2266                            lv_ptr->lv_snapshot_prev;
2267                }
2268
2269                /* no more snapshots? */
2270                if (!org->lv_snapshot_next) {
2271                        org->lv_access &= ~LV_SNAPSHOT_ORG;
2272                }
2273                up_write(&org->lv_lock);
2274
2275                lvm_snapshot_release(lv_ptr);
2276
2277                /* Update the VG PE(s) used by snapshot reserve space. */
2278                vg_ptr->pe_allocated -= lv_ptr->lv_allocated_snapshot_le;
2279        }
2280
2281        lv_ptr->lv_status |= LV_SPINDOWN;
2282
2283        /* sync the buffers */
2284        fsync_dev(lv_ptr->lv_dev);
2285
2286        lv_ptr->lv_status &= ~LV_ACTIVE;
2287
2288        /* invalidate the buffers */
2289        invalidate_buffers(lv_ptr->lv_dev);
2290
2291        /* reset generic hd */
2292        lvm_gendisk.part[MINOR(lv_ptr->lv_dev)].start_sect = -1;
2293        lvm_gendisk.part[MINOR(lv_ptr->lv_dev)].nr_sects = 0;
2294        lvm_gendisk.part[MINOR(lv_ptr->lv_dev)].de = 0;
2295        lvm_size[MINOR(lv_ptr->lv_dev)] = 0;
2296
2297        /* reset VG/LV mapping */
2298        vg_lv_map[MINOR(lv_ptr->lv_dev)].vg_number = ABS_MAX_VG;
2299        vg_lv_map[MINOR(lv_ptr->lv_dev)].lv_number = -1;
2300
2301        /* correct the PE count in PVs if this is not a snapshot
2302           logical volume */
2303        if (!(lv_ptr->lv_access & LV_SNAPSHOT)) {
2304                /* only if this is no snapshot logical volume because
2305                   we share the lv_current_pe[] structs with the
2306                   original logical volume */
2307                for (le = 0; le < lv_ptr->lv_allocated_le; le++) {
2308                        vg_ptr->pe_allocated--;
2309                        for (p = 0; p < vg_ptr->pv_cur; p++) {
2310                                if (vg_ptr->pv[p]->pv_dev ==
2311                                    lv_ptr->lv_current_pe[le].dev)
2312                                        vg_ptr->pv[p]->pe_allocated--;
2313                        }
2314                }
2315                vfree(lv_ptr->lv_current_pe);
2316        }
2317
2318        P_KFREE("%s -- kfree %d\n", lvm_name, __LINE__);
2319        kfree(lv_ptr);
2320        vg_ptr->lv[l] = NULL;
2321        vg_ptr->lv_cur--;
2322        return 0;
2323} /* lvm_do_lv_remove() */
2324
2325
2326/*
2327 * logical volume extend / reduce
2328 */
2329static int __extend_reduce_snapshot(vg_t *vg_ptr, lv_t *old_lv, lv_t *new_lv) {
2330        ulong size;
2331        lv_block_exception_t *lvbe;
2332
2333        if (!new_lv->lv_block_exception)
2334                return -ENXIO;
2335
2336        size = new_lv->lv_remap_end * sizeof(lv_block_exception_t);
2337        if ((lvbe = vmalloc(size)) == NULL) {
2338                printk(KERN_CRIT
2339                       "%s -- lvm_do_lv_extend_reduce: vmalloc "
2340                       "error LV_BLOCK_EXCEPTION of %lu Byte at line %d\n",
2341                       lvm_name, size, __LINE__);
2342                return -ENOMEM;
2343        }
2344
2345        if ((new_lv->lv_remap_end > old_lv->lv_remap_end) &&
2346            (copy_from_user(lvbe, new_lv->lv_block_exception, size))) {
2347                vfree(lvbe);
2348                return -EFAULT;
2349        }
2350        new_lv->lv_block_exception = lvbe;
2351
2352        if (lvm_snapshot_alloc_hash_table(new_lv)) {
2353                vfree(new_lv->lv_block_exception);
2354                return -ENOMEM;
2355        }
2356
2357        return 0;
2358}
2359
2360static int __extend_reduce(vg_t *vg_ptr, lv_t *old_lv, lv_t *new_lv) {
2361        ulong size, l, p, end;
2362        pe_t *pe;
2363
2364        /* allocate space for new pe structures */
2365        size = new_lv->lv_current_le * sizeof(pe_t);
2366        if ((pe = vmalloc(size)) == NULL) {
2367                printk(KERN_CRIT
2368                       "%s -- lvm_do_lv_extend_reduce: "
2369                       "vmalloc error LV_CURRENT_PE of %lu Byte at line %d\n",
2370                       lvm_name, size, __LINE__);
2371                return -ENOMEM;
2372        }
2373
2374        /* get the PE structures from user space */
2375        if (copy_from_user(pe, new_lv->lv_current_pe, size)) {
2376                if(old_lv->lv_access & LV_SNAPSHOT)
2377                        vfree(new_lv->lv_snapshot_hash_table);
2378                vfree(pe);
2379                return -EFAULT;
2380        }
2381
2382        new_lv->lv_current_pe = pe;
2383
2384        /* reduce allocation counters on PV(s) */
2385        for (l = 0; l < old_lv->lv_allocated_le; l++) {
2386                vg_ptr->pe_allocated--;
2387                for (p = 0; p < vg_ptr->pv_cur; p++) {
2388                        if (vg_ptr->pv[p]->pv_dev ==
2389                            old_lv->lv_current_pe[l].dev) {
2390                                vg_ptr->pv[p]->pe_allocated--;
2391                                break;
2392                        }
2393                }
2394        }
2395
2396        /* extend the PE count in PVs */
2397        for (l = 0; l < new_lv->lv_allocated_le; l++) {
2398                vg_ptr->pe_allocated++;
2399                for (p = 0; p < vg_ptr->pv_cur; p++) {
2400                        if (vg_ptr->pv[p]->pv_dev ==
2401                            new_lv->lv_current_pe[l].dev) {
2402                                vg_ptr->pv[p]->pe_allocated++;
2403                                break;
2404                        }
2405                }
2406        }
2407
2408        /* save available i/o statistic data */
2409        if (old_lv->lv_stripes < 2) {   /* linear logical volume */
2410                end = min(old_lv->lv_current_le, new_lv->lv_current_le);
2411                for (l = 0; l < end; l++) {
2412                        new_lv->lv_current_pe[l].reads +=
2413                                old_lv->lv_current_pe[l].reads;
2414
2415                        new_lv->lv_current_pe[l].writes +=
2416                                old_lv->lv_current_pe[l].writes;
2417                }
2418
2419        } else {                /* striped logical volume */
2420                uint i, j, source, dest, end, old_stripe_size, new_stripe_size;
2421
2422                old_stripe_size = old_lv->lv_allocated_le / old_lv->lv_stripes;
2423                new_stripe_size = new_lv->lv_allocated_le / new_lv->lv_stripes;
2424                end = min(old_stripe_size, new_stripe_size);
2425
2426                for (i = source = dest = 0; i < new_lv->lv_stripes; i++) {
2427                        for (j = 0; j < end; j++) {
2428                                new_lv->lv_current_pe[dest + j].reads +=
2429                                    old_lv->lv_current_pe[source + j].reads;
2430                                new_lv->lv_current_pe[dest + j].writes +=
2431                                    old_lv->lv_current_pe[source + j].writes;
2432                        }
2433                        source += old_stripe_size;
2434                        dest += new_stripe_size;
2435                }
2436        }
2437
2438        return 0;
2439}
2440
2441static int lvm_do_lv_extend_reduce(int minor, char *lv_name, lv_t *new_lv)
2442{
2443        int r;
2444        ulong l, e, size;
2445        vg_t *vg_ptr = vg[VG_CHR(minor)];
2446        lv_t *old_lv;
2447        pe_t *pe;
2448
2449        if (!vg_ptr)
2450                return -ENXIO;
2451
2452        if ((pe = new_lv->lv_current_pe) == NULL)
2453                return -EINVAL;
2454
2455        for (l = 0; l < vg_ptr->lv_max; l++)
2456                if (vg_ptr->lv[l] && !strcmp(vg_ptr->lv[l]->lv_name, lv_name))
2457                        break;
2458
2459        if (l == vg_ptr->lv_max)
2460                return -ENXIO;
2461
2462        old_lv = vg_ptr->lv[l];
2463
2464        if (old_lv->lv_access & LV_SNAPSHOT) {
2465                /* only perform this operation on active snapshots */
2466                if (old_lv->lv_status & LV_ACTIVE)
2467                        r = __extend_reduce_snapshot(vg_ptr, old_lv, new_lv);
2468                else
2469                        r = -EPERM;
2470
2471        } else
2472                r = __extend_reduce(vg_ptr, old_lv, new_lv);
2473
2474        if(r)
2475                return r;
2476
2477        /* copy relevent fields */
2478        down_write(&old_lv->lv_lock);
2479
2480        if(new_lv->lv_access & LV_SNAPSHOT) {
2481                size = (new_lv->lv_remap_end > old_lv->lv_remap_end) ?
2482                        old_lv->lv_remap_ptr : new_lv->lv_remap_end;
2483                size *= sizeof(lv_block_exception_t);
2484                memcpy(new_lv->lv_block_exception,
2485                       old_lv->lv_block_exception, size);
2486
2487                old_lv->lv_remap_end = new_lv->lv_remap_end;
2488                old_lv->lv_block_exception = new_lv->lv_block_exception;
2489                old_lv->lv_snapshot_hash_table =
2490                        new_lv->lv_snapshot_hash_table;
2491                old_lv->lv_snapshot_hash_table_size =
2492                        new_lv->lv_snapshot_hash_table_size;
2493                old_lv->lv_snapshot_hash_mask =
2494                        new_lv->lv_snapshot_hash_mask;
2495
2496                for (e = 0; e < new_lv->lv_remap_ptr; e++)
2497                        lvm_hash_link(new_lv->lv_block_exception + e,
2498                                      new_lv->lv_block_exception[e].rdev_org,
2499                                      new_lv->lv_block_exception[e].rsector_org,
2500                                      new_lv);
2501
2502        } else {
2503
2504                vfree(old_lv->lv_current_pe);
2505                vfree(old_lv->lv_snapshot_hash_table);
2506
2507                old_lv->lv_size = new_lv->lv_size;
2508                old_lv->lv_allocated_le = new_lv->lv_allocated_le;
2509                old_lv->lv_current_le = new_lv->lv_current_le;
2510                old_lv->lv_current_pe = new_lv->lv_current_pe;
2511                lvm_gendisk.part[MINOR(old_lv->lv_dev)].nr_sects =
2512                        old_lv->lv_size;
2513                lvm_size[MINOR(old_lv->lv_dev)] = old_lv->lv_size >> 1;
2514
2515                if (old_lv->lv_access & LV_SNAPSHOT_ORG) {
2516                        lv_t *snap;
2517                        for(snap = old_lv->lv_snapshot_next; snap;
2518                            snap = snap->lv_snapshot_next) {
2519                                down_write(&snap->lv_lock);
2520                                snap->lv_current_pe = old_lv->lv_current_pe;
2521                                snap->lv_allocated_le =
2522                                        old_lv->lv_allocated_le;
2523                                snap->lv_current_le = old_lv->lv_current_le;
2524                                snap->lv_size = old_lv->lv_size;
2525
2526                                lvm_gendisk.part[MINOR(snap->lv_dev)].nr_sects
2527                                        = old_lv->lv_size;
2528                                lvm_size[MINOR(snap->lv_dev)] =
2529                                        old_lv->lv_size >> 1;
2530                                __update_hardsectsize(snap);
2531                                up_write(&snap->lv_lock);
2532                        }
2533                }
2534        }
2535
2536        __update_hardsectsize(old_lv);
2537        up_write(&old_lv->lv_lock);
2538
2539        return 0;
2540} /* lvm_do_lv_extend_reduce() */
2541
2542
2543/*
2544 * character device support function logical volume status by name
2545 */
2546static int lvm_do_lv_status_byname(vg_t *vg_ptr, void *arg)
2547{
2548        uint l;
2549        lv_status_byname_req_t lv_status_byname_req;
2550        void *saved_ptr1;
2551        void *saved_ptr2;
2552        lv_t *lv_ptr;
2553
2554        if (vg_ptr == NULL) return -ENXIO;
2555        if (copy_from_user(&lv_status_byname_req, arg,
2556                           sizeof(lv_status_byname_req_t)) != 0)
2557                return -EFAULT;
2558
2559        if (lv_status_byname_req.lv == NULL) return -EINVAL;
2560
2561        for (l = 0; l < vg_ptr->lv_max; l++) {
2562                if ((lv_ptr = vg_ptr->lv[l]) != NULL &&
2563                    strcmp(lv_ptr->lv_name,
2564                           lv_status_byname_req.lv_name) == 0) {
2565                        /* Save usermode pointers */
2566                        if (copy_from_user(&saved_ptr1, &lv_status_byname_req.lv->lv_current_pe, sizeof(void*)) != 0)
2567                                return -EFAULT;
2568                        if (copy_from_user(&saved_ptr2, &lv_status_byname_req.lv->lv_block_exception, sizeof(void*)) != 0)
2569                                return -EFAULT;
2570                        if (copy_to_user(lv_status_byname_req.lv,
2571                                         lv_ptr,
2572                                         sizeof(lv_t)) != 0)
2573                                return -EFAULT;
2574                        if (saved_ptr1 != NULL) {
2575                                if (copy_to_user(saved_ptr1,
2576                                                 lv_ptr->lv_current_pe,
2577                                                 lv_ptr->lv_allocated_le *
2578                                                 sizeof(pe_t)) != 0)
2579                                        return -EFAULT;
2580                        }
2581                        /* Restore usermode pointers */
2582                        if (copy_to_user(&lv_status_byname_req.lv->lv_current_pe, &saved_ptr1, sizeof(void*)) != 0)
2583                                return -EFAULT;
2584                        return 0;
2585                }
2586        }
2587        return -ENXIO;
2588} /* lvm_do_lv_status_byname() */
2589
2590
2591/*
2592 * character device support function logical volume status by index
2593 */
2594static int lvm_do_lv_status_byindex(vg_t *vg_ptr,void *arg)
2595{
2596        lv_status_byindex_req_t lv_status_byindex_req;
2597        void *saved_ptr1;
2598        void *saved_ptr2;
2599        lv_t *lv_ptr;
2600
2601        if (vg_ptr == NULL) return -ENXIO;
2602        if (copy_from_user(&lv_status_byindex_req, arg,
2603                           sizeof(lv_status_byindex_req)) != 0)
2604                return -EFAULT;
2605
2606        if (lv_status_byindex_req.lv == NULL)
2607                return -EINVAL;
2608        if ( ( lv_ptr = vg_ptr->lv[lv_status_byindex_req.lv_index]) == NULL)
2609                return -ENXIO;
2610
2611        /* Save usermode pointers */
2612        if (copy_from_user(&saved_ptr1, &lv_status_byindex_req.lv->lv_current_pe, sizeof(void*)) != 0)
2613                return -EFAULT;
2614        if (copy_from_user(&saved_ptr2, &lv_status_byindex_req.lv->lv_block_exception, sizeof(void*)) != 0)
2615                return -EFAULT;
2616
2617        if (copy_to_user(lv_status_byindex_req.lv, lv_ptr, sizeof(lv_t)) != 0)
2618                return -EFAULT;
2619        if (saved_ptr1 != NULL) {
2620                if (copy_to_user(saved_ptr1,
2621                                 lv_ptr->lv_current_pe,
2622                                 lv_ptr->lv_allocated_le *
2623                                 sizeof(pe_t)) != 0)
2624                        return -EFAULT;
2625        }
2626
2627        /* Restore usermode pointers */
2628        if (copy_to_user(&lv_status_byindex_req.lv->lv_current_pe, &saved_ptr1, sizeof(void *)) != 0)
2629                return -EFAULT;
2630
2631        return 0;
2632} /* lvm_do_lv_status_byindex() */
2633
2634
2635/*
2636 * character device support function logical volume status by device number
2637 */
2638static int lvm_do_lv_status_bydev(vg_t * vg_ptr, void * arg) {
2639        int l;
2640        lv_status_bydev_req_t lv_status_bydev_req;
2641        void *saved_ptr1;
2642        void *saved_ptr2;
2643        lv_t *lv_ptr;
2644
2645        if (vg_ptr == NULL) return -ENXIO;
2646        if (copy_from_user(&lv_status_bydev_req, arg,
2647                           sizeof(lv_status_bydev_req)) != 0)
2648                return -EFAULT;
2649
2650        for ( l = 0; l < vg_ptr->lv_max; l++) {
2651                if ( vg_ptr->lv[l] == NULL) continue;
2652                if ( vg_ptr->lv[l]->lv_dev == lv_status_bydev_req.dev) break;
2653        }
2654
2655        if ( l == vg_ptr->lv_max) return -ENXIO;
2656        lv_ptr = vg_ptr->lv[l];
2657
2658        /* Save usermode pointers */
2659        if (copy_from_user(&saved_ptr1, &lv_status_bydev_req.lv->lv_current_pe, sizeof(void*)) != 0)
2660                return -EFAULT;
2661        if (copy_from_user(&saved_ptr2, &lv_status_bydev_req.lv->lv_block_exception, sizeof(void*)) != 0)
2662                return -EFAULT;
2663
2664        if (copy_to_user(lv_status_bydev_req.lv, lv_ptr, sizeof(lv_t)) != 0)
2665                return -EFAULT;
2666        if (saved_ptr1 != NULL) {
2667                if (copy_to_user(saved_ptr1,
2668                                 lv_ptr->lv_current_pe,
2669                                 lv_ptr->lv_allocated_le *
2670                                 sizeof(pe_t)) != 0)
2671                        return -EFAULT;
2672        }
2673        /* Restore usermode pointers */
2674        if (copy_to_user(&lv_status_bydev_req.lv->lv_current_pe, &saved_ptr1, sizeof(void *)) != 0)
2675                return -EFAULT;
2676
2677        return 0;
2678} /* lvm_do_lv_status_bydev() */
2679
2680
2681/*
2682 * character device support function rename a logical volume
2683 */
2684static int lvm_do_lv_rename(vg_t *vg_ptr, lv_req_t *lv_req, lv_t *lv)
2685{
2686        int l = 0;
2687        int ret = 0;
2688        lv_t *lv_ptr = NULL;
2689
2690        if (!vg_ptr)
2691                return -ENXIO;
2692
2693        for (l = 0; l < vg_ptr->lv_max; l++)
2694        {
2695                if ( (lv_ptr = vg_ptr->lv[l]) == NULL) continue;
2696                if (lv_ptr->lv_dev == lv->lv_dev)
2697                {
2698                        lvm_fs_remove_lv(vg_ptr, lv_ptr);
2699                        strncpy(lv_ptr->lv_name, lv_req->lv_name, NAME_LEN);
2700                        lvm_fs_create_lv(vg_ptr, lv_ptr);
2701                        break;
2702                }
2703        }
2704        if (l == vg_ptr->lv_max) ret = -ENODEV;
2705
2706        return ret;
2707} /* lvm_do_lv_rename */
2708
2709
2710/*
2711 * character device support function physical volume change
2712 */
2713static int lvm_do_pv_change(vg_t *vg_ptr, void *arg)
2714{
2715        uint p;
2716        pv_t *pv_ptr;
2717        struct block_device *bd;
2718
2719        if (vg_ptr == NULL) return -ENXIO;
2720        if (copy_from_user(&pv_change_req, arg,
2721                           sizeof(pv_change_req)) != 0)
2722                return -EFAULT;
2723
2724        for (p = 0; p < vg_ptr->pv_max; p++) {
2725                pv_ptr = vg_ptr->pv[p];
2726                if (pv_ptr != NULL &&
2727                    strcmp(pv_ptr->pv_name,
2728                               pv_change_req.pv_name) == 0) {
2729
2730                        bd = pv_ptr->bd;
2731                        if (copy_from_user(pv_ptr,
2732                                           pv_change_req.pv,
2733                                           sizeof(pv_t)) != 0)
2734                                return -EFAULT;
2735                        pv_ptr->bd = bd;
2736
2737                        /* We don't need the PE list
2738                           in kernel space as with LVs pe_t list */
2739                        pv_ptr->pe = NULL;
2740                        return 0;
2741                }
2742        }
2743        return -ENXIO;
2744} /* lvm_do_pv_change() */
2745
2746/*
2747 * character device support function get physical volume status
2748 */
2749static int lvm_do_pv_status(vg_t *vg_ptr, void *arg)
2750{
2751        uint p;
2752        pv_t *pv_ptr;
2753
2754        if (vg_ptr == NULL) return -ENXIO;
2755        if (copy_from_user(&pv_status_req, arg,
2756                           sizeof(pv_status_req)) != 0)
2757                return -EFAULT;
2758
2759        for (p = 0; p < vg_ptr->pv_max; p++) {
2760                pv_ptr = vg_ptr->pv[p];
2761                if (pv_ptr != NULL &&
2762                    strcmp(pv_ptr->pv_name,
2763                               pv_status_req.pv_name) == 0) {
2764                        if (copy_to_user(pv_status_req.pv,
2765                                         pv_ptr,
2766                                         sizeof(pv_t)) != 0)
2767                                return -EFAULT;
2768                        return 0;
2769                }
2770        }
2771        return -ENXIO;
2772} /* lvm_do_pv_status() */
2773
2774
2775/*
2776 * character device support function flush and invalidate all buffers of a PV
2777 */
2778static int lvm_do_pv_flush(void *arg)
2779{
2780        pv_flush_req_t pv_flush_req;
2781
2782        if (copy_from_user(&pv_flush_req, arg, sizeof(pv_flush_req)) != 0)
2783                return -EFAULT;
2784
2785        fsync_dev(pv_flush_req.pv_dev);
2786        invalidate_buffers(pv_flush_req.pv_dev);
2787
2788        return 0;
2789}
2790
2791
2792/*
2793 * support function initialize gendisk variables
2794 */
2795static void __init lvm_geninit(struct gendisk *lvm_gdisk)
2796{
2797        int i = 0;
2798
2799#ifdef DEBUG_GENDISK
2800        printk(KERN_DEBUG "%s -- lvm_gendisk\n", lvm_name);
2801#endif
2802
2803        for (i = 0; i < MAX_LV; i++) {
2804                lvm_gendisk.part[i].start_sect = -1;    /* avoid partition check */
2805                lvm_size[i] = lvm_gendisk.part[i].nr_sects = 0;
2806                lvm_blocksizes[i] = BLOCK_SIZE;
2807        }
2808
2809        blk_size[MAJOR_NR] = lvm_size;
2810        blksize_size[MAJOR_NR] = lvm_blocksizes;
2811        hardsect_size[MAJOR_NR] = lvm_hardsectsizes;
2812
2813        return;
2814} /* lvm_gen_init() */
2815
2816
2817
2818/* Must have down_write(_pe_lock) when we enqueue buffers */
2819static void _queue_io(struct buffer_head *bh, int rw) {
2820        if (bh->b_reqnext) BUG();
2821        bh->b_reqnext = _pe_requests;
2822        _pe_requests = bh;
2823}
2824
2825/* Must have down_write(_pe_lock) when we dequeue buffers */
2826static struct buffer_head *_dequeue_io(void)
2827{
2828        struct buffer_head *bh = _pe_requests;
2829        _pe_requests = NULL;
2830        return bh;
2831}
2832
2833/*
2834 * We do not need to hold _pe_lock to flush buffers.  bh should be taken from
2835 * _pe_requests under down_write(_pe_lock), and then _pe_requests can be set
2836 * NULL and we drop _pe_lock.  Any new buffers defered at this time will be
2837 * added to a new list, and the old buffers can have their I/O restarted
2838 * asynchronously.
2839 *
2840 * If, for some reason, the same PE is locked again before all of these writes
2841 * have finished, then these buffers will just be re-queued (i.e. no danger).
2842 */
2843static void _flush_io(struct buffer_head *bh)
2844{
2845        while (bh) {
2846                struct buffer_head *next = bh->b_reqnext;
2847                bh->b_reqnext = NULL;
2848                /* resubmit this buffer head */
2849                generic_make_request(WRITE, bh);
2850                bh = next;
2851        }
2852}
2853
2854
2855/*
2856 * we must open the pv's before we use them
2857 */
2858static int _open_pv(pv_t *pv) {
2859        int err;
2860        struct block_device *bd;
2861
2862        if (!(bd = bdget(kdev_t_to_nr(pv->pv_dev))))
2863                return -ENOMEM;
2864
2865        err = blkdev_get(bd, FMODE_READ|FMODE_WRITE, 0, BDEV_FILE);
2866        if (err)
2867                return err;
2868
2869        pv->bd = bd;
2870        return 0;
2871}
2872
2873static void _close_pv(pv_t *pv) {
2874        if (pv) {
2875                struct block_device *bdev = pv->bd;
2876                pv->bd = NULL;
2877                if (bdev)
2878                        blkdev_put(bdev, BDEV_FILE);
2879        }
2880}
2881
2882
2883static unsigned long _sectors_to_k(unsigned long sect)
2884{
2885        if(SECTOR_SIZE > 1024) {
2886                return sect * (SECTOR_SIZE / 1024);
2887        }
2888
2889        return sect / (1024 / SECTOR_SIZE);
2890}
2891
2892MODULE_AUTHOR("Heinz Mauelshagen, Sistina Software");
2893MODULE_DESCRIPTION("Logical Volume Manager");
2894#ifdef MODULE_LICENSE
2895MODULE_LICENSE("GPL");
2896#endif
2897
2898module_init(lvm_init);
2899module_exit(lvm_cleanup);
2900
lxr.linux.no kindly hosted by Redpill Linpro AS, provider of Linux consulting and operations services since 1995.