linux/fs/ocfs2/stack_user.c
<<
>>
Prefs
   1/* -*- mode: c; c-basic-offset: 8; -*-
   2 * vim: noexpandtab sw=8 ts=8 sts=0:
   3 *
   4 * stack_user.c
   5 *
   6 * Code which interfaces ocfs2 with fs/dlm and a userspace stack.
   7 *
   8 * Copyright (C) 2007 Oracle.  All rights reserved.
   9 *
  10 * This program is free software; you can redistribute it and/or
  11 * modify it under the terms of the GNU General Public
  12 * License as published by the Free Software Foundation, version 2.
  13 *
  14 * This program is distributed in the hope that it will be useful,
  15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
  16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  17 * General Public License for more details.
  18 */
  19
  20#include <linux/module.h>
  21#include <linux/fs.h>
  22#include <linux/miscdevice.h>
  23#include <linux/mutex.h>
  24#include <linux/smp_lock.h>
  25#include <linux/reboot.h>
  26#include <asm/uaccess.h>
  27
  28#include "ocfs2.h"  /* For struct ocfs2_lock_res */
  29#include "stackglue.h"
  30
  31#include <linux/dlm_plock.h>
  32
  33/*
  34 * The control protocol starts with a handshake.  Until the handshake
  35 * is complete, the control device will fail all write(2)s.
  36 *
  37 * The handshake is simple.  First, the client reads until EOF.  Each line
  38 * of output is a supported protocol tag.  All protocol tags are a single
  39 * character followed by a two hex digit version number.  Currently the
  40 * only things supported is T01, for "Text-base version 0x01".  Next, the
  41 * client writes the version they would like to use, including the newline.
  42 * Thus, the protocol tag is 'T01\n'.  If the version tag written is
  43 * unknown, -EINVAL is returned.  Once the negotiation is complete, the
  44 * client can start sending messages.
  45 *
  46 * The T01 protocol has three messages.  First is the "SETN" message.
  47 * It has the following syntax:
  48 *
  49 *  SETN<space><8-char-hex-nodenum><newline>
  50 *
  51 * This is 14 characters.
  52 *
  53 * The "SETN" message must be the first message following the protocol.
  54 * It tells ocfs2_control the local node number.
  55 *
  56 * Next comes the "SETV" message.  It has the following syntax:
  57 *
  58 *  SETV<space><2-char-hex-major><space><2-char-hex-minor><newline>
  59 *
  60 * This is 11 characters.
  61 *
  62 * The "SETV" message sets the filesystem locking protocol version as
  63 * negotiated by the client.  The client negotiates based on the maximum
  64 * version advertised in /sys/fs/ocfs2/max_locking_protocol.  The major
  65 * number from the "SETV" message must match
  66 * ocfs2_user_plugin.sp_proto->lp_max_version.pv_major, and the minor number
  67 * must be less than or equal to ...->lp_max_version.pv_minor.
  68 *
  69 * Once this information has been set, mounts will be allowed.  From this
  70 * point on, the "DOWN" message can be sent for node down notification.
  71 * It has the following syntax:
  72 *
  73 *  DOWN<space><32-char-cap-hex-uuid><space><8-char-hex-nodenum><newline>
  74 *
  75 * eg:
  76 *
  77 *  DOWN 632A924FDD844190BDA93C0DF6B94899 00000001\n
  78 *
  79 * This is 47 characters.
  80 */
  81
  82/*
  83 * Whether or not the client has done the handshake.
  84 * For now, we have just one protocol version.
  85 */
  86#define OCFS2_CONTROL_PROTO                     "T01\n"
  87#define OCFS2_CONTROL_PROTO_LEN                 4
  88
  89/* Handshake states */
  90#define OCFS2_CONTROL_HANDSHAKE_INVALID         (0)
  91#define OCFS2_CONTROL_HANDSHAKE_READ            (1)
  92#define OCFS2_CONTROL_HANDSHAKE_PROTOCOL        (2)
  93#define OCFS2_CONTROL_HANDSHAKE_VALID           (3)
  94
  95/* Messages */
  96#define OCFS2_CONTROL_MESSAGE_OP_LEN            4
  97#define OCFS2_CONTROL_MESSAGE_SETNODE_OP        "SETN"
  98#define OCFS2_CONTROL_MESSAGE_SETNODE_TOTAL_LEN 14
  99#define OCFS2_CONTROL_MESSAGE_SETVERSION_OP     "SETV"
 100#define OCFS2_CONTROL_MESSAGE_SETVERSION_TOTAL_LEN      11
 101#define OCFS2_CONTROL_MESSAGE_DOWN_OP           "DOWN"
 102#define OCFS2_CONTROL_MESSAGE_DOWN_TOTAL_LEN    47
 103#define OCFS2_TEXT_UUID_LEN                     32
 104#define OCFS2_CONTROL_MESSAGE_VERNUM_LEN        2
 105#define OCFS2_CONTROL_MESSAGE_NODENUM_LEN       8
 106
 107/*
 108 * ocfs2_live_connection is refcounted because the filesystem and
 109 * miscdevice sides can detach in different order.  Let's just be safe.
 110 */
 111struct ocfs2_live_connection {
 112        struct list_head                oc_list;
 113        struct ocfs2_cluster_connection *oc_conn;
 114};
 115
 116struct ocfs2_control_private {
 117        struct list_head op_list;
 118        int op_state;
 119        int op_this_node;
 120        struct ocfs2_protocol_version op_proto;
 121};
 122
 123/* SETN<space><8-char-hex-nodenum><newline> */
 124struct ocfs2_control_message_setn {
 125        char    tag[OCFS2_CONTROL_MESSAGE_OP_LEN];
 126        char    space;
 127        char    nodestr[OCFS2_CONTROL_MESSAGE_NODENUM_LEN];
 128        char    newline;
 129};
 130
 131/* SETV<space><2-char-hex-major><space><2-char-hex-minor><newline> */
 132struct ocfs2_control_message_setv {
 133        char    tag[OCFS2_CONTROL_MESSAGE_OP_LEN];
 134        char    space1;
 135        char    major[OCFS2_CONTROL_MESSAGE_VERNUM_LEN];
 136        char    space2;
 137        char    minor[OCFS2_CONTROL_MESSAGE_VERNUM_LEN];
 138        char    newline;
 139};
 140
 141/* DOWN<space><32-char-cap-hex-uuid><space><8-char-hex-nodenum><newline> */
 142struct ocfs2_control_message_down {
 143        char    tag[OCFS2_CONTROL_MESSAGE_OP_LEN];
 144        char    space1;
 145        char    uuid[OCFS2_TEXT_UUID_LEN];
 146        char    space2;
 147        char    nodestr[OCFS2_CONTROL_MESSAGE_NODENUM_LEN];
 148        char    newline;
 149};
 150
 151union ocfs2_control_message {
 152        char                                    tag[OCFS2_CONTROL_MESSAGE_OP_LEN];
 153        struct ocfs2_control_message_setn       u_setn;
 154        struct ocfs2_control_message_setv       u_setv;
 155        struct ocfs2_control_message_down       u_down;
 156};
 157
 158static struct ocfs2_stack_plugin ocfs2_user_plugin;
 159
 160static atomic_t ocfs2_control_opened;
 161static int ocfs2_control_this_node = -1;
 162static struct ocfs2_protocol_version running_proto;
 163
 164static LIST_HEAD(ocfs2_live_connection_list);
 165static LIST_HEAD(ocfs2_control_private_list);
 166static DEFINE_MUTEX(ocfs2_control_lock);
 167
 168static inline void ocfs2_control_set_handshake_state(struct file *file,
 169                                                     int state)
 170{
 171        struct ocfs2_control_private *p = file->private_data;
 172        p->op_state = state;
 173}
 174
 175static inline int ocfs2_control_get_handshake_state(struct file *file)
 176{
 177        struct ocfs2_control_private *p = file->private_data;
 178        return p->op_state;
 179}
 180
 181static struct ocfs2_live_connection *ocfs2_connection_find(const char *name)
 182{
 183        size_t len = strlen(name);
 184        struct ocfs2_live_connection *c;
 185
 186        BUG_ON(!mutex_is_locked(&ocfs2_control_lock));
 187
 188        list_for_each_entry(c, &ocfs2_live_connection_list, oc_list) {
 189                if ((c->oc_conn->cc_namelen == len) &&
 190                    !strncmp(c->oc_conn->cc_name, name, len))
 191                        return c;
 192        }
 193
 194        return c;
 195}
 196
 197/*
 198 * ocfs2_live_connection structures are created underneath the ocfs2
 199 * mount path.  Since the VFS prevents multiple calls to
 200 * fill_super(), we can't get dupes here.
 201 */
 202static int ocfs2_live_connection_new(struct ocfs2_cluster_connection *conn,
 203                                     struct ocfs2_live_connection **c_ret)
 204{
 205        int rc = 0;
 206        struct ocfs2_live_connection *c;
 207
 208        c = kzalloc(sizeof(struct ocfs2_live_connection), GFP_KERNEL);
 209        if (!c)
 210                return -ENOMEM;
 211
 212        mutex_lock(&ocfs2_control_lock);
 213        c->oc_conn = conn;
 214
 215        if (atomic_read(&ocfs2_control_opened))
 216                list_add(&c->oc_list, &ocfs2_live_connection_list);
 217        else {
 218                printk(KERN_ERR
 219                       "ocfs2: Userspace control daemon is not present\n");
 220                rc = -ESRCH;
 221        }
 222
 223        mutex_unlock(&ocfs2_control_lock);
 224
 225        if (!rc)
 226                *c_ret = c;
 227        else
 228                kfree(c);
 229
 230        return rc;
 231}
 232
 233/*
 234 * This function disconnects the cluster connection from ocfs2_control.
 235 * Afterwards, userspace can't affect the cluster connection.
 236 */
 237static void ocfs2_live_connection_drop(struct ocfs2_live_connection *c)
 238{
 239        mutex_lock(&ocfs2_control_lock);
 240        list_del_init(&c->oc_list);
 241        c->oc_conn = NULL;
 242        mutex_unlock(&ocfs2_control_lock);
 243
 244        kfree(c);
 245}
 246
 247static int ocfs2_control_cfu(void *target, size_t target_len,
 248                             const char __user *buf, size_t count)
 249{
 250        /* The T01 expects write(2) calls to have exactly one command */
 251        if ((count != target_len) ||
 252            (count > sizeof(union ocfs2_control_message)))
 253                return -EINVAL;
 254
 255        if (copy_from_user(target, buf, target_len))
 256                return -EFAULT;
 257
 258        return 0;
 259}
 260
 261static ssize_t ocfs2_control_validate_protocol(struct file *file,
 262                                               const char __user *buf,
 263                                               size_t count)
 264{
 265        ssize_t ret;
 266        char kbuf[OCFS2_CONTROL_PROTO_LEN];
 267
 268        ret = ocfs2_control_cfu(kbuf, OCFS2_CONTROL_PROTO_LEN,
 269                                buf, count);
 270        if (ret)
 271                return ret;
 272
 273        if (strncmp(kbuf, OCFS2_CONTROL_PROTO, OCFS2_CONTROL_PROTO_LEN))
 274                return -EINVAL;
 275
 276        ocfs2_control_set_handshake_state(file,
 277                                          OCFS2_CONTROL_HANDSHAKE_PROTOCOL);
 278
 279        return count;
 280}
 281
 282static void ocfs2_control_send_down(const char *uuid,
 283                                    int nodenum)
 284{
 285        struct ocfs2_live_connection *c;
 286
 287        mutex_lock(&ocfs2_control_lock);
 288
 289        c = ocfs2_connection_find(uuid);
 290        if (c) {
 291                BUG_ON(c->oc_conn == NULL);
 292                c->oc_conn->cc_recovery_handler(nodenum,
 293                                                c->oc_conn->cc_recovery_data);
 294        }
 295
 296        mutex_unlock(&ocfs2_control_lock);
 297}
 298
 299/*
 300 * Called whenever configuration elements are sent to /dev/ocfs2_control.
 301 * If all configuration elements are present, try to set the global
 302 * values.  If there is a problem, return an error.  Skip any missing
 303 * elements, and only bump ocfs2_control_opened when we have all elements
 304 * and are successful.
 305 */
 306static int ocfs2_control_install_private(struct file *file)
 307{
 308        int rc = 0;
 309        int set_p = 1;
 310        struct ocfs2_control_private *p = file->private_data;
 311
 312        BUG_ON(p->op_state != OCFS2_CONTROL_HANDSHAKE_PROTOCOL);
 313
 314        mutex_lock(&ocfs2_control_lock);
 315
 316        if (p->op_this_node < 0) {
 317                set_p = 0;
 318        } else if ((ocfs2_control_this_node >= 0) &&
 319                   (ocfs2_control_this_node != p->op_this_node)) {
 320                rc = -EINVAL;
 321                goto out_unlock;
 322        }
 323
 324        if (!p->op_proto.pv_major) {
 325                set_p = 0;
 326        } else if (!list_empty(&ocfs2_live_connection_list) &&
 327                   ((running_proto.pv_major != p->op_proto.pv_major) ||
 328                    (running_proto.pv_minor != p->op_proto.pv_minor))) {
 329                rc = -EINVAL;
 330                goto out_unlock;
 331        }
 332
 333        if (set_p) {
 334                ocfs2_control_this_node = p->op_this_node;
 335                running_proto.pv_major = p->op_proto.pv_major;
 336                running_proto.pv_minor = p->op_proto.pv_minor;
 337        }
 338
 339out_unlock:
 340        mutex_unlock(&ocfs2_control_lock);
 341
 342        if (!rc && set_p) {
 343                /* We set the global values successfully */
 344                atomic_inc(&ocfs2_control_opened);
 345                ocfs2_control_set_handshake_state(file,
 346                                        OCFS2_CONTROL_HANDSHAKE_VALID);
 347        }
 348
 349        return rc;
 350}
 351
 352static int ocfs2_control_get_this_node(void)
 353{
 354        int rc;
 355
 356        mutex_lock(&ocfs2_control_lock);
 357        if (ocfs2_control_this_node < 0)
 358                rc = -EINVAL;
 359        else
 360                rc = ocfs2_control_this_node;
 361        mutex_unlock(&ocfs2_control_lock);
 362
 363        return rc;
 364}
 365
 366static int ocfs2_control_do_setnode_msg(struct file *file,
 367                                        struct ocfs2_control_message_setn *msg)
 368{
 369        long nodenum;
 370        char *ptr = NULL;
 371        struct ocfs2_control_private *p = file->private_data;
 372
 373        if (ocfs2_control_get_handshake_state(file) !=
 374            OCFS2_CONTROL_HANDSHAKE_PROTOCOL)
 375                return -EINVAL;
 376
 377        if (strncmp(msg->tag, OCFS2_CONTROL_MESSAGE_SETNODE_OP,
 378                    OCFS2_CONTROL_MESSAGE_OP_LEN))
 379                return -EINVAL;
 380
 381        if ((msg->space != ' ') || (msg->newline != '\n'))
 382                return -EINVAL;
 383        msg->space = msg->newline = '\0';
 384
 385        nodenum = simple_strtol(msg->nodestr, &ptr, 16);
 386        if (!ptr || *ptr)
 387                return -EINVAL;
 388
 389        if ((nodenum == LONG_MIN) || (nodenum == LONG_MAX) ||
 390            (nodenum > INT_MAX) || (nodenum < 0))
 391                return -ERANGE;
 392        p->op_this_node = nodenum;
 393
 394        return ocfs2_control_install_private(file);
 395}
 396
 397static int ocfs2_control_do_setversion_msg(struct file *file,
 398                                           struct ocfs2_control_message_setv *msg)
 399 {
 400        long major, minor;
 401        char *ptr = NULL;
 402        struct ocfs2_control_private *p = file->private_data;
 403        struct ocfs2_protocol_version *max =
 404                &ocfs2_user_plugin.sp_proto->lp_max_version;
 405
 406        if (ocfs2_control_get_handshake_state(file) !=
 407            OCFS2_CONTROL_HANDSHAKE_PROTOCOL)
 408                return -EINVAL;
 409
 410        if (strncmp(msg->tag, OCFS2_CONTROL_MESSAGE_SETVERSION_OP,
 411                    OCFS2_CONTROL_MESSAGE_OP_LEN))
 412                return -EINVAL;
 413
 414        if ((msg->space1 != ' ') || (msg->space2 != ' ') ||
 415            (msg->newline != '\n'))
 416                return -EINVAL;
 417        msg->space1 = msg->space2 = msg->newline = '\0';
 418
 419        major = simple_strtol(msg->major, &ptr, 16);
 420        if (!ptr || *ptr)
 421                return -EINVAL;
 422        minor = simple_strtol(msg->minor, &ptr, 16);
 423        if (!ptr || *ptr)
 424                return -EINVAL;
 425
 426        /*
 427         * The major must be between 1 and 255, inclusive.  The minor
 428         * must be between 0 and 255, inclusive.  The version passed in
 429         * must be within the maximum version supported by the filesystem.
 430         */
 431        if ((major == LONG_MIN) || (major == LONG_MAX) ||
 432            (major > (u8)-1) || (major < 1))
 433                return -ERANGE;
 434        if ((minor == LONG_MIN) || (minor == LONG_MAX) ||
 435            (minor > (u8)-1) || (minor < 0))
 436                return -ERANGE;
 437        if ((major != max->pv_major) ||
 438            (minor > max->pv_minor))
 439                return -EINVAL;
 440
 441        p->op_proto.pv_major = major;
 442        p->op_proto.pv_minor = minor;
 443
 444        return ocfs2_control_install_private(file);
 445}
 446
 447static int ocfs2_control_do_down_msg(struct file *file,
 448                                     struct ocfs2_control_message_down *msg)
 449{
 450        long nodenum;
 451        char *p = NULL;
 452
 453        if (ocfs2_control_get_handshake_state(file) !=
 454            OCFS2_CONTROL_HANDSHAKE_VALID)
 455                return -EINVAL;
 456
 457        if (strncmp(msg->tag, OCFS2_CONTROL_MESSAGE_DOWN_OP,
 458                    OCFS2_CONTROL_MESSAGE_OP_LEN))
 459                return -EINVAL;
 460
 461        if ((msg->space1 != ' ') || (msg->space2 != ' ') ||
 462            (msg->newline != '\n'))
 463                return -EINVAL;
 464        msg->space1 = msg->space2 = msg->newline = '\0';
 465
 466        nodenum = simple_strtol(msg->nodestr, &p, 16);
 467        if (!p || *p)
 468                return -EINVAL;
 469
 470        if ((nodenum == LONG_MIN) || (nodenum == LONG_MAX) ||
 471            (nodenum > INT_MAX) || (nodenum < 0))
 472                return -ERANGE;
 473
 474        ocfs2_control_send_down(msg->uuid, nodenum);
 475
 476        return 0;
 477}
 478
 479static ssize_t ocfs2_control_message(struct file *file,
 480                                     const char __user *buf,
 481                                     size_t count)
 482{
 483        ssize_t ret;
 484        union ocfs2_control_message msg;
 485
 486        /* Try to catch padding issues */
 487        WARN_ON(offsetof(struct ocfs2_control_message_down, uuid) !=
 488                (sizeof(msg.u_down.tag) + sizeof(msg.u_down.space1)));
 489
 490        memset(&msg, 0, sizeof(union ocfs2_control_message));
 491        ret = ocfs2_control_cfu(&msg, count, buf, count);
 492        if (ret)
 493                goto out;
 494
 495        if ((count == OCFS2_CONTROL_MESSAGE_SETNODE_TOTAL_LEN) &&
 496            !strncmp(msg.tag, OCFS2_CONTROL_MESSAGE_SETNODE_OP,
 497                     OCFS2_CONTROL_MESSAGE_OP_LEN))
 498                ret = ocfs2_control_do_setnode_msg(file, &msg.u_setn);
 499        else if ((count == OCFS2_CONTROL_MESSAGE_SETVERSION_TOTAL_LEN) &&
 500                 !strncmp(msg.tag, OCFS2_CONTROL_MESSAGE_SETVERSION_OP,
 501                          OCFS2_CONTROL_MESSAGE_OP_LEN))
 502                ret = ocfs2_control_do_setversion_msg(file, &msg.u_setv);
 503        else if ((count == OCFS2_CONTROL_MESSAGE_DOWN_TOTAL_LEN) &&
 504                 !strncmp(msg.tag, OCFS2_CONTROL_MESSAGE_DOWN_OP,
 505                          OCFS2_CONTROL_MESSAGE_OP_LEN))
 506                ret = ocfs2_control_do_down_msg(file, &msg.u_down);
 507        else
 508                ret = -EINVAL;
 509
 510out:
 511        return ret ? ret : count;
 512}
 513
 514static ssize_t ocfs2_control_write(struct file *file,
 515                                   const char __user *buf,
 516                                   size_t count,
 517                                   loff_t *ppos)
 518{
 519        ssize_t ret;
 520
 521        switch (ocfs2_control_get_handshake_state(file)) {
 522                case OCFS2_CONTROL_HANDSHAKE_INVALID:
 523                        ret = -EINVAL;
 524                        break;
 525
 526                case OCFS2_CONTROL_HANDSHAKE_READ:
 527                        ret = ocfs2_control_validate_protocol(file, buf,
 528                                                              count);
 529                        break;
 530
 531                case OCFS2_CONTROL_HANDSHAKE_PROTOCOL:
 532                case OCFS2_CONTROL_HANDSHAKE_VALID:
 533                        ret = ocfs2_control_message(file, buf, count);
 534                        break;
 535
 536                default:
 537                        BUG();
 538                        ret = -EIO;
 539                        break;
 540        }
 541
 542        return ret;
 543}
 544
 545/*
 546 * This is a naive version.  If we ever have a new protocol, we'll expand
 547 * it.  Probably using seq_file.
 548 */
 549static ssize_t ocfs2_control_read(struct file *file,
 550                                  char __user *buf,
 551                                  size_t count,
 552                                  loff_t *ppos)
 553{
 554        ssize_t ret;
 555
 556        ret = simple_read_from_buffer(buf, count, ppos,
 557                        OCFS2_CONTROL_PROTO, OCFS2_CONTROL_PROTO_LEN);
 558
 559        /* Have we read the whole protocol list? */
 560        if (ret > 0 && *ppos >= OCFS2_CONTROL_PROTO_LEN)
 561                ocfs2_control_set_handshake_state(file,
 562                                                  OCFS2_CONTROL_HANDSHAKE_READ);
 563
 564        return ret;
 565}
 566
 567static int ocfs2_control_release(struct inode *inode, struct file *file)
 568{
 569        struct ocfs2_control_private *p = file->private_data;
 570
 571        mutex_lock(&ocfs2_control_lock);
 572
 573        if (ocfs2_control_get_handshake_state(file) !=
 574            OCFS2_CONTROL_HANDSHAKE_VALID)
 575                goto out;
 576
 577        if (atomic_dec_and_test(&ocfs2_control_opened)) {
 578                if (!list_empty(&ocfs2_live_connection_list)) {
 579                        /* XXX: Do bad things! */
 580                        printk(KERN_ERR
 581                               "ocfs2: Unexpected release of ocfs2_control!\n"
 582                               "       Loss of cluster connection requires "
 583                               "an emergency restart!\n");
 584                        emergency_restart();
 585                }
 586                /*
 587                 * Last valid close clears the node number and resets
 588                 * the locking protocol version
 589                 */
 590                ocfs2_control_this_node = -1;
 591                running_proto.pv_major = 0;
 592                running_proto.pv_major = 0;
 593        }
 594
 595out:
 596        list_del_init(&p->op_list);
 597        file->private_data = NULL;
 598
 599        mutex_unlock(&ocfs2_control_lock);
 600
 601        kfree(p);
 602
 603        return 0;
 604}
 605
 606static int ocfs2_control_open(struct inode *inode, struct file *file)
 607{
 608        struct ocfs2_control_private *p;
 609
 610        p = kzalloc(sizeof(struct ocfs2_control_private), GFP_KERNEL);
 611        if (!p)
 612                return -ENOMEM;
 613        p->op_this_node = -1;
 614
 615        lock_kernel();
 616        mutex_lock(&ocfs2_control_lock);
 617        file->private_data = p;
 618        list_add(&p->op_list, &ocfs2_control_private_list);
 619        mutex_unlock(&ocfs2_control_lock);
 620        unlock_kernel();
 621
 622        return 0;
 623}
 624
 625static const struct file_operations ocfs2_control_fops = {
 626        .open    = ocfs2_control_open,
 627        .release = ocfs2_control_release,
 628        .read    = ocfs2_control_read,
 629        .write   = ocfs2_control_write,
 630        .owner   = THIS_MODULE,
 631};
 632
 633static struct miscdevice ocfs2_control_device = {
 634        .minor          = MISC_DYNAMIC_MINOR,
 635        .name           = "ocfs2_control",
 636        .fops           = &ocfs2_control_fops,
 637};
 638
 639static int ocfs2_control_init(void)
 640{
 641        int rc;
 642
 643        atomic_set(&ocfs2_control_opened, 0);
 644
 645        rc = misc_register(&ocfs2_control_device);
 646        if (rc)
 647                printk(KERN_ERR
 648                       "ocfs2: Unable to register ocfs2_control device "
 649                       "(errno %d)\n",
 650                       -rc);
 651
 652        return rc;
 653}
 654
 655static void ocfs2_control_exit(void)
 656{
 657        int rc;
 658
 659        rc = misc_deregister(&ocfs2_control_device);
 660        if (rc)
 661                printk(KERN_ERR
 662                       "ocfs2: Unable to deregister ocfs2_control device "
 663                       "(errno %d)\n",
 664                       -rc);
 665}
 666
 667static struct dlm_lksb *fsdlm_astarg_to_lksb(void *astarg)
 668{
 669        struct ocfs2_lock_res *res = astarg;
 670        return &res->l_lksb.lksb_fsdlm;
 671}
 672
 673static void fsdlm_lock_ast_wrapper(void *astarg)
 674{
 675        struct dlm_lksb *lksb = fsdlm_astarg_to_lksb(astarg);
 676        int status = lksb->sb_status;
 677
 678        BUG_ON(ocfs2_user_plugin.sp_proto == NULL);
 679
 680        /*
 681         * For now we're punting on the issue of other non-standard errors
 682         * where we can't tell if the unlock_ast or lock_ast should be called.
 683         * The main "other error" that's possible is EINVAL which means the
 684         * function was called with invalid args, which shouldn't be possible
 685         * since the caller here is under our control.  Other non-standard
 686         * errors probably fall into the same category, or otherwise are fatal
 687         * which means we can't carry on anyway.
 688         */
 689
 690        if (status == -DLM_EUNLOCK || status == -DLM_ECANCEL)
 691                ocfs2_user_plugin.sp_proto->lp_unlock_ast(astarg, 0);
 692        else
 693                ocfs2_user_plugin.sp_proto->lp_lock_ast(astarg);
 694}
 695
 696static void fsdlm_blocking_ast_wrapper(void *astarg, int level)
 697{
 698        BUG_ON(ocfs2_user_plugin.sp_proto == NULL);
 699
 700        ocfs2_user_plugin.sp_proto->lp_blocking_ast(astarg, level);
 701}
 702
 703static int user_dlm_lock(struct ocfs2_cluster_connection *conn,
 704                         int mode,
 705                         union ocfs2_dlm_lksb *lksb,
 706                         u32 flags,
 707                         void *name,
 708                         unsigned int namelen,
 709                         void *astarg)
 710{
 711        int ret;
 712
 713        if (!lksb->lksb_fsdlm.sb_lvbptr)
 714                lksb->lksb_fsdlm.sb_lvbptr = (char *)lksb +
 715                                             sizeof(struct dlm_lksb);
 716
 717        ret = dlm_lock(conn->cc_lockspace, mode, &lksb->lksb_fsdlm,
 718                       flags|DLM_LKF_NODLCKWT, name, namelen, 0,
 719                       fsdlm_lock_ast_wrapper, astarg,
 720                       fsdlm_blocking_ast_wrapper);
 721        return ret;
 722}
 723
 724static int user_dlm_unlock(struct ocfs2_cluster_connection *conn,
 725                           union ocfs2_dlm_lksb *lksb,
 726                           u32 flags,
 727                           void *astarg)
 728{
 729        int ret;
 730
 731        ret = dlm_unlock(conn->cc_lockspace, lksb->lksb_fsdlm.sb_lkid,
 732                         flags, &lksb->lksb_fsdlm, astarg);
 733        return ret;
 734}
 735
 736static int user_dlm_lock_status(union ocfs2_dlm_lksb *lksb)
 737{
 738        return lksb->lksb_fsdlm.sb_status;
 739}
 740
 741static void *user_dlm_lvb(union ocfs2_dlm_lksb *lksb)
 742{
 743        if (!lksb->lksb_fsdlm.sb_lvbptr)
 744                lksb->lksb_fsdlm.sb_lvbptr = (char *)lksb +
 745                                             sizeof(struct dlm_lksb);
 746        return (void *)(lksb->lksb_fsdlm.sb_lvbptr);
 747}
 748
 749static void user_dlm_dump_lksb(union ocfs2_dlm_lksb *lksb)
 750{
 751}
 752
 753static int user_plock(struct ocfs2_cluster_connection *conn,
 754                      u64 ino,
 755                      struct file *file,
 756                      int cmd,
 757                      struct file_lock *fl)
 758{
 759        /*
 760         * This more or less just demuxes the plock request into any
 761         * one of three dlm calls.
 762         *
 763         * Internally, fs/dlm will pass these to a misc device, which
 764         * a userspace daemon will read and write to.
 765         *
 766         * For now, cancel requests (which happen internally only),
 767         * are turned into unlocks. Most of this function taken from
 768         * gfs2_lock.
 769         */
 770
 771        if (cmd == F_CANCELLK) {
 772                cmd = F_SETLK;
 773                fl->fl_type = F_UNLCK;
 774        }
 775
 776        if (IS_GETLK(cmd))
 777                return dlm_posix_get(conn->cc_lockspace, ino, file, fl);
 778        else if (fl->fl_type == F_UNLCK)
 779                return dlm_posix_unlock(conn->cc_lockspace, ino, file, fl);
 780        else
 781                return dlm_posix_lock(conn->cc_lockspace, ino, file, cmd, fl);
 782}
 783
 784/*
 785 * Compare a requested locking protocol version against the current one.
 786 *
 787 * If the major numbers are different, they are incompatible.
 788 * If the current minor is greater than the request, they are incompatible.
 789 * If the current minor is less than or equal to the request, they are
 790 * compatible, and the requester should run at the current minor version.
 791 */
 792static int fs_protocol_compare(struct ocfs2_protocol_version *existing,
 793                               struct ocfs2_protocol_version *request)
 794{
 795        if (existing->pv_major != request->pv_major)
 796                return 1;
 797
 798        if (existing->pv_minor > request->pv_minor)
 799                return 1;
 800
 801        if (existing->pv_minor < request->pv_minor)
 802                request->pv_minor = existing->pv_minor;
 803
 804        return 0;
 805}
 806
 807static int user_cluster_connect(struct ocfs2_cluster_connection *conn)
 808{
 809        dlm_lockspace_t *fsdlm;
 810        struct ocfs2_live_connection *control;
 811        int rc = 0;
 812
 813        BUG_ON(conn == NULL);
 814
 815        rc = ocfs2_live_connection_new(conn, &control);
 816        if (rc)
 817                goto out;
 818
 819        /*
 820         * running_proto must have been set before we allowed any mounts
 821         * to proceed.
 822         */
 823        if (fs_protocol_compare(&running_proto, &conn->cc_version)) {
 824                printk(KERN_ERR
 825                       "Unable to mount with fs locking protocol version "
 826                       "%u.%u because the userspace control daemon has "
 827                       "negotiated %u.%u\n",
 828                       conn->cc_version.pv_major, conn->cc_version.pv_minor,
 829                       running_proto.pv_major, running_proto.pv_minor);
 830                rc = -EPROTO;
 831                ocfs2_live_connection_drop(control);
 832                goto out;
 833        }
 834
 835        rc = dlm_new_lockspace(conn->cc_name, strlen(conn->cc_name),
 836                               &fsdlm, DLM_LSFL_FS, DLM_LVB_LEN);
 837        if (rc) {
 838                ocfs2_live_connection_drop(control);
 839                goto out;
 840        }
 841
 842        conn->cc_private = control;
 843        conn->cc_lockspace = fsdlm;
 844out:
 845        return rc;
 846}
 847
 848static int user_cluster_disconnect(struct ocfs2_cluster_connection *conn)
 849{
 850        dlm_release_lockspace(conn->cc_lockspace, 2);
 851        conn->cc_lockspace = NULL;
 852        ocfs2_live_connection_drop(conn->cc_private);
 853        conn->cc_private = NULL;
 854        return 0;
 855}
 856
 857static int user_cluster_this_node(unsigned int *this_node)
 858{
 859        int rc;
 860
 861        rc = ocfs2_control_get_this_node();
 862        if (rc < 0)
 863                return rc;
 864
 865        *this_node = rc;
 866        return 0;
 867}
 868
 869static struct ocfs2_stack_operations ocfs2_user_plugin_ops = {
 870        .connect        = user_cluster_connect,
 871        .disconnect     = user_cluster_disconnect,
 872        .this_node      = user_cluster_this_node,
 873        .dlm_lock       = user_dlm_lock,
 874        .dlm_unlock     = user_dlm_unlock,
 875        .lock_status    = user_dlm_lock_status,
 876        .lock_lvb       = user_dlm_lvb,
 877        .plock          = user_plock,
 878        .dump_lksb      = user_dlm_dump_lksb,
 879};
 880
 881static struct ocfs2_stack_plugin ocfs2_user_plugin = {
 882        .sp_name        = "user",
 883        .sp_ops         = &ocfs2_user_plugin_ops,
 884        .sp_owner       = THIS_MODULE,
 885};
 886
 887
 888static int __init ocfs2_user_plugin_init(void)
 889{
 890        int rc;
 891
 892        rc = ocfs2_control_init();
 893        if (!rc) {
 894                rc = ocfs2_stack_glue_register(&ocfs2_user_plugin);
 895                if (rc)
 896                        ocfs2_control_exit();
 897        }
 898
 899        return rc;
 900}
 901
 902static void __exit ocfs2_user_plugin_exit(void)
 903{
 904        ocfs2_stack_glue_unregister(&ocfs2_user_plugin);
 905        ocfs2_control_exit();
 906}
 907
 908MODULE_AUTHOR("Oracle");
 909MODULE_DESCRIPTION("ocfs2 driver for userspace cluster stacks");
 910MODULE_LICENSE("GPL");
 911module_init(ocfs2_user_plugin_init);
 912module_exit(ocfs2_user_plugin_exit);
 913
lxr.linux.no kindly hosted by Redpill Linpro AS, provider of Linux consulting and operations services since 1995.