linux/drivers/char/mem.c
<<
>>
Prefs
   1// SPDX-License-Identifier: GPL-2.0
   2/*
   3 *  linux/drivers/char/mem.c
   4 *
   5 *  Copyright (C) 1991, 1992  Linus Torvalds
   6 *
   7 *  Added devfs support.
   8 *    Jan-11-1998, C. Scott Ananian <cananian@alumni.princeton.edu>
   9 *  Shared /dev/zero mmapping support, Feb 2000, Kanoj Sarcar <kanoj@sgi.com>
  10 */
  11
  12#include <linux/mm.h>
  13#include <linux/miscdevice.h>
  14#include <linux/slab.h>
  15#include <linux/vmalloc.h>
  16#include <linux/mman.h>
  17#include <linux/random.h>
  18#include <linux/init.h>
  19#include <linux/tty.h>
  20#include <linux/capability.h>
  21#include <linux/ptrace.h>
  22#include <linux/device.h>
  23#include <linux/highmem.h>
  24#include <linux/backing-dev.h>
  25#include <linux/shmem_fs.h>
  26#include <linux/splice.h>
  27#include <linux/pfn.h>
  28#include <linux/export.h>
  29#include <linux/io.h>
  30#include <linux/uio.h>
  31#include <linux/uaccess.h>
  32#include <linux/security.h>
  33
  34#define DEVMEM_MINOR    1
  35#define DEVPORT_MINOR   4
  36
  37static inline unsigned long size_inside_page(unsigned long start,
  38                                             unsigned long size)
  39{
  40        unsigned long sz;
  41
  42        sz = PAGE_SIZE - (start & (PAGE_SIZE - 1));
  43
  44        return min(sz, size);
  45}
  46
  47#ifndef ARCH_HAS_VALID_PHYS_ADDR_RANGE
  48static inline int valid_phys_addr_range(phys_addr_t addr, size_t count)
  49{
  50        return addr + count <= __pa(high_memory);
  51}
  52
  53static inline int valid_mmap_phys_addr_range(unsigned long pfn, size_t size)
  54{
  55        return 1;
  56}
  57#endif
  58
  59#ifdef CONFIG_STRICT_DEVMEM
  60static inline int page_is_allowed(unsigned long pfn)
  61{
  62        return devmem_is_allowed(pfn);
  63}
  64static inline int range_is_allowed(unsigned long pfn, unsigned long size)
  65{
  66        u64 from = ((u64)pfn) << PAGE_SHIFT;
  67        u64 to = from + size;
  68        u64 cursor = from;
  69
  70        while (cursor < to) {
  71                if (!devmem_is_allowed(pfn))
  72                        return 0;
  73                cursor += PAGE_SIZE;
  74                pfn++;
  75        }
  76        return 1;
  77}
  78#else
  79static inline int page_is_allowed(unsigned long pfn)
  80{
  81        return 1;
  82}
  83static inline int range_is_allowed(unsigned long pfn, unsigned long size)
  84{
  85        return 1;
  86}
  87#endif
  88
  89static inline bool should_stop_iteration(void)
  90{
  91        if (need_resched())
  92                cond_resched();
  93        return signal_pending(current);
  94}
  95
  96/*
  97 * This funcion reads the *physical* memory. The f_pos points directly to the
  98 * memory location.
  99 */
 100static ssize_t read_mem(struct file *file, char __user *buf,
 101                        size_t count, loff_t *ppos)
 102{
 103        phys_addr_t p = *ppos;
 104        ssize_t read, sz;
 105        void *ptr;
 106        char *bounce;
 107        int err;
 108
 109        if (p != *ppos)
 110                return 0;
 111
 112        if (!valid_phys_addr_range(p, count))
 113                return -EFAULT;
 114        read = 0;
 115#ifdef __ARCH_HAS_NO_PAGE_ZERO_MAPPED
 116        /* we don't have page 0 mapped on sparc and m68k.. */
 117        if (p < PAGE_SIZE) {
 118                sz = size_inside_page(p, count);
 119                if (sz > 0) {
 120                        if (clear_user(buf, sz))
 121                                return -EFAULT;
 122                        buf += sz;
 123                        p += sz;
 124                        count -= sz;
 125                        read += sz;
 126                }
 127        }
 128#endif
 129
 130        bounce = kmalloc(PAGE_SIZE, GFP_KERNEL);
 131        if (!bounce)
 132                return -ENOMEM;
 133
 134        while (count > 0) {
 135                unsigned long remaining;
 136                int allowed, probe;
 137
 138                sz = size_inside_page(p, count);
 139
 140                err = -EPERM;
 141                allowed = page_is_allowed(p >> PAGE_SHIFT);
 142                if (!allowed)
 143                        goto failed;
 144
 145                err = -EFAULT;
 146                if (allowed == 2) {
 147                        /* Show zeros for restricted memory. */
 148                        remaining = clear_user(buf, sz);
 149                } else {
 150                        /*
 151                         * On ia64 if a page has been mapped somewhere as
 152                         * uncached, then it must also be accessed uncached
 153                         * by the kernel or data corruption may occur.
 154                         */
 155                        ptr = xlate_dev_mem_ptr(p);
 156                        if (!ptr)
 157                                goto failed;
 158
 159                        probe = copy_from_kernel_nofault(bounce, ptr, sz);
 160                        unxlate_dev_mem_ptr(p, ptr);
 161                        if (probe)
 162                                goto failed;
 163
 164                        remaining = copy_to_user(buf, bounce, sz);
 165                }
 166
 167                if (remaining)
 168                        goto failed;
 169
 170                buf += sz;
 171                p += sz;
 172                count -= sz;
 173                read += sz;
 174                if (should_stop_iteration())
 175                        break;
 176        }
 177        kfree(bounce);
 178
 179        *ppos += read;
 180        return read;
 181
 182failed:
 183        kfree(bounce);
 184        return err;
 185}
 186
 187static ssize_t write_mem(struct file *file, const char __user *buf,
 188                         size_t count, loff_t *ppos)
 189{
 190        phys_addr_t p = *ppos;
 191        ssize_t written, sz;
 192        unsigned long copied;
 193        void *ptr;
 194
 195        if (p != *ppos)
 196                return -EFBIG;
 197
 198        if (!valid_phys_addr_range(p, count))
 199                return -EFAULT;
 200
 201        written = 0;
 202
 203#ifdef __ARCH_HAS_NO_PAGE_ZERO_MAPPED
 204        /* we don't have page 0 mapped on sparc and m68k.. */
 205        if (p < PAGE_SIZE) {
 206                sz = size_inside_page(p, count);
 207                /* Hmm. Do something? */
 208                buf += sz;
 209                p += sz;
 210                count -= sz;
 211                written += sz;
 212        }
 213#endif
 214
 215        while (count > 0) {
 216                int allowed;
 217
 218                sz = size_inside_page(p, count);
 219
 220                allowed = page_is_allowed(p >> PAGE_SHIFT);
 221                if (!allowed)
 222                        return -EPERM;
 223
 224                /* Skip actual writing when a page is marked as restricted. */
 225                if (allowed == 1) {
 226                        /*
 227                         * On ia64 if a page has been mapped somewhere as
 228                         * uncached, then it must also be accessed uncached
 229                         * by the kernel or data corruption may occur.
 230                         */
 231                        ptr = xlate_dev_mem_ptr(p);
 232                        if (!ptr) {
 233                                if (written)
 234                                        break;
 235                                return -EFAULT;
 236                        }
 237
 238                        copied = copy_from_user(ptr, buf, sz);
 239                        unxlate_dev_mem_ptr(p, ptr);
 240                        if (copied) {
 241                                written += sz - copied;
 242                                if (written)
 243                                        break;
 244                                return -EFAULT;
 245                        }
 246                }
 247
 248                buf += sz;
 249                p += sz;
 250                count -= sz;
 251                written += sz;
 252                if (should_stop_iteration())
 253                        break;
 254        }
 255
 256        *ppos += written;
 257        return written;
 258}
 259
 260int __weak phys_mem_access_prot_allowed(struct file *file,
 261        unsigned long pfn, unsigned long size, pgprot_t *vma_prot)
 262{
 263        return 1;
 264}
 265
 266#ifndef __HAVE_PHYS_MEM_ACCESS_PROT
 267
 268/*
 269 * Architectures vary in how they handle caching for addresses
 270 * outside of main memory.
 271 *
 272 */
 273#ifdef pgprot_noncached
 274static int uncached_access(struct file *file, phys_addr_t addr)
 275{
 276        /*
 277         * Accessing memory above the top the kernel knows about or through a
 278         * file pointer
 279         * that was marked O_DSYNC will be done non-cached.
 280         */
 281        if (file->f_flags & O_DSYNC)
 282                return 1;
 283        return addr >= __pa(high_memory);
 284}
 285#endif
 286
 287static pgprot_t phys_mem_access_prot(struct file *file, unsigned long pfn,
 288                                     unsigned long size, pgprot_t vma_prot)
 289{
 290#ifdef pgprot_noncached
 291        phys_addr_t offset = pfn << PAGE_SHIFT;
 292
 293        if (uncached_access(file, offset))
 294                return pgprot_noncached(vma_prot);
 295#endif
 296        return vma_prot;
 297}
 298#endif
 299
 300#ifndef CONFIG_MMU
 301static unsigned long get_unmapped_area_mem(struct file *file,
 302                                           unsigned long addr,
 303                                           unsigned long len,
 304                                           unsigned long pgoff,
 305                                           unsigned long flags)
 306{
 307        if (!valid_mmap_phys_addr_range(pgoff, len))
 308                return (unsigned long) -EINVAL;
 309        return pgoff << PAGE_SHIFT;
 310}
 311
 312/* permit direct mmap, for read, write or exec */
 313static unsigned memory_mmap_capabilities(struct file *file)
 314{
 315        return NOMMU_MAP_DIRECT |
 316                NOMMU_MAP_READ | NOMMU_MAP_WRITE | NOMMU_MAP_EXEC;
 317}
 318
 319static unsigned zero_mmap_capabilities(struct file *file)
 320{
 321        return NOMMU_MAP_COPY;
 322}
 323
 324/* can't do an in-place private mapping if there's no MMU */
 325static inline int private_mapping_ok(struct vm_area_struct *vma)
 326{
 327        return is_nommu_shared_mapping(vma->vm_flags);
 328}
 329#else
 330
 331static inline int private_mapping_ok(struct vm_area_struct *vma)
 332{
 333        return 1;
 334}
 335#endif
 336
 337static const struct vm_operations_struct mmap_mem_ops = {
 338#ifdef CONFIG_HAVE_IOREMAP_PROT
 339        .access = generic_access_phys
 340#endif
 341};
 342
 343static int mmap_mem(struct file *file, struct vm_area_struct *vma)
 344{
 345        size_t size = vma->vm_end - vma->vm_start;
 346        phys_addr_t offset = (phys_addr_t)vma->vm_pgoff << PAGE_SHIFT;
 347
 348        /* Does it even fit in phys_addr_t? */
 349        if (offset >> PAGE_SHIFT != vma->vm_pgoff)
 350                return -EINVAL;
 351
 352        /* It's illegal to wrap around the end of the physical address space. */
 353        if (offset + (phys_addr_t)size - 1 < offset)
 354                return -EINVAL;
 355
 356        if (!valid_mmap_phys_addr_range(vma->vm_pgoff, size))
 357                return -EINVAL;
 358
 359        if (!private_mapping_ok(vma))
 360                return -ENOSYS;
 361
 362        if (!range_is_allowed(vma->vm_pgoff, size))
 363                return -EPERM;
 364
 365        if (!phys_mem_access_prot_allowed(file, vma->vm_pgoff, size,
 366                                                &vma->vm_page_prot))
 367                return -EINVAL;
 368
 369        vma->vm_page_prot = phys_mem_access_prot(file, vma->vm_pgoff,
 370                                                 size,
 371                                                 vma->vm_page_prot);
 372
 373        vma->vm_ops = &mmap_mem_ops;
 374
 375        /* Remap-pfn-range will mark the range VM_IO */
 376        if (remap_pfn_range(vma,
 377                            vma->vm_start,
 378                            vma->vm_pgoff,
 379                            size,
 380                            vma->vm_page_prot)) {
 381                return -EAGAIN;
 382        }
 383        return 0;
 384}
 385
 386static ssize_t read_port(struct file *file, char __user *buf,
 387                         size_t count, loff_t *ppos)
 388{
 389        unsigned long i = *ppos;
 390        char __user *tmp = buf;
 391
 392        if (!access_ok(buf, count))
 393                return -EFAULT;
 394        while (count-- > 0 && i < 65536) {
 395                if (__put_user(inb(i), tmp) < 0)
 396                        return -EFAULT;
 397                i++;
 398                tmp++;
 399        }
 400        *ppos = i;
 401        return tmp-buf;
 402}
 403
 404static ssize_t write_port(struct file *file, const char __user *buf,
 405                          size_t count, loff_t *ppos)
 406{
 407        unsigned long i = *ppos;
 408        const char __user *tmp = buf;
 409
 410        if (!access_ok(buf, count))
 411                return -EFAULT;
 412        while (count-- > 0 && i < 65536) {
 413                char c;
 414
 415                if (__get_user(c, tmp)) {
 416                        if (tmp > buf)
 417                                break;
 418                        return -EFAULT;
 419                }
 420                outb(c, i);
 421                i++;
 422                tmp++;
 423        }
 424        *ppos = i;
 425        return tmp-buf;
 426}
 427
 428static ssize_t read_null(struct file *file, char __user *buf,
 429                         size_t count, loff_t *ppos)
 430{
 431        return 0;
 432}
 433
 434static ssize_t write_null(struct file *file, const char __user *buf,
 435                          size_t count, loff_t *ppos)
 436{
 437        return count;
 438}
 439
 440static ssize_t read_iter_null(struct kiocb *iocb, struct iov_iter *to)
 441{
 442        return 0;
 443}
 444
 445static ssize_t write_iter_null(struct kiocb *iocb, struct iov_iter *from)
 446{
 447        size_t count = iov_iter_count(from);
 448        iov_iter_advance(from, count);
 449        return count;
 450}
 451
 452static int pipe_to_null(struct pipe_inode_info *info, struct pipe_buffer *buf,
 453                        struct splice_desc *sd)
 454{
 455        return sd->len;
 456}
 457
 458static ssize_t splice_write_null(struct pipe_inode_info *pipe, struct file *out,
 459                                 loff_t *ppos, size_t len, unsigned int flags)
 460{
 461        return splice_from_pipe(pipe, out, ppos, len, flags, pipe_to_null);
 462}
 463
 464static int uring_cmd_null(struct io_uring_cmd *ioucmd, unsigned int issue_flags)
 465{
 466        return 0;
 467}
 468
 469static ssize_t read_iter_zero(struct kiocb *iocb, struct iov_iter *iter)
 470{
 471        size_t written = 0;
 472
 473        while (iov_iter_count(iter)) {
 474                size_t chunk = iov_iter_count(iter), n;
 475
 476                if (chunk > PAGE_SIZE)
 477                        chunk = PAGE_SIZE;      /* Just for latency reasons */
 478                n = iov_iter_zero(chunk, iter);
 479                if (!n && iov_iter_count(iter))
 480                        return written ? written : -EFAULT;
 481                written += n;
 482                if (signal_pending(current))
 483                        return written ? written : -ERESTARTSYS;
 484                if (!need_resched())
 485                        continue;
 486                if (iocb->ki_flags & IOCB_NOWAIT)
 487                        return written ? written : -EAGAIN;
 488                cond_resched();
 489        }
 490        return written;
 491}
 492
 493static ssize_t read_zero(struct file *file, char __user *buf,
 494                         size_t count, loff_t *ppos)
 495{
 496        size_t cleared = 0;
 497
 498        while (count) {
 499                size_t chunk = min_t(size_t, count, PAGE_SIZE);
 500                size_t left;
 501
 502                left = clear_user(buf + cleared, chunk);
 503                if (unlikely(left)) {
 504                        cleared += (chunk - left);
 505                        if (!cleared)
 506                                return -EFAULT;
 507                        break;
 508                }
 509                cleared += chunk;
 510                count -= chunk;
 511
 512                if (signal_pending(current))
 513                        break;
 514                cond_resched();
 515        }
 516
 517        return cleared;
 518}
 519
 520static int mmap_zero(struct file *file, struct vm_area_struct *vma)
 521{
 522#ifndef CONFIG_MMU
 523        return -ENOSYS;
 524#endif
 525        if (vma->vm_flags & VM_SHARED)
 526                return shmem_zero_setup(vma);
 527        vma_set_anonymous(vma);
 528        return 0;
 529}
 530
 531static unsigned long get_unmapped_area_zero(struct file *file,
 532                                unsigned long addr, unsigned long len,
 533                                unsigned long pgoff, unsigned long flags)
 534{
 535#ifdef CONFIG_MMU
 536        if (flags & MAP_SHARED) {
 537                /*
 538                 * mmap_zero() will call shmem_zero_setup() to create a file,
 539                 * so use shmem's get_unmapped_area in case it can be huge;
 540                 * and pass NULL for file as in mmap.c's get_unmapped_area(),
 541                 * so as not to confuse shmem with our handle on "/dev/zero".
 542                 */
 543                return shmem_get_unmapped_area(NULL, addr, len, pgoff, flags);
 544        }
 545
 546        /* Otherwise flags & MAP_PRIVATE: with no shmem object beneath it */
 547        return current->mm->get_unmapped_area(file, addr, len, pgoff, flags);
 548#else
 549        return -ENOSYS;
 550#endif
 551}
 552
 553static ssize_t write_full(struct file *file, const char __user *buf,
 554                          size_t count, loff_t *ppos)
 555{
 556        return -ENOSPC;
 557}
 558
 559/*
 560 * Special lseek() function for /dev/null and /dev/zero.  Most notably, you
 561 * can fopen() both devices with "a" now.  This was previously impossible.
 562 * -- SRB.
 563 */
 564static loff_t null_lseek(struct file *file, loff_t offset, int orig)
 565{
 566        return file->f_pos = 0;
 567}
 568
 569/*
 570 * The memory devices use the full 32/64 bits of the offset, and so we cannot
 571 * check against negative addresses: they are ok. The return value is weird,
 572 * though, in that case (0).
 573 *
 574 * also note that seeking relative to the "end of file" isn't supported:
 575 * it has no meaning, so it returns -EINVAL.
 576 */
 577static loff_t memory_lseek(struct file *file, loff_t offset, int orig)
 578{
 579        loff_t ret;
 580
 581        inode_lock(file_inode(file));
 582        switch (orig) {
 583        case SEEK_CUR:
 584                offset += file->f_pos;
 585                fallthrough;
 586        case SEEK_SET:
 587                /* to avoid userland mistaking f_pos=-9 as -EBADF=-9 */
 588                if ((unsigned long long)offset >= -MAX_ERRNO) {
 589                        ret = -EOVERFLOW;
 590                        break;
 591                }
 592                file->f_pos = offset;
 593                ret = file->f_pos;
 594                force_successful_syscall_return();
 595                break;
 596        default:
 597                ret = -EINVAL;
 598        }
 599        inode_unlock(file_inode(file));
 600        return ret;
 601}
 602
 603static int open_port(struct inode *inode, struct file *filp)
 604{
 605        int rc;
 606
 607        if (!capable(CAP_SYS_RAWIO))
 608                return -EPERM;
 609
 610        rc = security_locked_down(LOCKDOWN_DEV_MEM);
 611        if (rc)
 612                return rc;
 613
 614        if (iminor(inode) != DEVMEM_MINOR)
 615                return 0;
 616
 617        /*
 618         * Use a unified address space to have a single point to manage
 619         * revocations when drivers want to take over a /dev/mem mapped
 620         * range.
 621         */
 622        filp->f_mapping = iomem_get_mapping();
 623
 624        return 0;
 625}
 626
 627#define zero_lseek      null_lseek
 628#define full_lseek      null_lseek
 629#define write_zero      write_null
 630#define write_iter_zero write_iter_null
 631#define splice_write_zero       splice_write_null
 632#define open_mem        open_port
 633
 634static const struct file_operations __maybe_unused mem_fops = {
 635        .llseek         = memory_lseek,
 636        .read           = read_mem,
 637        .write          = write_mem,
 638        .mmap           = mmap_mem,
 639        .open           = open_mem,
 640#ifndef CONFIG_MMU
 641        .get_unmapped_area = get_unmapped_area_mem,
 642        .mmap_capabilities = memory_mmap_capabilities,
 643#endif
 644};
 645
 646static const struct file_operations null_fops = {
 647        .llseek         = null_lseek,
 648        .read           = read_null,
 649        .write          = write_null,
 650        .read_iter      = read_iter_null,
 651        .write_iter     = write_iter_null,
 652        .splice_write   = splice_write_null,
 653        .uring_cmd      = uring_cmd_null,
 654};
 655
 656static const struct file_operations __maybe_unused port_fops = {
 657        .llseek         = memory_lseek,
 658        .read           = read_port,
 659        .write          = write_port,
 660        .open           = open_port,
 661};
 662
 663static const struct file_operations zero_fops = {
 664        .llseek         = zero_lseek,
 665        .write          = write_zero,
 666        .read_iter      = read_iter_zero,
 667        .read           = read_zero,
 668        .write_iter     = write_iter_zero,
 669        .splice_read    = copy_splice_read,
 670        .splice_write   = splice_write_zero,
 671        .mmap           = mmap_zero,
 672        .get_unmapped_area = get_unmapped_area_zero,
 673#ifndef CONFIG_MMU
 674        .mmap_capabilities = zero_mmap_capabilities,
 675#endif
 676};
 677
 678static const struct file_operations full_fops = {
 679        .llseek         = full_lseek,
 680        .read_iter      = read_iter_zero,
 681        .write          = write_full,
 682        .splice_read    = copy_splice_read,
 683};
 684
 685static const struct memdev {
 686        const char *name;
 687        const struct file_operations *fops;
 688        fmode_t fmode;
 689        umode_t mode;
 690} devlist[] = {
 691#ifdef CONFIG_DEVMEM
 692        [DEVMEM_MINOR] = { "mem", &mem_fops, FMODE_UNSIGNED_OFFSET, 0 },
 693#endif
 694        [3] = { "null", &null_fops, FMODE_NOWAIT, 0666 },
 695#ifdef CONFIG_DEVPORT
 696        [4] = { "port", &port_fops, 0, 0 },
 697#endif
 698        [5] = { "zero", &zero_fops, FMODE_NOWAIT, 0666 },
 699        [7] = { "full", &full_fops, 0, 0666 },
 700        [8] = { "random", &random_fops, FMODE_NOWAIT, 0666 },
 701        [9] = { "urandom", &urandom_fops, FMODE_NOWAIT, 0666 },
 702#ifdef CONFIG_PRINTK
 703        [11] = { "kmsg", &kmsg_fops, 0, 0644 },
 704#endif
 705};
 706
 707static int memory_open(struct inode *inode, struct file *filp)
 708{
 709        int minor;
 710        const struct memdev *dev;
 711
 712        minor = iminor(inode);
 713        if (minor >= ARRAY_SIZE(devlist))
 714                return -ENXIO;
 715
 716        dev = &devlist[minor];
 717        if (!dev->fops)
 718                return -ENXIO;
 719
 720        filp->f_op = dev->fops;
 721        filp->f_mode |= dev->fmode;
 722
 723        if (dev->fops->open)
 724                return dev->fops->open(inode, filp);
 725
 726        return 0;
 727}
 728
 729static const struct file_operations memory_fops = {
 730        .open = memory_open,
 731        .llseek = noop_llseek,
 732};
 733
 734static char *mem_devnode(const struct device *dev, umode_t *mode)
 735{
 736        if (mode && devlist[MINOR(dev->devt)].mode)
 737                *mode = devlist[MINOR(dev->devt)].mode;
 738        return NULL;
 739}
 740
 741static const struct class mem_class = {
 742        .name           = "mem",
 743        .devnode        = mem_devnode,
 744};
 745
 746static int __init chr_dev_init(void)
 747{
 748        int retval;
 749        int minor;
 750
 751        if (register_chrdev(MEM_MAJOR, "mem", &memory_fops))
 752                printk("unable to get major %d for memory devs\n", MEM_MAJOR);
 753
 754        retval = class_register(&mem_class);
 755        if (retval)
 756                return retval;
 757
 758        for (minor = 1; minor < ARRAY_SIZE(devlist); minor++) {
 759                if (!devlist[minor].name)
 760                        continue;
 761
 762                /*
 763                 * Create /dev/port?
 764                 */
 765                if ((minor == DEVPORT_MINOR) && !arch_has_dev_port())
 766                        continue;
 767
 768                device_create(&mem_class, NULL, MKDEV(MEM_MAJOR, minor),
 769                              NULL, devlist[minor].name);
 770        }
 771
 772        return tty_init();
 773}
 774
 775fs_initcall(chr_dev_init);
 776