linux/mm/truncate.c
<<
>>
Prefs
   1/*
   2 * mm/truncate.c - code for taking down pages from address_spaces
   3 *
   4 * Copyright (C) 2002, Linus Torvalds
   5 *
   6 * 10Sep2002    akpm@zip.com.au
   7 *              Initial version.
   8 */
   9
  10#include <linux/kernel.h>
  11#include <linux/mm.h>
  12#include <linux/swap.h>
  13#include <linux/module.h>
  14#include <linux/pagemap.h>
  15#include <linux/highmem.h>
  16#include <linux/pagevec.h>
  17#include <linux/task_io_accounting_ops.h>
  18#include <linux/buffer_head.h>  /* grr. try_to_release_page,
  19                                   do_invalidatepage */
  20
  21
  22/**
  23 * do_invalidatepage - invalidate part of all of a page
  24 * @page: the page which is affected
  25 * @offset: the index of the truncation point
  26 *
  27 * do_invalidatepage() is called when all or part of the page has become
  28 * invalidated by a truncate operation.
  29 *
  30 * do_invalidatepage() does not have to release all buffers, but it must
  31 * ensure that no dirty buffer is left outside @offset and that no I/O
  32 * is underway against any of the blocks which are outside the truncation
  33 * point.  Because the caller is about to free (and possibly reuse) those
  34 * blocks on-disk.
  35 */
  36void do_invalidatepage(struct page *page, unsigned long offset)
  37{
  38        void (*invalidatepage)(struct page *, unsigned long);
  39        invalidatepage = page->mapping->a_ops->invalidatepage;
  40#ifdef CONFIG_BLOCK
  41        if (!invalidatepage)
  42                invalidatepage = block_invalidatepage;
  43#endif
  44        if (invalidatepage)
  45                (*invalidatepage)(page, offset);
  46}
  47
  48static inline void truncate_partial_page(struct page *page, unsigned partial)
  49{
  50        zero_user_page(page, partial, PAGE_CACHE_SIZE - partial, KM_USER0);
  51        if (PagePrivate(page))
  52                do_invalidatepage(page, partial);
  53}
  54
  55/*
  56 * This cancels just the dirty bit on the kernel page itself, it
  57 * does NOT actually remove dirty bits on any mmap's that may be
  58 * around. It also leaves the page tagged dirty, so any sync
  59 * activity will still find it on the dirty lists, and in particular,
  60 * clear_page_dirty_for_io() will still look at the dirty bits in
  61 * the VM.
  62 *
  63 * Doing this should *normally* only ever be done when a page
  64 * is truncated, and is not actually mapped anywhere at all. However,
  65 * fs/buffer.c does this when it notices that somebody has cleaned
  66 * out all the buffers on a page without actually doing it through
  67 * the VM. Can you say "ext3 is horribly ugly"? Tought you could.
  68 */
  69void cancel_dirty_page(struct page *page, unsigned int account_size)
  70{
  71        if (TestClearPageDirty(page)) {
  72                struct address_space *mapping = page->mapping;
  73                if (mapping && mapping_cap_account_dirty(mapping)) {
  74                        dec_zone_page_state(page, NR_FILE_DIRTY);
  75                        if (account_size)
  76                                task_io_account_cancelled_write(account_size);
  77                }
  78        }
  79}
  80EXPORT_SYMBOL(cancel_dirty_page);
  81
  82/*
  83 * If truncate cannot remove the fs-private metadata from the page, the page
  84 * becomes anonymous.  It will be left on the LRU and may even be mapped into
  85 * user pagetables if we're racing with filemap_nopage().
  86 *
  87 * We need to bale out if page->mapping is no longer equal to the original
  88 * mapping.  This happens a) when the VM reclaimed the page while we waited on
  89 * its lock, b) when a concurrent invalidate_mapping_pages got there first and
  90 * c) when tmpfs swizzles a page between a tmpfs inode and swapper_space.
  91 */
  92static void
  93truncate_complete_page(struct address_space *mapping, struct page *page)
  94{
  95        if (page->mapping != mapping)
  96                return;
  97
  98        cancel_dirty_page(page, PAGE_CACHE_SIZE);
  99
 100        if (PagePrivate(page))
 101                do_invalidatepage(page, 0);
 102
 103        ClearPageUptodate(page);
 104        ClearPageMappedToDisk(page);
 105        remove_from_page_cache(page);
 106        page_cache_release(page);       /* pagecache ref */
 107}
 108
 109/*
 110 * This is for invalidate_mapping_pages().  That function can be called at
 111 * any time, and is not supposed to throw away dirty pages.  But pages can
 112 * be marked dirty at any time too, so use remove_mapping which safely
 113 * discards clean, unused pages.
 114 *
 115 * Returns non-zero if the page was successfully invalidated.
 116 */
 117static int
 118invalidate_complete_page(struct address_space *mapping, struct page *page)
 119{
 120        int ret;
 121
 122        if (page->mapping != mapping)
 123                return 0;
 124
 125        if (PagePrivate(page) && !try_to_release_page(page, 0))
 126                return 0;
 127
 128        ret = remove_mapping(mapping, page);
 129
 130        return ret;
 131}
 132
 133/**
 134 * truncate_inode_pages - truncate range of pages specified by start and
 135 * end byte offsets
 136 * @mapping: mapping to truncate
 137 * @lstart: offset from which to truncate
 138 * @lend: offset to which to truncate
 139 *
 140 * Truncate the page cache, removing the pages that are between
 141 * specified offsets (and zeroing out partial page
 142 * (if lstart is not page aligned)).
 143 *
 144 * Truncate takes two passes - the first pass is nonblocking.  It will not
 145 * block on page locks and it will not block on writeback.  The second pass
 146 * will wait.  This is to prevent as much IO as possible in the affected region.
 147 * The first pass will remove most pages, so the search cost of the second pass
 148 * is low.
 149 *
 150 * When looking at page->index outside the page lock we need to be careful to
 151 * copy it into a local to avoid races (it could change at any time).
 152 *
 153 * We pass down the cache-hot hint to the page freeing code.  Even if the
 154 * mapping is large, it is probably the case that the final pages are the most
 155 * recently touched, and freeing happens in ascending file offset order.
 156 */
 157void truncate_inode_pages_range(struct address_space *mapping,
 158                                loff_t lstart, loff_t lend)
 159{
 160        const pgoff_t start = (lstart + PAGE_CACHE_SIZE-1) >> PAGE_CACHE_SHIFT;
 161        pgoff_t end;
 162        const unsigned partial = lstart & (PAGE_CACHE_SIZE - 1);
 163        struct pagevec pvec;
 164        pgoff_t next;
 165        int i;
 166
 167        if (mapping->nrpages == 0)
 168                return;
 169
 170        BUG_ON((lend & (PAGE_CACHE_SIZE - 1)) != (PAGE_CACHE_SIZE - 1));
 171        end = (lend >> PAGE_CACHE_SHIFT);
 172
 173        pagevec_init(&pvec, 0);
 174        next = start;
 175        while (next <= end &&
 176               pagevec_lookup(&pvec, mapping, next, PAGEVEC_SIZE)) {
 177                for (i = 0; i < pagevec_count(&pvec); i++) {
 178                        struct page *page = pvec.pages[i];
 179                        pgoff_t page_index = page->index;
 180
 181                        if (page_index > end) {
 182                                next = page_index;
 183                                break;
 184                        }
 185
 186                        if (page_index > next)
 187                                next = page_index;
 188                        next++;
 189                        if (TestSetPageLocked(page))
 190                                continue;
 191                        if (PageWriteback(page)) {
 192                                unlock_page(page);
 193                                continue;
 194                        }
 195                        truncate_complete_page(mapping, page);
 196                        unlock_page(page);
 197                }
 198                pagevec_release(&pvec);
 199                cond_resched();
 200        }
 201
 202        if (partial) {
 203                struct page *page = find_lock_page(mapping, start - 1);
 204                if (page) {
 205                        wait_on_page_writeback(page);
 206                        truncate_partial_page(page, partial);
 207                        unlock_page(page);
 208                        page_cache_release(page);
 209                }
 210        }
 211
 212        next = start;
 213        for ( ; ; ) {
 214                cond_resched();
 215                if (!pagevec_lookup(&pvec, mapping, next, PAGEVEC_SIZE)) {
 216                        if (next == start)
 217                                break;
 218                        next = start;
 219                        continue;
 220                }
 221                if (pvec.pages[0]->index > end) {
 222                        pagevec_release(&pvec);
 223                        break;
 224                }
 225                for (i = 0; i < pagevec_count(&pvec); i++) {
 226                        struct page *page = pvec.pages[i];
 227
 228                        if (page->index > end)
 229                                break;
 230                        lock_page(page);
 231                        wait_on_page_writeback(page);
 232                        if (page->index > next)
 233                                next = page->index;
 234                        next++;
 235                        truncate_complete_page(mapping, page);
 236                        unlock_page(page);
 237                }
 238                pagevec_release(&pvec);
 239        }
 240}
 241EXPORT_SYMBOL(truncate_inode_pages_range);
 242
 243/**
 244 * truncate_inode_pages - truncate *all* the pages from an offset
 245 * @mapping: mapping to truncate
 246 * @lstart: offset from which to truncate
 247 *
 248 * Called under (and serialised by) inode->i_mutex.
 249 */
 250void truncate_inode_pages(struct address_space *mapping, loff_t lstart)
 251{
 252        truncate_inode_pages_range(mapping, lstart, (loff_t)-1);
 253}
 254EXPORT_SYMBOL(truncate_inode_pages);
 255
 256/**
 257 * invalidate_mapping_pages - Invalidate all the unlocked pages of one inode
 258 * @mapping: the address_space which holds the pages to invalidate
 259 * @start: the offset 'from' which to invalidate
 260 * @end: the offset 'to' which to invalidate (inclusive)
 261 *
 262 * This function only removes the unlocked pages, if you want to
 263 * remove all the pages of one inode, you must call truncate_inode_pages.
 264 *
 265 * invalidate_mapping_pages() will not block on IO activity. It will not
 266 * invalidate pages which are dirty, locked, under writeback or mapped into
 267 * pagetables.
 268 */
 269unsigned long invalidate_mapping_pages(struct address_space *mapping,
 270                                pgoff_t start, pgoff_t end)
 271{
 272        struct pagevec pvec;
 273        pgoff_t next = start;
 274        unsigned long ret = 0;
 275        int i;
 276
 277        pagevec_init(&pvec, 0);
 278        while (next <= end &&
 279                        pagevec_lookup(&pvec, mapping, next, PAGEVEC_SIZE)) {
 280                for (i = 0; i < pagevec_count(&pvec); i++) {
 281                        struct page *page = pvec.pages[i];
 282                        pgoff_t index;
 283                        int lock_failed;
 284
 285                        lock_failed = TestSetPageLocked(page);
 286
 287                        /*
 288                         * We really shouldn't be looking at the ->index of an
 289                         * unlocked page.  But we're not allowed to lock these
 290                         * pages.  So we rely upon nobody altering the ->index
 291                         * of this (pinned-by-us) page.
 292                         */
 293                        index = page->index;
 294                        if (index > next)
 295                                next = index;
 296                        next++;
 297                        if (lock_failed)
 298                                continue;
 299
 300                        if (PageDirty(page) || PageWriteback(page))
 301                                goto unlock;
 302                        if (page_mapped(page))
 303                                goto unlock;
 304                        ret += invalidate_complete_page(mapping, page);
 305unlock:
 306                        unlock_page(page);
 307                        if (next > end)
 308                                break;
 309                }
 310                pagevec_release(&pvec);
 311        }
 312        return ret;
 313}
 314EXPORT_SYMBOL(invalidate_mapping_pages);
 315
 316/*
 317 * This is like invalidate_complete_page(), except it ignores the page's
 318 * refcount.  We do this because invalidate_inode_pages2() needs stronger
 319 * invalidation guarantees, and cannot afford to leave pages behind because
 320 * shrink_list() has a temp ref on them, or because they're transiently sitting
 321 * in the lru_cache_add() pagevecs.
 322 */
 323static int
 324invalidate_complete_page2(struct address_space *mapping, struct page *page)
 325{
 326        if (page->mapping != mapping)
 327                return 0;
 328
 329        if (PagePrivate(page) && !try_to_release_page(page, GFP_KERNEL))
 330                return 0;
 331
 332        write_lock_irq(&mapping->tree_lock);
 333        if (PageDirty(page))
 334                goto failed;
 335
 336        BUG_ON(PagePrivate(page));
 337        __remove_from_page_cache(page);
 338        write_unlock_irq(&mapping->tree_lock);
 339        ClearPageUptodate(page);
 340        page_cache_release(page);       /* pagecache ref */
 341        return 1;
 342failed:
 343        write_unlock_irq(&mapping->tree_lock);
 344        return 0;
 345}
 346
 347static int do_launder_page(struct address_space *mapping, struct page *page)
 348{
 349        if (!PageDirty(page))
 350                return 0;
 351        if (page->mapping != mapping || mapping->a_ops->launder_page == NULL)
 352                return 0;
 353        return mapping->a_ops->launder_page(page);
 354}
 355
 356/**
 357 * invalidate_inode_pages2_range - remove range of pages from an address_space
 358 * @mapping: the address_space
 359 * @start: the page offset 'from' which to invalidate
 360 * @end: the page offset 'to' which to invalidate (inclusive)
 361 *
 362 * Any pages which are found to be mapped into pagetables are unmapped prior to
 363 * invalidation.
 364 *
 365 * Returns -EIO if any pages could not be invalidated.
 366 */
 367int invalidate_inode_pages2_range(struct address_space *mapping,
 368                                  pgoff_t start, pgoff_t end)
 369{
 370        struct pagevec pvec;
 371        pgoff_t next;
 372        int i;
 373        int ret = 0;
 374        int did_range_unmap = 0;
 375        int wrapped = 0;
 376
 377        pagevec_init(&pvec, 0);
 378        next = start;
 379        while (next <= end && !wrapped &&
 380                pagevec_lookup(&pvec, mapping, next,
 381                        min(end - next, (pgoff_t)PAGEVEC_SIZE - 1) + 1)) {
 382                for (i = 0; i < pagevec_count(&pvec); i++) {
 383                        struct page *page = pvec.pages[i];
 384                        pgoff_t page_index;
 385
 386                        lock_page(page);
 387                        if (page->mapping != mapping) {
 388                                unlock_page(page);
 389                                continue;
 390                        }
 391                        page_index = page->index;
 392                        next = page_index + 1;
 393                        if (next == 0)
 394                                wrapped = 1;
 395                        if (page_index > end) {
 396                                unlock_page(page);
 397                                break;
 398                        }
 399                        wait_on_page_writeback(page);
 400                        while (page_mapped(page)) {
 401                                if (!did_range_unmap) {
 402                                        /*
 403                                         * Zap the rest of the file in one hit.
 404                                         */
 405                                        unmap_mapping_range(mapping,
 406                                           (loff_t)page_index<<PAGE_CACHE_SHIFT,
 407                                           (loff_t)(end - page_index + 1)
 408                                                        << PAGE_CACHE_SHIFT,
 409                                            0);
 410                                        did_range_unmap = 1;
 411                                } else {
 412                                        /*
 413                                         * Just zap this page
 414                                         */
 415                                        unmap_mapping_range(mapping,
 416                                          (loff_t)page_index<<PAGE_CACHE_SHIFT,
 417                                          PAGE_CACHE_SIZE, 0);
 418                                }
 419                        }
 420                        ret = do_launder_page(mapping, page);
 421                        if (ret == 0 && !invalidate_complete_page2(mapping, page))
 422                                ret = -EIO;
 423                        unlock_page(page);
 424                }
 425                pagevec_release(&pvec);
 426                cond_resched();
 427        }
 428        return ret;
 429}
 430EXPORT_SYMBOL_GPL(invalidate_inode_pages2_range);
 431
 432/**
 433 * invalidate_inode_pages2 - remove all pages from an address_space
 434 * @mapping: the address_space
 435 *
 436 * Any pages which are found to be mapped into pagetables are unmapped prior to
 437 * invalidation.
 438 *
 439 * Returns -EIO if any pages could not be invalidated.
 440 */
 441int invalidate_inode_pages2(struct address_space *mapping)
 442{
 443        return invalidate_inode_pages2_range(mapping, 0, -1);
 444}
 445EXPORT_SYMBOL_GPL(invalidate_inode_pages2);
 446
lxr.linux.no kindly hosted by Redpill Linpro AS, provider of Linux consulting and operations services since 1995.