linux/arch/mips/mm/page.c
<<
>>
Prefs
   1/*
   2 * This file is subject to the terms and conditions of the GNU General Public
   3 * License.  See the file "COPYING" in the main directory of this archive
   4 * for more details.
   5 *
   6 * Copyright (C) 2003, 04, 05 Ralf Baechle (ralf@linux-mips.org)
   7 * Copyright (C) 2007  Maciej W. Rozycki
   8 * Copyright (C) 2008  Thiemo Seufer
   9 * Copyright (C) 2012  MIPS Technologies, Inc.
  10 */
  11#include <linux/kernel.h>
  12#include <linux/sched.h>
  13#include <linux/smp.h>
  14#include <linux/mm.h>
  15#include <linux/proc_fs.h>
  16
  17#include <asm/bugs.h>
  18#include <asm/cacheops.h>
  19#include <asm/cpu-type.h>
  20#include <asm/inst.h>
  21#include <asm/io.h>
  22#include <asm/page.h>
  23#include <asm/prefetch.h>
  24#include <asm/bootinfo.h>
  25#include <asm/mipsregs.h>
  26#include <asm/mmu_context.h>
  27#include <asm/cpu.h>
  28#include <asm/war.h>
  29
  30#ifdef CONFIG_SIBYTE_DMA_PAGEOPS
  31#include <asm/sibyte/sb1250.h>
  32#include <asm/sibyte/sb1250_regs.h>
  33#include <asm/sibyte/sb1250_dma.h>
  34#endif
  35
  36#include <asm/uasm.h>
  37
  38/* Registers used in the assembled routines. */
  39#define ZERO 0
  40#define AT 2
  41#define A0 4
  42#define A1 5
  43#define A2 6
  44#define T0 8
  45#define T1 9
  46#define T2 10
  47#define T3 11
  48#define T9 25
  49#define RA 31
  50
  51/* Handle labels (which must be positive integers). */
  52enum label_id {
  53        label_clear_nopref = 1,
  54        label_clear_pref,
  55        label_copy_nopref,
  56        label_copy_pref_both,
  57        label_copy_pref_store,
  58};
  59
  60UASM_L_LA(_clear_nopref)
  61UASM_L_LA(_clear_pref)
  62UASM_L_LA(_copy_nopref)
  63UASM_L_LA(_copy_pref_both)
  64UASM_L_LA(_copy_pref_store)
  65
  66/* We need one branch and therefore one relocation per target label. */
  67static struct uasm_label labels[5];
  68static struct uasm_reloc relocs[5];
  69
  70#define cpu_is_r4600_v1_x()     ((read_c0_prid() & 0xfffffff0) == 0x00002010)
  71#define cpu_is_r4600_v2_x()     ((read_c0_prid() & 0xfffffff0) == 0x00002020)
  72
  73/*
  74 * R6 has a limited offset of the pref instruction.
  75 * Skip it if the offset is more than 9 bits.
  76 */
  77#define _uasm_i_pref(a, b, c, d)                \
  78do {                                            \
  79        if (cpu_has_mips_r6) {                  \
  80                if (c <= 0xff && c >= -0x100)   \
  81                        uasm_i_pref(a, b, c, d);\
  82        } else {                                \
  83                uasm_i_pref(a, b, c, d);        \
  84        }                                       \
  85} while(0)
  86
  87static int pref_bias_clear_store;
  88static int pref_bias_copy_load;
  89static int pref_bias_copy_store;
  90
  91static u32 pref_src_mode;
  92static u32 pref_dst_mode;
  93
  94static int clear_word_size;
  95static int copy_word_size;
  96
  97static int half_clear_loop_size;
  98static int half_copy_loop_size;
  99
 100static int cache_line_size;
 101#define cache_line_mask() (cache_line_size - 1)
 102
 103static inline void
 104pg_addiu(u32 **buf, unsigned int reg1, unsigned int reg2, unsigned int off)
 105{
 106        if (cpu_has_64bit_gp_regs && DADDI_WAR && r4k_daddiu_bug()) {
 107                if (off > 0x7fff) {
 108                        uasm_i_lui(buf, T9, uasm_rel_hi(off));
 109                        uasm_i_addiu(buf, T9, T9, uasm_rel_lo(off));
 110                } else
 111                        uasm_i_addiu(buf, T9, ZERO, off);
 112                uasm_i_daddu(buf, reg1, reg2, T9);
 113        } else {
 114                if (off > 0x7fff) {
 115                        uasm_i_lui(buf, T9, uasm_rel_hi(off));
 116                        uasm_i_addiu(buf, T9, T9, uasm_rel_lo(off));
 117                        UASM_i_ADDU(buf, reg1, reg2, T9);
 118                } else
 119                        UASM_i_ADDIU(buf, reg1, reg2, off);
 120        }
 121}
 122
 123static void set_prefetch_parameters(void)
 124{
 125        if (cpu_has_64bit_gp_regs || cpu_has_64bit_zero_reg)
 126                clear_word_size = 8;
 127        else
 128                clear_word_size = 4;
 129
 130        if (cpu_has_64bit_gp_regs)
 131                copy_word_size = 8;
 132        else
 133                copy_word_size = 4;
 134
 135        /*
 136         * The pref's used here are using "streaming" hints, which cause the
 137         * copied data to be kicked out of the cache sooner.  A page copy often
 138         * ends up copying a lot more data than is commonly used, so this seems
 139         * to make sense in terms of reducing cache pollution, but I've no real
 140         * performance data to back this up.
 141         */
 142        if (cpu_has_prefetch) {
 143                /*
 144                 * XXX: Most prefetch bias values in here are based on
 145                 * guesswork.
 146                 */
 147                cache_line_size = cpu_dcache_line_size();
 148                switch (current_cpu_type()) {
 149                case CPU_R5500:
 150                case CPU_TX49XX:
 151                        /* These processors only support the Pref_Load. */
 152                        pref_bias_copy_load = 256;
 153                        break;
 154
 155                case CPU_R10000:
 156                case CPU_R12000:
 157                case CPU_R14000:
 158                case CPU_R16000:
 159                        /*
 160                         * Those values have been experimentally tuned for an
 161                         * Origin 200.
 162                         */
 163                        pref_bias_clear_store = 512;
 164                        pref_bias_copy_load = 256;
 165                        pref_bias_copy_store = 256;
 166                        pref_src_mode = Pref_LoadStreamed;
 167                        pref_dst_mode = Pref_StoreStreamed;
 168                        break;
 169
 170                case CPU_SB1:
 171                case CPU_SB1A:
 172                        pref_bias_clear_store = 128;
 173                        pref_bias_copy_load = 128;
 174                        pref_bias_copy_store = 128;
 175                        /*
 176                         * SB1 pass1 Pref_LoadStreamed/Pref_StoreStreamed
 177                         * hints are broken.
 178                         */
 179                        if (current_cpu_type() == CPU_SB1 &&
 180                            (current_cpu_data.processor_id & 0xff) < 0x02) {
 181                                pref_src_mode = Pref_Load;
 182                                pref_dst_mode = Pref_Store;
 183                        } else {
 184                                pref_src_mode = Pref_LoadStreamed;
 185                                pref_dst_mode = Pref_StoreStreamed;
 186                        }
 187                        break;
 188
 189                case CPU_LOONGSON64:
 190                        /* Loongson-3 only support the Pref_Load/Pref_Store. */
 191                        pref_bias_clear_store = 128;
 192                        pref_bias_copy_load = 128;
 193                        pref_bias_copy_store = 128;
 194                        pref_src_mode = Pref_Load;
 195                        pref_dst_mode = Pref_Store;
 196                        break;
 197
 198                default:
 199                        pref_bias_clear_store = 128;
 200                        pref_bias_copy_load = 256;
 201                        pref_bias_copy_store = 128;
 202                        pref_src_mode = Pref_LoadStreamed;
 203                        if (cpu_has_mips_r6)
 204                                /*
 205                                 * Bit 30 (Pref_PrepareForStore) has been
 206                                 * removed from MIPS R6. Use bit 5
 207                                 * (Pref_StoreStreamed).
 208                                 */
 209                                pref_dst_mode = Pref_StoreStreamed;
 210                        else
 211                                pref_dst_mode = Pref_PrepareForStore;
 212                        break;
 213                }
 214        } else {
 215                if (cpu_has_cache_cdex_s)
 216                        cache_line_size = cpu_scache_line_size();
 217                else if (cpu_has_cache_cdex_p)
 218                        cache_line_size = cpu_dcache_line_size();
 219        }
 220        /*
 221         * Too much unrolling will overflow the available space in
 222         * clear_space_array / copy_page_array.
 223         */
 224        half_clear_loop_size = min(16 * clear_word_size,
 225                                   max(cache_line_size >> 1,
 226                                       4 * clear_word_size));
 227        half_copy_loop_size = min(16 * copy_word_size,
 228                                  max(cache_line_size >> 1,
 229                                      4 * copy_word_size));
 230}
 231
 232static void build_clear_store(u32 **buf, int off)
 233{
 234        if (cpu_has_64bit_gp_regs || cpu_has_64bit_zero_reg) {
 235                uasm_i_sd(buf, ZERO, off, A0);
 236        } else {
 237                uasm_i_sw(buf, ZERO, off, A0);
 238        }
 239}
 240
 241static inline void build_clear_pref(u32 **buf, int off)
 242{
 243        if (off & cache_line_mask())
 244                return;
 245
 246        if (pref_bias_clear_store) {
 247                _uasm_i_pref(buf, pref_dst_mode, pref_bias_clear_store + off,
 248                            A0);
 249        } else if (cache_line_size == (half_clear_loop_size << 1)) {
 250                if (cpu_has_cache_cdex_s) {
 251                        uasm_i_cache(buf, Create_Dirty_Excl_SD, off, A0);
 252                } else if (cpu_has_cache_cdex_p) {
 253                        if (IS_ENABLED(CONFIG_WAR_R4600_V1_HIT_CACHEOP) &&
 254                            cpu_is_r4600_v1_x()) {
 255                                uasm_i_nop(buf);
 256                                uasm_i_nop(buf);
 257                                uasm_i_nop(buf);
 258                                uasm_i_nop(buf);
 259                        }
 260
 261                        if (IS_ENABLED(CONFIG_WAR_R4600_V2_HIT_CACHEOP) &&
 262                            cpu_is_r4600_v2_x())
 263                                uasm_i_lw(buf, ZERO, ZERO, AT);
 264
 265                        uasm_i_cache(buf, Create_Dirty_Excl_D, off, A0);
 266                }
 267        }
 268}
 269
 270extern u32 __clear_page_start;
 271extern u32 __clear_page_end;
 272extern u32 __copy_page_start;
 273extern u32 __copy_page_end;
 274
 275void build_clear_page(void)
 276{
 277        int off;
 278        u32 *buf = &__clear_page_start;
 279        struct uasm_label *l = labels;
 280        struct uasm_reloc *r = relocs;
 281        int i;
 282        static atomic_t run_once = ATOMIC_INIT(0);
 283
 284        if (atomic_xchg(&run_once, 1)) {
 285                return;
 286        }
 287
 288        memset(labels, 0, sizeof(labels));
 289        memset(relocs, 0, sizeof(relocs));
 290
 291        set_prefetch_parameters();
 292
 293        /*
 294         * This algorithm makes the following assumptions:
 295         *   - The prefetch bias is a multiple of 2 words.
 296         *   - The prefetch bias is less than one page.
 297         */
 298        BUG_ON(pref_bias_clear_store % (2 * clear_word_size));
 299        BUG_ON(PAGE_SIZE < pref_bias_clear_store);
 300
 301        off = PAGE_SIZE - pref_bias_clear_store;
 302        if (off > 0xffff || !pref_bias_clear_store)
 303                pg_addiu(&buf, A2, A0, off);
 304        else
 305                uasm_i_ori(&buf, A2, A0, off);
 306
 307        if (IS_ENABLED(CONFIG_WAR_R4600_V2_HIT_CACHEOP) && cpu_is_r4600_v2_x())
 308                uasm_i_lui(&buf, AT, uasm_rel_hi(0xa0000000));
 309
 310        off = cache_line_size ? min(8, pref_bias_clear_store / cache_line_size)
 311                                * cache_line_size : 0;
 312        while (off) {
 313                build_clear_pref(&buf, -off);
 314                off -= cache_line_size;
 315        }
 316        uasm_l_clear_pref(&l, buf);
 317        do {
 318                build_clear_pref(&buf, off);
 319                build_clear_store(&buf, off);
 320                off += clear_word_size;
 321        } while (off < half_clear_loop_size);
 322        pg_addiu(&buf, A0, A0, 2 * off);
 323        off = -off;
 324        do {
 325                build_clear_pref(&buf, off);
 326                if (off == -clear_word_size)
 327                        uasm_il_bne(&buf, &r, A0, A2, label_clear_pref);
 328                build_clear_store(&buf, off);
 329                off += clear_word_size;
 330        } while (off < 0);
 331
 332        if (pref_bias_clear_store) {
 333                pg_addiu(&buf, A2, A0, pref_bias_clear_store);
 334                uasm_l_clear_nopref(&l, buf);
 335                off = 0;
 336                do {
 337                        build_clear_store(&buf, off);
 338                        off += clear_word_size;
 339                } while (off < half_clear_loop_size);
 340                pg_addiu(&buf, A0, A0, 2 * off);
 341                off = -off;
 342                do {
 343                        if (off == -clear_word_size)
 344                                uasm_il_bne(&buf, &r, A0, A2,
 345                                            label_clear_nopref);
 346                        build_clear_store(&buf, off);
 347                        off += clear_word_size;
 348                } while (off < 0);
 349        }
 350
 351        uasm_i_jr(&buf, RA);
 352        uasm_i_nop(&buf);
 353
 354        BUG_ON(buf > &__clear_page_end);
 355
 356        uasm_resolve_relocs(relocs, labels);
 357
 358        pr_debug("Synthesized clear page handler (%u instructions).\n",
 359                 (u32)(buf - &__clear_page_start));
 360
 361        pr_debug("\t.set push\n");
 362        pr_debug("\t.set noreorder\n");
 363        for (i = 0; i < (buf - &__clear_page_start); i++)
 364                pr_debug("\t.word 0x%08x\n", (&__clear_page_start)[i]);
 365        pr_debug("\t.set pop\n");
 366}
 367
 368static void build_copy_load(u32 **buf, int reg, int off)
 369{
 370        if (cpu_has_64bit_gp_regs) {
 371                uasm_i_ld(buf, reg, off, A1);
 372        } else {
 373                uasm_i_lw(buf, reg, off, A1);
 374        }
 375}
 376
 377static void build_copy_store(u32 **buf, int reg, int off)
 378{
 379        if (cpu_has_64bit_gp_regs) {
 380                uasm_i_sd(buf, reg, off, A0);
 381        } else {
 382                uasm_i_sw(buf, reg, off, A0);
 383        }
 384}
 385
 386static inline void build_copy_load_pref(u32 **buf, int off)
 387{
 388        if (off & cache_line_mask())
 389                return;
 390
 391        if (pref_bias_copy_load)
 392                _uasm_i_pref(buf, pref_src_mode, pref_bias_copy_load + off, A1);
 393}
 394
 395static inline void build_copy_store_pref(u32 **buf, int off)
 396{
 397        if (off & cache_line_mask())
 398                return;
 399
 400        if (pref_bias_copy_store) {
 401                _uasm_i_pref(buf, pref_dst_mode, pref_bias_copy_store + off,
 402                            A0);
 403        } else if (cache_line_size == (half_copy_loop_size << 1)) {
 404                if (cpu_has_cache_cdex_s) {
 405                        uasm_i_cache(buf, Create_Dirty_Excl_SD, off, A0);
 406                } else if (cpu_has_cache_cdex_p) {
 407                        if (IS_ENABLED(CONFIG_WAR_R4600_V1_HIT_CACHEOP) &&
 408                            cpu_is_r4600_v1_x()) {
 409                                uasm_i_nop(buf);
 410                                uasm_i_nop(buf);
 411                                uasm_i_nop(buf);
 412                                uasm_i_nop(buf);
 413                        }
 414
 415                        if (IS_ENABLED(CONFIG_WAR_R4600_V2_HIT_CACHEOP) &&
 416                            cpu_is_r4600_v2_x())
 417                                uasm_i_lw(buf, ZERO, ZERO, AT);
 418
 419                        uasm_i_cache(buf, Create_Dirty_Excl_D, off, A0);
 420                }
 421        }
 422}
 423
 424void build_copy_page(void)
 425{
 426        int off;
 427        u32 *buf = &__copy_page_start;
 428        struct uasm_label *l = labels;
 429        struct uasm_reloc *r = relocs;
 430        int i;
 431        static atomic_t run_once = ATOMIC_INIT(0);
 432
 433        if (atomic_xchg(&run_once, 1)) {
 434                return;
 435        }
 436
 437        memset(labels, 0, sizeof(labels));
 438        memset(relocs, 0, sizeof(relocs));
 439
 440        set_prefetch_parameters();
 441
 442        /*
 443         * This algorithm makes the following assumptions:
 444         *   - All prefetch biases are multiples of 8 words.
 445         *   - The prefetch biases are less than one page.
 446         *   - The store prefetch bias isn't greater than the load
 447         *     prefetch bias.
 448         */
 449        BUG_ON(pref_bias_copy_load % (8 * copy_word_size));
 450        BUG_ON(pref_bias_copy_store % (8 * copy_word_size));
 451        BUG_ON(PAGE_SIZE < pref_bias_copy_load);
 452        BUG_ON(pref_bias_copy_store > pref_bias_copy_load);
 453
 454        off = PAGE_SIZE - pref_bias_copy_load;
 455        if (off > 0xffff || !pref_bias_copy_load)
 456                pg_addiu(&buf, A2, A0, off);
 457        else
 458                uasm_i_ori(&buf, A2, A0, off);
 459
 460        if (IS_ENABLED(CONFIG_WAR_R4600_V2_HIT_CACHEOP) && cpu_is_r4600_v2_x())
 461                uasm_i_lui(&buf, AT, uasm_rel_hi(0xa0000000));
 462
 463        off = cache_line_size ? min(8, pref_bias_copy_load / cache_line_size) *
 464                                cache_line_size : 0;
 465        while (off) {
 466                build_copy_load_pref(&buf, -off);
 467                off -= cache_line_size;
 468        }
 469        off = cache_line_size ? min(8, pref_bias_copy_store / cache_line_size) *
 470                                cache_line_size : 0;
 471        while (off) {
 472                build_copy_store_pref(&buf, -off);
 473                off -= cache_line_size;
 474        }
 475        uasm_l_copy_pref_both(&l, buf);
 476        do {
 477                build_copy_load_pref(&buf, off);
 478                build_copy_load(&buf, T0, off);
 479                build_copy_load_pref(&buf, off + copy_word_size);
 480                build_copy_load(&buf, T1, off + copy_word_size);
 481                build_copy_load_pref(&buf, off + 2 * copy_word_size);
 482                build_copy_load(&buf, T2, off + 2 * copy_word_size);
 483                build_copy_load_pref(&buf, off + 3 * copy_word_size);
 484                build_copy_load(&buf, T3, off + 3 * copy_word_size);
 485                build_copy_store_pref(&buf, off);
 486                build_copy_store(&buf, T0, off);
 487                build_copy_store_pref(&buf, off + copy_word_size);
 488                build_copy_store(&buf, T1, off + copy_word_size);
 489                build_copy_store_pref(&buf, off + 2 * copy_word_size);
 490                build_copy_store(&buf, T2, off + 2 * copy_word_size);
 491                build_copy_store_pref(&buf, off + 3 * copy_word_size);
 492                build_copy_store(&buf, T3, off + 3 * copy_word_size);
 493                off += 4 * copy_word_size;
 494        } while (off < half_copy_loop_size);
 495        pg_addiu(&buf, A1, A1, 2 * off);
 496        pg_addiu(&buf, A0, A0, 2 * off);
 497        off = -off;
 498        do {
 499                build_copy_load_pref(&buf, off);
 500                build_copy_load(&buf, T0, off);
 501                build_copy_load_pref(&buf, off + copy_word_size);
 502                build_copy_load(&buf, T1, off + copy_word_size);
 503                build_copy_load_pref(&buf, off + 2 * copy_word_size);
 504                build_copy_load(&buf, T2, off + 2 * copy_word_size);
 505                build_copy_load_pref(&buf, off + 3 * copy_word_size);
 506                build_copy_load(&buf, T3, off + 3 * copy_word_size);
 507                build_copy_store_pref(&buf, off);
 508                build_copy_store(&buf, T0, off);
 509                build_copy_store_pref(&buf, off + copy_word_size);
 510                build_copy_store(&buf, T1, off + copy_word_size);
 511                build_copy_store_pref(&buf, off + 2 * copy_word_size);
 512                build_copy_store(&buf, T2, off + 2 * copy_word_size);
 513                build_copy_store_pref(&buf, off + 3 * copy_word_size);
 514                if (off == -(4 * copy_word_size))
 515                        uasm_il_bne(&buf, &r, A2, A0, label_copy_pref_both);
 516                build_copy_store(&buf, T3, off + 3 * copy_word_size);
 517                off += 4 * copy_word_size;
 518        } while (off < 0);
 519
 520        if (pref_bias_copy_load - pref_bias_copy_store) {
 521                pg_addiu(&buf, A2, A0,
 522                         pref_bias_copy_load - pref_bias_copy_store);
 523                uasm_l_copy_pref_store(&l, buf);
 524                off = 0;
 525                do {
 526                        build_copy_load(&buf, T0, off);
 527                        build_copy_load(&buf, T1, off + copy_word_size);
 528                        build_copy_load(&buf, T2, off + 2 * copy_word_size);
 529                        build_copy_load(&buf, T3, off + 3 * copy_word_size);
 530                        build_copy_store_pref(&buf, off);
 531                        build_copy_store(&buf, T0, off);
 532                        build_copy_store_pref(&buf, off + copy_word_size);
 533                        build_copy_store(&buf, T1, off + copy_word_size);
 534                        build_copy_store_pref(&buf, off + 2 * copy_word_size);
 535                        build_copy_store(&buf, T2, off + 2 * copy_word_size);
 536                        build_copy_store_pref(&buf, off + 3 * copy_word_size);
 537                        build_copy_store(&buf, T3, off + 3 * copy_word_size);
 538                        off += 4 * copy_word_size;
 539                } while (off < half_copy_loop_size);
 540                pg_addiu(&buf, A1, A1, 2 * off);
 541                pg_addiu(&buf, A0, A0, 2 * off);
 542                off = -off;
 543                do {
 544                        build_copy_load(&buf, T0, off);
 545                        build_copy_load(&buf, T1, off + copy_word_size);
 546                        build_copy_load(&buf, T2, off + 2 * copy_word_size);
 547                        build_copy_load(&buf, T3, off + 3 * copy_word_size);
 548                        build_copy_store_pref(&buf, off);
 549                        build_copy_store(&buf, T0, off);
 550                        build_copy_store_pref(&buf, off + copy_word_size);
 551                        build_copy_store(&buf, T1, off + copy_word_size);
 552                        build_copy_store_pref(&buf, off + 2 * copy_word_size);
 553                        build_copy_store(&buf, T2, off + 2 * copy_word_size);
 554                        build_copy_store_pref(&buf, off + 3 * copy_word_size);
 555                        if (off == -(4 * copy_word_size))
 556                                uasm_il_bne(&buf, &r, A2, A0,
 557                                            label_copy_pref_store);
 558                        build_copy_store(&buf, T3, off + 3 * copy_word_size);
 559                        off += 4 * copy_word_size;
 560                } while (off < 0);
 561        }
 562
 563        if (pref_bias_copy_store) {
 564                pg_addiu(&buf, A2, A0, pref_bias_copy_store);
 565                uasm_l_copy_nopref(&l, buf);
 566                off = 0;
 567                do {
 568                        build_copy_load(&buf, T0, off);
 569                        build_copy_load(&buf, T1, off + copy_word_size);
 570                        build_copy_load(&buf, T2, off + 2 * copy_word_size);
 571                        build_copy_load(&buf, T3, off + 3 * copy_word_size);
 572                        build_copy_store(&buf, T0, off);
 573                        build_copy_store(&buf, T1, off + copy_word_size);
 574                        build_copy_store(&buf, T2, off + 2 * copy_word_size);
 575                        build_copy_store(&buf, T3, off + 3 * copy_word_size);
 576                        off += 4 * copy_word_size;
 577                } while (off < half_copy_loop_size);
 578                pg_addiu(&buf, A1, A1, 2 * off);
 579                pg_addiu(&buf, A0, A0, 2 * off);
 580                off = -off;
 581                do {
 582                        build_copy_load(&buf, T0, off);
 583                        build_copy_load(&buf, T1, off + copy_word_size);
 584                        build_copy_load(&buf, T2, off + 2 * copy_word_size);
 585                        build_copy_load(&buf, T3, off + 3 * copy_word_size);
 586                        build_copy_store(&buf, T0, off);
 587                        build_copy_store(&buf, T1, off + copy_word_size);
 588                        build_copy_store(&buf, T2, off + 2 * copy_word_size);
 589                        if (off == -(4 * copy_word_size))
 590                                uasm_il_bne(&buf, &r, A2, A0,
 591                                            label_copy_nopref);
 592                        build_copy_store(&buf, T3, off + 3 * copy_word_size);
 593                        off += 4 * copy_word_size;
 594                } while (off < 0);
 595        }
 596
 597        uasm_i_jr(&buf, RA);
 598        uasm_i_nop(&buf);
 599
 600        BUG_ON(buf > &__copy_page_end);
 601
 602        uasm_resolve_relocs(relocs, labels);
 603
 604        pr_debug("Synthesized copy page handler (%u instructions).\n",
 605                 (u32)(buf - &__copy_page_start));
 606
 607        pr_debug("\t.set push\n");
 608        pr_debug("\t.set noreorder\n");
 609        for (i = 0; i < (buf - &__copy_page_start); i++)
 610                pr_debug("\t.word 0x%08x\n", (&__copy_page_start)[i]);
 611        pr_debug("\t.set pop\n");
 612}
 613
 614#ifdef CONFIG_SIBYTE_DMA_PAGEOPS
 615extern void clear_page_cpu(void *page);
 616extern void copy_page_cpu(void *to, void *from);
 617
 618/*
 619 * Pad descriptors to cacheline, since each is exclusively owned by a
 620 * particular CPU.
 621 */
 622struct dmadscr {
 623        u64 dscr_a;
 624        u64 dscr_b;
 625        u64 pad_a;
 626        u64 pad_b;
 627} ____cacheline_aligned_in_smp page_descr[DM_NUM_CHANNELS];
 628
 629void clear_page(void *page)
 630{
 631        u64 to_phys = CPHYSADDR((unsigned long)page);
 632        unsigned int cpu = smp_processor_id();
 633
 634        /* if the page is not in KSEG0, use old way */
 635        if ((long)KSEGX((unsigned long)page) != (long)CKSEG0)
 636                return clear_page_cpu(page);
 637
 638        page_descr[cpu].dscr_a = to_phys | M_DM_DSCRA_ZERO_MEM |
 639                                 M_DM_DSCRA_L2C_DEST | M_DM_DSCRA_INTERRUPT;
 640        page_descr[cpu].dscr_b = V_DM_DSCRB_SRC_LENGTH(PAGE_SIZE);
 641        __raw_writeq(1, IOADDR(A_DM_REGISTER(cpu, R_DM_DSCR_COUNT)));
 642
 643        /*
 644         * Don't really want to do it this way, but there's no
 645         * reliable way to delay completion detection.
 646         */
 647        while (!(__raw_readq(IOADDR(A_DM_REGISTER(cpu, R_DM_DSCR_BASE_DEBUG)))
 648                 & M_DM_DSCR_BASE_INTERRUPT))
 649                ;
 650        __raw_readq(IOADDR(A_DM_REGISTER(cpu, R_DM_DSCR_BASE)));
 651}
 652EXPORT_SYMBOL(clear_page);
 653
 654void copy_page(void *to, void *from)
 655{
 656        u64 from_phys = CPHYSADDR((unsigned long)from);
 657        u64 to_phys = CPHYSADDR((unsigned long)to);
 658        unsigned int cpu = smp_processor_id();
 659
 660        /* if any page is not in KSEG0, use old way */
 661        if ((long)KSEGX((unsigned long)to) != (long)CKSEG0
 662            || (long)KSEGX((unsigned long)from) != (long)CKSEG0)
 663                return copy_page_cpu(to, from);
 664
 665        page_descr[cpu].dscr_a = to_phys | M_DM_DSCRA_L2C_DEST |
 666                                 M_DM_DSCRA_INTERRUPT;
 667        page_descr[cpu].dscr_b = from_phys | V_DM_DSCRB_SRC_LENGTH(PAGE_SIZE);
 668        __raw_writeq(1, IOADDR(A_DM_REGISTER(cpu, R_DM_DSCR_COUNT)));
 669
 670        /*
 671         * Don't really want to do it this way, but there's no
 672         * reliable way to delay completion detection.
 673         */
 674        while (!(__raw_readq(IOADDR(A_DM_REGISTER(cpu, R_DM_DSCR_BASE_DEBUG)))
 675                 & M_DM_DSCR_BASE_INTERRUPT))
 676                ;
 677        __raw_readq(IOADDR(A_DM_REGISTER(cpu, R_DM_DSCR_BASE)));
 678}
 679EXPORT_SYMBOL(copy_page);
 680
 681#endif /* CONFIG_SIBYTE_DMA_PAGEOPS */
 682