linux/arch/x86/kernel/ds.c
<<
>>
Prefs
   1/*
   2 * Debug Store support
   3 *
   4 * This provides a low-level interface to the hardware's Debug Store
   5 * feature that is used for branch trace store (BTS) and
   6 * precise-event based sampling (PEBS).
   7 *
   8 * It manages:
   9 * - DS and BTS hardware configuration
  10 * - buffer overflow handling (to be done)
  11 * - buffer access
  12 *
  13 * It does not do:
  14 * - security checking (is the caller allowed to trace the task)
  15 * - buffer allocation (memory accounting)
  16 *
  17 *
  18 * Copyright (C) 2007-2009 Intel Corporation.
  19 * Markus Metzger <markus.t.metzger@intel.com>, 2007-2009
  20 */
  21
  22
  23#include <asm/ds.h>
  24
  25#include <linux/errno.h>
  26#include <linux/string.h>
  27#include <linux/slab.h>
  28#include <linux/sched.h>
  29#include <linux/mm.h>
  30#include <linux/kernel.h>
  31
  32
  33/*
  34 * The configuration for a particular DS hardware implementation.
  35 */
  36struct ds_configuration {
  37        /* the name of the configuration */
  38        const char *name;
  39        /* the size of one pointer-typed field in the DS structure and
  40           in the BTS and PEBS buffers in bytes;
  41           this covers the first 8 DS fields related to buffer management. */
  42        unsigned char  sizeof_field;
  43        /* the size of a BTS/PEBS record in bytes */
  44        unsigned char  sizeof_rec[2];
  45        /* a series of bit-masks to control various features indexed
  46         * by enum ds_feature */
  47        unsigned long ctl[dsf_ctl_max];
  48};
  49static DEFINE_PER_CPU(struct ds_configuration, ds_cfg_array);
  50
  51#define ds_cfg per_cpu(ds_cfg_array, smp_processor_id())
  52
  53#define MAX_SIZEOF_DS (12 * 8)  /* maximal size of a DS configuration */
  54#define MAX_SIZEOF_BTS (3 * 8)  /* maximal size of a BTS record */
  55#define DS_ALIGNMENT (1 << 3)   /* BTS and PEBS buffer alignment */
  56
  57#define BTS_CONTROL \
  58 (ds_cfg.ctl[dsf_bts] | ds_cfg.ctl[dsf_bts_kernel] | ds_cfg.ctl[dsf_bts_user] |\
  59  ds_cfg.ctl[dsf_bts_overflow])
  60
  61
  62/*
  63 * A BTS or PEBS tracer.
  64 *
  65 * This holds the configuration of the tracer and serves as a handle
  66 * to identify tracers.
  67 */
  68struct ds_tracer {
  69        /* the DS context (partially) owned by this tracer */
  70        struct ds_context *context;
  71        /* the buffer provided on ds_request() and its size in bytes */
  72        void *buffer;
  73        size_t size;
  74};
  75
  76struct bts_tracer {
  77        /* the common DS part */
  78        struct ds_tracer ds;
  79        /* the trace including the DS configuration */
  80        struct bts_trace trace;
  81        /* buffer overflow notification function */
  82        bts_ovfl_callback_t ovfl;
  83};
  84
  85struct pebs_tracer {
  86        /* the common DS part */
  87        struct ds_tracer ds;
  88        /* the trace including the DS configuration */
  89        struct pebs_trace trace;
  90        /* buffer overflow notification function */
  91        pebs_ovfl_callback_t ovfl;
  92};
  93
  94/*
  95 * Debug Store (DS) save area configuration (see Intel64 and IA32
  96 * Architectures Software Developer's Manual, section 18.5)
  97 *
  98 * The DS configuration consists of the following fields; different
  99 * architetures vary in the size of those fields.
 100 * - double-word aligned base linear address of the BTS buffer
 101 * - write pointer into the BTS buffer
 102 * - end linear address of the BTS buffer (one byte beyond the end of
 103 *   the buffer)
 104 * - interrupt pointer into BTS buffer
 105 *   (interrupt occurs when write pointer passes interrupt pointer)
 106 * - double-word aligned base linear address of the PEBS buffer
 107 * - write pointer into the PEBS buffer
 108 * - end linear address of the PEBS buffer (one byte beyond the end of
 109 *   the buffer)
 110 * - interrupt pointer into PEBS buffer
 111 *   (interrupt occurs when write pointer passes interrupt pointer)
 112 * - value to which counter is reset following counter overflow
 113 *
 114 * Later architectures use 64bit pointers throughout, whereas earlier
 115 * architectures use 32bit pointers in 32bit mode.
 116 *
 117 *
 118 * We compute the base address for the first 8 fields based on:
 119 * - the field size stored in the DS configuration
 120 * - the relative field position
 121 * - an offset giving the start of the respective region
 122 *
 123 * This offset is further used to index various arrays holding
 124 * information for BTS and PEBS at the respective index.
 125 *
 126 * On later 32bit processors, we only access the lower 32bit of the
 127 * 64bit pointer fields. The upper halves will be zeroed out.
 128 */
 129
 130enum ds_field {
 131        ds_buffer_base = 0,
 132        ds_index,
 133        ds_absolute_maximum,
 134        ds_interrupt_threshold,
 135};
 136
 137enum ds_qualifier {
 138        ds_bts  = 0,
 139        ds_pebs
 140};
 141
 142static inline unsigned long ds_get(const unsigned char *base,
 143                                   enum ds_qualifier qual, enum ds_field field)
 144{
 145        base += (ds_cfg.sizeof_field * (field + (4 * qual)));
 146        return *(unsigned long *)base;
 147}
 148
 149static inline void ds_set(unsigned char *base, enum ds_qualifier qual,
 150                          enum ds_field field, unsigned long value)
 151{
 152        base += (ds_cfg.sizeof_field * (field + (4 * qual)));
 153        (*(unsigned long *)base) = value;
 154}
 155
 156
 157/*
 158 * Locking is done only for allocating BTS or PEBS resources.
 159 */
 160static DEFINE_SPINLOCK(ds_lock);
 161
 162
 163/*
 164 * We either support (system-wide) per-cpu or per-thread allocation.
 165 * We distinguish the two based on the task_struct pointer, where a
 166 * NULL pointer indicates per-cpu allocation for the current cpu.
 167 *
 168 * Allocations are use-counted. As soon as resources are allocated,
 169 * further allocations must be of the same type (per-cpu or
 170 * per-thread). We model this by counting allocations (i.e. the number
 171 * of tracers of a certain type) for one type negatively:
 172 *   =0  no tracers
 173 *   >0  number of per-thread tracers
 174 *   <0  number of per-cpu tracers
 175 *
 176 * Tracers essentially gives the number of ds contexts for a certain
 177 * type of allocation.
 178 */
 179static atomic_t tracers = ATOMIC_INIT(0);
 180
 181static inline void get_tracer(struct task_struct *task)
 182{
 183        if (task)
 184                atomic_inc(&tracers);
 185        else
 186                atomic_dec(&tracers);
 187}
 188
 189static inline void put_tracer(struct task_struct *task)
 190{
 191        if (task)
 192                atomic_dec(&tracers);
 193        else
 194                atomic_inc(&tracers);
 195}
 196
 197static inline int check_tracer(struct task_struct *task)
 198{
 199        return task ?
 200                (atomic_read(&tracers) >= 0) :
 201                (atomic_read(&tracers) <= 0);
 202}
 203
 204
 205/*
 206 * The DS context is either attached to a thread or to a cpu:
 207 * - in the former case, the thread_struct contains a pointer to the
 208 *   attached context.
 209 * - in the latter case, we use a static array of per-cpu context
 210 *   pointers.
 211 *
 212 * Contexts are use-counted. They are allocated on first access and
 213 * deallocated when the last user puts the context.
 214 */
 215struct ds_context {
 216        /* pointer to the DS configuration; goes into MSR_IA32_DS_AREA */
 217        unsigned char ds[MAX_SIZEOF_DS];
 218        /* the owner of the BTS and PEBS configuration, respectively */
 219        struct bts_tracer *bts_master;
 220        struct pebs_tracer *pebs_master;
 221        /* use count */
 222        unsigned long count;
 223        /* a pointer to the context location inside the thread_struct
 224         * or the per_cpu context array */
 225        struct ds_context **this;
 226        /* a pointer to the task owning this context, or NULL, if the
 227         * context is owned by a cpu */
 228        struct task_struct *task;
 229};
 230
 231static DEFINE_PER_CPU(struct ds_context *, system_context_array);
 232
 233#define system_context per_cpu(system_context_array, smp_processor_id())
 234
 235
 236static inline struct ds_context *ds_get_context(struct task_struct *task)
 237{
 238        struct ds_context **p_context =
 239                (task ? &task->thread.ds_ctx : &system_context);
 240        struct ds_context *context = NULL;
 241        struct ds_context *new_context = NULL;
 242        unsigned long irq;
 243
 244        /* Chances are small that we already have a context. */
 245        new_context = kzalloc(sizeof(*new_context), GFP_KERNEL);
 246        if (!new_context)
 247                return NULL;
 248
 249        spin_lock_irqsave(&ds_lock, irq);
 250
 251        context = *p_context;
 252        if (!context) {
 253                context = new_context;
 254
 255                context->this = p_context;
 256                context->task = task;
 257                context->count = 0;
 258
 259                if (task)
 260                        set_tsk_thread_flag(task, TIF_DS_AREA_MSR);
 261
 262                if (!task || (task == current))
 263                        wrmsrl(MSR_IA32_DS_AREA, (unsigned long)context->ds);
 264
 265                *p_context = context;
 266        }
 267
 268        context->count++;
 269
 270        spin_unlock_irqrestore(&ds_lock, irq);
 271
 272        if (context != new_context)
 273                kfree(new_context);
 274
 275        return context;
 276}
 277
 278static inline void ds_put_context(struct ds_context *context)
 279{
 280        unsigned long irq;
 281
 282        if (!context)
 283                return;
 284
 285        spin_lock_irqsave(&ds_lock, irq);
 286
 287        if (--context->count) {
 288                spin_unlock_irqrestore(&ds_lock, irq);
 289                return;
 290        }
 291
 292        *(context->this) = NULL;
 293
 294        if (context->task)
 295                clear_tsk_thread_flag(context->task, TIF_DS_AREA_MSR);
 296
 297        if (!context->task || (context->task == current))
 298                wrmsrl(MSR_IA32_DS_AREA, 0);
 299
 300        spin_unlock_irqrestore(&ds_lock, irq);
 301
 302        kfree(context);
 303}
 304
 305
 306/*
 307 * Call the tracer's callback on a buffer overflow.
 308 *
 309 * context: the ds context
 310 * qual: the buffer type
 311 */
 312static void ds_overflow(struct ds_context *context, enum ds_qualifier qual)
 313{
 314        switch (qual) {
 315        case ds_bts:
 316                if (context->bts_master &&
 317                    context->bts_master->ovfl)
 318                        context->bts_master->ovfl(context->bts_master);
 319                break;
 320        case ds_pebs:
 321                if (context->pebs_master &&
 322                    context->pebs_master->ovfl)
 323                        context->pebs_master->ovfl(context->pebs_master);
 324                break;
 325        }
 326}
 327
 328
 329/*
 330 * Write raw data into the BTS or PEBS buffer.
 331 *
 332 * The remainder of any partially written record is zeroed out.
 333 *
 334 * context: the DS context
 335 * qual: the buffer type
 336 * record: the data to write
 337 * size: the size of the data
 338 */
 339static int ds_write(struct ds_context *context, enum ds_qualifier qual,
 340                    const void *record, size_t size)
 341{
 342        int bytes_written = 0;
 343
 344        if (!record)
 345                return -EINVAL;
 346
 347        while (size) {
 348                unsigned long base, index, end, write_end, int_th;
 349                unsigned long write_size, adj_write_size;
 350
 351                /*
 352                 * write as much as possible without producing an
 353                 * overflow interrupt.
 354                 *
 355                 * interrupt_threshold must either be
 356                 * - bigger than absolute_maximum or
 357                 * - point to a record between buffer_base and absolute_maximum
 358                 *
 359                 * index points to a valid record.
 360                 */
 361                base   = ds_get(context->ds, qual, ds_buffer_base);
 362                index  = ds_get(context->ds, qual, ds_index);
 363                end    = ds_get(context->ds, qual, ds_absolute_maximum);
 364                int_th = ds_get(context->ds, qual, ds_interrupt_threshold);
 365
 366                write_end = min(end, int_th);
 367
 368                /* if we are already beyond the interrupt threshold,
 369                 * we fill the entire buffer */
 370                if (write_end <= index)
 371                        write_end = end;
 372
 373                if (write_end <= index)
 374                        break;
 375
 376                write_size = min((unsigned long) size, write_end - index);
 377                memcpy((void *)index, record, write_size);
 378
 379                record = (const char *)record + write_size;
 380                size -= write_size;
 381                bytes_written += write_size;
 382
 383                adj_write_size = write_size / ds_cfg.sizeof_rec[qual];
 384                adj_write_size *= ds_cfg.sizeof_rec[qual];
 385
 386                /* zero out trailing bytes */
 387                memset((char *)index + write_size, 0,
 388                       adj_write_size - write_size);
 389                index += adj_write_size;
 390
 391                if (index >= end)
 392                        index = base;
 393                ds_set(context->ds, qual, ds_index, index);
 394
 395                if (index >= int_th)
 396                        ds_overflow(context, qual);
 397        }
 398
 399        return bytes_written;
 400}
 401
 402
 403/*
 404 * Branch Trace Store (BTS) uses the following format. Different
 405 * architectures vary in the size of those fields.
 406 * - source linear address
 407 * - destination linear address
 408 * - flags
 409 *
 410 * Later architectures use 64bit pointers throughout, whereas earlier
 411 * architectures use 32bit pointers in 32bit mode.
 412 *
 413 * We compute the base address for the first 8 fields based on:
 414 * - the field size stored in the DS configuration
 415 * - the relative field position
 416 *
 417 * In order to store additional information in the BTS buffer, we use
 418 * a special source address to indicate that the record requires
 419 * special interpretation.
 420 *
 421 * Netburst indicated via a bit in the flags field whether the branch
 422 * was predicted; this is ignored.
 423 *
 424 * We use two levels of abstraction:
 425 * - the raw data level defined here
 426 * - an arch-independent level defined in ds.h
 427 */
 428
 429enum bts_field {
 430        bts_from,
 431        bts_to,
 432        bts_flags,
 433
 434        bts_qual = bts_from,
 435        bts_jiffies = bts_to,
 436        bts_pid = bts_flags,
 437
 438        bts_qual_mask = (bts_qual_max - 1),
 439        bts_escape = ((unsigned long)-1 & ~bts_qual_mask)
 440};
 441
 442static inline unsigned long bts_get(const char *base, enum bts_field field)
 443{
 444        base += (ds_cfg.sizeof_field * field);
 445        return *(unsigned long *)base;
 446}
 447
 448static inline void bts_set(char *base, enum bts_field field, unsigned long val)
 449{
 450        base += (ds_cfg.sizeof_field * field);;
 451        (*(unsigned long *)base) = val;
 452}
 453
 454
 455/*
 456 * The raw BTS data is architecture dependent.
 457 *
 458 * For higher-level users, we give an arch-independent view.
 459 * - ds.h defines struct bts_struct
 460 * - bts_read translates one raw bts record into a bts_struct
 461 * - bts_write translates one bts_struct into the raw format and
 462 *   writes it into the top of the parameter tracer's buffer.
 463 *
 464 * return: bytes read/written on success; -Eerrno, otherwise
 465 */
 466static int bts_read(struct bts_tracer *tracer, const void *at,
 467                    struct bts_struct *out)
 468{
 469        if (!tracer)
 470                return -EINVAL;
 471
 472        if (at < tracer->trace.ds.begin)
 473                return -EINVAL;
 474
 475        if (tracer->trace.ds.end < (at + tracer->trace.ds.size))
 476                return -EINVAL;
 477
 478        memset(out, 0, sizeof(*out));
 479        if ((bts_get(at, bts_qual) & ~bts_qual_mask) == bts_escape) {
 480                out->qualifier = (bts_get(at, bts_qual) & bts_qual_mask);
 481                out->variant.timestamp.jiffies = bts_get(at, bts_jiffies);
 482                out->variant.timestamp.pid = bts_get(at, bts_pid);
 483        } else {
 484                out->qualifier = bts_branch;
 485                out->variant.lbr.from = bts_get(at, bts_from);
 486                out->variant.lbr.to   = bts_get(at, bts_to);
 487
 488                if (!out->variant.lbr.from && !out->variant.lbr.to)
 489                        out->qualifier = bts_invalid;
 490        }
 491
 492        return ds_cfg.sizeof_rec[ds_bts];
 493}
 494
 495static int bts_write(struct bts_tracer *tracer, const struct bts_struct *in)
 496{
 497        unsigned char raw[MAX_SIZEOF_BTS];
 498
 499        if (!tracer)
 500                return -EINVAL;
 501
 502        if (MAX_SIZEOF_BTS < ds_cfg.sizeof_rec[ds_bts])
 503                return -EOVERFLOW;
 504
 505        switch (in->qualifier) {
 506        case bts_invalid:
 507                bts_set(raw, bts_from, 0);
 508                bts_set(raw, bts_to, 0);
 509                bts_set(raw, bts_flags, 0);
 510                break;
 511        case bts_branch:
 512                bts_set(raw, bts_from, in->variant.lbr.from);
 513                bts_set(raw, bts_to,   in->variant.lbr.to);
 514                bts_set(raw, bts_flags, 0);
 515                break;
 516        case bts_task_arrives:
 517        case bts_task_departs:
 518                bts_set(raw, bts_qual, (bts_escape | in->qualifier));
 519                bts_set(raw, bts_jiffies, in->variant.timestamp.jiffies);
 520                bts_set(raw, bts_pid, in->variant.timestamp.pid);
 521                break;
 522        default:
 523                return -EINVAL;
 524        }
 525
 526        return ds_write(tracer->ds.context, ds_bts, raw,
 527                        ds_cfg.sizeof_rec[ds_bts]);
 528}
 529
 530
 531static void ds_write_config(struct ds_context *context,
 532                            struct ds_trace *cfg, enum ds_qualifier qual)
 533{
 534        unsigned char *ds = context->ds;
 535
 536        ds_set(ds, qual, ds_buffer_base, (unsigned long)cfg->begin);
 537        ds_set(ds, qual, ds_index, (unsigned long)cfg->top);
 538        ds_set(ds, qual, ds_absolute_maximum, (unsigned long)cfg->end);
 539        ds_set(ds, qual, ds_interrupt_threshold, (unsigned long)cfg->ith);
 540}
 541
 542static void ds_read_config(struct ds_context *context,
 543                           struct ds_trace *cfg, enum ds_qualifier qual)
 544{
 545        unsigned char *ds = context->ds;
 546
 547        cfg->begin = (void *)ds_get(ds, qual, ds_buffer_base);
 548        cfg->top = (void *)ds_get(ds, qual, ds_index);
 549        cfg->end = (void *)ds_get(ds, qual, ds_absolute_maximum);
 550        cfg->ith = (void *)ds_get(ds, qual, ds_interrupt_threshold);
 551}
 552
 553static void ds_init_ds_trace(struct ds_trace *trace, enum ds_qualifier qual,
 554                             void *base, size_t size, size_t ith,
 555                             unsigned int flags) {
 556        unsigned long buffer, adj;
 557
 558        /* adjust the buffer address and size to meet alignment
 559         * constraints:
 560         * - buffer is double-word aligned
 561         * - size is multiple of record size
 562         *
 563         * We checked the size at the very beginning; we have enough
 564         * space to do the adjustment.
 565         */
 566        buffer = (unsigned long)base;
 567
 568        adj = ALIGN(buffer, DS_ALIGNMENT) - buffer;
 569        buffer += adj;
 570        size   -= adj;
 571
 572        trace->n = size / ds_cfg.sizeof_rec[qual];
 573        trace->size = ds_cfg.sizeof_rec[qual];
 574
 575        size = (trace->n * trace->size);
 576
 577        trace->begin = (void *)buffer;
 578        trace->top = trace->begin;
 579        trace->end = (void *)(buffer + size);
 580        /* The value for 'no threshold' is -1, which will set the
 581         * threshold outside of the buffer, just like we want it.
 582         */
 583        trace->ith = (void *)(buffer + size - ith);
 584
 585        trace->flags = flags;
 586}
 587
 588
 589static int ds_request(struct ds_tracer *tracer, struct ds_trace *trace,
 590                      enum ds_qualifier qual, struct task_struct *task,
 591                      void *base, size_t size, size_t th, unsigned int flags)
 592{
 593        struct ds_context *context;
 594        int error;
 595
 596        error = -EINVAL;
 597        if (!base)
 598                goto out;
 599
 600        /* we require some space to do alignment adjustments below */
 601        error = -EINVAL;
 602        if (size < (DS_ALIGNMENT + ds_cfg.sizeof_rec[qual]))
 603                goto out;
 604
 605        if (th != (size_t)-1) {
 606                th *= ds_cfg.sizeof_rec[qual];
 607
 608                error = -EINVAL;
 609                if (size <= th)
 610                        goto out;
 611        }
 612
 613        tracer->buffer = base;
 614        tracer->size = size;
 615
 616        error = -ENOMEM;
 617        context = ds_get_context(task);
 618        if (!context)
 619                goto out;
 620        tracer->context = context;
 621
 622        ds_init_ds_trace(trace, qual, base, size, th, flags);
 623
 624        error = 0;
 625 out:
 626        return error;
 627}
 628
 629struct bts_tracer *ds_request_bts(struct task_struct *task,
 630                                  void *base, size_t size,
 631                                  bts_ovfl_callback_t ovfl, size_t th,
 632                                  unsigned int flags)
 633{
 634        struct bts_tracer *tracer;
 635        unsigned long irq;
 636        int error;
 637
 638        error = -EOPNOTSUPP;
 639        if (!ds_cfg.ctl[dsf_bts])
 640                goto out;
 641
 642        /* buffer overflow notification is not yet implemented */
 643        error = -EOPNOTSUPP;
 644        if (ovfl)
 645                goto out;
 646
 647        error = -ENOMEM;
 648        tracer = kzalloc(sizeof(*tracer), GFP_KERNEL);
 649        if (!tracer)
 650                goto out;
 651        tracer->ovfl = ovfl;
 652
 653        error = ds_request(&tracer->ds, &tracer->trace.ds,
 654                           ds_bts, task, base, size, th, flags);
 655        if (error < 0)
 656                goto out_tracer;
 657
 658
 659        spin_lock_irqsave(&ds_lock, irq);
 660
 661        error = -EPERM;
 662        if (!check_tracer(task))
 663                goto out_unlock;
 664        get_tracer(task);
 665
 666        error = -EPERM;
 667        if (tracer->ds.context->bts_master)
 668                goto out_put_tracer;
 669        tracer->ds.context->bts_master = tracer;
 670
 671        spin_unlock_irqrestore(&ds_lock, irq);
 672
 673
 674        tracer->trace.read  = bts_read;
 675        tracer->trace.write = bts_write;
 676
 677        ds_write_config(tracer->ds.context, &tracer->trace.ds, ds_bts);
 678        ds_resume_bts(tracer);
 679
 680        return tracer;
 681
 682 out_put_tracer:
 683        put_tracer(task);
 684 out_unlock:
 685        spin_unlock_irqrestore(&ds_lock, irq);
 686        ds_put_context(tracer->ds.context);
 687 out_tracer:
 688        kfree(tracer);
 689 out:
 690        return ERR_PTR(error);
 691}
 692
 693struct pebs_tracer *ds_request_pebs(struct task_struct *task,
 694                                    void *base, size_t size,
 695                                    pebs_ovfl_callback_t ovfl, size_t th,
 696                                    unsigned int flags)
 697{
 698        struct pebs_tracer *tracer;
 699        unsigned long irq;
 700        int error;
 701
 702        /* buffer overflow notification is not yet implemented */
 703        error = -EOPNOTSUPP;
 704        if (ovfl)
 705                goto out;
 706
 707        error = -ENOMEM;
 708        tracer = kzalloc(sizeof(*tracer), GFP_KERNEL);
 709        if (!tracer)
 710                goto out;
 711        tracer->ovfl = ovfl;
 712
 713        error = ds_request(&tracer->ds, &tracer->trace.ds,
 714                           ds_pebs, task, base, size, th, flags);
 715        if (error < 0)
 716                goto out_tracer;
 717
 718        spin_lock_irqsave(&ds_lock, irq);
 719
 720        error = -EPERM;
 721        if (!check_tracer(task))
 722                goto out_unlock;
 723        get_tracer(task);
 724
 725        error = -EPERM;
 726        if (tracer->ds.context->pebs_master)
 727                goto out_put_tracer;
 728        tracer->ds.context->pebs_master = tracer;
 729
 730        spin_unlock_irqrestore(&ds_lock, irq);
 731
 732        ds_write_config(tracer->ds.context, &tracer->trace.ds, ds_pebs);
 733        ds_resume_pebs(tracer);
 734
 735        return tracer;
 736
 737 out_put_tracer:
 738        put_tracer(task);
 739 out_unlock:
 740        spin_unlock_irqrestore(&ds_lock, irq);
 741        ds_put_context(tracer->ds.context);
 742 out_tracer:
 743        kfree(tracer);
 744 out:
 745        return ERR_PTR(error);
 746}
 747
 748void ds_release_bts(struct bts_tracer *tracer)
 749{
 750        if (!tracer)
 751                return;
 752
 753        ds_suspend_bts(tracer);
 754
 755        WARN_ON_ONCE(tracer->ds.context->bts_master != tracer);
 756        tracer->ds.context->bts_master = NULL;
 757
 758        put_tracer(tracer->ds.context->task);
 759        ds_put_context(tracer->ds.context);
 760
 761        kfree(tracer);
 762}
 763
 764void ds_suspend_bts(struct bts_tracer *tracer)
 765{
 766        struct task_struct *task;
 767
 768        if (!tracer)
 769                return;
 770
 771        task = tracer->ds.context->task;
 772
 773        if (!task || (task == current))
 774                update_debugctlmsr(get_debugctlmsr() & ~BTS_CONTROL);
 775
 776        if (task) {
 777                task->thread.debugctlmsr &= ~BTS_CONTROL;
 778
 779                if (!task->thread.debugctlmsr)
 780                        clear_tsk_thread_flag(task, TIF_DEBUGCTLMSR);
 781        }
 782}
 783
 784void ds_resume_bts(struct bts_tracer *tracer)
 785{
 786        struct task_struct *task;
 787        unsigned long control;
 788
 789        if (!tracer)
 790                return;
 791
 792        task = tracer->ds.context->task;
 793
 794        control = ds_cfg.ctl[dsf_bts];
 795        if (!(tracer->trace.ds.flags & BTS_KERNEL))
 796                control |= ds_cfg.ctl[dsf_bts_kernel];
 797        if (!(tracer->trace.ds.flags & BTS_USER))
 798                control |= ds_cfg.ctl[dsf_bts_user];
 799
 800        if (task) {
 801                task->thread.debugctlmsr |= control;
 802                set_tsk_thread_flag(task, TIF_DEBUGCTLMSR);
 803        }
 804
 805        if (!task || (task == current))
 806                update_debugctlmsr(get_debugctlmsr() | control);
 807}
 808
 809void ds_release_pebs(struct pebs_tracer *tracer)
 810{
 811        if (!tracer)
 812                return;
 813
 814        ds_suspend_pebs(tracer);
 815
 816        WARN_ON_ONCE(tracer->ds.context->pebs_master != tracer);
 817        tracer->ds.context->pebs_master = NULL;
 818
 819        put_tracer(tracer->ds.context->task);
 820        ds_put_context(tracer->ds.context);
 821
 822        kfree(tracer);
 823}
 824
 825void ds_suspend_pebs(struct pebs_tracer *tracer)
 826{
 827
 828}
 829
 830void ds_resume_pebs(struct pebs_tracer *tracer)
 831{
 832
 833}
 834
 835const struct bts_trace *ds_read_bts(struct bts_tracer *tracer)
 836{
 837        if (!tracer)
 838                return NULL;
 839
 840        ds_read_config(tracer->ds.context, &tracer->trace.ds, ds_bts);
 841        return &tracer->trace;
 842}
 843
 844const struct pebs_trace *ds_read_pebs(struct pebs_tracer *tracer)
 845{
 846        if (!tracer)
 847                return NULL;
 848
 849        ds_read_config(tracer->ds.context, &tracer->trace.ds, ds_pebs);
 850        tracer->trace.reset_value =
 851                *(u64 *)(tracer->ds.context->ds + (ds_cfg.sizeof_field * 8));
 852
 853        return &tracer->trace;
 854}
 855
 856int ds_reset_bts(struct bts_tracer *tracer)
 857{
 858        if (!tracer)
 859                return -EINVAL;
 860
 861        tracer->trace.ds.top = tracer->trace.ds.begin;
 862
 863        ds_set(tracer->ds.context->ds, ds_bts, ds_index,
 864               (unsigned long)tracer->trace.ds.top);
 865
 866        return 0;
 867}
 868
 869int ds_reset_pebs(struct pebs_tracer *tracer)
 870{
 871        if (!tracer)
 872                return -EINVAL;
 873
 874        tracer->trace.ds.top = tracer->trace.ds.begin;
 875
 876        ds_set(tracer->ds.context->ds, ds_bts, ds_index,
 877               (unsigned long)tracer->trace.ds.top);
 878
 879        return 0;
 880}
 881
 882int ds_set_pebs_reset(struct pebs_tracer *tracer, u64 value)
 883{
 884        if (!tracer)
 885                return -EINVAL;
 886
 887        *(u64 *)(tracer->ds.context->ds + (ds_cfg.sizeof_field * 8)) = value;
 888
 889        return 0;
 890}
 891
 892static const struct ds_configuration ds_cfg_netburst = {
 893        .name = "Netburst",
 894        .ctl[dsf_bts]           = (1 << 2) | (1 << 3),
 895        .ctl[dsf_bts_kernel]    = (1 << 5),
 896        .ctl[dsf_bts_user]      = (1 << 6),
 897
 898        .sizeof_field           = sizeof(long),
 899        .sizeof_rec[ds_bts]     = sizeof(long) * 3,
 900#ifdef __i386__
 901        .sizeof_rec[ds_pebs]    = sizeof(long) * 10,
 902#else
 903        .sizeof_rec[ds_pebs]    = sizeof(long) * 18,
 904#endif
 905};
 906static const struct ds_configuration ds_cfg_pentium_m = {
 907        .name = "Pentium M",
 908        .ctl[dsf_bts]           = (1 << 6) | (1 << 7),
 909
 910        .sizeof_field           = sizeof(long),
 911        .sizeof_rec[ds_bts]     = sizeof(long) * 3,
 912#ifdef __i386__
 913        .sizeof_rec[ds_pebs]    = sizeof(long) * 10,
 914#else
 915        .sizeof_rec[ds_pebs]    = sizeof(long) * 18,
 916#endif
 917};
 918static const struct ds_configuration ds_cfg_core2_atom = {
 919        .name = "Core 2/Atom",
 920        .ctl[dsf_bts]           = (1 << 6) | (1 << 7),
 921        .ctl[dsf_bts_kernel]    = (1 << 9),
 922        .ctl[dsf_bts_user]      = (1 << 10),
 923
 924        .sizeof_field           = 8,
 925        .sizeof_rec[ds_bts]     = 8 * 3,
 926        .sizeof_rec[ds_pebs]    = 8 * 18,
 927};
 928
 929static void
 930ds_configure(const struct ds_configuration *cfg)
 931{
 932        memset(&ds_cfg, 0, sizeof(ds_cfg));
 933        ds_cfg = *cfg;
 934
 935        printk(KERN_INFO "[ds] using %s configuration\n", ds_cfg.name);
 936
 937        if (!cpu_has_bts) {
 938                ds_cfg.ctl[dsf_bts] = 0;
 939                printk(KERN_INFO "[ds] bts not available\n");
 940        }
 941        if (!cpu_has_pebs)
 942                printk(KERN_INFO "[ds] pebs not available\n");
 943
 944        WARN_ON_ONCE(MAX_SIZEOF_DS < (12 * ds_cfg.sizeof_field));
 945}
 946
 947void __cpuinit ds_init_intel(struct cpuinfo_x86 *c)
 948{
 949        switch (c->x86) {
 950        case 0x6:
 951                switch (c->x86_model) {
 952                case 0x9:
 953                case 0xd: /* Pentium M */
 954                        ds_configure(&ds_cfg_pentium_m);
 955                        break;
 956                case 0xf:
 957                case 0x17: /* Core2 */
 958                case 0x1c: /* Atom */
 959                        ds_configure(&ds_cfg_core2_atom);
 960                        break;
 961                case 0x1a: /* i7 */
 962                default:
 963                        /* sorry, don't know about them */
 964                        break;
 965                }
 966                break;
 967        case 0xf:
 968                switch (c->x86_model) {
 969                case 0x0:
 970                case 0x1:
 971                case 0x2: /* Netburst */
 972                        ds_configure(&ds_cfg_netburst);
 973                        break;
 974                default:
 975                        /* sorry, don't know about them */
 976                        break;
 977                }
 978                break;
 979        default:
 980                /* sorry, don't know about them */
 981                break;
 982        }
 983}
 984
 985/*
 986 * Change the DS configuration from tracing prev to tracing next.
 987 */
 988void ds_switch_to(struct task_struct *prev, struct task_struct *next)
 989{
 990        struct ds_context *prev_ctx = prev->thread.ds_ctx;
 991        struct ds_context *next_ctx = next->thread.ds_ctx;
 992
 993        if (prev_ctx) {
 994                update_debugctlmsr(0);
 995
 996                if (prev_ctx->bts_master &&
 997                    (prev_ctx->bts_master->trace.ds.flags & BTS_TIMESTAMPS)) {
 998                        struct bts_struct ts = {
 999                                .qualifier = bts_task_departs,
1000                                .variant.timestamp.jiffies = jiffies_64,
1001                                .variant.timestamp.pid = prev->pid
1002                        };
1003                        bts_write(prev_ctx->bts_master, &ts);
1004                }
1005        }
1006
1007        if (next_ctx) {
1008                if (next_ctx->bts_master &&
1009                    (next_ctx->bts_master->trace.ds.flags & BTS_TIMESTAMPS)) {
1010                        struct bts_struct ts = {
1011                                .qualifier = bts_task_arrives,
1012                                .variant.timestamp.jiffies = jiffies_64,
1013                                .variant.timestamp.pid = next->pid
1014                        };
1015                        bts_write(next_ctx->bts_master, &ts);
1016                }
1017
1018                wrmsrl(MSR_IA32_DS_AREA, (unsigned long)next_ctx->ds);
1019        }
1020
1021        update_debugctlmsr(next->thread.debugctlmsr);
1022}
1023
1024void ds_copy_thread(struct task_struct *tsk, struct task_struct *father)
1025{
1026        clear_tsk_thread_flag(tsk, TIF_DS_AREA_MSR);
1027        tsk->thread.ds_ctx = NULL;
1028}
1029
1030void ds_exit_thread(struct task_struct *tsk)
1031{
1032}
1033
lxr.linux.no kindly hosted by Redpill Linpro AS, provider of Linux consulting and operations services since 1995.