linux/Documentation/accounting/getdelays.c
<<
>>
Prefs
   1/* getdelays.c
   2 *
   3 * Utility to get per-pid and per-tgid delay accounting statistics
   4 * Also illustrates usage of the taskstats interface
   5 *
   6 * Copyright (C) Shailabh Nagar, IBM Corp. 2005
   7 * Copyright (C) Balbir Singh, IBM Corp. 2006
   8 * Copyright (c) Jay Lan, SGI. 2006
   9 *
  10 * Compile with
  11 *      gcc -I/usr/src/linux/include getdelays.c -o getdelays
  12 */
  13
  14#include <stdio.h>
  15#include <stdlib.h>
  16#include <errno.h>
  17#include <unistd.h>
  18#include <poll.h>
  19#include <string.h>
  20#include <fcntl.h>
  21#include <sys/types.h>
  22#include <sys/stat.h>
  23#include <sys/socket.h>
  24#include <signal.h>
  25
  26#include <linux/genetlink.h>
  27#include <linux/taskstats.h>
  28#include <linux/cgroupstats.h>
  29
  30/*
  31 * Generic macros for dealing with netlink sockets. Might be duplicated
  32 * elsewhere. It is recommended that commercial grade applications use
  33 * libnl or libnetlink and use the interfaces provided by the library
  34 */
  35#define GENLMSG_DATA(glh)       ((void *)(NLMSG_DATA(glh) + GENL_HDRLEN))
  36#define GENLMSG_PAYLOAD(glh)    (NLMSG_PAYLOAD(glh, 0) - GENL_HDRLEN)
  37#define NLA_DATA(na)            ((void *)((char*)(na) + NLA_HDRLEN))
  38#define NLA_PAYLOAD(len)        (len - NLA_HDRLEN)
  39
  40#define err(code, fmt, arg...)                  \
  41        do {                                    \
  42                fprintf(stderr, fmt, ##arg);    \
  43                exit(code);                     \
  44        } while (0)
  45
  46int done;
  47int rcvbufsz;
  48char name[100];
  49int dbg;
  50int print_delays;
  51int print_io_accounting;
  52int print_task_context_switch_counts;
  53__u64 stime, utime;
  54
  55#define PRINTF(fmt, arg...) {                   \
  56            if (dbg) {                          \
  57                printf(fmt, ##arg);             \
  58            }                                   \
  59        }
  60
  61/* Maximum size of response requested or message sent */
  62#define MAX_MSG_SIZE    1024
  63/* Maximum number of cpus expected to be specified in a cpumask */
  64#define MAX_CPUS        32
  65
  66struct msgtemplate {
  67        struct nlmsghdr n;
  68        struct genlmsghdr g;
  69        char buf[MAX_MSG_SIZE];
  70};
  71
  72char cpumask[100+6*MAX_CPUS];
  73
  74static void usage(void)
  75{
  76        fprintf(stderr, "getdelays [-dilv] [-w logfile] [-r bufsize] "
  77                        "[-m cpumask] [-t tgid] [-p pid]\n");
  78        fprintf(stderr, "  -d: print delayacct stats\n");
  79        fprintf(stderr, "  -i: print IO accounting (works only with -p)\n");
  80        fprintf(stderr, "  -l: listen forever\n");
  81        fprintf(stderr, "  -v: debug on\n");
  82        fprintf(stderr, "  -C: container path\n");
  83}
  84
  85/*
  86 * Create a raw netlink socket and bind
  87 */
  88static int create_nl_socket(int protocol)
  89{
  90        int fd;
  91        struct sockaddr_nl local;
  92
  93        fd = socket(AF_NETLINK, SOCK_RAW, protocol);
  94        if (fd < 0)
  95                return -1;
  96
  97        if (rcvbufsz)
  98                if (setsockopt(fd, SOL_SOCKET, SO_RCVBUF,
  99                                &rcvbufsz, sizeof(rcvbufsz)) < 0) {
 100                        fprintf(stderr, "Unable to set socket rcv buf size "
 101                                        "to %d\n",
 102                                rcvbufsz);
 103                        return -1;
 104                }
 105
 106        memset(&local, 0, sizeof(local));
 107        local.nl_family = AF_NETLINK;
 108
 109        if (bind(fd, (struct sockaddr *) &local, sizeof(local)) < 0)
 110                goto error;
 111
 112        return fd;
 113error:
 114        close(fd);
 115        return -1;
 116}
 117
 118
 119int send_cmd(int sd, __u16 nlmsg_type, __u32 nlmsg_pid,
 120             __u8 genl_cmd, __u16 nla_type,
 121             void *nla_data, int nla_len)
 122{
 123        struct nlattr *na;
 124        struct sockaddr_nl nladdr;
 125        int r, buflen;
 126        char *buf;
 127
 128        struct msgtemplate msg;
 129
 130        msg.n.nlmsg_len = NLMSG_LENGTH(GENL_HDRLEN);
 131        msg.n.nlmsg_type = nlmsg_type;
 132        msg.n.nlmsg_flags = NLM_F_REQUEST;
 133        msg.n.nlmsg_seq = 0;
 134        msg.n.nlmsg_pid = nlmsg_pid;
 135        msg.g.cmd = genl_cmd;
 136        msg.g.version = 0x1;
 137        na = (struct nlattr *) GENLMSG_DATA(&msg);
 138        na->nla_type = nla_type;
 139        na->nla_len = nla_len + 1 + NLA_HDRLEN;
 140        memcpy(NLA_DATA(na), nla_data, nla_len);
 141        msg.n.nlmsg_len += NLMSG_ALIGN(na->nla_len);
 142
 143        buf = (char *) &msg;
 144        buflen = msg.n.nlmsg_len ;
 145        memset(&nladdr, 0, sizeof(nladdr));
 146        nladdr.nl_family = AF_NETLINK;
 147        while ((r = sendto(sd, buf, buflen, 0, (struct sockaddr *) &nladdr,
 148                           sizeof(nladdr))) < buflen) {
 149                if (r > 0) {
 150                        buf += r;
 151                        buflen -= r;
 152                } else if (errno != EAGAIN)
 153                        return -1;
 154        }
 155        return 0;
 156}
 157
 158
 159/*
 160 * Probe the controller in genetlink to find the family id
 161 * for the TASKSTATS family
 162 */
 163int get_family_id(int sd)
 164{
 165        struct {
 166                struct nlmsghdr n;
 167                struct genlmsghdr g;
 168                char buf[256];
 169        } ans;
 170
 171        int id = 0, rc;
 172        struct nlattr *na;
 173        int rep_len;
 174
 175        strcpy(name, TASKSTATS_GENL_NAME);
 176        rc = send_cmd(sd, GENL_ID_CTRL, getpid(), CTRL_CMD_GETFAMILY,
 177                        CTRL_ATTR_FAMILY_NAME, (void *)name,
 178                        strlen(TASKSTATS_GENL_NAME)+1);
 179
 180        rep_len = recv(sd, &ans, sizeof(ans), 0);
 181        if (ans.n.nlmsg_type == NLMSG_ERROR ||
 182            (rep_len < 0) || !NLMSG_OK((&ans.n), rep_len))
 183                return 0;
 184
 185        na = (struct nlattr *) GENLMSG_DATA(&ans);
 186        na = (struct nlattr *) ((char *) na + NLA_ALIGN(na->nla_len));
 187        if (na->nla_type == CTRL_ATTR_FAMILY_ID) {
 188                id = *(__u16 *) NLA_DATA(na);
 189        }
 190        return id;
 191}
 192
 193void print_delayacct(struct taskstats *t)
 194{
 195        printf("\n\nCPU   %15s%15s%15s%15s\n"
 196               "      %15llu%15llu%15llu%15llu\n"
 197               "IO    %15s%15s\n"
 198               "      %15llu%15llu\n"
 199               "SWAP  %15s%15s\n"
 200               "      %15llu%15llu\n"
 201               "RECLAIM  %12s%15s\n"
 202               "      %15llu%15llu\n",
 203               "count", "real total", "virtual total", "delay total",
 204               (unsigned long long)t->cpu_count,
 205               (unsigned long long)t->cpu_run_real_total,
 206               (unsigned long long)t->cpu_run_virtual_total,
 207               (unsigned long long)t->cpu_delay_total,
 208               "count", "delay total",
 209               (unsigned long long)t->blkio_count,
 210               (unsigned long long)t->blkio_delay_total,
 211               "count", "delay total",
 212               (unsigned long long)t->swapin_count,
 213               (unsigned long long)t->swapin_delay_total,
 214               "count", "delay total",
 215               (unsigned long long)t->freepages_count,
 216               (unsigned long long)t->freepages_delay_total);
 217}
 218
 219void task_context_switch_counts(struct taskstats *t)
 220{
 221        printf("\n\nTask   %15s%15s\n"
 222               "       %15llu%15llu\n",
 223               "voluntary", "nonvoluntary",
 224               (unsigned long long)t->nvcsw, (unsigned long long)t->nivcsw);
 225}
 226
 227void print_cgroupstats(struct cgroupstats *c)
 228{
 229        printf("sleeping %llu, blocked %llu, running %llu, stopped %llu, "
 230                "uninterruptible %llu\n", (unsigned long long)c->nr_sleeping,
 231                (unsigned long long)c->nr_io_wait,
 232                (unsigned long long)c->nr_running,
 233                (unsigned long long)c->nr_stopped,
 234                (unsigned long long)c->nr_uninterruptible);
 235}
 236
 237
 238void print_ioacct(struct taskstats *t)
 239{
 240        printf("%s: read=%llu, write=%llu, cancelled_write=%llu\n",
 241                t->ac_comm,
 242                (unsigned long long)t->read_bytes,
 243                (unsigned long long)t->write_bytes,
 244                (unsigned long long)t->cancelled_write_bytes);
 245}
 246
 247int main(int argc, char *argv[])
 248{
 249        int c, rc, rep_len, aggr_len, len2, cmd_type;
 250        __u16 id;
 251        __u32 mypid;
 252
 253        struct nlattr *na;
 254        int nl_sd = -1;
 255        int len = 0;
 256        pid_t tid = 0;
 257        pid_t rtid = 0;
 258
 259        int fd = 0;
 260        int count = 0;
 261        int write_file = 0;
 262        int maskset = 0;
 263        char *logfile = NULL;
 264        int loop = 0;
 265        int containerset = 0;
 266        char containerpath[1024];
 267        int cfd = 0;
 268
 269        struct msgtemplate msg;
 270
 271        while (1) {
 272                c = getopt(argc, argv, "qdiw:r:m:t:p:vlC:");
 273                if (c < 0)
 274                        break;
 275
 276                switch (c) {
 277                case 'd':
 278                        printf("print delayacct stats ON\n");
 279                        print_delays = 1;
 280                        break;
 281                case 'i':
 282                        printf("printing IO accounting\n");
 283                        print_io_accounting = 1;
 284                        break;
 285                case 'q':
 286                        printf("printing task/process context switch rates\n");
 287                        print_task_context_switch_counts = 1;
 288                        break;
 289                case 'C':
 290                        containerset = 1;
 291                        strncpy(containerpath, optarg, strlen(optarg) + 1);
 292                        break;
 293                case 'w':
 294                        logfile = strdup(optarg);
 295                        printf("write to file %s\n", logfile);
 296                        write_file = 1;
 297                        break;
 298                case 'r':
 299                        rcvbufsz = atoi(optarg);
 300                        printf("receive buf size %d\n", rcvbufsz);
 301                        if (rcvbufsz < 0)
 302                                err(1, "Invalid rcv buf size\n");
 303                        break;
 304                case 'm':
 305                        strncpy(cpumask, optarg, sizeof(cpumask));
 306                        maskset = 1;
 307                        printf("cpumask %s maskset %d\n", cpumask, maskset);
 308                        break;
 309                case 't':
 310                        tid = atoi(optarg);
 311                        if (!tid)
 312                                err(1, "Invalid tgid\n");
 313                        cmd_type = TASKSTATS_CMD_ATTR_TGID;
 314                        break;
 315                case 'p':
 316                        tid = atoi(optarg);
 317                        if (!tid)
 318                                err(1, "Invalid pid\n");
 319                        cmd_type = TASKSTATS_CMD_ATTR_PID;
 320                        break;
 321                case 'v':
 322                        printf("debug on\n");
 323                        dbg = 1;
 324                        break;
 325                case 'l':
 326                        printf("listen forever\n");
 327                        loop = 1;
 328                        break;
 329                default:
 330                        usage();
 331                        exit(-1);
 332                }
 333        }
 334
 335        if (write_file) {
 336                fd = open(logfile, O_WRONLY | O_CREAT | O_TRUNC,
 337                          S_IRUSR | S_IWUSR | S_IRGRP | S_IROTH);
 338                if (fd == -1) {
 339                        perror("Cannot open output file\n");
 340                        exit(1);
 341                }
 342        }
 343
 344        if ((nl_sd = create_nl_socket(NETLINK_GENERIC)) < 0)
 345                err(1, "error creating Netlink socket\n");
 346
 347
 348        mypid = getpid();
 349        id = get_family_id(nl_sd);
 350        if (!id) {
 351                fprintf(stderr, "Error getting family id, errno %d\n", errno);
 352                goto err;
 353        }
 354        PRINTF("family id %d\n", id);
 355
 356        if (maskset) {
 357                rc = send_cmd(nl_sd, id, mypid, TASKSTATS_CMD_GET,
 358                              TASKSTATS_CMD_ATTR_REGISTER_CPUMASK,
 359                              &cpumask, strlen(cpumask) + 1);
 360                PRINTF("Sent register cpumask, retval %d\n", rc);
 361                if (rc < 0) {
 362                        fprintf(stderr, "error sending register cpumask\n");
 363                        goto err;
 364                }
 365        }
 366
 367        if (tid && containerset) {
 368                fprintf(stderr, "Select either -t or -C, not both\n");
 369                goto err;
 370        }
 371
 372        if (tid) {
 373                rc = send_cmd(nl_sd, id, mypid, TASKSTATS_CMD_GET,
 374                              cmd_type, &tid, sizeof(__u32));
 375                PRINTF("Sent pid/tgid, retval %d\n", rc);
 376                if (rc < 0) {
 377                        fprintf(stderr, "error sending tid/tgid cmd\n");
 378                        goto done;
 379                }
 380        }
 381
 382        if (containerset) {
 383                cfd = open(containerpath, O_RDONLY);
 384                if (cfd < 0) {
 385                        perror("error opening container file");
 386                        goto err;
 387                }
 388                rc = send_cmd(nl_sd, id, mypid, CGROUPSTATS_CMD_GET,
 389                              CGROUPSTATS_CMD_ATTR_FD, &cfd, sizeof(__u32));
 390                if (rc < 0) {
 391                        perror("error sending cgroupstats command");
 392                        goto err;
 393                }
 394        }
 395        if (!maskset && !tid && !containerset) {
 396                usage();
 397                goto err;
 398        }
 399
 400        do {
 401                int i;
 402
 403                rep_len = recv(nl_sd, &msg, sizeof(msg), 0);
 404                PRINTF("received %d bytes\n", rep_len);
 405
 406                if (rep_len < 0) {
 407                        fprintf(stderr, "nonfatal reply error: errno %d\n",
 408                                errno);
 409                        continue;
 410                }
 411                if (msg.n.nlmsg_type == NLMSG_ERROR ||
 412                    !NLMSG_OK((&msg.n), rep_len)) {
 413                        struct nlmsgerr *err = NLMSG_DATA(&msg);
 414                        fprintf(stderr, "fatal reply error,  errno %d\n",
 415                                err->error);
 416                        goto done;
 417                }
 418
 419                PRINTF("nlmsghdr size=%zu, nlmsg_len=%d, rep_len=%d\n",
 420                       sizeof(struct nlmsghdr), msg.n.nlmsg_len, rep_len);
 421
 422
 423                rep_len = GENLMSG_PAYLOAD(&msg.n);
 424
 425                na = (struct nlattr *) GENLMSG_DATA(&msg);
 426                len = 0;
 427                i = 0;
 428                while (len < rep_len) {
 429                        len += NLA_ALIGN(na->nla_len);
 430                        switch (na->nla_type) {
 431                        case TASKSTATS_TYPE_AGGR_TGID:
 432                                /* Fall through */
 433                        case TASKSTATS_TYPE_AGGR_PID:
 434                                aggr_len = NLA_PAYLOAD(na->nla_len);
 435                                len2 = 0;
 436                                /* For nested attributes, na follows */
 437                                na = (struct nlattr *) NLA_DATA(na);
 438                                done = 0;
 439                                while (len2 < aggr_len) {
 440                                        switch (na->nla_type) {
 441                                        case TASKSTATS_TYPE_PID:
 442                                                rtid = *(int *) NLA_DATA(na);
 443                                                if (print_delays)
 444                                                        printf("PID\t%d\n", rtid);
 445                                                break;
 446                                        case TASKSTATS_TYPE_TGID:
 447                                                rtid = *(int *) NLA_DATA(na);
 448                                                if (print_delays)
 449                                                        printf("TGID\t%d\n", rtid);
 450                                                break;
 451                                        case TASKSTATS_TYPE_STATS:
 452                                                count++;
 453                                                if (print_delays)
 454                                                        print_delayacct((struct taskstats *) NLA_DATA(na));
 455                                                if (print_io_accounting)
 456                                                        print_ioacct((struct taskstats *) NLA_DATA(na));
 457                                                if (print_task_context_switch_counts)
 458                                                        task_context_switch_counts((struct taskstats *) NLA_DATA(na));
 459                                                if (fd) {
 460                                                        if (write(fd, NLA_DATA(na), na->nla_len) < 0) {
 461                                                                err(1,"write error\n");
 462                                                        }
 463                                                }
 464                                                if (!loop)
 465                                                        goto done;
 466                                                break;
 467                                        default:
 468                                                fprintf(stderr, "Unknown nested"
 469                                                        " nla_type %d\n",
 470                                                        na->nla_type);
 471                                                break;
 472                                        }
 473                                        len2 += NLA_ALIGN(na->nla_len);
 474                                        na = (struct nlattr *) ((char *) na + len2);
 475                                }
 476                                break;
 477
 478                        case CGROUPSTATS_TYPE_CGROUP_STATS:
 479                                print_cgroupstats(NLA_DATA(na));
 480                                break;
 481                        default:
 482                                fprintf(stderr, "Unknown nla_type %d\n",
 483                                        na->nla_type);
 484                                break;
 485                        }
 486                        na = (struct nlattr *) (GENLMSG_DATA(&msg) + len);
 487                }
 488        } while (loop);
 489done:
 490        if (maskset) {
 491                rc = send_cmd(nl_sd, id, mypid, TASKSTATS_CMD_GET,
 492                              TASKSTATS_CMD_ATTR_DEREGISTER_CPUMASK,
 493                              &cpumask, strlen(cpumask) + 1);
 494                printf("Sent deregister mask, retval %d\n", rc);
 495                if (rc < 0)
 496                        err(rc, "error sending deregister cpumask\n");
 497        }
 498err:
 499        close(nl_sd);
 500        if (fd)
 501                close(fd);
 502        if (cfd)
 503                close(cfd);
 504        return 0;
 505}
 506