|  | /* Copyright (C) 2021-2023 Free Software Foundation, Inc. | 
|  | Contributed by Oracle. | 
|  |  | 
|  | This file is part of GNU Binutils. | 
|  |  | 
|  | This program is free software; you can redistribute it and/or modify | 
|  | it under the terms of the GNU General Public License as published by | 
|  | the Free Software Foundation; either version 3, or (at your option) | 
|  | any later version. | 
|  |  | 
|  | This program is distributed in the hope that it will be useful, | 
|  | but WITHOUT ANY WARRANTY; without even the implied warranty of | 
|  | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the | 
|  | GNU General Public License for more details. | 
|  |  | 
|  | You should have received a copy of the GNU General Public License | 
|  | along with this program; if not, write to the Free Software | 
|  | Foundation, 51 Franklin Street - Fifth Floor, Boston, | 
|  | MA 02110-1301, USA.  */ | 
|  |  | 
|  | #include <errno.h> | 
|  | #include <unistd.h> | 
|  | #include <fcntl.h> | 
|  | #include <sys/mman.h> | 
|  | #include <sys/ioctl.h> | 
|  | #include <sys/syscall.h> | 
|  | #include <linux/perf_event.h> | 
|  |  | 
|  | #include "hwcdrv.h" | 
|  |  | 
|  | /*---------------------------------------------------------------------------*/ | 
|  | /* macros */ | 
|  | #define IS_GLOBAL /* Mark global symbols */ | 
|  |  | 
|  | #include "cpuid.c" /* ftns for identifying a chip */ | 
|  |  | 
|  | static hdrv_pcbe_api_t hdrv_pcbe_core_api; | 
|  | static hdrv_pcbe_api_t hdrv_pcbe_opteron_api; | 
|  | static hdrv_pcbe_api_t *hdrv_pcbe_drivers[] = { | 
|  | &hdrv_pcbe_core_api, | 
|  | &hdrv_pcbe_opteron_api, | 
|  | NULL | 
|  | }; | 
|  | #include "opteron_pcbe.c" /* CPU-specific code */ | 
|  | #include "core_pcbe.c" /* CPU-specific code  */ | 
|  |  | 
|  | extern hwcdrv_api_t hwcdrv_pcl_api; | 
|  | IS_GLOBAL hwcdrv_api_t *hwcdrv_drivers[] = { | 
|  | &hwcdrv_pcl_api, | 
|  | NULL | 
|  | }; | 
|  |  | 
|  | /*---------------------------------------------------------------------------*/ | 
|  |  | 
|  | /* utils for drivers */ | 
|  | IS_GLOBAL int | 
|  | hwcdrv_assign_all_regnos (Hwcentry* entries[], unsigned numctrs) | 
|  | { | 
|  | unsigned int pmc_assigned[MAX_PICS]; | 
|  | unsigned idx; | 
|  | for (int ii = 0; ii < MAX_PICS; ii++) | 
|  | pmc_assigned[ii] = 0; | 
|  |  | 
|  | /* assign the HWCs that we already know about */ | 
|  | for (idx = 0; idx < numctrs; idx++) | 
|  | { | 
|  | regno_t regno = entries[idx]->reg_num; | 
|  | if (regno == REGNO_ANY) | 
|  | { | 
|  | /* check to see if list of possible registers only contains one entry */ | 
|  | regno = REG_LIST_SINGLE_VALID_ENTRY (entries[idx]->reg_list); | 
|  | } | 
|  | if (regno != REGNO_ANY) | 
|  | { | 
|  | if (regno < 0 || regno >= MAX_PICS || !regno_is_valid (entries[idx], regno)) | 
|  | { | 
|  | logerr (GTXT ("For counter #%d, register %d is out of range\n"), idx + 1, regno); /*!*/ | 
|  | return HWCFUNCS_ERROR_HWCARGS; | 
|  | } | 
|  | TprintfT (DBG_LT2, "hwcfuncs_assign_regnos(): preselected: idx=%d, regno=%d\n", idx, regno); | 
|  | entries[idx]->reg_num = regno; /* assigning back to entries */ | 
|  | pmc_assigned[regno] = 1; | 
|  | } | 
|  | } | 
|  |  | 
|  | /* assign HWCs that are currently REGNO_ANY */ | 
|  | for (idx = 0; idx < numctrs; idx++) | 
|  | { | 
|  | if (entries[idx]->reg_num == REGNO_ANY) | 
|  | { | 
|  | int assigned = 0; | 
|  | regno_t *reg_list = entries[idx]->reg_list; | 
|  | for (; reg_list && *reg_list != REGNO_ANY; reg_list++) | 
|  | { | 
|  | regno_t regno = *reg_list; | 
|  | if (regno < 0 || regno >= MAX_PICS) | 
|  | { | 
|  | logerr (GTXT ("For counter #%d, register %d is out of range\n"), idx + 1, regno); /*!*/ | 
|  | return HWCFUNCS_ERROR_HWCARGS; | 
|  | } | 
|  | if (pmc_assigned[regno] == 0) | 
|  | { | 
|  | TprintfT (DBG_LT2, "hwcfuncs_assign_regnos(): assigned:   idx=%d, regno=%d\n", idx, regno); | 
|  | entries[idx]->reg_num = regno; /* assigning back to entries */ | 
|  | pmc_assigned[regno] = 1; | 
|  | assigned = 1; | 
|  | break; | 
|  | } | 
|  | } | 
|  | if (!assigned) | 
|  | { | 
|  | logerr (GTXT ("Counter '%s' could not be bound to a register\n"), | 
|  | entries[idx]->name ? entries[idx]->name : "<NULL>"); | 
|  | return HWCFUNCS_ERROR_HWCARGS; | 
|  | } | 
|  | } | 
|  | } | 
|  | return 0; | 
|  | } | 
|  |  | 
|  | IS_GLOBAL int | 
|  | hwcdrv_lookup_cpuver (const char * cpcN_cciname) | 
|  | { | 
|  | libcpc2_cpu_lookup_t *plookup; | 
|  | static libcpc2_cpu_lookup_t cpu_table[] = { | 
|  | LIBCPC2_CPU_LOOKUP_LIST | 
|  | }; | 
|  | if (cpcN_cciname == NULL) | 
|  | return CPUVER_UNDEFINED; | 
|  |  | 
|  | /* search table for name */ | 
|  | for (plookup = cpu_table; plookup->cpc2_cciname; plookup++) | 
|  | { | 
|  | int n = strlen (plookup->cpc2_cciname); | 
|  | if (!strncmp (plookup->cpc2_cciname, cpcN_cciname, n)) | 
|  | return plookup->cpc2_cpuver; | 
|  | } | 
|  | /* unknown, but does have a descriptive string */ | 
|  | TprintfT (DBG_LT0, "hwcfuncs: CPC2: WARNING: Id of processor '%s' " | 
|  | "could not be determined\n", | 
|  | cpcN_cciname); | 
|  | return CPUVER_GENERIC; | 
|  | } | 
|  |  | 
|  | /*---------------------------------------------------------------------------*/ | 
|  | /* utils to generate x86 register definitions on Linux */ | 
|  |  | 
|  | /* | 
|  | *  This code is structured as though we're going to initialize the | 
|  | *  HWC by writing the Intel MSR register directly.  That is, we | 
|  | *  assume the lowest 16 bits of the event number will have the event | 
|  | *  and that higher bits will set attributes. | 
|  | * | 
|  | *  While SPARC is different, we can nonetheless use basically the | 
|  | *  same "x86"-named functions: | 
|  | * | 
|  | *  - The event code will still be 16 bits.  It will still | 
|  | *    be in the lowest 16 bits of the event number.  Though | 
|  | *    perf_event_code() on SPARC will expect those bits to | 
|  | *    shifted, hwcdrv_pcl.c can easily perform that shift. | 
|  | * | 
|  | *  - On SPARC we support only two attributes, "user" and "system", | 
|  | *    which hwcdrv_pcl.c already converts to the "exclude_user" | 
|  | *    and "exclude_kernel" fields expected by perf_event_open(). | 
|  | *    "user" and "system" are stored in event bits 16 and 17. | 
|  | *    For M8, a 4-bit mask of supported PICs is stored in bits [23:20]. | 
|  | */ | 
|  |  | 
|  | IS_GLOBAL hwcdrv_get_eventnum_fn_t *hwcdrv_get_x86_eventnum = 0; | 
|  |  | 
|  | static const attr_info_t perfctr_sparc_attrs[] = { | 
|  | {NTXT ("user"),   0, 0x01, 16}, //usr | 
|  | {NTXT ("system"), 0, 0x01, 17}, //os | 
|  | {NULL, 0, 0x00, 0}, | 
|  | }; | 
|  | static const attr_info_t perfctr_x64_attrs[] = {/* ok for Core2 & later */ | 
|  | {NTXT ("umask"),  0, 0xff, 8}, | 
|  | {NTXT ("user"),   0, 0x01, 16}, //usr | 
|  | //{NTXT("nouser"),  1, 0x01, 16}, //usr (inverted) | 
|  | {NTXT ("system"), 0, 0x01, 17}, //os | 
|  | {NTXT ("edge"),   0, 0x01, 18}, | 
|  | {NTXT ("pc"),     0, 0x01, 19}, | 
|  | {NTXT ("inv"),    0, 0x01, 23}, | 
|  | {NTXT ("cmask"),  0, 0xff, 24}, | 
|  | {NULL, 0, 0x00, 0}, | 
|  | }; | 
|  | const attr_info_t *perfctr_attrs_table = perfctr_x64_attrs; | 
|  |  | 
|  | static const eventsel_t perfctr_evntsel_enable_bits = (0x01 << 16) | /* usr */ | 
|  | // (0xff <<  0) |   /* event*/ | 
|  | // (0xff <<  8) |   /* umask */ | 
|  | // (0x01 << 17) |   /* os */ | 
|  | // (0x01 << 18) |   /* edge */ | 
|  | // (0x01 << 19) |   /* pc */ | 
|  | (0x01 << 20) |      /* int */ | 
|  | // (0x01 << 21) |   /* reserved */ | 
|  | (0x01 << 22) |      /* enable */ | 
|  | // (0x01 << 23) |   /* inv */ | 
|  | // (0xff << 24) |   /* cmask */ | 
|  | 0; | 
|  |  | 
|  | static int | 
|  | myperfctr_get_x86_eventnum (const char *eventname, uint_t pmc, | 
|  | eventsel_t *eventsel, eventsel_t *valid_umask, | 
|  | uint_t *pmc_sel) | 
|  | { | 
|  | if (hwcdrv_get_x86_eventnum && | 
|  | !hwcdrv_get_x86_eventnum (eventname, pmc, eventsel, valid_umask, pmc_sel)) | 
|  | return 0; | 
|  |  | 
|  | /* check for numerically-specified counters */ | 
|  | char * endptr; | 
|  | uint64_t num = strtoull (eventname, &endptr, 0); | 
|  | if (*eventname && !*endptr) | 
|  | { | 
|  | *eventsel = EXTENDED_EVNUM_2_EVSEL (num); | 
|  | *valid_umask = 0xff; /* allow any umask (unused for SPARC?) */ | 
|  | *pmc_sel = pmc; | 
|  | return 0; | 
|  | } | 
|  |  | 
|  | /* name does not specify a numeric value */ | 
|  | *eventsel = (eventsel_t) - 1; | 
|  | *valid_umask = 0x0; | 
|  | *pmc_sel = pmc; | 
|  | return -1; | 
|  | } | 
|  |  | 
|  | static int | 
|  | mask_shift_set (eventsel_t *presult, eventsel_t invalue, | 
|  | eventsel_t mask, eventsel_t shift) | 
|  | { | 
|  | if (invalue & ~mask) | 
|  | return -1; /* invalue attempts to set bits outside of mask */ | 
|  | *presult &= ~(mask << shift); /* clear all the mask bits */ | 
|  | *presult |= (invalue << shift); /* set bits according to invalue */ | 
|  | return 0; | 
|  | } | 
|  |  | 
|  | static int | 
|  | set_x86_attr_bits (eventsel_t *result_mask, eventsel_t evnt_valid_umask, | 
|  | hwcfuncs_attr_t attrs[], int nattrs, const char*nameOnly) | 
|  | { | 
|  | eventsel_t evntsel = *result_mask; | 
|  | for (int ii = 0; ii < (int) nattrs; ii++) | 
|  | { | 
|  | const char *attrname = attrs[ii].ca_name; | 
|  | eventsel_t attrval = (eventsel_t) attrs[ii].ca_val; | 
|  | const char *tmpname; | 
|  | int attr_found = 0; | 
|  | for (int jj = 0; (tmpname = perfctr_attrs_table[jj].attrname); jj++) | 
|  | { | 
|  | if (strcmp (attrname, tmpname) == 0) | 
|  | { | 
|  | if (strcmp (attrname, "umask") == 0) | 
|  | { | 
|  | if (attrval & ~evnt_valid_umask) | 
|  | { | 
|  | logerr (GTXT ("for `%s', allowable umask bits are: 0x%llx\n"), | 
|  | nameOnly, (long long) evnt_valid_umask); | 
|  | return -1; | 
|  | } | 
|  | } | 
|  | if (mask_shift_set (&evntsel, | 
|  | perfctr_attrs_table[jj].is_inverted ? (attrval^1) : attrval, | 
|  | perfctr_attrs_table[jj].mask, | 
|  | perfctr_attrs_table[jj].shift)) | 
|  | { | 
|  | logerr (GTXT ("`%s' attribute `%s' could not be set to 0x%llx\n"), | 
|  | nameOnly, attrname, (long long) attrval); | 
|  | return -1; | 
|  | } | 
|  | TprintfT (DBG_LT2, "hwcfuncs: Counter %s, attribute %s set to 0x%llx\n", | 
|  | nameOnly, attrname, (long long) attrval); | 
|  | attr_found = 1; | 
|  | break; | 
|  | } | 
|  | } | 
|  | if (!attr_found) | 
|  | { | 
|  | logerr (GTXT ("attribute `%s' is invalid\n"), attrname); | 
|  | return -1; | 
|  | } | 
|  | } | 
|  | *result_mask = evntsel; | 
|  | return 0; | 
|  | } | 
|  |  | 
|  | IS_GLOBAL int | 
|  | hwcfuncs_get_x86_eventsel (unsigned int regno, const char *int_name, | 
|  | eventsel_t *return_event, uint_t *return_pmc_sel) | 
|  | { | 
|  | hwcfuncs_attr_t attrs[HWCFUNCS_MAX_ATTRS + 1]; | 
|  | unsigned nattrs = 0; | 
|  | char *nameOnly = NULL; | 
|  | eventsel_t evntsel = 0; // event number | 
|  | eventsel_t evnt_valid_umask = 0; | 
|  | uint_t pmc_sel = 0; | 
|  | int rc = -1; | 
|  | *return_event = 0; | 
|  | *return_pmc_sel = 0; | 
|  | void *attr_mem = hwcfuncs_parse_attrs (int_name, attrs, HWCFUNCS_MAX_ATTRS, | 
|  | &nattrs, NULL); | 
|  | if (!attr_mem) | 
|  | { | 
|  | logerr (GTXT ("out of memory, could not parse attributes\n")); | 
|  | return -1; | 
|  | } | 
|  | hwcfuncs_parse_ctr (int_name, NULL, &nameOnly, NULL, NULL, NULL); | 
|  | if (regno == REGNO_ANY) | 
|  | { | 
|  | logerr (GTXT ("reg# could not be determined for `%s'\n"), nameOnly); | 
|  | goto attr_wrapup; | 
|  | } | 
|  |  | 
|  | /* look up evntsel */ | 
|  | if (myperfctr_get_x86_eventnum (nameOnly, regno, | 
|  | &evntsel, &evnt_valid_umask, &pmc_sel)) | 
|  | { | 
|  | logerr (GTXT ("counter `%s' is not valid\n"), nameOnly); | 
|  | goto attr_wrapup; | 
|  | } | 
|  | TprintfT (DBG_LT1, "hwcfuncs: event=0x%llx pmc=0x%x '%s' nattrs = %u\n", | 
|  | (long long) evntsel, pmc_sel, nameOnly, nattrs); | 
|  |  | 
|  | /* determine event attributes */ | 
|  | eventsel_t evnt_attrs = perfctr_evntsel_enable_bits; | 
|  | if (set_x86_attr_bits (&evnt_attrs, evnt_valid_umask, attrs, nattrs, nameOnly)) | 
|  | goto attr_wrapup; | 
|  | if (evntsel & evnt_attrs) | 
|  | TprintfT (DBG_LT0, "hwcfuncs: ERROR - evntsel & enable bits overlap: 0x%llx 0x%llx 0x%llx\n", | 
|  | (long long) evntsel, (long long) evnt_attrs, | 
|  | (long long) (evntsel & evnt_attrs)); | 
|  | *return_event = evntsel | evnt_attrs; | 
|  | *return_pmc_sel = pmc_sel; | 
|  | rc = 0; | 
|  |  | 
|  | attr_wrapup: | 
|  | free (attr_mem); | 
|  | free (nameOnly); | 
|  | return rc; | 
|  | } | 
|  |  | 
|  | #ifdef __x86_64__ | 
|  | #define syscall_instr          "syscall" | 
|  | #define syscall_clobber        "rcx", "r11", "memory" | 
|  | #endif | 
|  | #ifdef __i386__ | 
|  | #define syscall_instr          "int $0x80" | 
|  | #define syscall_clobber        "memory" | 
|  | #endif | 
|  |  | 
|  | static inline int | 
|  | perf_event_open (struct perf_event_attr *hw_event_uptr, pid_t pid, | 
|  | int cpu, int group_fd, unsigned long flags) | 
|  | { | 
|  | /* It seems that perf_event_open() sometimes fails spuriously, | 
|  | * even while an immediate retry succeeds. | 
|  | * So, let's try a few retries if the call fails just to be sure. | 
|  | */ | 
|  | int rc; | 
|  | for (int retry = 0; retry < 5; retry++) | 
|  | { | 
|  | rc = syscall (__NR_perf_event_open, hw_event_uptr, pid, cpu, group_fd, flags); | 
|  | if (rc != -1) | 
|  | return rc; | 
|  | } | 
|  | return rc; | 
|  | } | 
|  |  | 
|  | /*---------------------------------------------------------------------------*/ | 
|  | /* macros & fwd prototypes */ | 
|  |  | 
|  | #define HWCDRV_API      static /* Mark functions used by hwcdrv API */ | 
|  |  | 
|  | HWCDRV_API int hwcdrv_start (void); | 
|  | HWCDRV_API int hwcdrv_free_counters (); | 
|  |  | 
|  | static pid_t | 
|  | hwcdrv_gettid (void) | 
|  | { | 
|  | #ifndef LIBCOLLECTOR_SRC | 
|  | return syscall (__NR_gettid); | 
|  | #elif defined(intel) | 
|  | pid_t r; | 
|  | __asm__ __volatile__(syscall_instr | 
|  | : "=a" (r) : "0" (__NR_gettid) | 
|  | : syscall_clobber); | 
|  | return r; | 
|  | #else | 
|  | return syscall (__NR_gettid); // FIXUP_XXX_SPARC_LINUX // write gettid in asm | 
|  | #endif | 
|  | } | 
|  |  | 
|  | /*---------------------------------------------------------------------------*/ | 
|  | /* types */ | 
|  |  | 
|  | #define NPAGES_PER_BUF  1 // number of pages to be used for perf_event samples | 
|  | // must be a power of 2 | 
|  |  | 
|  | /*---------------------------------------------------------------------------*/ | 
|  |  | 
|  | /* typedefs */ | 
|  |  | 
|  | typedef struct | 
|  | { // event (hwc) definition | 
|  | unsigned int reg_num; // PMC assignment, potentially for detecting conflicts | 
|  | eventsel_t eventsel;          // raw event bits (Intel/AMD) | 
|  | uint64_t counter_preload;     // number of HWC events before signal | 
|  | struct perf_event_attr hw;    // perf_event definition | 
|  | hrtime_t min_time;            // minimum time we're targeting between events | 
|  | char *name; | 
|  | } perf_event_def_t; | 
|  |  | 
|  | typedef struct | 
|  | { // runtime state of perf_event buffer | 
|  | void *buf;                    // pointer to mmapped buffer | 
|  | size_t pagesz;                // size of pages | 
|  | } buffer_state_t; | 
|  |  | 
|  | typedef struct | 
|  | { // runtime state of counter values | 
|  | uint64_t prev_ena_ts;         // previous perf_event "enabled" time | 
|  | uint64_t prev_run_ts;         // previous perf_event "running" time | 
|  | uint64_t prev_value;          // previous HWC value | 
|  | } counter_value_state_t; | 
|  |  | 
|  | typedef struct | 
|  | { // per-counter information | 
|  | perf_event_def_t *ev_def;     // global HWC definition for one counter | 
|  | int fd;                       // perf_event fd | 
|  | buffer_state_t buf_state;     // perf_event buffer's state | 
|  | counter_value_state_t value_state; // counter state | 
|  | int needs_restart;            // workaround for dbx failure to preserve si_fd | 
|  | uint64_t last_overflow_period; | 
|  | hrtime_t last_overflow_time; | 
|  | } counter_state_t; | 
|  |  | 
|  | typedef struct | 
|  | { // per-thread context | 
|  | counter_state_t *ctr_list; | 
|  | int signal_fd;                // fd that caused the most recent signal | 
|  | pid_t tid;			// for debugging signal delivery problems | 
|  | } hdrv_pcl_ctx_t; | 
|  |  | 
|  | /*---------------------------------------------------------------------------*/ | 
|  |  | 
|  | /* static variables */ | 
|  | static struct | 
|  | { | 
|  | int library_ok; | 
|  | int internal_open_called; | 
|  | hwcfuncs_tsd_get_fn_t find_vpc_ctx; | 
|  | unsigned hwcdef_cnt;      /* number of *active* hardware counters */ | 
|  | hwcdrv_get_events_fn_t *get_events; | 
|  | } hdrv_pcl_state; | 
|  |  | 
|  | static hwcdrv_about_t hdrv_pcl_about = {.cpcN_cpuver = CPUVER_UNDEFINED}; | 
|  | static perf_event_def_t global_perf_event_def[MAX_PICS]; | 
|  |  | 
|  | #define COUNTERS_ENABLED()      (hdrv_pcl_state.hwcdef_cnt) | 
|  |  | 
|  |  | 
|  | /* perf_event buffer formatting and handling */ | 
|  | static void | 
|  | reset_buf (buffer_state_t *bufstate) | 
|  | { | 
|  | TprintfT (0, "hwcdrv: ERROR: perf_event reset_buf() called!\n"); | 
|  | struct perf_event_mmap_page *metadata = bufstate->buf; | 
|  | if (metadata) | 
|  | metadata->data_tail = metadata->data_head; | 
|  | } | 
|  |  | 
|  | static int | 
|  | skip_buf (buffer_state_t *bufstate, size_t sz) | 
|  | { | 
|  | TprintfT (DBG_LT1, "hwcdrv: WARNING: perf_event skip_buf called!\n"); | 
|  | struct perf_event_mmap_page *metadata = bufstate->buf; | 
|  | if (metadata == NULL) | 
|  | return -1; | 
|  | size_t pgsz = bufstate->pagesz; | 
|  | size_t bufsz = NPAGES_PER_BUF*pgsz; | 
|  | uint64_t d_tail = metadata->data_tail; | 
|  | uint64_t d_head = metadata->data_head; | 
|  |  | 
|  | // validate request size | 
|  | if (sz > d_head - d_tail || sz >= bufsz) | 
|  | { | 
|  | reset_buf (bufstate); | 
|  | return -1; | 
|  | } | 
|  | metadata->data_tail = d_tail + sz; // advance tail | 
|  | return 0; | 
|  | } | 
|  |  | 
|  | static int | 
|  | read_buf (buffer_state_t *bufstate, void *buf, size_t sz) | 
|  | { | 
|  | struct perf_event_mmap_page *metadata = bufstate->buf; | 
|  | if (metadata == NULL) | 
|  | return -1; | 
|  | size_t pgsz = bufstate->pagesz; | 
|  | size_t bufsz = NPAGES_PER_BUF*pgsz; | 
|  | uint64_t d_tail = metadata->data_tail; | 
|  | uint64_t d_head = metadata->data_head; | 
|  |  | 
|  | // validate request size | 
|  | if (sz > d_head - d_tail || sz >= bufsz) | 
|  | { | 
|  | reset_buf (bufstate); | 
|  | return -1; | 
|  | } | 
|  | char *buf_base = ((char *) metadata) + pgsz; // start of data buffer | 
|  | uint64_t start_pos = d_tail & (bufsz - 1); // char offset into data buffer | 
|  | size_t nbytes = sz; | 
|  | if (start_pos + sz > bufsz) | 
|  | { | 
|  | // will wrap past end of buffer | 
|  | nbytes = bufsz - start_pos; | 
|  | memcpy (buf, buf_base + start_pos, nbytes); | 
|  | start_pos = 0; // wrap to start | 
|  | buf = (void *) (((char *) buf) + nbytes); | 
|  | nbytes = sz - nbytes; | 
|  | } | 
|  | memcpy (buf, buf_base + start_pos, nbytes); | 
|  | metadata->data_tail += sz; | 
|  | return 0; | 
|  | } | 
|  |  | 
|  | static int | 
|  | read_u64 (buffer_state_t *bufstate, uint64_t *value) | 
|  | { | 
|  | return read_buf (bufstate, value, sizeof (uint64_t)); | 
|  | } | 
|  |  | 
|  | static int | 
|  | read_sample (counter_state_t *ctr_state, int msgsz, uint64_t *rvalue, | 
|  | uint64_t *rlost) | 
|  | { | 
|  | // returns count of bytes read | 
|  | buffer_state_t *bufstate = &ctr_state->buf_state; | 
|  | counter_value_state_t *cntstate = &ctr_state->value_state; | 
|  | int readsz = 0; | 
|  |  | 
|  | // PERF_SAMPLE_IP | 
|  | uint64_t ipc = 0; | 
|  | int rc = read_u64 (bufstate, &ipc); | 
|  | if (rc) | 
|  | return -1; | 
|  | readsz += sizeof (uint64_t); | 
|  |  | 
|  | // PERF_SAMPLE_READ: value | 
|  | uint64_t value = 0; | 
|  | rc = read_u64 (bufstate, &value); | 
|  | if (rc) | 
|  | return -2; | 
|  | readsz += sizeof (uint64_t); | 
|  |  | 
|  | /* Bug 20806896 | 
|  | * Old Linux kernels (e.g. 2.6.32) on certain systems return enabled and | 
|  | * running times in the sample data that correspond to the metadata times | 
|  | *     metadata->time_enabled | 
|  | *     metadata->time_running | 
|  | * from the PREVIOUS (not current) sample.  Probably just ignore this bug | 
|  | * since it's on old kernels and we only use the enabled and running times | 
|  | * to construct loss_estimate. | 
|  | */ | 
|  | // PERF_SAMPLE_READ: PERF_FORMAT_ENABLED | 
|  | uint64_t enabled_time = 0; | 
|  | rc = read_u64 (bufstate, &enabled_time); | 
|  | if (rc) | 
|  | return -3; | 
|  | readsz += sizeof (uint64_t); | 
|  |  | 
|  | // PERF_SAMPLE_READ: PERF_FORMAT_RUNNING | 
|  | uint64_t running_time = 0; | 
|  | rc = read_u64 (bufstate, &running_time); | 
|  | if (rc) | 
|  | return -4; | 
|  | readsz += sizeof (uint64_t); | 
|  |  | 
|  | uint64_t value_delta = value - cntstate->prev_value; | 
|  | uint64_t enabled_delta = enabled_time - cntstate->prev_ena_ts; | 
|  | uint64_t running_delta = running_time - cntstate->prev_run_ts; | 
|  | cntstate->prev_value = value; | 
|  | cntstate->prev_ena_ts = enabled_time; | 
|  | cntstate->prev_run_ts = running_time; | 
|  |  | 
|  | // 24830461 need workaround for Linux anomalous HWC skid overrun | 
|  | int set_error_flag = 0; | 
|  | if (value_delta > 2 * ctr_state->last_overflow_period + 2000 /* HWC_SKID_TOLERANCE */) | 
|  | set_error_flag = 1; | 
|  |  | 
|  | uint64_t loss_estimate = 0; // estimate loss of events caused by multiplexing | 
|  | if (running_delta == enabled_delta) | 
|  | { | 
|  | // counter was running 100% of time, no multiplexing | 
|  | } | 
|  | else if (running_delta == 0) | 
|  | loss_estimate = 1; // token amount to aid in debugging perfctr oddities | 
|  | else if ((running_delta > enabled_delta) || (enabled_delta & 0x1000000000000000ll)) | 
|  | { | 
|  | // running should be smaller than enabled, can't estimate | 
|  | /* | 
|  | * 21418391 HWC can have a negative count | 
|  | * | 
|  | * We've also seen enabled not only be smaller than running | 
|  | * but in fact go negative.  Guard against this. | 
|  | */ | 
|  | loss_estimate = 2; // token amount to aid in debugging perfctr oddities | 
|  | } | 
|  | else | 
|  | { | 
|  | // counter was running less than 100% of time | 
|  | // Example: ena=7772268 run=6775669 raw_value=316004 scaled_value=362483 loss_est=46479 | 
|  | uint64_t scaled_delta = (double) value_delta * enabled_delta / running_delta; | 
|  | value_delta = scaled_delta; | 
|  | #if 0 | 
|  | // We should perhaps warn the user that multiplexing is going on, | 
|  | // but hwcdrv_pcl.c doesn't know about the collector_interface, SP_JCMD_COMMENT, or COL_COMMENT_* values. | 
|  | // For now we simply don't report. | 
|  | // Perhaps we should address the issue not here but in the caller collector_sigemt_handler(), | 
|  | // but at that level "lost" has a meaning that's considerably broader than just multiplexing. | 
|  | collector_interface->writeLog ("<event kind=\"%s\" id=\"%d\">%s %d -> %d</event>\n", | 
|  | SP_JCMD_COMMENT, COL_COMMENT_HWCADJ, global_perf_event_def[idx].name, | 
|  | ctr_list[idx].last_overflow_period, new_period); | 
|  | #endif | 
|  | } | 
|  | TprintfT ((loss_estimate || set_error_flag) ? DBG_LT1 : DBG_LT3, | 
|  | "hwcdrv: '%s' ipc=0x%llx ena=%llu run=%llu " | 
|  | "value_delta=%lld(0x%llx) loss_est=%llu %s error_flag='%s'\n", | 
|  | ctr_state->ev_def->name, (long long) ipc, | 
|  | (long long) enabled_delta, (long long) running_delta, | 
|  | (long long) value_delta, (long long) value_delta, | 
|  | (unsigned long long) loss_estimate, | 
|  | loss_estimate ? ", WARNING - SCALED" : "", | 
|  | set_error_flag ? ", ERRORFLAG" : ""); | 
|  | if (set_error_flag == 1) | 
|  | value_delta |= (1ULL << 63)     /* HWCVAL_ERR_FLAG */; | 
|  | *rvalue = value_delta; | 
|  | *rlost = loss_estimate; | 
|  | if (readsz != msgsz) | 
|  | { | 
|  | TprintfT (0, "hwcdrv: ERROR: perf_event sample not fully parsed\n"); | 
|  | return -5; | 
|  | } | 
|  | return 0; | 
|  | } | 
|  |  | 
|  | static void | 
|  | dump_perf_event_attr (struct perf_event_attr *at) | 
|  | { | 
|  | TprintfT (DBG_LT2, "dump_perf_event_attr:  size=%d  type=%d  sample_period=%lld\n" | 
|  | "  config=0x%llx  config1=0x%llx  config2=0x%llx  wakeup_events=%lld __reserved_1=%lld\n", | 
|  | (int) at->size, (int) at->type, (unsigned long long) at->sample_period, | 
|  | (unsigned long long) at->config, (unsigned long long) at->config1, | 
|  | (unsigned long long) at->config2, (unsigned long long) at->wakeup_events, | 
|  | (unsigned long long) at->__reserved_1); | 
|  | #define DUMP_F(fld) if (at->fld) TprintfT(DBG_LT2, "  %-10s : %lld\n", #fld, (long long) at->fld) | 
|  | DUMP_F (disabled); | 
|  | DUMP_F (inherit); | 
|  | DUMP_F (pinned); | 
|  | DUMP_F (exclusive); | 
|  | DUMP_F (exclude_user); | 
|  | DUMP_F (exclude_kernel); | 
|  | DUMP_F (exclude_hv); | 
|  | DUMP_F (exclude_idle); | 
|  | //    DUMP_F(xmmap); | 
|  | DUMP_F (comm); | 
|  | DUMP_F (freq); | 
|  | DUMP_F (inherit_stat); | 
|  | DUMP_F (enable_on_exec); | 
|  | DUMP_F (task); | 
|  | DUMP_F (watermark); | 
|  | } | 
|  |  | 
|  | static void | 
|  | init_perf_event (struct perf_event_attr *hw, uint64_t event, uint64_t period) | 
|  | { | 
|  | memset (hw, 0, sizeof (struct perf_event_attr)); | 
|  | hw->size = sizeof (struct perf_event_attr); // fwd/bwd compat | 
|  |  | 
|  | #if defined(__i386__) || defined(__x86_64) | 
|  | //note: Nehalem/Westmere OFFCORE_RESPONSE in upper 32 bits | 
|  | hw->config = event; | 
|  | hw->type = PERF_TYPE_RAW;     // hw/sw/trace/raw... | 
|  | #elif defined(__aarch64__) | 
|  | hw->type = (event >> 24) & 7; | 
|  | hw->config = event & 0xff; | 
|  | #elif defined(sparc) | 
|  | //SPARC needs to be shifted up 16 bits | 
|  | hw->config = (event & 0xFFFF) << 16;  // uint64_t event | 
|  | uint64_t regs = (event >> 20) & 0xf;  // see sparc_pcbe.c | 
|  | hw->config |= regs << 4;  // for M8, supported PICs need to be placed at bits [7:4] | 
|  | hw->type = PERF_TYPE_RAW; // hw/sw/trace/raw... | 
|  | #endif | 
|  |  | 
|  | hw->sample_period = period; | 
|  | hw->sample_type = PERF_SAMPLE_IP | | 
|  | // PERF_SAMPLE_TID		| | 
|  | // PERF_SAMPLE_TIME		| // possibly interesting | 
|  | // PERF_SAMPLE_ADDR		| | 
|  | PERF_SAMPLE_READ | // HWC value | 
|  | // PERF_SAMPLE_CALLCHAIN	| // interesting | 
|  | // PERF_SAMPLE_ID		| | 
|  | // PERF_SAMPLE_CPU		| // possibly interesting | 
|  | // PERF_SAMPLE_PERIOD		| | 
|  | // PERF_SAMPLE_STREAM_ID	| | 
|  | // PERF_SAMPLE_RAW		| | 
|  | 0; | 
|  | hw->read_format = | 
|  | PERF_FORMAT_TOTAL_TIME_ENABLED | // detect when hwc not scheduled | 
|  | PERF_FORMAT_TOTAL_TIME_RUNNING | // detect when hwc not scheduled | 
|  | // PERF_FORMAT_ID		| | 
|  | // PERF_FORMAT_GROUP		| | 
|  | 0; | 
|  | hw->disabled = 1; /* off by default */ | 
|  |  | 
|  | // Note: the following override config.priv bits! | 
|  | hw->exclude_user = (event & (1 << 16)) == 0;      /* don't count user */ | 
|  | hw->exclude_kernel = (event & (1 << 17)) == 0;    /* ditto kernel */ | 
|  | hw->exclude_hv = 1;       /* ditto hypervisor */ | 
|  | hw->wakeup_events = 1;    /* wakeup every n events */ | 
|  | dump_perf_event_attr (hw); | 
|  | } | 
|  |  | 
|  | static int | 
|  | start_one_ctr (int ii, size_t pgsz, hdrv_pcl_ctx_t * pctx, char *error_string) | 
|  | { | 
|  | // pe_attr should have been initialized in hwcdrv_create_counters() | 
|  | struct perf_event_attr pe_attr; | 
|  | memcpy (&pe_attr, &global_perf_event_def[ii].hw, sizeof (pe_attr)); | 
|  |  | 
|  | // but we adjust the period, so make sure that pctx->ctr_list[ii].last_overflow_period has been set | 
|  | pe_attr.sample_period = pctx->ctr_list[ii].last_overflow_period; | 
|  |  | 
|  | int hwc_fd = perf_event_open (&pe_attr, pctx->tid, -1, -1, 0); | 
|  | if (hwc_fd == -1) | 
|  | { | 
|  | TprintfT (DBG_LT1, "%s idx=%d perf_event_open failed, errno=%d\n", | 
|  | error_string, ii, errno); | 
|  | return 1; | 
|  | } | 
|  |  | 
|  | size_t buffer_area_sz = (NPAGES_PER_BUF + 1) * pgsz; // add a page for metadata | 
|  | void * buf = mmap (NULL, buffer_area_sz, //YXXX is this a safe call? | 
|  | PROT_READ | PROT_WRITE, MAP_SHARED, hwc_fd, 0); | 
|  | if (buf == MAP_FAILED) | 
|  | { | 
|  | TprintfT (0, "sz = %ld, pgsz = %ld\n  err=%s idx=%d mmap failed: %s\n", | 
|  | (long) buffer_area_sz, (long) pgsz, error_string, ii, strerror (errno)); | 
|  | return 1; | 
|  | } | 
|  | pctx->ctr_list[ii].ev_def = &global_perf_event_def[ii]; // why do we set ev_def?  we never seem to use it | 
|  | pctx->ctr_list[ii].fd = hwc_fd; | 
|  | pctx->ctr_list[ii].buf_state.buf = buf; | 
|  | pctx->ctr_list[ii].buf_state.pagesz = pgsz; | 
|  | pctx->ctr_list[ii].value_state.prev_ena_ts = 0; | 
|  | pctx->ctr_list[ii].value_state.prev_run_ts = 0; | 
|  | pctx->ctr_list[ii].value_state.prev_value = 0; | 
|  | pctx->ctr_list[ii].last_overflow_time = gethrtime (); | 
|  |  | 
|  | /* set async mode */ | 
|  | long flags = fcntl (hwc_fd, F_GETFL, 0) | O_ASYNC; | 
|  | int rc = fcntl (hwc_fd, F_SETFL, flags); | 
|  | if (rc == -1) | 
|  | { | 
|  | TprintfT (0, "%s idx=%d O_ASYNC failed\n", error_string, ii); | 
|  | return 1; | 
|  | } | 
|  |  | 
|  | /* | 
|  | * set lwp ownership of the fd | 
|  | * See BUGS section of "man perf_event_open": | 
|  | *     The F_SETOWN_EX option to fcntl(2) is needed to properly get | 
|  | *     overflow signals in threads.  This was introduced in Linux 2.6.32. | 
|  | * Legacy references: | 
|  | *     see http://lkml.org/lkml/2009/8/4/128 | 
|  | *     google man fcntl F_SETOWN_EX -conflict | 
|  | *       "From Linux 2.6.32 onward, use F_SETOWN_EX to target | 
|  | *       SIGIO and SIGURG signals at a particular thread." | 
|  | *     http://icl.cs.utk.edu/papi/docs/da/d2a/examples__v2_8x_2self__smpl__multi_8c.html | 
|  | *     See 2010 CSCADS presentation by Eranian | 
|  | */ | 
|  | struct f_owner_ex fowner_ex; | 
|  | fowner_ex.type = F_OWNER_TID; | 
|  | fowner_ex.pid = pctx->tid; | 
|  | rc = fcntl (hwc_fd, F_SETOWN_EX, (unsigned long) &fowner_ex); | 
|  | if (rc == -1) | 
|  | { | 
|  | TprintfT (0, "%s idx=%d F_SETOWN failed\n", error_string, ii); | 
|  | return 1; | 
|  | } | 
|  |  | 
|  | /* Use sigio so handler can determine FD via siginfo->si_fd. */ | 
|  | rc = fcntl (hwc_fd, F_SETSIG, SIGIO); | 
|  | if (rc == -1) | 
|  | { | 
|  | TprintfT (0, "%s idx=%d F_SETSIG failed\n", error_string, ii); | 
|  | return 1; | 
|  | } | 
|  | return 0; | 
|  | } | 
|  |  | 
|  | static int | 
|  | stop_one_ctr (int ii, counter_state_t *ctr_list) | 
|  | { | 
|  | int hwc_rc = 0; | 
|  | if (-1 == ioctl (ctr_list[ii].fd, PERF_EVENT_IOC_DISABLE, 1)) | 
|  | { | 
|  | TprintfT (0, "hwcdrv: ERROR: PERF_EVENT_IOC_DISABLE #%d failed: errno=%d\n", ii, errno); | 
|  | hwc_rc = HWCFUNCS_ERROR_GENERIC; | 
|  | } | 
|  | void *buf = ctr_list[ii].buf_state.buf; | 
|  | if (buf) | 
|  | { | 
|  | size_t bufsz = (NPAGES_PER_BUF + 1) * ctr_list[ii].buf_state.pagesz; | 
|  | ctr_list[ii].buf_state.buf = NULL; | 
|  | int tmprc = munmap (buf, bufsz); | 
|  | if (tmprc) | 
|  | { | 
|  | TprintfT (0, "hwcdrv: ERROR: munmap() #%d failed: errno=%d\n", ii, errno); | 
|  | hwc_rc = HWCFUNCS_ERROR_GENERIC; | 
|  | } | 
|  | } | 
|  | if (-1 == close (ctr_list[ii].fd)) | 
|  | { | 
|  | TprintfT (0, "hwcdrv: ERROR: close(fd) #%d failed: errno=%d\n", ii, errno); | 
|  | hwc_rc = HWCFUNCS_ERROR_GENERIC; | 
|  | } | 
|  | return hwc_rc; | 
|  | } | 
|  |  | 
|  | /* HWCDRV_API for thread-specific actions */ | 
|  | HWCDRV_API int | 
|  | hwcdrv_lwp_init (void) | 
|  | { | 
|  | return hwcdrv_start (); | 
|  | } | 
|  |  | 
|  | HWCDRV_API void | 
|  | hwcdrv_lwp_fini (void) | 
|  | { | 
|  | hwcdrv_free_counters ();  /* also sets pctx->ctr_list=NULL; */ | 
|  | } | 
|  |  | 
|  | /* open */ | 
|  | static int | 
|  | hdrv_pcl_internal_open () | 
|  | { | 
|  | if (hdrv_pcl_state.internal_open_called) | 
|  | { | 
|  | TprintfT (0, "hwcdrv: WARNING: hdrv_pcl_internal_open: already called\n"); | 
|  | return HWCFUNCS_ERROR_ALREADY_CALLED; | 
|  | } | 
|  |  | 
|  | // determine if PCL is available | 
|  | perf_event_def_t tmp_event_def; | 
|  | memset (&tmp_event_def, 0, sizeof (tmp_event_def)); | 
|  | struct perf_event_attr *pe_attr = &tmp_event_def.hw; | 
|  | init_perf_event (pe_attr, 0, 0); | 
|  | pe_attr->type = PERF_TYPE_HARDWARE; // specify abstracted HW event | 
|  | pe_attr->config = PERF_COUNT_HW_INSTRUCTIONS; // specify abstracted insts | 
|  | int hwc_fd = perf_event_open (pe_attr, | 
|  | 0, // pid/tid, 0 is self | 
|  | -1, // cpu, -1 is per-thread mode | 
|  | -1, // group_fd, -1 is root | 
|  | 0); // flags | 
|  | if (hwc_fd == -1) | 
|  | { | 
|  | TprintfT (DBG_LT1, "hwcdrv: WARNING: hdrv_pcl_internal_open:" | 
|  | " perf_event_open() failed, errno=%d\n", errno); | 
|  | goto internal_open_error; | 
|  | } | 
|  |  | 
|  | /* see if the PCL is new enough to know about F_SETOWN_EX */ | 
|  | struct f_owner_ex fowner_ex; | 
|  | fowner_ex.type = F_OWNER_TID; | 
|  | fowner_ex.pid = hwcdrv_gettid (); // "pid=tid" is correct w/F_OWNER_TID | 
|  | if (fcntl (hwc_fd, F_SETOWN_EX, (unsigned long) &fowner_ex) == -1) | 
|  | { | 
|  | TprintfT (DBG_LT1, "hwcdrv: WARNING: hdrv_pcl_internal_open: " | 
|  | "F_SETOWN failed, errno=%d\n", errno); | 
|  | close (hwc_fd); | 
|  | goto internal_open_error; | 
|  | } | 
|  | close (hwc_fd); | 
|  |  | 
|  | hdrv_pcl_state.internal_open_called = 1; | 
|  | hdrv_pcl_state.library_ok = 1; // set to non-zero to show it's initted | 
|  | hdrv_pcl_about.cpcN_cpuver = CPUVER_UNDEFINED; | 
|  | TprintfT (DBG_LT2, "hwcdrv: hdrv_pcl_internal_open()\n"); | 
|  | for (int ii = 0; hdrv_pcbe_drivers[ii]; ii++) | 
|  | { | 
|  | hdrv_pcbe_api_t *ppcbe = hdrv_pcbe_drivers[ii]; | 
|  | if (!ppcbe->hdrv_pcbe_init ()) | 
|  | { | 
|  | hdrv_pcl_about.cpcN_cciname = ppcbe->hdrv_pcbe_impl_name (); | 
|  | hdrv_pcl_about.cpcN_cpuver = hwcdrv_lookup_cpuver (hdrv_pcl_about.cpcN_cciname); | 
|  | if (hdrv_pcl_about.cpcN_cpuver == CPUVER_UNDEFINED) | 
|  | goto internal_open_error; | 
|  | hdrv_pcl_about.cpcN_npics = ppcbe->hdrv_pcbe_ncounters (); | 
|  | hdrv_pcl_about.cpcN_docref = ppcbe->hdrv_pcbe_cpuref (); | 
|  | hdrv_pcl_state.get_events = ppcbe->hdrv_pcbe_get_events; | 
|  | hwcdrv_get_x86_eventnum = ppcbe->hdrv_pcbe_get_eventnum; | 
|  | break; | 
|  | } | 
|  | } | 
|  | if (hdrv_pcl_about.cpcN_npics > MAX_PICS) | 
|  | { | 
|  | TprintfT (0, "hwcdrv: WARNING: hdrv_pcl_internal_open:" | 
|  | " reducing number of HWCs from %u to %u on processor '%s'\n", | 
|  | hdrv_pcl_about.cpcN_npics, MAX_PICS, hdrv_pcl_about.cpcN_cciname); | 
|  | hdrv_pcl_about.cpcN_npics = MAX_PICS; | 
|  | } | 
|  | TprintfT (DBG_LT1, "hwcdrv: hdrv_pcl_internal_open:" | 
|  | " perf_event cpuver=%d, name='%s'\n", | 
|  | hdrv_pcl_about.cpcN_cpuver, hdrv_pcl_about.cpcN_cciname); | 
|  | return 0; | 
|  |  | 
|  | internal_open_error: | 
|  | hdrv_pcl_about.cpcN_cpuver = CPUVER_UNDEFINED; | 
|  | hdrv_pcl_about.cpcN_npics = 0; | 
|  | hdrv_pcl_about.cpcN_docref = NULL; | 
|  | hdrv_pcl_about.cpcN_cciname = NULL; | 
|  | return HWCFUNCS_ERROR_NOT_SUPPORTED; | 
|  | } | 
|  |  | 
|  | static void * | 
|  | single_thread_tsd_ftn () | 
|  | { | 
|  | static hdrv_pcl_ctx_t tsd_context; | 
|  | return &tsd_context; | 
|  | } | 
|  |  | 
|  | /* HWCDRV_API */ | 
|  | HWCDRV_API int | 
|  | hwcdrv_init (hwcfuncs_abort_fn_t abort_ftn, int *tsd_sz) | 
|  | { | 
|  | hdrv_pcl_state.find_vpc_ctx = single_thread_tsd_ftn; | 
|  | if (tsd_sz) | 
|  | *tsd_sz = sizeof (hdrv_pcl_ctx_t); | 
|  |  | 
|  | if (hdrv_pcl_state.internal_open_called) | 
|  | return HWCFUNCS_ERROR_ALREADY_CALLED; | 
|  | return hdrv_pcl_internal_open (); | 
|  | } | 
|  |  | 
|  | HWCDRV_API void | 
|  | hwcdrv_get_info (int *cpuver, const char **cciname, uint_t *npics, | 
|  | const char **docref, uint64_t *support) | 
|  | { | 
|  | if (cpuver) | 
|  | *cpuver = hdrv_pcl_about.cpcN_cpuver; | 
|  | if (cciname) | 
|  | *cciname = hdrv_pcl_about.cpcN_cciname; | 
|  | if (npics) | 
|  | *npics = hdrv_pcl_about.cpcN_npics; | 
|  | if (docref) | 
|  | *docref = hdrv_pcl_about.cpcN_docref; | 
|  | if (support) | 
|  | *support = HWCFUNCS_SUPPORT_OVERFLOW_PROFILING | HWCFUNCS_SUPPORT_OVERFLOW_CTR_ID; | 
|  | } | 
|  |  | 
|  | HWCDRV_API int | 
|  | hwcdrv_enable_mt (hwcfuncs_tsd_get_fn_t tsd_ftn) | 
|  | { | 
|  | if (tsd_ftn) | 
|  | hdrv_pcl_state.find_vpc_ctx = tsd_ftn; | 
|  | else | 
|  | { | 
|  | TprintfT (0, "hwcdrv: ERROR: enable_mt(): tsd_ftn==NULL\n"); | 
|  | return HWCFUNCS_ERROR_UNAVAIL; | 
|  | } | 
|  | return 0; | 
|  | } | 
|  |  | 
|  | HWCDRV_API int | 
|  | hwcdrv_get_descriptions (hwcf_hwc_cb_t *hwc_cb, hwcf_attr_cb_t *attr_cb) | 
|  | { | 
|  | int count = 0; | 
|  | if (hwc_cb && hdrv_pcl_state.get_events) | 
|  | count = hdrv_pcl_state.get_events (hwc_cb); | 
|  | if (attr_cb) | 
|  | for (int ii = 0; perfctr_attrs_table && perfctr_attrs_table[ii].attrname; ii++) | 
|  | attr_cb (perfctr_attrs_table[ii].attrname); | 
|  | if (!count) | 
|  | return -1; | 
|  | return 0; | 
|  | } | 
|  |  | 
|  | HWCDRV_API int | 
|  | hwcdrv_assign_regnos (Hwcentry* entries[], unsigned numctrs) | 
|  | { | 
|  | return hwcdrv_assign_all_regnos (entries, numctrs); | 
|  | } | 
|  |  | 
|  | static int | 
|  | internal_hwc_start (int fd) | 
|  | { | 
|  | int rc = ioctl (fd, PERF_EVENT_IOC_REFRESH, 1); | 
|  | if (rc == -1) | 
|  | { | 
|  | TprintfT (DBG_LT0, "hwcdrv: ERROR: internal_hwc_start:" | 
|  | " PERF_EVENT_IOC_REFRESH(fd=%d) failed: errno=%d\n", fd, errno); | 
|  | return HWCFUNCS_ERROR_UNAVAIL; | 
|  | } | 
|  | TprintfT (DBG_LT3, "hwcdrv: internal_hwc_start(fd=%d)\n", fd); | 
|  | return 0; | 
|  | } | 
|  |  | 
|  | HWCDRV_API int | 
|  | hwcdrv_overflow (siginfo_t *si, hwc_event_t *eventp, hwc_event_t *lost_events) | 
|  | { | 
|  | /* set expired counters to overflow value and all others to 0 */ | 
|  | /* return 0: OK, counters should be restarted */ | 
|  | /* return non-zero: eventp not set, counters should not be restarted */ | 
|  | /* clear return values */ | 
|  | int ii; | 
|  | for (ii = 0; ii < hdrv_pcl_state.hwcdef_cnt; ii++) | 
|  | { | 
|  | eventp->ce_pic[ii] = 0; | 
|  | lost_events->ce_pic[ii] = 0; | 
|  | } | 
|  | hrtime_t sig_ts = gethrtime (); //YXXX get this from HWC event? | 
|  | eventp->ce_hrt = sig_ts; | 
|  | lost_events->ce_hrt = sig_ts; | 
|  |  | 
|  | /* determine source signal */ | 
|  | int signal_fd = -1; | 
|  | switch (si->si_code) | 
|  | { | 
|  | case POLL_HUP: /* expected value from pcl */ | 
|  | /* According to Stephane Eranian: | 
|  | * "expect POLL_HUP instead of POLL_IN because we are | 
|  | * in one-shot mode (IOC_REFRESH)" | 
|  | */ | 
|  | signal_fd = si->si_fd; | 
|  | break; | 
|  | case SI_TKILL: /* event forwarded by tkill */ | 
|  | /* DBX can only forward SI_TKILL when it detects POLL_HUP | 
|  | * unfortunately, this means that si->si_fd has been lost... | 
|  | * We need to process the buffers, but we don't know the fd! | 
|  | */ | 
|  | TprintfT (DBG_LT0, "hwcdrv: sig_ts=%llu: WARNING: hwcdrv_overflow:" | 
|  | " SI_TKILL detected\n", sig_ts); | 
|  | break; | 
|  | default: | 
|  | // "sometimes we see a POLL_IN (1) with very high event rates," | 
|  | // according to eranian(?) | 
|  | TprintfT (DBG_LT0, "hwcdrv: sig_ts=%llu: ERROR: hwcdrv_overflow:" | 
|  | " unexpected si_code 0x%x\n", sig_ts, si->si_code); | 
|  | return HWCFUNCS_ERROR_GENERIC; | 
|  | } | 
|  |  | 
|  | hdrv_pcl_ctx_t * pctx = hdrv_pcl_state.find_vpc_ctx (); | 
|  | if (!pctx) | 
|  | { | 
|  | TprintfT (DBG_LT0, "hwcdrv: sig_ts=%llu: ERROR: hwcdrv_overflow:" | 
|  | " tsd context is NULL\n", sig_ts); | 
|  | return HWCFUNCS_ERROR_UNEXPECTED; | 
|  | } | 
|  | counter_state_t * ctr_list = (counter_state_t *) pctx->ctr_list; | 
|  | if (!ctr_list) | 
|  | { | 
|  | TprintfT (DBG_LT0, "hwcdrv: sig_ts=%llu: WARNING: hwcdrv_overflow:" | 
|  | " ctr_list is NULL\n", sig_ts); | 
|  | return HWCFUNCS_ERROR_UNEXPECTED; | 
|  | } | 
|  |  | 
|  | /* clear needs_restart flag */ | 
|  | for (ii = 0; ii < hdrv_pcl_state.hwcdef_cnt; ii++) | 
|  | ctr_list[ii].needs_restart = 0; | 
|  |  | 
|  | /* attempt to identify the counter to read */ | 
|  | int signal_idx = -1; | 
|  | pctx->signal_fd = signal_fd; // save the signal provided by siginfo_t | 
|  | if (signal_fd != -1) | 
|  | { | 
|  | for (ii = 0; ii < hdrv_pcl_state.hwcdef_cnt; ii++) | 
|  | { | 
|  | if (ctr_list[ii].fd == signal_fd) | 
|  | { | 
|  | signal_idx = ii; | 
|  | break; | 
|  | } | 
|  | } | 
|  | } | 
|  |  | 
|  | if (signal_idx < 0) | 
|  | { | 
|  | TprintfT (DBG_LT0, "hwcdrv: sig_ts=%llu: ERROR: hwcdrv_overflow:" | 
|  | " pmc not determined!\n", sig_ts); | 
|  | lost_events->ce_pic[0] = 1; /* record a bogus value into experiment */ | 
|  | // note: bogus value may get overwritten in loop below | 
|  | } | 
|  |  | 
|  | /* capture sample(s).  In addition to signal_idx, check other counters. */ | 
|  | struct perf_event_header sheader; | 
|  | int idx; | 
|  | for (idx = 0; idx < hdrv_pcl_state.hwcdef_cnt; idx++) | 
|  | { | 
|  | int num_recs = 0; | 
|  | while (1) | 
|  | { | 
|  | /* check for samples */ | 
|  | struct perf_event_mmap_page *metadata = ctr_list[idx].buf_state.buf; | 
|  | if (metadata == NULL) | 
|  | break; // empty | 
|  | if (metadata->data_tail == metadata->data_head) | 
|  | break; // empty | 
|  |  | 
|  | /* read header */ | 
|  | if (read_buf (&ctr_list[idx].buf_state, &sheader, sizeof (sheader))) | 
|  | break; | 
|  | num_recs++; | 
|  |  | 
|  | /* check for PERF_RECORD_SAMPLE */ | 
|  | size_t datasz = sheader.size - sizeof (struct perf_event_header); | 
|  | if (sheader.type != PERF_RECORD_SAMPLE) | 
|  | { | 
|  | TprintfT (DBG_LT2, "hwcdrv: sig_ts=%llu: WARNING: hwcdrv_overflow:" | 
|  | " unexpected recd type=%d\n", | 
|  | sig_ts, sheader.type); | 
|  | if (skip_buf (&ctr_list[idx].buf_state, datasz)) | 
|  | { | 
|  | TprintfT (DBG_LT0, "hwcdrv: sig_ts=%llu: ERROR: hwcdrv_overflow:" | 
|  | " skip recd type=%d failed\n", sig_ts, sheader.type); | 
|  | lost_events->ce_pic[idx] = 4; /* record a bogus value */ | 
|  | break; // failed to skip buffer?? | 
|  | } | 
|  | lost_events->ce_pic[idx] = 2; /* record a bogus value */ | 
|  | continue; // advance to next record | 
|  | } | 
|  |  | 
|  | /* type is PERF_RECORD_SAMPLE */ | 
|  | uint64_t value, lostv; | 
|  | if (read_sample (&ctr_list[idx], datasz, &value, &lostv)) | 
|  | { | 
|  | TprintfT (DBG_LT0, "hwcdrv: sig_ts=%llu: ERROR: hwcdrv_overflow:" | 
|  | " read_sample() failed\n", sig_ts); | 
|  | lost_events->ce_pic[idx] = 3; // record a bogus value | 
|  | break;                        // failed to read sample data?? | 
|  | } | 
|  | TprintfT (DBG_LT3, "hwcdrv: sig_ts=%llu: hwcdrv_overflow:" | 
|  | " idx=%d value=%llu lost=%llu\n", (unsigned long long) sig_ts, | 
|  | idx, (unsigned long long) value, (unsigned long long) lostv); | 
|  | if (eventp->ce_pic[idx]) | 
|  | { | 
|  | TprintfT (DBG_LT2, "hwcdrv: sig_ts=%llu: WARNING: hwcdrv_overflow:" | 
|  | " idx=%d previous sample recorded as lost_event\n", sig_ts, idx); | 
|  | lost_events->ce_pic[idx] += eventp->ce_pic[idx]; | 
|  | } | 
|  | eventp->ce_pic[idx] = value; | 
|  | lost_events->ce_pic[idx] += lostv; | 
|  | } | 
|  |  | 
|  | /* debug output for unexpected (but common) cases */ | 
|  | if (idx == signal_idx) | 
|  | { | 
|  | if (num_recs != 1) | 
|  | TprintfT (DBG_LT2, "hwcdrv: sig_ts=%llu: WARNING: hwcdrv_overflow:" | 
|  | " %d records for signal_idx=%d\n", sig_ts, num_recs, signal_idx); | 
|  | } | 
|  | else if (num_recs) | 
|  | TprintfT (DBG_LT2, "hwcdrv: sig_ts=%llu: WARNING: hwcdrv_overflow:" | 
|  | " %d unexpected record(s) for idx=%d (signal_idx=%d)\n", | 
|  | sig_ts, num_recs, idx, signal_idx); | 
|  |  | 
|  | /* trigger counter restart whenever records were found */ | 
|  | if (num_recs) | 
|  | { | 
|  | /* check whether to adapt the overflow interval */ | 
|  | /* This is the Linux version. | 
|  | * The Solaris version is in hwprofile.c collector_update_overflow_counters(). | 
|  | */ | 
|  | hrtime_t min_time = global_perf_event_def[idx].min_time; | 
|  | if (min_time > 0 // overflow interval is adaptive | 
|  | && sig_ts - ctr_list[idx].last_overflow_time < min_time) // last interval below min | 
|  | { | 
|  | /* pick a new overflow interval */ | 
|  | /* roughly doubled, but add funny numbers */ | 
|  | /* hopefully the result is prime or not a multiple of some # of ops/loop */ | 
|  | uint64_t new_period = 2 * ctr_list[idx].last_overflow_period + 37; | 
|  | #if 0 | 
|  | // On Solaris, we report the adjustment to the log file. | 
|  | // On Linux it's hard for us to do so since hwcdrv_pcl.c doesn't know about collector_interface, SP_JCMD_COMMENT, or COL_COMMENT_HWCADJ. | 
|  | // For now we simply don't report. | 
|  | collector_interface->writeLog ("<event kind=\"%s\" id=\"%d\">%s %d -> %d</event>\n", | 
|  | SP_JCMD_COMMENT, COL_COMMENT_HWCADJ, global_perf_event_def[idx].name, | 
|  | ctr_list[idx].last_overflow_period, new_period); | 
|  | #endif | 
|  | /* There are a variety of ways of resetting the period on Linux. | 
|  | * The most elegant is | 
|  | *     ioctl(fd,PERF_EVENT_IOC_PERIOD,&period) | 
|  | * but check the perf_event_open man page for PERF_EVENT_IOC_PERIOD: | 
|  | *     > Prior to Linux 2.6.36 this ioctl always failed due to a bug in the kernel. | 
|  | *     > Prior to Linux 3.14 (or 3.7 on ARM), the new period did not take effect | 
|  | *         until after the next overflow. | 
|  | * So we're kind of stuck shutting the fd down and restarting it with the new period. | 
|  | */ | 
|  | if (stop_one_ctr (idx, ctr_list)) | 
|  | { | 
|  | // EUGENE figure out what to do on error | 
|  | } | 
|  | ctr_list[idx].last_overflow_period = new_period; | 
|  | if (start_one_ctr (idx, ctr_list[idx].buf_state.pagesz, pctx, "hwcdrv: ERROR: hwcdrv_overflow (readjust overflow):")) | 
|  | { | 
|  | // EUGENE figure out what to do on error | 
|  | } | 
|  | } | 
|  | ctr_list[idx].last_overflow_time = sig_ts; | 
|  | #if 0 | 
|  | ctr_list[idx].needs_restart = 1; | 
|  | #else // seems to be more reliable to restart here instead of hwcdrv_sighlr_restart() | 
|  | internal_hwc_start (ctr_list[idx].fd); | 
|  | #endif | 
|  | } | 
|  | } | 
|  | return 0; // OK to restart counters | 
|  | } | 
|  |  | 
|  | HWCDRV_API int | 
|  | hwcdrv_sighlr_restart (const hwc_event_t *pp) | 
|  | { | 
|  | #if 0 // restarting here doesn't seem to work as well as restarting in hwcdrv_overflow() | 
|  | hdrv_pcl_ctx_t * pctx = hdrv_pcl_state.find_vpc_ctx (); | 
|  | if (!pctx) | 
|  | { | 
|  | TprintfT (DBG_LT0, "hwcdrv: ERROR: hwcdrv_sighlr_restart: find_vpc_ctx()==NULL\n"); | 
|  | return -1; | 
|  | } | 
|  | counter_state_t * ctr_list = (counter_state_t *) pctx->ctr_list; | 
|  | if (!ctr_list) | 
|  | { | 
|  | TprintfT (DBG_LT0, "hwcdrv: WARNING: hwcdrv_sighlr_restart: ctr_list is NULL\n"); | 
|  | return -1; | 
|  | } | 
|  | int errors = 0; | 
|  | for (int ii = 0; ii < hdrv_pcl_state.hwcdef_cnt; ii++) | 
|  | { | 
|  | if (ctr_list[ii].needs_restart) | 
|  | errors |= internal_hwc_start (ctr_list[ii].fd); | 
|  | ctr_list[ii].needs_restart = 0; | 
|  | } | 
|  | return errors; | 
|  | #else | 
|  | return 0; | 
|  | #endif | 
|  | } | 
|  |  | 
|  | /* create counters based on hwcdef[] */ | 
|  | HWCDRV_API int | 
|  | hwcdrv_create_counters (unsigned hwcdef_cnt, Hwcentry *hwcdef) | 
|  | { | 
|  | if (hwcdef_cnt > hdrv_pcl_about.cpcN_npics) | 
|  | { | 
|  | logerr (GTXT ("More than %d counters were specified\n"), hdrv_pcl_about.cpcN_npics); /*!*/ | 
|  | return HWCFUNCS_ERROR_HWCARGS; | 
|  | } | 
|  | if (hdrv_pcl_about.cpcN_cpuver == CPUVER_UNDEFINED) | 
|  | { | 
|  | logerr (GTXT ("Processor not supported\n")); | 
|  | return HWCFUNCS_ERROR_HWCARGS; | 
|  | } | 
|  |  | 
|  | /* add counters */ | 
|  | for (unsigned idx = 0; idx < hwcdef_cnt; idx++) | 
|  | { | 
|  | perf_event_def_t *glb_event_def = &global_perf_event_def[idx]; | 
|  | memset (glb_event_def, 0, sizeof (perf_event_def_t)); | 
|  | unsigned int pmc_sel; | 
|  | eventsel_t evntsel; | 
|  | if (hwcfuncs_get_x86_eventsel (hwcdef[idx].reg_num, | 
|  | hwcdef[idx].int_name, &evntsel, &pmc_sel)) | 
|  | { | 
|  | TprintfT (0, "hwcdrv: ERROR: hwcfuncs_get_x86_eventsel() failed\n"); | 
|  | return HWCFUNCS_ERROR_HWCARGS; | 
|  | } | 
|  | glb_event_def->reg_num = pmc_sel; | 
|  | glb_event_def->eventsel = evntsel; | 
|  | glb_event_def->counter_preload = hwcdef[idx].val; | 
|  | glb_event_def->min_time = hwcdef[idx].min_time; | 
|  | glb_event_def->name = strdup (hwcdef[idx].name); // memory leak??? very minor | 
|  | init_perf_event (&glb_event_def->hw, glb_event_def->eventsel, | 
|  | glb_event_def->counter_preload); | 
|  | TprintfT (DBG_LT1, "hwcdrv: create_counters: pic=%u name='%s' interval=%lld" | 
|  | "(min_time=%lld): reg_num=0x%x eventsel=0x%llx ireset=%lld usr=%lld sys=%lld\n", | 
|  | idx, hwcdef[idx].int_name, (long long) glb_event_def->counter_preload, | 
|  | (long long) glb_event_def->min_time, (int) glb_event_def->reg_num, | 
|  | (long long) glb_event_def->eventsel, | 
|  | (long long) HW_INTERVAL_PRESET (hwcdef[idx].val), | 
|  | (long long) glb_event_def->hw.exclude_user, | 
|  | (long long) glb_event_def->hw.exclude_kernel); | 
|  | } | 
|  |  | 
|  | hdrv_pcl_state.hwcdef_cnt = hwcdef_cnt; | 
|  | return 0; | 
|  | } | 
|  |  | 
|  | HWCDRV_API int | 
|  | hwcdrv_free_counters () // note: only performs shutdown for this thread | 
|  | { | 
|  | hdrv_pcl_ctx_t * pctx; | 
|  | if (!COUNTERS_ENABLED ()) | 
|  | return 0; | 
|  | pctx = hdrv_pcl_state.find_vpc_ctx (); | 
|  | if (!pctx) | 
|  | { | 
|  | TprintfT (0, "hwcdrv: WARNING: hwcdrv_free_counters: tsd context is NULL\n"); | 
|  | return HWCFUNCS_ERROR_GENERIC; | 
|  | } | 
|  | counter_state_t *ctr_list = pctx->ctr_list; | 
|  | if (!ctr_list) | 
|  | { | 
|  | // fork child: prolog suspends hwcs, then epilog frees them | 
|  | TprintfT (DBG_LT1, "hwcdrv: WARNING: hwcdrv_free_counters: ctr_list is already NULL\n"); | 
|  | return 0; | 
|  | } | 
|  | int hwc_rc = 0; | 
|  | for (int ii = 0; ii < hdrv_pcl_state.hwcdef_cnt; ii++) | 
|  | if (stop_one_ctr (ii, ctr_list)) | 
|  | hwc_rc = HWCFUNCS_ERROR_GENERIC; | 
|  | TprintfT (DBG_LT1, "hwcdrv: hwcdrv_free_counters(tid=0x%lx).\n", (long) pctx->tid); | 
|  | pctx->ctr_list = NULL; | 
|  | return hwc_rc; | 
|  | } | 
|  |  | 
|  | HWCDRV_API int | 
|  | hwcdrv_start (void) /* must be called from each thread ? */ | 
|  | { | 
|  | hdrv_pcl_ctx_t *pctx = NULL; | 
|  | if (!COUNTERS_ENABLED ()) | 
|  | { | 
|  | TprintfT (DBG_LT1, "hwcdrv: WARNING: hwcdrv_start: no counters to start \n"); | 
|  | return 0; | 
|  | } | 
|  | if (!hdrv_pcl_state.library_ok) | 
|  | { | 
|  | TprintfT (0, "hwcdrv: ERROR: hwcdrv_start: library is not open\n"); | 
|  | return HWCFUNCS_ERROR_NOT_SUPPORTED; | 
|  | } | 
|  |  | 
|  | /* | 
|  | * set up per-thread context | 
|  | */ | 
|  | pctx = hdrv_pcl_state.find_vpc_ctx (); | 
|  | if (!pctx) | 
|  | { | 
|  | TprintfT (0, "hwcdrv: ERROR: hwcdrv_start: tsd context is NULL\n"); | 
|  | return HWCFUNCS_ERROR_UNEXPECTED; | 
|  | } | 
|  | pctx->tid = hwcdrv_gettid (); | 
|  | TprintfT (DBG_LT1, "hwcdrv: hwcdrv_start(tid=0x%lx)\n", (long) pctx->tid); | 
|  |  | 
|  | /* | 
|  | * create per-thread counter list | 
|  | */ | 
|  | counter_state_t *ctr_list = (counter_state_t *) calloc (hdrv_pcl_state.hwcdef_cnt, | 
|  | sizeof (counter_state_t)); | 
|  | if (!ctr_list) | 
|  | { | 
|  | TprintfT (0, "hwcdrv: ERROR: hwcdrv_start: calloc(ctr_list) failed\n"); | 
|  | return HWCFUNCS_ERROR_MEMORY; | 
|  | } | 
|  | int ii; | 
|  | for (ii = 0; ii < hdrv_pcl_state.hwcdef_cnt; ii++) | 
|  | ctr_list[ii].fd = -1; // invalidate fds in case we have to close prematurely | 
|  | pctx->ctr_list = ctr_list; | 
|  |  | 
|  | /* | 
|  | * bind the counters | 
|  | */ | 
|  | size_t pgsz = sysconf (_SC_PAGESIZE); | 
|  | for (ii = 0; ii < hdrv_pcl_state.hwcdef_cnt; ii++) | 
|  | { | 
|  | ctr_list[ii].last_overflow_period = global_perf_event_def[ii].hw.sample_period; | 
|  | if (start_one_ctr (ii, pgsz, pctx, "hwcdrv: ERROR: hwcdrv_start:")) goto hwcdrv_start_cleanup; | 
|  | } | 
|  |  | 
|  | /* | 
|  | * start the counters | 
|  | */ | 
|  | for (ii = 0; ii < hdrv_pcl_state.hwcdef_cnt; ii++) | 
|  | { | 
|  | int rc = internal_hwc_start (ctr_list[ii].fd); | 
|  | if (rc < 0) | 
|  | goto hwcdrv_start_cleanup; | 
|  | } | 
|  | return 0; | 
|  |  | 
|  | hwcdrv_start_cleanup: | 
|  | hwcdrv_free_counters (); // PERF_EVENT_IOC_DISABLE and close() for all fds | 
|  | return HWCFUNCS_ERROR_UNAVAIL; | 
|  | } | 
|  |  | 
|  | HWCDRV_API int | 
|  | hwcdrv_lwp_suspend (void) /* must be called from each thread */ | 
|  | { | 
|  | if (!COUNTERS_ENABLED ()) | 
|  | { | 
|  | TprintfT (DBG_LT1, "hwcdrv: WARNING: hwcdrv_lwp_suspend: no counters\n"); | 
|  | return 0; | 
|  | } | 
|  | TprintfT (DBG_LT1, "hwcdrv: hwcdrv_lwp_suspend()\n"); | 
|  | return hwcdrv_free_counters (); | 
|  | } | 
|  |  | 
|  | HWCDRV_API int | 
|  | hwcdrv_lwp_resume (void) /* must be called from each thread */ | 
|  | { | 
|  | if (!COUNTERS_ENABLED ()) | 
|  | { | 
|  | TprintfT (DBG_LT1, "hwcdrv: WARNING: hwcdrv_lwp_resume: no counters\n"); | 
|  | return 0; | 
|  | } | 
|  | TprintfT (DBG_LT1, "hwcdrv: hwcdrv_lwp_resume()\n"); | 
|  | return hwcdrv_start (); | 
|  | } | 
|  |  | 
|  | HWCDRV_API int | 
|  | hwcdrv_read_events (hwc_event_t *overflow_data, hwc_event_samples_t *sampled_data) | 
|  | { | 
|  | overflow_data->ce_hrt = 0; | 
|  | for (int i = 0; i < MAX_PICS; i++) | 
|  | { | 
|  | overflow_data->ce_pic[i] = 0; | 
|  | if (sampled_data) | 
|  | HWCFUNCS_SAMPLE_RESET (&sampled_data->sample[i]); | 
|  | } | 
|  | return 0; | 
|  | } | 
|  |  | 
|  | /*---------------------------------------------------------------------------*/ | 
|  | /* HWCDRV_API */ | 
|  |  | 
|  | hwcdrv_api_t hwcdrv_pcl_api = { | 
|  | hwcdrv_init, | 
|  | hwcdrv_get_info, | 
|  | hwcdrv_enable_mt, | 
|  | hwcdrv_get_descriptions, | 
|  | hwcdrv_assign_regnos, | 
|  | hwcdrv_create_counters, | 
|  | hwcdrv_start, | 
|  | hwcdrv_overflow, | 
|  | hwcdrv_read_events, | 
|  | hwcdrv_sighlr_restart, | 
|  | hwcdrv_lwp_suspend, | 
|  | hwcdrv_lwp_resume, | 
|  | hwcdrv_free_counters, | 
|  | hwcdrv_lwp_init, | 
|  | hwcdrv_lwp_fini, | 
|  | -1                      // hwcdrv_init_status | 
|  | }; |