| /* Copyright (C) 2021-2024 Free Software Foundation, Inc. |
| Contributed by Oracle. |
| |
| This file is part of GNU Binutils. |
| |
| This program is free software; you can redistribute it and/or modify |
| it under the terms of the GNU General Public License as published by |
| the Free Software Foundation; either version 3, or (at your option) |
| any later version. |
| |
| This program is distributed in the hope that it will be useful, |
| but WITHOUT ANY WARRANTY; without even the implied warranty of |
| MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
| GNU General Public License for more details. |
| |
| You should have received a copy of the GNU General Public License |
| along with this program; if not, write to the Free Software |
| Foundation, 51 Franklin Street - Fifth Floor, Boston, |
| MA 02110-1301, USA. */ |
| |
| #include <errno.h> |
| #include <unistd.h> |
| #include <fcntl.h> |
| #include <sys/mman.h> |
| #include <sys/ioctl.h> |
| #include <sys/syscall.h> |
| #include <linux/perf_event.h> |
| |
| #include "hwcdrv.h" |
| |
| /*---------------------------------------------------------------------------*/ |
| /* macros */ |
| #define IS_GLOBAL /* Mark global symbols */ |
| |
| #include "cpuid.c" /* ftns for identifying a chip */ |
| |
| static hdrv_pcbe_api_t *pcbe_driver = NULL; |
| static hdrv_pcbe_api_t hdrv_pcbe_core_api; |
| static hdrv_pcbe_api_t hdrv_pcbe_opteron_api; |
| static hdrv_pcbe_api_t *hdrv_pcbe_drivers[] = { |
| &hdrv_pcbe_core_api, |
| &hdrv_pcbe_opteron_api, |
| NULL |
| }; |
| #include "opteron_pcbe.c" /* CPU-specific code */ |
| #include "core_pcbe.c" /* CPU-specific code */ |
| |
| /*---------------------------------------------------------------------------*/ |
| static int |
| hwcdrv_lookup_cpuver (const char * cpcN_cciname) |
| { |
| /* returns hwc_cpus.h ID for a given string. */ |
| libcpc2_cpu_lookup_t *plookup; |
| static libcpc2_cpu_lookup_t cpu_table[] = { |
| LIBCPC2_CPU_LOOKUP_LIST |
| }; |
| if (cpcN_cciname == NULL) |
| return CPUVER_UNDEFINED; |
| |
| /* search table for name */ |
| for (plookup = cpu_table; plookup->cpc2_cciname; plookup++) |
| { |
| int n = strlen (plookup->cpc2_cciname); |
| if (!strncmp (plookup->cpc2_cciname, cpcN_cciname, n)) |
| return plookup->cpc2_cpuver; |
| } |
| /* unknown, but does have a descriptive string */ |
| TprintfT (DBG_LT0, "hwcfuncs: CPC2: WARNING: Id of processor '%s' " |
| "could not be determined\n", |
| cpcN_cciname); |
| return CPUVER_GENERIC; |
| } |
| |
| /*---------------------------------------------------------------------------*/ |
| /* utils to generate x86 register definitions on Linux */ |
| |
| /* |
| * This code is structured as though we're going to initialize the |
| * HWC by writing the Intel MSR register directly. That is, we |
| * assume the lowest 16 bits of the event number will have the event |
| * and that higher bits will set attributes. |
| * |
| * While SPARC is different, we can nonetheless use basically the |
| * same "x86"-named functions: |
| * |
| * - The event code will still be 16 bits. It will still |
| * be in the lowest 16 bits of the event number. Though |
| * perf_event_code() on SPARC will expect those bits to |
| * shifted, hwcdrv_pcl.c can easily perform that shift. |
| * |
| * - On SPARC we support only two attributes, "user" and "system", |
| * which hwcdrv_pcl.c already converts to the "exclude_user" |
| * and "exclude_kernel" fields expected by perf_event_open(). |
| * "user" and "system" are stored in event bits 16 and 17. |
| * For M8, a 4-bit mask of supported PICs is stored in bits [23:20]. |
| */ |
| |
| static const attr_info_t perfctr_sparc_attrs[] = { |
| {NTXT ("user"), 0, 0x01, 16}, //usr |
| {NTXT ("system"), 0, 0x01, 17}, //os |
| {NULL, 0, 0x00, 0}, |
| }; |
| static const attr_info_t perfctr_x64_attrs[] = {/* ok for Core2 & later */ |
| {NTXT ("umask"), 0, 0xff, 8}, |
| {NTXT ("user"), 0, 0x01, 16}, //usr |
| //{NTXT("nouser"), 1, 0x01, 16}, //usr (inverted) |
| {NTXT ("system"), 0, 0x01, 17}, //os |
| {NTXT ("edge"), 0, 0x01, 18}, |
| {NTXT ("pc"), 0, 0x01, 19}, |
| {NTXT ("inv"), 0, 0x01, 23}, |
| {NTXT ("cmask"), 0, 0xff, 24}, |
| {NULL, 0, 0x00, 0}, |
| }; |
| const attr_info_t *perfctr_attrs_table = perfctr_x64_attrs; |
| |
| static const eventsel_t perfctr_evntsel_enable_bits = (0x01 << 16) | /* usr */ |
| // (0xff << 0) | /* event*/ |
| // (0xff << 8) | /* umask */ |
| // (0x01 << 17) | /* os */ |
| // (0x01 << 18) | /* edge */ |
| // (0x01 << 19) | /* pc */ |
| (0x01 << 20) | /* int */ |
| // (0x01 << 21) | /* reserved */ |
| (0x01 << 22) | /* enable */ |
| // (0x01 << 23) | /* inv */ |
| // (0xff << 24) | /* cmask */ |
| 0; |
| |
| static int |
| myperfctr_get_x86_eventnum (const char *eventname, uint_t pmc, |
| eventsel_t *eventsel, eventsel_t *valid_umask, |
| uint_t *pmc_sel) |
| { |
| if (pcbe_driver && pcbe_driver->hdrv_pcbe_get_eventnum && |
| !pcbe_driver->hdrv_pcbe_get_eventnum (eventname, pmc, eventsel, |
| valid_umask, pmc_sel)) |
| return 0; |
| |
| /* check for numerically-specified counters */ |
| char * endptr; |
| uint64_t num = strtoull (eventname, &endptr, 0); |
| if (*eventname && !*endptr) |
| { |
| *eventsel = EXTENDED_EVNUM_2_EVSEL (num); |
| *valid_umask = 0xff; /* allow any umask (unused for SPARC?) */ |
| *pmc_sel = pmc; |
| return 0; |
| } |
| |
| /* name does not specify a numeric value */ |
| *eventsel = (eventsel_t) - 1; |
| *valid_umask = 0x0; |
| *pmc_sel = pmc; |
| return -1; |
| } |
| |
| static int |
| mask_shift_set (eventsel_t *presult, eventsel_t invalue, |
| eventsel_t mask, eventsel_t shift) |
| { |
| if (invalue & ~mask) |
| return -1; /* invalue attempts to set bits outside of mask */ |
| *presult &= ~(mask << shift); /* clear all the mask bits */ |
| *presult |= (invalue << shift); /* set bits according to invalue */ |
| return 0; |
| } |
| |
| static int |
| set_x86_attr_bits (eventsel_t *result_mask, eventsel_t evnt_valid_umask, |
| hwcfuncs_attr_t attrs[], int nattrs, const char*nameOnly) |
| { |
| eventsel_t evntsel = *result_mask; |
| for (int ii = 0; ii < (int) nattrs; ii++) |
| { |
| const char *attrname = attrs[ii].ca_name; |
| eventsel_t attrval = (eventsel_t) attrs[ii].ca_val; |
| const char *tmpname; |
| int attr_found = 0; |
| for (int jj = 0; (tmpname = perfctr_attrs_table[jj].attrname); jj++) |
| { |
| if (strcmp (attrname, tmpname) == 0) |
| { |
| if (strcmp (attrname, "umask") == 0) |
| { |
| if (attrval & ~evnt_valid_umask) |
| { |
| logerr (GTXT ("for `%s', allowable umask bits are: 0x%llx\n"), |
| nameOnly, (long long) evnt_valid_umask); |
| return -1; |
| } |
| } |
| if (mask_shift_set (&evntsel, |
| perfctr_attrs_table[jj].is_inverted ? (attrval^1) : attrval, |
| perfctr_attrs_table[jj].mask, |
| perfctr_attrs_table[jj].shift)) |
| { |
| logerr (GTXT ("`%s' attribute `%s' could not be set to 0x%llx\n"), |
| nameOnly, attrname, (long long) attrval); |
| return -1; |
| } |
| TprintfT (DBG_LT2, "hwcfuncs: Counter %s, attribute %s set to 0x%llx\n", |
| nameOnly, attrname, (long long) attrval); |
| attr_found = 1; |
| break; |
| } |
| } |
| if (!attr_found) |
| { |
| logerr (GTXT ("attribute `%s' is invalid\n"), attrname); |
| return -1; |
| } |
| } |
| *result_mask = evntsel; |
| return 0; |
| } |
| |
| static int |
| hwcfuncs_get_x86_eventsel (Hwcentry *h, |
| eventsel_t *return_event, uint_t *return_pmc_sel) |
| { |
| hwcfuncs_attr_t attrs[HWCFUNCS_MAX_ATTRS + 1]; |
| unsigned nattrs = 0; |
| char *nameOnly = NULL; |
| eventsel_t evntsel = h->config; |
| eventsel_t evnt_valid_umask = 0; |
| uint_t pmc_sel = 0; |
| int rc = -1; |
| *return_event = 0; |
| *return_pmc_sel = 0; |
| void *attr_mem = hwcfuncs_parse_attrs (h->int_name, attrs, HWCFUNCS_MAX_ATTRS, |
| &nattrs, NULL); |
| if (!attr_mem) |
| { |
| logerr (GTXT ("out of memory, could not parse attributes\n")); |
| return -1; |
| } |
| hwcfuncs_parse_ctr (h->int_name, NULL, &nameOnly, NULL, NULL, NULL); |
| |
| /* look up evntsel */ |
| if (myperfctr_get_x86_eventnum (nameOnly, h->reg_num, |
| &evntsel, &evnt_valid_umask, &pmc_sel)) |
| { |
| logerr (GTXT ("counter `%s' is not valid\n"), nameOnly); |
| goto attr_wrapup; |
| } |
| TprintfT (DBG_LT1, "hwcfuncs: event=0x%llx pmc=0x%x '%s' nattrs = %u\n", |
| (long long) evntsel, pmc_sel, nameOnly, nattrs); |
| |
| /* determine event attributes */ |
| eventsel_t evnt_attrs = perfctr_evntsel_enable_bits; |
| if (set_x86_attr_bits (&evnt_attrs, evnt_valid_umask, attrs, nattrs, nameOnly)) |
| goto attr_wrapup; |
| if (evntsel & evnt_attrs) |
| TprintfT (DBG_LT0, "hwcfuncs: ERROR - evntsel & enable bits overlap: 0x%llx 0x%llx 0x%llx\n", |
| (long long) evntsel, (long long) evnt_attrs, |
| (long long) (evntsel & evnt_attrs)); |
| *return_event = evntsel | evnt_attrs; |
| *return_pmc_sel = pmc_sel; |
| rc = 0; |
| |
| attr_wrapup: |
| free (attr_mem); |
| free (nameOnly); |
| return rc; |
| } |
| |
| #ifdef __x86_64__ |
| #define syscall_instr "syscall" |
| #define syscall_clobber "rcx", "r11", "memory" |
| #endif |
| #ifdef __i386__ |
| #define syscall_instr "int $0x80" |
| #define syscall_clobber "memory" |
| #endif |
| |
| static inline int |
| perf_event_open (struct perf_event_attr *hw_event_uptr, pid_t pid, |
| int cpu, int group_fd, unsigned long flags) |
| { |
| /* It seems that perf_event_open() sometimes fails spuriously, |
| * even while an immediate retry succeeds. |
| * So, let's try a few retries if the call fails just to be sure. |
| */ |
| int rc; |
| for (int retry = 0; retry < 5; retry++) |
| { |
| rc = syscall (__NR_perf_event_open, hw_event_uptr, pid, cpu, group_fd, flags); |
| if (rc != -1) |
| return rc; |
| TprintfT (0, "perf_event_open %d: errno=%d %s\n", retry, errno, strerror(errno)); |
| } |
| return rc; |
| } |
| |
| /*---------------------------------------------------------------------------*/ |
| /* macros & fwd prototypes */ |
| |
| #define HWCDRV_API static /* Mark functions used by hwcdrv API */ |
| |
| HWCDRV_API int hwcdrv_start (void); |
| HWCDRV_API int hwcdrv_free_counters (); |
| |
| static pid_t |
| hwcdrv_gettid (void) |
| { |
| #ifndef LIBCOLLECTOR_SRC |
| return syscall (__NR_gettid); |
| #elif defined(intel) |
| pid_t r; |
| __asm__ __volatile__(syscall_instr |
| : "=a" (r) : "0" (__NR_gettid) |
| : syscall_clobber); |
| return r; |
| #else |
| return syscall (__NR_gettid); // FIXUP_XXX_SPARC_LINUX // write gettid in asm |
| #endif |
| } |
| |
| /*---------------------------------------------------------------------------*/ |
| /* types */ |
| |
| #define NPAGES_PER_BUF 1 // number of pages to be used for perf_event samples |
| // must be a power of 2 |
| |
| /*---------------------------------------------------------------------------*/ |
| |
| /* typedefs */ |
| |
| typedef struct |
| { // event (hwc) definition |
| unsigned int reg_num; // PMC assignment, potentially for detecting conflicts |
| eventsel_t eventsel; // raw event bits (Intel/AMD) |
| uint64_t counter_preload; // number of HWC events before signal |
| struct perf_event_attr hw; // perf_event definition |
| hrtime_t min_time; // minimum time we're targeting between events |
| char *name; |
| } perf_event_def_t; |
| static perf_event_def_t event_def_0; |
| |
| typedef struct |
| { // runtime state of perf_event buffer |
| void *buf; // pointer to mmapped buffer |
| size_t pagesz; // size of pages |
| } buffer_state_t; |
| |
| typedef struct |
| { // runtime state of counter values |
| uint64_t prev_ena_ts; // previous perf_event "enabled" time |
| uint64_t prev_run_ts; // previous perf_event "running" time |
| uint64_t prev_value; // previous HWC value |
| } counter_value_state_t; |
| |
| typedef struct |
| { // per-counter information |
| perf_event_def_t *ev_def; // global HWC definition for one counter |
| int fd; // perf_event fd |
| buffer_state_t buf_state; // perf_event buffer's state |
| counter_value_state_t value_state; // counter state |
| int needs_restart; // workaround for dbx failure to preserve si_fd |
| uint64_t last_overflow_period; |
| hrtime_t last_overflow_time; |
| } counter_state_t; |
| |
| typedef struct |
| { // per-thread context |
| counter_state_t *ctr_list; |
| int signal_fd; // fd that caused the most recent signal |
| pid_t tid; // for debugging signal delivery problems |
| } hdrv_pcl_ctx_t; |
| |
| /*---------------------------------------------------------------------------*/ |
| |
| /* static variables */ |
| static struct |
| { |
| int library_ok; |
| int internal_open_called; |
| hwcfuncs_tsd_get_fn_t find_vpc_ctx; |
| unsigned hwcdef_cnt; /* number of *active* hardware counters */ |
| } hdrv_pcl_state; |
| |
| static hwcdrv_about_t hdrv_pcl_about = {.cpcN_cpuver = CPUVER_UNDEFINED}; |
| static perf_event_def_t global_perf_event_def[MAX_PICS]; |
| |
| #define COUNTERS_ENABLED() (hdrv_pcl_state.hwcdef_cnt) |
| |
| |
| /* perf_event buffer formatting and handling */ |
| static void |
| reset_buf (buffer_state_t *bufstate) |
| { |
| TprintfT (0, "hwcdrv: ERROR: perf_event reset_buf() called!\n"); |
| struct perf_event_mmap_page *metadata = bufstate->buf; |
| if (metadata) |
| metadata->data_tail = metadata->data_head; |
| } |
| |
| static int |
| skip_buf (buffer_state_t *bufstate, size_t sz) |
| { |
| TprintfT (DBG_LT1, "hwcdrv: WARNING: perf_event skip_buf called!\n"); |
| struct perf_event_mmap_page *metadata = bufstate->buf; |
| if (metadata == NULL) |
| return -1; |
| size_t pgsz = bufstate->pagesz; |
| size_t bufsz = NPAGES_PER_BUF*pgsz; |
| uint64_t d_tail = metadata->data_tail; |
| uint64_t d_head = metadata->data_head; |
| |
| // validate request size |
| if (sz > d_head - d_tail || sz >= bufsz) |
| { |
| reset_buf (bufstate); |
| return -1; |
| } |
| metadata->data_tail = d_tail + sz; // advance tail |
| return 0; |
| } |
| |
| static int |
| read_buf (buffer_state_t *bufstate, void *buf, size_t sz) |
| { |
| struct perf_event_mmap_page *metadata = bufstate->buf; |
| if (metadata == NULL) |
| return -1; |
| size_t pgsz = bufstate->pagesz; |
| size_t bufsz = NPAGES_PER_BUF*pgsz; |
| uint64_t d_tail = metadata->data_tail; |
| uint64_t d_head = metadata->data_head; |
| |
| // validate request size |
| if (sz > d_head - d_tail || sz >= bufsz) |
| { |
| reset_buf (bufstate); |
| return -1; |
| } |
| char *buf_base = ((char *) metadata) + pgsz; // start of data buffer |
| uint64_t start_pos = d_tail & (bufsz - 1); // char offset into data buffer |
| size_t nbytes = sz; |
| if (start_pos + sz > bufsz) |
| { |
| // will wrap past end of buffer |
| nbytes = bufsz - start_pos; |
| memcpy (buf, buf_base + start_pos, nbytes); |
| start_pos = 0; // wrap to start |
| buf = (void *) (((char *) buf) + nbytes); |
| nbytes = sz - nbytes; |
| } |
| memcpy (buf, buf_base + start_pos, nbytes); |
| metadata->data_tail += sz; |
| return 0; |
| } |
| |
| static int |
| read_u64 (buffer_state_t *bufstate, uint64_t *value) |
| { |
| return read_buf (bufstate, value, sizeof (uint64_t)); |
| } |
| |
| static int |
| read_sample (counter_state_t *ctr_state, int msgsz, uint64_t *rvalue, |
| uint64_t *rlost) |
| { |
| // returns count of bytes read |
| buffer_state_t *bufstate = &ctr_state->buf_state; |
| counter_value_state_t *cntstate = &ctr_state->value_state; |
| int readsz = 0; |
| |
| // PERF_SAMPLE_IP |
| uint64_t ipc = 0; |
| int rc = read_u64 (bufstate, &ipc); |
| if (rc) |
| return -1; |
| readsz += sizeof (uint64_t); |
| |
| // PERF_SAMPLE_READ: value |
| uint64_t value = 0; |
| rc = read_u64 (bufstate, &value); |
| if (rc) |
| return -2; |
| readsz += sizeof (uint64_t); |
| |
| /* Bug 20806896 |
| * Old Linux kernels (e.g. 2.6.32) on certain systems return enabled and |
| * running times in the sample data that correspond to the metadata times |
| * metadata->time_enabled |
| * metadata->time_running |
| * from the PREVIOUS (not current) sample. Probably just ignore this bug |
| * since it's on old kernels and we only use the enabled and running times |
| * to construct loss_estimate. |
| */ |
| // PERF_SAMPLE_READ: PERF_FORMAT_ENABLED |
| uint64_t enabled_time = 0; |
| rc = read_u64 (bufstate, &enabled_time); |
| if (rc) |
| return -3; |
| readsz += sizeof (uint64_t); |
| |
| // PERF_SAMPLE_READ: PERF_FORMAT_RUNNING |
| uint64_t running_time = 0; |
| rc = read_u64 (bufstate, &running_time); |
| if (rc) |
| return -4; |
| readsz += sizeof (uint64_t); |
| |
| uint64_t value_delta = value - cntstate->prev_value; |
| uint64_t enabled_delta = enabled_time - cntstate->prev_ena_ts; |
| uint64_t running_delta = running_time - cntstate->prev_run_ts; |
| cntstate->prev_value = value; |
| cntstate->prev_ena_ts = enabled_time; |
| cntstate->prev_run_ts = running_time; |
| |
| // 24830461 need workaround for Linux anomalous HWC skid overrun |
| int set_error_flag = 0; |
| if (value_delta > 2 * ctr_state->last_overflow_period + 2000 /* HWC_SKID_TOLERANCE */) |
| set_error_flag = 1; |
| |
| uint64_t loss_estimate = 0; // estimate loss of events caused by multiplexing |
| if (running_delta == enabled_delta) |
| { |
| // counter was running 100% of time, no multiplexing |
| } |
| else if (running_delta == 0) |
| loss_estimate = 1; // token amount to aid in debugging perfctr oddities |
| else if ((running_delta > enabled_delta) || (enabled_delta & 0x1000000000000000ll)) |
| { |
| // running should be smaller than enabled, can't estimate |
| /* |
| * 21418391 HWC can have a negative count |
| * |
| * We've also seen enabled not only be smaller than running |
| * but in fact go negative. Guard against this. |
| */ |
| loss_estimate = 2; // token amount to aid in debugging perfctr oddities |
| } |
| else |
| { |
| // counter was running less than 100% of time |
| // Example: ena=7772268 run=6775669 raw_value=316004 scaled_value=362483 loss_est=46479 |
| uint64_t scaled_delta = (double) value_delta * enabled_delta / running_delta; |
| value_delta = scaled_delta; |
| #if 0 |
| // We should perhaps warn the user that multiplexing is going on, |
| // but hwcdrv_pcl.c doesn't know about the collector_interface, SP_JCMD_COMMENT, or COL_COMMENT_* values. |
| // For now we simply don't report. |
| // Perhaps we should address the issue not here but in the caller collector_sigemt_handler(), |
| // but at that level "lost" has a meaning that's considerably broader than just multiplexing. |
| collector_interface->writeLog ("<event kind=\"%s\" id=\"%d\">%s %d -> %d</event>\n", |
| SP_JCMD_COMMENT, COL_COMMENT_HWCADJ, global_perf_event_def[idx].name, |
| ctr_list[idx].last_overflow_period, new_period); |
| #endif |
| } |
| TprintfT ((loss_estimate || set_error_flag) ? DBG_LT1 : DBG_LT3, |
| "hwcdrv: '%s' ipc=0x%llx ena=%llu run=%llu " |
| "value_delta=%lld(0x%llx) loss_est=%llu %s error_flag='%s'\n", |
| ctr_state->ev_def->name, (long long) ipc, |
| (long long) enabled_delta, (long long) running_delta, |
| (long long) value_delta, (long long) value_delta, |
| (unsigned long long) loss_estimate, |
| loss_estimate ? ", WARNING - SCALED" : "", |
| set_error_flag ? ", ERRORFLAG" : ""); |
| if (set_error_flag == 1) |
| value_delta |= (1ULL << 63) /* HWCVAL_ERR_FLAG */; |
| *rvalue = value_delta; |
| *rlost = loss_estimate; |
| if (readsz != msgsz) |
| { |
| TprintfT (0, "hwcdrv: ERROR: perf_event sample not fully parsed\n"); |
| return -5; |
| } |
| return 0; |
| } |
| |
| static void |
| dump_perf_event_attr (struct perf_event_attr *at) |
| { |
| #if defined(DEBUG) |
| TprintfT (DBG_LT2, "dump_perf_event_attr: size=%d type=%d sample_period=%lld\n" |
| " config=0x%llx config1=0x%llx config2=0x%llx wakeup_events=%lld __reserved_1=%lld\n", |
| (int) at->size, (int) at->type, (unsigned long long) at->sample_period, |
| (unsigned long long) at->config, (unsigned long long) at->config1, |
| (unsigned long long) at->config2, (unsigned long long) at->wakeup_events, |
| (unsigned long long) at->__reserved_1); |
| #define DUMP_F(fld) if (at->fld) TprintfT(DBG_LT2, " %-10s : %lld\n", #fld, (long long) at->fld) |
| DUMP_F (disabled); |
| DUMP_F (inherit); |
| DUMP_F (pinned); |
| DUMP_F (exclusive); |
| DUMP_F (exclude_user); |
| DUMP_F (exclude_kernel); |
| DUMP_F (exclude_hv); |
| DUMP_F (exclude_idle); |
| DUMP_F (comm); |
| DUMP_F (freq); |
| DUMP_F (inherit_stat); |
| DUMP_F (enable_on_exec); |
| DUMP_F (task); |
| DUMP_F (watermark); |
| #endif |
| } |
| |
| static void |
| init_perf_event (struct perf_event_attr *hw, uint64_t event, uint64_t period, |
| Hwcentry *hwce) |
| { |
| static struct perf_event_attr perf_event_attr_0 = { |
| .size = sizeof (struct perf_event_attr), |
| .disabled = 1, /* off by default */ |
| .exclude_hv = 1, |
| .wakeup_events = 1 /* wakeup every n events */ |
| }; |
| *hw = perf_event_attr_0; |
| if (hwce && hwce->use_perf_event_type) |
| { |
| hw->config = hwce->config; |
| hw->config1 = hwce->config1; |
| hw->type = hwce->type; |
| } |
| else |
| { // backward compatibility. The old interface had no 'hwce' argument. |
| hw->config = event; |
| hw->type = PERF_TYPE_RAW; // hw/sw/trace/raw... |
| } |
| hw->sample_period = period; |
| hw->sample_type = PERF_SAMPLE_IP | |
| // PERF_SAMPLE_TID | |
| // PERF_SAMPLE_TIME | // possibly interesting |
| // PERF_SAMPLE_ADDR | |
| PERF_SAMPLE_READ | // HWC value |
| // PERF_SAMPLE_CALLCHAIN | // interesting |
| // PERF_SAMPLE_ID | |
| // PERF_SAMPLE_CPU | // possibly interesting |
| // PERF_SAMPLE_PERIOD | |
| // PERF_SAMPLE_STREAM_ID | |
| // PERF_SAMPLE_RAW | |
| 0; |
| hw->read_format = |
| PERF_FORMAT_TOTAL_TIME_ENABLED | // detect when hwc not scheduled |
| PERF_FORMAT_TOTAL_TIME_RUNNING | // detect when hwc not scheduled |
| // PERF_FORMAT_ID | |
| // PERF_FORMAT_GROUP | |
| 0; |
| |
| // Note: the following override config.priv bits! |
| hw->exclude_user = (event & (1 << 16)) == 0; /* don't count user */ |
| hw->exclude_kernel = (event & (1 << 17)) == 0; /* ditto kernel */ |
| dump_perf_event_attr (hw); |
| } |
| |
| static int |
| start_one_ctr (int ii, size_t pgsz, hdrv_pcl_ctx_t * pctx, char *error_string) |
| { |
| // pe_attr should have been initialized in hwcdrv_create_counters() |
| struct perf_event_attr pe_attr; |
| memcpy (&pe_attr, &global_perf_event_def[ii].hw, sizeof (pe_attr)); |
| |
| // but we adjust the period, so make sure that pctx->ctr_list[ii].last_overflow_period has been set |
| pe_attr.sample_period = pctx->ctr_list[ii].last_overflow_period; |
| |
| int hwc_fd = perf_event_open (&pe_attr, pctx->tid, -1, -1, 0); |
| if (hwc_fd == -1) |
| { |
| TprintfT (DBG_LT1, "%s idx=%d perf_event_open failed, errno=%d\n", |
| error_string, ii, errno); |
| return 1; |
| } |
| |
| size_t buffer_area_sz = (NPAGES_PER_BUF + 1) * pgsz; // add a page for metadata |
| void * buf = mmap (NULL, buffer_area_sz, //YXXX is this a safe call? |
| PROT_READ | PROT_WRITE, MAP_SHARED, hwc_fd, 0); |
| if (buf == MAP_FAILED) |
| { |
| TprintfT (0, "sz = %ld, pgsz = %ld\n err=%s idx=%d mmap failed: %s\n", |
| (long) buffer_area_sz, (long) pgsz, error_string, ii, strerror (errno)); |
| return 1; |
| } |
| pctx->ctr_list[ii].ev_def = &global_perf_event_def[ii]; // why do we set ev_def? we never seem to use it |
| pctx->ctr_list[ii].fd = hwc_fd; |
| pctx->ctr_list[ii].buf_state.buf = buf; |
| pctx->ctr_list[ii].buf_state.pagesz = pgsz; |
| pctx->ctr_list[ii].value_state.prev_ena_ts = 0; |
| pctx->ctr_list[ii].value_state.prev_run_ts = 0; |
| pctx->ctr_list[ii].value_state.prev_value = 0; |
| pctx->ctr_list[ii].last_overflow_time = gethrtime (); |
| |
| /* set async mode */ |
| long flags = fcntl (hwc_fd, F_GETFL, 0) | O_ASYNC; |
| int rc = fcntl (hwc_fd, F_SETFL, flags); |
| if (rc == -1) |
| { |
| TprintfT (0, "%s idx=%d O_ASYNC failed\n", error_string, ii); |
| return 1; |
| } |
| |
| /* |
| * set lwp ownership of the fd |
| * See BUGS section of "man perf_event_open": |
| * The F_SETOWN_EX option to fcntl(2) is needed to properly get |
| * overflow signals in threads. This was introduced in Linux 2.6.32. |
| * Legacy references: |
| * see http://lkml.org/lkml/2009/8/4/128 |
| * google man fcntl F_SETOWN_EX -conflict |
| * "From Linux 2.6.32 onward, use F_SETOWN_EX to target |
| * SIGIO and SIGURG signals at a particular thread." |
| * http://icl.cs.utk.edu/papi/docs/da/d2a/examples__v2_8x_2self__smpl__multi_8c.html |
| * See 2010 CSCADS presentation by Eranian |
| */ |
| struct f_owner_ex fowner_ex; |
| fowner_ex.type = F_OWNER_TID; |
| fowner_ex.pid = pctx->tid; |
| rc = fcntl (hwc_fd, F_SETOWN_EX, (unsigned long) &fowner_ex); |
| if (rc == -1) |
| { |
| TprintfT (0, "%s idx=%d F_SETOWN failed\n", error_string, ii); |
| return 1; |
| } |
| |
| /* Use sigio so handler can determine FD via siginfo->si_fd. */ |
| rc = fcntl (hwc_fd, F_SETSIG, SIGIO); |
| if (rc == -1) |
| { |
| TprintfT (0, "%s idx=%d F_SETSIG failed\n", error_string, ii); |
| return 1; |
| } |
| return 0; |
| } |
| |
| static int |
| stop_one_ctr (int ii, counter_state_t *ctr_list) |
| { |
| int hwc_rc = 0; |
| if (-1 == ioctl (ctr_list[ii].fd, PERF_EVENT_IOC_DISABLE, 1)) |
| { |
| TprintfT (0, "hwcdrv: ERROR: PERF_EVENT_IOC_DISABLE #%d failed: errno=%d\n", ii, errno); |
| hwc_rc = HWCFUNCS_ERROR_GENERIC; |
| } |
| void *buf = ctr_list[ii].buf_state.buf; |
| if (buf) |
| { |
| size_t bufsz = (NPAGES_PER_BUF + 1) * ctr_list[ii].buf_state.pagesz; |
| ctr_list[ii].buf_state.buf = NULL; |
| int tmprc = munmap (buf, bufsz); |
| if (tmprc) |
| { |
| TprintfT (0, "hwcdrv: ERROR: munmap() #%d failed: errno=%d\n", ii, errno); |
| hwc_rc = HWCFUNCS_ERROR_GENERIC; |
| } |
| } |
| if (-1 == close (ctr_list[ii].fd)) |
| { |
| TprintfT (0, "hwcdrv: ERROR: close(fd) #%d failed: errno=%d\n", ii, errno); |
| hwc_rc = HWCFUNCS_ERROR_GENERIC; |
| } |
| return hwc_rc; |
| } |
| |
| /* HWCDRV_API for thread-specific actions */ |
| HWCDRV_API int |
| hwcdrv_lwp_init (void) |
| { |
| return hwcdrv_start (); |
| } |
| |
| HWCDRV_API void |
| hwcdrv_lwp_fini (void) |
| { |
| hwcdrv_free_counters (); /* also sets pctx->ctr_list=NULL; */ |
| } |
| |
| /* open */ |
| static int |
| hdrv_pcl_internal_open () |
| { |
| if (hdrv_pcl_state.internal_open_called) |
| { |
| TprintfT (0, "hwcdrv: WARNING: hdrv_pcl_internal_open: already called\n"); |
| return HWCFUNCS_ERROR_ALREADY_CALLED; |
| } |
| |
| // determine if PCL is available |
| perf_event_def_t tmp_event_def = event_def_0; |
| struct perf_event_attr *pe_attr = &tmp_event_def.hw; |
| init_perf_event (pe_attr, 0, 0, NULL); |
| pe_attr->type = PERF_TYPE_HARDWARE; // specify abstracted HW event |
| pe_attr->config = PERF_COUNT_HW_INSTRUCTIONS; // specify abstracted insts |
| int hwc_fd = perf_event_open (pe_attr, |
| 0, // pid/tid, 0 is self |
| -1, // cpu, -1 is per-thread mode |
| -1, // group_fd, -1 is root |
| 0); // flags |
| if (hwc_fd == -1) |
| { |
| TprintfT (DBG_LT1, "hwcdrv: WARNING: hdrv_pcl_internal_open:" |
| " perf_event_open() failed, errno=%d\n", errno); |
| goto internal_open_error; |
| } |
| |
| /* see if the PCL is new enough to know about F_SETOWN_EX */ |
| struct f_owner_ex fowner_ex; |
| fowner_ex.type = F_OWNER_TID; |
| fowner_ex.pid = hwcdrv_gettid (); // "pid=tid" is correct w/F_OWNER_TID |
| if (fcntl (hwc_fd, F_SETOWN_EX, (unsigned long) &fowner_ex) == -1) |
| { |
| TprintfT (DBG_LT1, "hwcdrv: WARNING: hdrv_pcl_internal_open: " |
| "F_SETOWN failed, errno=%d\n", errno); |
| close (hwc_fd); |
| goto internal_open_error; |
| } |
| close (hwc_fd); |
| |
| hdrv_pcl_state.internal_open_called = 1; |
| hdrv_pcl_state.library_ok = 1; // set to non-zero to show it's initted |
| hdrv_pcl_about.cpcN_cpuver = CPUVER_UNDEFINED; |
| TprintfT (DBG_LT2, "hwcdrv: hdrv_pcl_internal_open()\n"); |
| for (int ii = 0; hdrv_pcbe_drivers[ii]; ii++) |
| { |
| hdrv_pcbe_api_t *ppcbe = hdrv_pcbe_drivers[ii]; |
| if (!ppcbe->hdrv_pcbe_init ()) |
| { |
| pcbe_driver = ppcbe; |
| hdrv_pcl_about.cpcN_cciname = ppcbe->hdrv_pcbe_impl_name (); |
| hdrv_pcl_about.cpcN_cpuver = hwcdrv_lookup_cpuver (hdrv_pcl_about.cpcN_cciname); |
| if (hdrv_pcl_about.cpcN_cpuver == CPUVER_UNDEFINED) |
| goto internal_open_error; |
| hdrv_pcl_about.cpcN_npics = ppcbe->hdrv_pcbe_ncounters (); |
| hdrv_pcl_about.cpcN_docref = ppcbe->hdrv_pcbe_cpuref (); |
| break; |
| } |
| } |
| if (hdrv_pcl_about.cpcN_npics > MAX_PICS) |
| { |
| TprintfT (0, "hwcdrv: WARNING: hdrv_pcl_internal_open:" |
| " reducing number of HWCs from %u to %u on processor '%s'\n", |
| hdrv_pcl_about.cpcN_npics, MAX_PICS, hdrv_pcl_about.cpcN_cciname); |
| hdrv_pcl_about.cpcN_npics = MAX_PICS; |
| } |
| TprintfT (DBG_LT1, "hwcdrv: hdrv_pcl_internal_open:" |
| " perf_event cpuver=%d, name='%s'\n", |
| hdrv_pcl_about.cpcN_cpuver, hdrv_pcl_about.cpcN_cciname); |
| return 0; |
| |
| internal_open_error: |
| hdrv_pcl_about.cpcN_cpuver = CPUVER_UNDEFINED; |
| hdrv_pcl_about.cpcN_npics = 0; |
| hdrv_pcl_about.cpcN_docref = NULL; |
| hdrv_pcl_about.cpcN_cciname = NULL; |
| return HWCFUNCS_ERROR_NOT_SUPPORTED; |
| } |
| |
| static void * |
| single_thread_tsd_ftn () |
| { |
| static hdrv_pcl_ctx_t tsd_context; |
| return &tsd_context; |
| } |
| |
| /* HWCDRV_API */ |
| HWCDRV_API int |
| hwcdrv_init (hwcfuncs_abort_fn_t abort_ftn, int *tsd_sz) |
| { |
| hdrv_pcl_state.find_vpc_ctx = single_thread_tsd_ftn; |
| if (tsd_sz) |
| *tsd_sz = sizeof (hdrv_pcl_ctx_t); |
| |
| if (hdrv_pcl_state.internal_open_called) |
| return HWCFUNCS_ERROR_ALREADY_CALLED; |
| return hdrv_pcl_internal_open (); |
| } |
| |
| HWCDRV_API void |
| hwcdrv_get_info (int *cpuver, const char **cciname, uint_t *npics, |
| const char **docref, uint64_t *support) |
| { |
| if (cpuver) |
| *cpuver = hdrv_pcl_about.cpcN_cpuver; |
| if (cciname) |
| *cciname = hdrv_pcl_about.cpcN_cciname; |
| if (npics) |
| *npics = hdrv_pcl_about.cpcN_npics; |
| if (docref) |
| *docref = hdrv_pcl_about.cpcN_docref; |
| if (support) |
| *support = HWCFUNCS_SUPPORT_OVERFLOW_PROFILING | HWCFUNCS_SUPPORT_OVERFLOW_CTR_ID; |
| } |
| |
| HWCDRV_API int |
| hwcdrv_enable_mt (hwcfuncs_tsd_get_fn_t tsd_ftn) |
| { |
| if (tsd_ftn) |
| hdrv_pcl_state.find_vpc_ctx = tsd_ftn; |
| else |
| { |
| TprintfT (0, "hwcdrv: ERROR: enable_mt(): tsd_ftn==NULL\n"); |
| return HWCFUNCS_ERROR_UNAVAIL; |
| } |
| return 0; |
| } |
| |
| HWCDRV_API int |
| hwcdrv_get_descriptions (hwcf_hwc_cb_t *hwc_cb, hwcf_attr_cb_t *attr_cb, |
| Hwcentry *raw_hwc_tbl) |
| { |
| int count = 0; |
| if (hwc_cb && pcbe_driver && pcbe_driver->hdrv_pcbe_get_events) |
| count = pcbe_driver->hdrv_pcbe_get_events (hwc_cb, raw_hwc_tbl); |
| if (attr_cb) |
| for (int ii = 0; perfctr_attrs_table && perfctr_attrs_table[ii].attrname; ii++) |
| attr_cb (perfctr_attrs_table[ii].attrname); |
| if (!count) |
| return -1; |
| return 0; |
| } |
| |
| HWCDRV_API int |
| hwcdrv_assign_regnos (Hwcentry* entries[], unsigned numctrs) |
| { |
| return 0; |
| } |
| |
| static int |
| internal_hwc_start (int fd) |
| { |
| int rc = ioctl (fd, PERF_EVENT_IOC_REFRESH, 1); |
| if (rc == -1) |
| { |
| TprintfT (DBG_LT0, "hwcdrv: ERROR: internal_hwc_start:" |
| " PERF_EVENT_IOC_REFRESH(fd=%d) failed: errno=%d\n", fd, errno); |
| return HWCFUNCS_ERROR_UNAVAIL; |
| } |
| TprintfT (DBG_LT3, "hwcdrv: internal_hwc_start(fd=%d)\n", fd); |
| return 0; |
| } |
| |
| HWCDRV_API int |
| hwcdrv_overflow (siginfo_t *si, hwc_event_t *eventp, hwc_event_t *lost_events) |
| { |
| /* set expired counters to overflow value and all others to 0 */ |
| /* return 0: OK, counters should be restarted */ |
| /* return non-zero: eventp not set, counters should not be restarted */ |
| /* clear return values */ |
| int ii; |
| for (ii = 0; ii < hdrv_pcl_state.hwcdef_cnt; ii++) |
| { |
| eventp->ce_pic[ii] = 0; |
| lost_events->ce_pic[ii] = 0; |
| } |
| hrtime_t sig_ts = gethrtime (); //YXXX get this from HWC event? |
| eventp->ce_hrt = sig_ts; |
| lost_events->ce_hrt = sig_ts; |
| |
| /* determine source signal */ |
| int signal_fd = -1; |
| switch (si->si_code) |
| { |
| case POLL_HUP: /* expected value from pcl */ |
| /* According to Stephane Eranian: |
| * "expect POLL_HUP instead of POLL_IN because we are |
| * in one-shot mode (IOC_REFRESH)" |
| */ |
| signal_fd = si->si_fd; |
| break; |
| case SI_TKILL: /* event forwarded by tkill */ |
| /* DBX can only forward SI_TKILL when it detects POLL_HUP |
| * unfortunately, this means that si->si_fd has been lost... |
| * We need to process the buffers, but we don't know the fd! |
| */ |
| TprintfT (DBG_LT0, "hwcdrv: sig_ts=%llu: WARNING: hwcdrv_overflow:" |
| " SI_TKILL detected\n", sig_ts); |
| break; |
| default: |
| // "sometimes we see a POLL_IN (1) with very high event rates," |
| // according to eranian(?) |
| TprintfT (DBG_LT0, "hwcdrv: sig_ts=%llu: ERROR: hwcdrv_overflow:" |
| " unexpected si_code 0x%x\n", sig_ts, si->si_code); |
| return HWCFUNCS_ERROR_GENERIC; |
| } |
| |
| hdrv_pcl_ctx_t * pctx = hdrv_pcl_state.find_vpc_ctx (); |
| if (!pctx) |
| { |
| TprintfT (DBG_LT0, "hwcdrv: sig_ts=%llu: ERROR: hwcdrv_overflow:" |
| " tsd context is NULL\n", sig_ts); |
| return HWCFUNCS_ERROR_UNEXPECTED; |
| } |
| counter_state_t * ctr_list = (counter_state_t *) pctx->ctr_list; |
| if (!ctr_list) |
| { |
| TprintfT (DBG_LT0, "hwcdrv: sig_ts=%llu: WARNING: hwcdrv_overflow:" |
| " ctr_list is NULL\n", sig_ts); |
| return HWCFUNCS_ERROR_UNEXPECTED; |
| } |
| |
| /* clear needs_restart flag */ |
| for (ii = 0; ii < hdrv_pcl_state.hwcdef_cnt; ii++) |
| ctr_list[ii].needs_restart = 0; |
| |
| /* attempt to identify the counter to read */ |
| int signal_idx = -1; |
| pctx->signal_fd = signal_fd; // save the signal provided by siginfo_t |
| if (signal_fd != -1) |
| { |
| for (ii = 0; ii < hdrv_pcl_state.hwcdef_cnt; ii++) |
| { |
| if (ctr_list[ii].fd == signal_fd) |
| { |
| signal_idx = ii; |
| break; |
| } |
| } |
| } |
| |
| if (signal_idx < 0) |
| { |
| TprintfT (DBG_LT0, "hwcdrv: sig_ts=%llu: ERROR: hwcdrv_overflow:" |
| " pmc not determined!\n", sig_ts); |
| lost_events->ce_pic[0] = 1; /* record a bogus value into experiment */ |
| // note: bogus value may get overwritten in loop below |
| } |
| |
| /* capture sample(s). In addition to signal_idx, check other counters. */ |
| struct perf_event_header sheader; |
| int idx; |
| for (idx = 0; idx < hdrv_pcl_state.hwcdef_cnt; idx++) |
| { |
| int num_recs = 0; |
| while (1) |
| { |
| /* check for samples */ |
| struct perf_event_mmap_page *metadata = ctr_list[idx].buf_state.buf; |
| if (metadata == NULL) |
| break; // empty |
| if (metadata->data_tail == metadata->data_head) |
| break; // empty |
| |
| /* read header */ |
| if (read_buf (&ctr_list[idx].buf_state, &sheader, sizeof (sheader))) |
| break; |
| num_recs++; |
| |
| /* check for PERF_RECORD_SAMPLE */ |
| size_t datasz = sheader.size - sizeof (struct perf_event_header); |
| if (sheader.type != PERF_RECORD_SAMPLE) |
| { |
| TprintfT (DBG_LT2, "hwcdrv: sig_ts=%llu: WARNING: hwcdrv_overflow:" |
| " unexpected recd type=%d\n", |
| sig_ts, sheader.type); |
| if (skip_buf (&ctr_list[idx].buf_state, datasz)) |
| { |
| TprintfT (DBG_LT0, "hwcdrv: sig_ts=%llu: ERROR: hwcdrv_overflow:" |
| " skip recd type=%d failed\n", sig_ts, sheader.type); |
| lost_events->ce_pic[idx] = 4; /* record a bogus value */ |
| break; // failed to skip buffer?? |
| } |
| lost_events->ce_pic[idx] = 2; /* record a bogus value */ |
| continue; // advance to next record |
| } |
| |
| /* type is PERF_RECORD_SAMPLE */ |
| uint64_t value, lostv; |
| if (read_sample (&ctr_list[idx], datasz, &value, &lostv)) |
| { |
| TprintfT (DBG_LT0, "hwcdrv: sig_ts=%llu: ERROR: hwcdrv_overflow:" |
| " read_sample() failed\n", sig_ts); |
| lost_events->ce_pic[idx] = 3; // record a bogus value |
| break; // failed to read sample data?? |
| } |
| TprintfT (DBG_LT3, "hwcdrv: sig_ts=%llu: hwcdrv_overflow:" |
| " idx=%d value=%llu lost=%llu\n", (unsigned long long) sig_ts, |
| idx, (unsigned long long) value, (unsigned long long) lostv); |
| if (eventp->ce_pic[idx]) |
| { |
| TprintfT (DBG_LT2, "hwcdrv: sig_ts=%llu: WARNING: hwcdrv_overflow:" |
| " idx=%d previous sample recorded as lost_event\n", sig_ts, idx); |
| lost_events->ce_pic[idx] += eventp->ce_pic[idx]; |
| } |
| eventp->ce_pic[idx] = value; |
| lost_events->ce_pic[idx] += lostv; |
| } |
| |
| /* debug output for unexpected (but common) cases */ |
| if (idx == signal_idx) |
| { |
| if (num_recs != 1) |
| TprintfT (DBG_LT2, "hwcdrv: sig_ts=%llu: WARNING: hwcdrv_overflow:" |
| " %d records for signal_idx=%d\n", sig_ts, num_recs, signal_idx); |
| } |
| else if (num_recs) |
| TprintfT (DBG_LT2, "hwcdrv: sig_ts=%llu: WARNING: hwcdrv_overflow:" |
| " %d unexpected record(s) for idx=%d (signal_idx=%d)\n", |
| sig_ts, num_recs, idx, signal_idx); |
| |
| /* trigger counter restart whenever records were found */ |
| if (num_recs) |
| { |
| /* check whether to adapt the overflow interval */ |
| /* This is the Linux version. |
| * The Solaris version is in hwprofile.c collector_update_overflow_counters(). |
| */ |
| hrtime_t min_time = global_perf_event_def[idx].min_time; |
| if (min_time > 0 // overflow interval is adaptive |
| && sig_ts - ctr_list[idx].last_overflow_time < min_time) // last interval below min |
| { |
| /* pick a new overflow interval */ |
| /* roughly doubled, but add funny numbers */ |
| /* hopefully the result is prime or not a multiple of some # of ops/loop */ |
| uint64_t new_period = 2 * ctr_list[idx].last_overflow_period + 37; |
| #if 0 |
| // On Solaris, we report the adjustment to the log file. |
| // On Linux it's hard for us to do so since hwcdrv_pcl.c doesn't know about collector_interface, SP_JCMD_COMMENT, or COL_COMMENT_HWCADJ. |
| // For now we simply don't report. |
| collector_interface->writeLog ("<event kind=\"%s\" id=\"%d\">%s %d -> %d</event>\n", |
| SP_JCMD_COMMENT, COL_COMMENT_HWCADJ, global_perf_event_def[idx].name, |
| ctr_list[idx].last_overflow_period, new_period); |
| #endif |
| /* There are a variety of ways of resetting the period on Linux. |
| * The most elegant is |
| * ioctl(fd,PERF_EVENT_IOC_PERIOD,&period) |
| * but check the perf_event_open man page for PERF_EVENT_IOC_PERIOD: |
| * > Prior to Linux 2.6.36 this ioctl always failed due to a bug in the kernel. |
| * > Prior to Linux 3.14 (or 3.7 on ARM), the new period did not take effect |
| * until after the next overflow. |
| * So we're kind of stuck shutting the fd down and restarting it with the new period. |
| */ |
| if (stop_one_ctr (idx, ctr_list)) |
| { |
| // EUGENE figure out what to do on error |
| } |
| ctr_list[idx].last_overflow_period = new_period; |
| if (start_one_ctr (idx, ctr_list[idx].buf_state.pagesz, pctx, "hwcdrv: ERROR: hwcdrv_overflow (readjust overflow):")) |
| { |
| // EUGENE figure out what to do on error |
| } |
| } |
| ctr_list[idx].last_overflow_time = sig_ts; |
| #if 0 |
| ctr_list[idx].needs_restart = 1; |
| #else // seems to be more reliable to restart here instead of hwcdrv_sighlr_restart() |
| internal_hwc_start (ctr_list[idx].fd); |
| #endif |
| } |
| } |
| return 0; // OK to restart counters |
| } |
| |
| HWCDRV_API int |
| hwcdrv_sighlr_restart (const hwc_event_t *pp) |
| { |
| #if 0 // restarting here doesn't seem to work as well as restarting in hwcdrv_overflow() |
| hdrv_pcl_ctx_t * pctx = hdrv_pcl_state.find_vpc_ctx (); |
| if (!pctx) |
| { |
| TprintfT (DBG_LT0, "hwcdrv: ERROR: hwcdrv_sighlr_restart: find_vpc_ctx()==NULL\n"); |
| return -1; |
| } |
| counter_state_t * ctr_list = (counter_state_t *) pctx->ctr_list; |
| if (!ctr_list) |
| { |
| TprintfT (DBG_LT0, "hwcdrv: WARNING: hwcdrv_sighlr_restart: ctr_list is NULL\n"); |
| return -1; |
| } |
| int errors = 0; |
| for (int ii = 0; ii < hdrv_pcl_state.hwcdef_cnt; ii++) |
| { |
| if (ctr_list[ii].needs_restart) |
| errors |= internal_hwc_start (ctr_list[ii].fd); |
| ctr_list[ii].needs_restart = 0; |
| } |
| return errors; |
| #else |
| return 0; |
| #endif |
| } |
| |
| /* create counters based on hwcdef[] */ |
| HWCDRV_API int |
| hwcdrv_create_counters (unsigned hwcdef_cnt, Hwcentry *hwcdef) |
| { |
| if (hwcdef_cnt > hdrv_pcl_about.cpcN_npics) |
| { |
| logerr (GTXT ("More than %d counters were specified\n"), hdrv_pcl_about.cpcN_npics); /*!*/ |
| return HWCFUNCS_ERROR_HWCARGS; |
| } |
| if (hdrv_pcl_about.cpcN_cpuver == CPUVER_UNDEFINED) |
| { |
| logerr (GTXT ("Processor not supported\n")); |
| return HWCFUNCS_ERROR_HWCARGS; |
| } |
| |
| /* add counters */ |
| for (unsigned idx = 0; idx < hwcdef_cnt; idx++) |
| { |
| perf_event_def_t *glb_event_def = &global_perf_event_def[idx]; |
| *glb_event_def = event_def_0; |
| unsigned int pmc_sel; |
| eventsel_t evntsel; |
| if (hwcfuncs_get_x86_eventsel (hwcdef + idx, &evntsel, &pmc_sel)) |
| { |
| TprintfT (0, "hwcdrv: ERROR: hwcfuncs_get_x86_eventsel() failed\n"); |
| return HWCFUNCS_ERROR_HWCARGS; |
| } |
| glb_event_def->reg_num = pmc_sel; |
| glb_event_def->eventsel = evntsel; |
| glb_event_def->counter_preload = hwcdef[idx].val; |
| glb_event_def->min_time = hwcdef[idx].min_time; |
| glb_event_def->name = strdup (hwcdef[idx].name); // memory leak??? very minor |
| init_perf_event (&glb_event_def->hw, glb_event_def->eventsel, |
| glb_event_def->counter_preload, hwcdef + idx); |
| TprintfT (DBG_LT1, "hwcdrv: create_counters: pic=%u name='%s' interval=%lld" |
| "(min_time=%lld): reg_num=0x%x eventsel=0x%llx ireset=%lld usr=%lld sys=%lld\n", |
| idx, hwcdef[idx].int_name, (long long) glb_event_def->counter_preload, |
| (long long) glb_event_def->min_time, (int) glb_event_def->reg_num, |
| (long long) glb_event_def->eventsel, |
| (long long) HW_INTERVAL_PRESET (hwcdef[idx].val), |
| (long long) glb_event_def->hw.exclude_user, |
| (long long) glb_event_def->hw.exclude_kernel); |
| } |
| |
| hdrv_pcl_state.hwcdef_cnt = hwcdef_cnt; |
| return 0; |
| } |
| |
| HWCDRV_API int |
| hwcdrv_free_counters () // note: only performs shutdown for this thread |
| { |
| hdrv_pcl_ctx_t * pctx; |
| if (!COUNTERS_ENABLED ()) |
| return 0; |
| pctx = hdrv_pcl_state.find_vpc_ctx (); |
| if (!pctx) |
| { |
| TprintfT (0, "hwcdrv: WARNING: hwcdrv_free_counters: tsd context is NULL\n"); |
| return HWCFUNCS_ERROR_GENERIC; |
| } |
| counter_state_t *ctr_list = pctx->ctr_list; |
| if (!ctr_list) |
| { |
| // fork child: prolog suspends hwcs, then epilog frees them |
| TprintfT (DBG_LT1, "hwcdrv: WARNING: hwcdrv_free_counters: ctr_list is already NULL\n"); |
| return 0; |
| } |
| int hwc_rc = 0; |
| for (int ii = 0; ii < hdrv_pcl_state.hwcdef_cnt; ii++) |
| if (stop_one_ctr (ii, ctr_list)) |
| hwc_rc = HWCFUNCS_ERROR_GENERIC; |
| TprintfT (DBG_LT1, "hwcdrv: hwcdrv_free_counters(tid=0x%lx).\n", (long) pctx->tid); |
| pctx->ctr_list = NULL; |
| return hwc_rc; |
| } |
| |
| HWCDRV_API int |
| hwcdrv_start (void) /* must be called from each thread ? */ |
| { |
| hdrv_pcl_ctx_t *pctx = NULL; |
| if (!COUNTERS_ENABLED ()) |
| { |
| TprintfT (DBG_LT1, "hwcdrv: WARNING: hwcdrv_start: no counters to start \n"); |
| return 0; |
| } |
| if (!hdrv_pcl_state.library_ok) |
| { |
| TprintfT (0, "hwcdrv: ERROR: hwcdrv_start: library is not open\n"); |
| return HWCFUNCS_ERROR_NOT_SUPPORTED; |
| } |
| |
| /* |
| * set up per-thread context |
| */ |
| pctx = hdrv_pcl_state.find_vpc_ctx (); |
| if (!pctx) |
| { |
| TprintfT (0, "hwcdrv: ERROR: hwcdrv_start: tsd context is NULL\n"); |
| return HWCFUNCS_ERROR_UNEXPECTED; |
| } |
| pctx->tid = hwcdrv_gettid (); |
| TprintfT (DBG_LT1, "hwcdrv: hwcdrv_start(tid=0x%lx)\n", (long) pctx->tid); |
| |
| /* |
| * create per-thread counter list |
| */ |
| counter_state_t *ctr_list = (counter_state_t *) calloc (hdrv_pcl_state.hwcdef_cnt, |
| sizeof (counter_state_t)); |
| if (!ctr_list) |
| { |
| TprintfT (0, "hwcdrv: ERROR: hwcdrv_start: calloc(ctr_list) failed\n"); |
| return HWCFUNCS_ERROR_MEMORY; |
| } |
| int ii; |
| for (ii = 0; ii < hdrv_pcl_state.hwcdef_cnt; ii++) |
| ctr_list[ii].fd = -1; // invalidate fds in case we have to close prematurely |
| pctx->ctr_list = ctr_list; |
| |
| /* |
| * bind the counters |
| */ |
| size_t pgsz = sysconf (_SC_PAGESIZE); |
| for (ii = 0; ii < hdrv_pcl_state.hwcdef_cnt; ii++) |
| { |
| ctr_list[ii].last_overflow_period = global_perf_event_def[ii].hw.sample_period; |
| if (start_one_ctr (ii, pgsz, pctx, "hwcdrv: ERROR: hwcdrv_start:")) goto hwcdrv_start_cleanup; |
| } |
| |
| /* |
| * start the counters |
| */ |
| for (ii = 0; ii < hdrv_pcl_state.hwcdef_cnt; ii++) |
| { |
| int rc = internal_hwc_start (ctr_list[ii].fd); |
| if (rc < 0) |
| goto hwcdrv_start_cleanup; |
| } |
| return 0; |
| |
| hwcdrv_start_cleanup: |
| hwcdrv_free_counters (); // PERF_EVENT_IOC_DISABLE and close() for all fds |
| return HWCFUNCS_ERROR_UNAVAIL; |
| } |
| |
| HWCDRV_API int |
| hwcdrv_lwp_suspend (void) /* must be called from each thread */ |
| { |
| if (!COUNTERS_ENABLED ()) |
| { |
| TprintfT (DBG_LT1, "hwcdrv: WARNING: hwcdrv_lwp_suspend: no counters\n"); |
| return 0; |
| } |
| TprintfT (DBG_LT1, "hwcdrv: hwcdrv_lwp_suspend()\n"); |
| return hwcdrv_free_counters (); |
| } |
| |
| HWCDRV_API int |
| hwcdrv_lwp_resume (void) /* must be called from each thread */ |
| { |
| if (!COUNTERS_ENABLED ()) |
| { |
| TprintfT (DBG_LT1, "hwcdrv: WARNING: hwcdrv_lwp_resume: no counters\n"); |
| return 0; |
| } |
| TprintfT (DBG_LT1, "hwcdrv: hwcdrv_lwp_resume()\n"); |
| return hwcdrv_start (); |
| } |
| |
| HWCDRV_API int |
| hwcdrv_read_events (hwc_event_t *overflow_data, hwc_event_samples_t *sampled_data) |
| { |
| overflow_data->ce_hrt = 0; |
| for (int i = 0; i < MAX_PICS; i++) |
| { |
| overflow_data->ce_pic[i] = 0; |
| if (sampled_data) |
| HWCFUNCS_SAMPLE_RESET (&sampled_data->sample[i]); |
| } |
| return 0; |
| } |
| |
| /*---------------------------------------------------------------------------*/ |
| /* HWCDRV_API */ |
| |
| hwcdrv_api_t hwcdrv_pcl_api = { |
| hwcdrv_init, |
| hwcdrv_get_info, |
| hwcdrv_enable_mt, |
| hwcdrv_get_descriptions, |
| hwcdrv_assign_regnos, |
| hwcdrv_create_counters, |
| hwcdrv_start, |
| hwcdrv_overflow, |
| hwcdrv_read_events, |
| hwcdrv_sighlr_restart, |
| hwcdrv_lwp_suspend, |
| hwcdrv_lwp_resume, |
| hwcdrv_free_counters, |
| hwcdrv_lwp_init, |
| hwcdrv_lwp_fini, |
| -1 // hwcdrv_init_status |
| }; |