| /* |
| Copyright (c) 2014-2016 Intel Corporation. All Rights Reserved. |
| |
| Redistribution and use in source and binary forms, with or without |
| modification, are permitted provided that the following conditions |
| are met: |
| |
| * Redistributions of source code must retain the above copyright |
| notice, this list of conditions and the following disclaimer. |
| * Redistributions in binary form must reproduce the above copyright |
| notice, this list of conditions and the following disclaimer in the |
| documentation and/or other materials provided with the distribution. |
| * Neither the name of Intel Corporation nor the names of its |
| contributors may be used to endorse or promote products derived |
| from this software without specific prior written permission. |
| |
| THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS |
| "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT |
| LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR |
| A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT |
| HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, |
| SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT |
| LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, |
| DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY |
| THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT |
| (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE |
| OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
| */ |
| |
| |
| #include "offload_target.h" |
| #include <stdlib.h> |
| #include <unistd.h> |
| #ifdef SEP_SUPPORT |
| #include <fcntl.h> |
| #include <sys/ioctl.h> |
| #endif // SEP_SUPPORT |
| #include <omp.h> |
| #include <map> |
| |
| // typedef offload_func_with_parms. |
| // Pointer to function that represents an offloaded entry point. |
| // The parameters are a temporary fix for parameters on the stack. |
| typedef void (*offload_func_with_parms)(void *); |
| |
| // Target console and file logging |
| const char *prefix; |
| int console_enabled = 0; |
| int offload_report_level = 0; |
| |
| // Trace information |
| static const char* vardesc_direction_as_string[] = { |
| "NOCOPY", |
| "IN", |
| "OUT", |
| "INOUT" |
| }; |
| static const char* vardesc_type_as_string[] = { |
| "unknown", |
| "data", |
| "data_ptr", |
| "func_ptr", |
| "void_ptr", |
| "string_ptr", |
| "dv", |
| "dv_data", |
| "dv_data_slice", |
| "dv_ptr", |
| "dv_ptr_data", |
| "dv_ptr_data_slice", |
| "cean_var", |
| "cean_var_ptr", |
| "c_data_ptr_array", |
| "c_extended_type", |
| "c_func_ptr_array", |
| "c_void_ptr_array", |
| "c_string_ptr_array", |
| "c_data_ptr_ptr", |
| "c_func_ptr_ptr", |
| "c_void_ptr_ptr", |
| "c_string_ptr_ptr", |
| "c_cean_var_ptr_ptr", |
| }; |
| |
| int mic_index = -1; |
| int mic_engines_total = -1; |
| uint64_t mic_frequency = 0; |
| int offload_number = 0; |
| static std::map<void*, RefInfo*> ref_data; |
| static mutex_t add_ref_lock; |
| |
| #ifdef SEP_SUPPORT |
| static const char* sep_monitor_env = "SEP_MONITOR"; |
| static bool sep_monitor = false; |
| static const char* sep_device_env = "SEP_DEVICE"; |
| static const char* sep_device = "/dev/sep3.8/c"; |
| static int sep_counter = 0; |
| |
| #define SEP_API_IOC_MAGIC 99 |
| #define SEP_IOCTL_PAUSE _IO (SEP_API_IOC_MAGIC, 31) |
| #define SEP_IOCTL_RESUME _IO (SEP_API_IOC_MAGIC, 32) |
| |
| static void add_ref_count(void * buf, bool created) |
| { |
| mutex_locker_t locker(add_ref_lock); |
| RefInfo * info = ref_data[buf]; |
| |
| if (info) { |
| info->count++; |
| } |
| else { |
| info = new RefInfo((int)created,(long)1); |
| } |
| info->is_added |= created; |
| ref_data[buf] = info; |
| } |
| |
| static void BufReleaseRef(void * buf) |
| { |
| mutex_locker_t locker(add_ref_lock); |
| RefInfo * info = ref_data[buf]; |
| |
| if (info) { |
| --info->count; |
| if (info->count == 0 && info->is_added) { |
| OFFLOAD_TRACE(1, "Calling COIBufferReleaseRef AddRef count = %d\n", |
| ((RefInfo *) ref_data[buf])->count); |
| BufferReleaseRef(buf); |
| info->is_added = 0; |
| } |
| } |
| } |
| |
| static int VTPauseSampling(void) |
| { |
| int ret = -1; |
| int handle = open(sep_device, O_RDWR); |
| if (handle > 0) { |
| ret = ioctl(handle, SEP_IOCTL_PAUSE); |
| close(handle); |
| } |
| return ret; |
| } |
| |
| static int VTResumeSampling(void) |
| { |
| int ret = -1; |
| int handle = open(sep_device, O_RDWR); |
| if (handle > 0) { |
| ret = ioctl(handle, SEP_IOCTL_RESUME); |
| close(handle); |
| } |
| return ret; |
| } |
| #endif // SEP_SUPPORT |
| |
| void OffloadDescriptor::offload( |
| uint32_t buffer_count, |
| void** buffers, |
| void* misc_data, |
| uint16_t misc_data_len, |
| void* return_data, |
| uint16_t return_data_len |
| ) |
| { |
| FunctionDescriptor *func = (FunctionDescriptor*) misc_data; |
| const char *name = func->data; |
| OffloadDescriptor ofld; |
| char *in_data = 0; |
| char *out_data = 0; |
| char *timer_data = 0; |
| |
| console_enabled = func->console_enabled; |
| timer_enabled = func->timer_enabled; |
| offload_report_level = func->offload_report_level; |
| offload_number = func->offload_number; |
| ofld.set_offload_number(func->offload_number); |
| |
| #ifdef SEP_SUPPORT |
| if (sep_monitor) { |
| if (__sync_fetch_and_add(&sep_counter, 1) == 0) { |
| OFFLOAD_DEBUG_TRACE(2, "VTResumeSampling\n"); |
| VTResumeSampling(); |
| } |
| } |
| #endif // SEP_SUPPORT |
| |
| OFFLOAD_DEBUG_TRACE_1(2, ofld.get_offload_number(), |
| c_offload_start_target_func, |
| "Offload \"%s\" started\n", name); |
| |
| // initialize timer data |
| OFFLOAD_TIMER_INIT(); |
| |
| OFFLOAD_TIMER_START(c_offload_target_total_time); |
| |
| OFFLOAD_TIMER_START(c_offload_target_descriptor_setup); |
| |
| // get input/output buffer addresses |
| if (func->in_datalen > 0 || func->out_datalen > 0) { |
| if (func->data_offset != 0) { |
| in_data = (char*) misc_data + func->data_offset; |
| out_data = (char*) return_data; |
| } |
| else { |
| char *inout_buf = (char*) buffers[--buffer_count]; |
| in_data = inout_buf; |
| out_data = inout_buf; |
| } |
| } |
| |
| // assign variable descriptors |
| ofld.m_vars_total = func->vars_num; |
| if (ofld.m_vars_total > 0) { |
| uint64_t var_data_len = ofld.m_vars_total * sizeof(VarDesc); |
| |
| ofld.m_vars = (VarDesc*) malloc(var_data_len); |
| if (ofld.m_vars == NULL) |
| LIBOFFLOAD_ERROR(c_malloc); |
| memcpy(ofld.m_vars, in_data, var_data_len); |
| |
| ofld.m_vars_extra = |
| (VarExtra*) malloc(ofld.m_vars_total * sizeof(VarExtra)); |
| if (ofld.m_vars == NULL) |
| LIBOFFLOAD_ERROR(c_malloc); |
| |
| in_data += var_data_len; |
| func->in_datalen -= var_data_len; |
| } |
| |
| // timer data |
| if (func->timer_enabled) { |
| uint64_t timer_data_len = OFFLOAD_TIMER_DATALEN(); |
| |
| timer_data = out_data; |
| out_data += timer_data_len; |
| func->out_datalen -= timer_data_len; |
| } |
| |
| // init Marshallers |
| ofld.m_in.init_buffer(in_data, func->in_datalen); |
| ofld.m_out.init_buffer(out_data, func->out_datalen); |
| |
| // copy buffers to offload descriptor |
| std::copy(buffers, buffers + buffer_count, |
| std::back_inserter(ofld.m_buffers)); |
| |
| OFFLOAD_TIMER_STOP(c_offload_target_descriptor_setup); |
| |
| // find offload entry address |
| OFFLOAD_TIMER_START(c_offload_target_func_lookup); |
| |
| offload_func_with_parms entry = (offload_func_with_parms) |
| __offload_entries.find_addr(name); |
| |
| if (entry == NULL) { |
| #if OFFLOAD_DEBUG > 0 |
| if (console_enabled > 2) { |
| __offload_entries.dump(); |
| } |
| #endif |
| LIBOFFLOAD_ERROR(c_offload_descriptor_offload, name); |
| exit(1); |
| } |
| |
| OFFLOAD_TIMER_STOP(c_offload_target_func_lookup); |
| |
| OFFLOAD_TIMER_START(c_offload_target_func_time); |
| |
| // execute offload entry |
| entry(&ofld); |
| |
| OFFLOAD_TIMER_STOP(c_offload_target_func_time); |
| |
| OFFLOAD_TIMER_STOP(c_offload_target_total_time); |
| |
| // copy timer data to the buffer |
| OFFLOAD_TIMER_TARGET_DATA(timer_data); |
| |
| OFFLOAD_DEBUG_TRACE(2, "Offload \"%s\" finished\n", name); |
| |
| #ifdef SEP_SUPPORT |
| if (sep_monitor) { |
| if (__sync_sub_and_fetch(&sep_counter, 1) == 0) { |
| OFFLOAD_DEBUG_TRACE(2, "VTPauseSampling\n"); |
| VTPauseSampling(); |
| } |
| } |
| #endif // SEP_SUPPORT |
| } |
| |
| void OffloadDescriptor::merge_var_descs( |
| VarDesc *vars, |
| VarDesc2 *vars2, |
| int vars_total |
| ) |
| { |
| // number of variable descriptors received from host and generated |
| // locally should match |
| if (m_vars_total < vars_total) { |
| LIBOFFLOAD_ERROR(c_merge_var_descs1); |
| exit(1); |
| } |
| |
| for (int i = 0; i < m_vars_total; i++) { |
| // instead of m_vars[i].type.src we will use m_vars_extra[i].type_src |
| |
| if (i < vars_total) { |
| // variable type must match |
| if (m_vars[i].type.bits != vars[i].type.bits) { |
| OFFLOAD_TRACE(2, |
| "m_vars[%d].type.bits=%08x, vars[%d].type.bits=%08x\n", |
| i, m_vars[i].type.bits, i, vars[i].type.bits); |
| LIBOFFLOAD_ERROR(c_merge_var_descs2); |
| exit(1); |
| } |
| |
| if (m_vars[i].type.src == c_extended_type) { |
| VarDescExtendedType *etype = |
| reinterpret_cast<VarDescExtendedType*>(vars[i].ptr); |
| m_vars_extra[i].type_src = etype->extended_type; |
| m_vars[i].ptr = etype->ptr; |
| } |
| else { |
| m_vars_extra[i].type_src = m_vars[i].type.src; |
| if (!(m_vars[i].flags.use_device_ptr && |
| m_vars[i].type.src == c_dv)) { |
| m_vars[i].ptr = vars[i].ptr; |
| } |
| } |
| // instead of m_vars[i].type.dst we will use m_vars_extra[i].type_dst |
| if (i < vars_total && m_vars[i].type.dst == c_extended_type) { |
| VarDescExtendedType *etype = |
| reinterpret_cast<VarDescExtendedType*>(vars[i].into); |
| m_vars_extra[i].type_dst = etype->extended_type; |
| m_vars[i].into = etype->ptr; |
| } |
| else { |
| m_vars_extra[i].type_dst = m_vars[i].type.dst; |
| m_vars[i].into = vars[i].into; |
| } |
| |
| const char *var_sname = ""; |
| if (vars2 != NULL) { |
| if (vars2[i].sname != NULL) { |
| var_sname = vars2[i].sname; |
| } |
| } |
| OFFLOAD_DEBUG_TRACE_1(2, get_offload_number(), c_offload_var, |
| " VarDesc %d, var=%s, %s, %s\n", |
| i, var_sname, |
| vardesc_direction_as_string[m_vars[i].direction.bits], |
| vardesc_type_as_string[m_vars_extra[i].type_src]); |
| if (vars2 != NULL && vars2[i].dname != NULL) { |
| OFFLOAD_TRACE(2, " into=%s, %s\n", vars2[i].dname, |
| vardesc_type_as_string[m_vars_extra[i].type_dst]); |
| } |
| } |
| else { |
| m_vars_extra[i].type_src = m_vars[i].type.src; |
| m_vars_extra[i].type_dst = m_vars[i].type.dst; |
| } |
| |
| OFFLOAD_TRACE(2, |
| " type_src=%d, type_dstn=%d, direction=%d, " |
| "alloc_if=%d, free_if=%d, align=%d, mic_offset=%d, flags=0x%x, " |
| "offset=%lld, size=%lld, count/disp=%lld, ptr=%p into=%p\n", |
| m_vars_extra[i].type_src, |
| m_vars_extra[i].type_dst, |
| m_vars[i].direction.bits, |
| m_vars[i].alloc_if, |
| m_vars[i].free_if, |
| m_vars[i].align, |
| m_vars[i].mic_offset, |
| m_vars[i].flags.bits, |
| m_vars[i].offset, |
| m_vars[i].size, |
| m_vars[i].count, |
| m_vars[i].ptr, |
| m_vars[i].into); |
| } |
| } |
| |
| void OffloadDescriptor::scatter_copyin_data() |
| { |
| OFFLOAD_TIMER_START(c_offload_target_scatter_inputs); |
| |
| OFFLOAD_DEBUG_TRACE(2, "IN buffer @ %p size %lld\n", |
| m_in.get_buffer_start(), |
| m_in.get_buffer_size()); |
| OFFLOAD_DEBUG_DUMP_BYTES(2, m_in.get_buffer_start(), |
| m_in.get_buffer_size()); |
| |
| // receive data |
| for (int i = 0; i < m_vars_total; i++) { |
| bool src_is_for_mic = (m_vars[i].direction.out || |
| m_vars[i].into == NULL); |
| void** ptr_addr = src_is_for_mic ? |
| static_cast<void**>(m_vars[i].ptr) : |
| static_cast<void**>(m_vars[i].into); |
| int type = src_is_for_mic ? m_vars_extra[i].type_src : |
| m_vars_extra[i].type_dst; |
| bool is_static = src_is_for_mic ? |
| m_vars[i].flags.is_static : |
| m_vars[i].flags.is_static_dstn; |
| void *ptr = NULL; |
| |
| if (m_vars[i].flags.alloc_disp) { |
| int64_t offset = 0; |
| m_in.receive_data(&offset, sizeof(offset)); |
| } |
| if (VAR_TYPE_IS_DV_DATA_SLICE(type) || |
| VAR_TYPE_IS_DV_DATA(type)) { |
| ArrDesc *dvp = (type == c_dv_data_slice || type == c_dv_data)? |
| reinterpret_cast<ArrDesc*>(ptr_addr) : |
| *reinterpret_cast<ArrDesc**>(ptr_addr); |
| ptr_addr = reinterpret_cast<void**>(&dvp->Base); |
| } |
| // Set pointer values |
| switch (type) { |
| case c_data_ptr_array: |
| { |
| int j = m_vars[i].ptr_arr_offset; |
| int max_el = j + m_vars[i].count; |
| char *dst_arr_ptr = (src_is_for_mic)? |
| *(reinterpret_cast<char**>(m_vars[i].ptr)) : |
| reinterpret_cast<char*>(m_vars[i].into); |
| |
| // if is_pointer is 1 it means that pointer array itself |
| // is defined either via pointer or as class member. |
| // i.e. arr_ptr[0:5] or this->ARR[0:5] |
| if (m_vars[i].flags.is_pointer) { |
| int64_t offset = 0; |
| m_in.receive_data(&offset, sizeof(offset)); |
| dst_arr_ptr = *((char**)dst_arr_ptr) + offset; |
| } |
| for (; j < max_el; j++) { |
| if (src_is_for_mic) { |
| m_vars[j].ptr = |
| dst_arr_ptr + m_vars[j].ptr_arr_offset; |
| } |
| else { |
| m_vars[j].into = |
| dst_arr_ptr + m_vars[j].ptr_arr_offset; |
| } |
| } |
| } |
| break; |
| case c_data: |
| case c_void_ptr: |
| case c_void_ptr_ptr: |
| case c_cean_var: |
| case c_dv: |
| break; |
| |
| case c_string_ptr: |
| case c_data_ptr: |
| case c_string_ptr_ptr: |
| case c_data_ptr_ptr: |
| case c_cean_var_ptr: |
| case c_cean_var_ptr_ptr: |
| case c_dv_ptr: |
| // Don't need ptr_addr value for variables from stack buffer. |
| // Stack buffer address is set at var_desc with #0. |
| if (i != 0 && m_vars[i].flags.is_stack_buf) { |
| break; |
| } |
| if (TYPE_IS_PTR_TO_PTR(m_vars_extra[i].type_src) || |
| TYPE_IS_PTR_TO_PTR(m_vars_extra[i].type_dst)) { |
| int64_t offset; |
| |
| m_in.receive_data(&offset, sizeof(offset)); |
| ptr_addr = reinterpret_cast<void**>( |
| reinterpret_cast<char*>(*ptr_addr) + offset); |
| |
| } |
| |
| if (m_vars[i].alloc_if && !m_vars[i].flags.preallocated) { |
| void *buf = NULL; |
| if (m_vars[i].flags.sink_addr) { |
| m_in.receive_data(&buf, sizeof(buf)); |
| } |
| else { |
| buf = m_buffers.front(); |
| m_buffers.pop_front(); |
| } |
| if (buf) { |
| if (!is_static) { |
| if (!m_vars[i].flags.sink_addr) { |
| // increment buffer reference |
| OFFLOAD_TIMER_START(c_offload_target_add_buffer_refs); |
| BufferAddRef(buf); |
| OFFLOAD_TRACE(1, "Calling COIBufferAddRef %p\n", buf); |
| OFFLOAD_TIMER_STOP(c_offload_target_add_buffer_refs); |
| } |
| add_ref_count(buf, 0 == m_vars[i].flags.sink_addr); |
| OFFLOAD_TRACE(1, " AddRef count = %d\n", |
| ((RefInfo *) ref_data[buf])->count); |
| } |
| ptr = static_cast<char*>(buf) + |
| m_vars[i].mic_offset + |
| (m_vars[i].flags.is_stack_buf ? |
| 0 : m_vars[i].offset); |
| |
| } |
| *ptr_addr = ptr; |
| } |
| else if (m_vars[i].flags.sink_addr) { |
| void *buf; |
| m_in.receive_data(&buf, sizeof(buf)); |
| void *ptr = static_cast<char*>(buf) + |
| m_vars[i].mic_offset + |
| (m_vars[i].flags.is_stack_buf ? |
| 0 : m_vars[i].offset); |
| *ptr_addr = ptr; |
| } |
| break; |
| |
| case c_func_ptr: |
| case c_func_ptr_ptr: |
| break; |
| |
| case c_dv_data: |
| case c_dv_ptr_data: |
| case c_dv_data_slice: |
| case c_dv_ptr_data_slice: |
| if (m_vars[i].alloc_if) { |
| void *buf; |
| if (m_vars[i].flags.sink_addr) { |
| m_in.receive_data(&buf, sizeof(buf)); |
| } |
| else { |
| buf = m_buffers.front(); |
| m_buffers.pop_front(); |
| } |
| if (buf) { |
| if (!is_static) { |
| if (!m_vars[i].flags.sink_addr) { |
| // increment buffer reference |
| OFFLOAD_TIMER_START(c_offload_target_add_buffer_refs); |
| BufferAddRef(buf); |
| OFFLOAD_TIMER_STOP(c_offload_target_add_buffer_refs); |
| } |
| add_ref_count(buf, 0 == m_vars[i].flags.sink_addr); |
| } |
| ptr = static_cast<char*>(buf) + |
| m_vars[i].mic_offset + m_vars[i].offset; |
| } |
| *ptr_addr = ptr; |
| } |
| else if (m_vars[i].flags.sink_addr) { |
| void *buf; |
| m_in.receive_data(&buf, sizeof(buf)); |
| ptr = static_cast<char*>(buf) + |
| m_vars[i].mic_offset + m_vars[i].offset; |
| *ptr_addr = ptr; |
| } |
| break; |
| |
| default: |
| LIBOFFLOAD_ERROR(c_unknown_var_type, type); |
| abort(); |
| } |
| // Release obsolete buffers for stack of persistent objects. |
| // The vardesc with i==0 and flags.is_stack_buf==TRUE is always for |
| // stack buffer pointer. |
| if (i == 0 && |
| m_vars[i].flags.is_stack_buf && |
| !m_vars[i].direction.bits && |
| m_vars[i].alloc_if && |
| m_vars[i].size != 0) { |
| for (int j=0; j < m_vars[i].size; j++) { |
| void *buf; |
| m_in.receive_data(&buf, sizeof(buf)); |
| OFFLOAD_TRACE(4, "Releasing stack buffer %p\n", buf); |
| BufferReleaseRef(buf); |
| ref_data.erase(buf); |
| } |
| } |
| // Do copyin |
| switch (m_vars_extra[i].type_dst) { |
| case c_data_ptr_array: |
| break; |
| case c_data: |
| case c_void_ptr: |
| case c_void_ptr_ptr: |
| case c_cean_var: |
| if (m_vars[i].direction.in && |
| !m_vars[i].flags.is_static_dstn) { |
| int64_t size; |
| int64_t disp; |
| char* ptr = m_vars[i].into ? |
| static_cast<char*>(m_vars[i].into) : |
| static_cast<char*>(m_vars[i].ptr); |
| if (m_vars_extra[i].type_dst == c_cean_var) { |
| m_in.receive_data((&size), sizeof(int64_t)); |
| m_in.receive_data((&disp), sizeof(int64_t)); |
| } |
| else { |
| size = m_vars[i].size; |
| disp = 0; |
| } |
| m_in.receive_data(ptr + disp, size); |
| } |
| break; |
| |
| case c_dv: |
| if (m_vars[i].direction.bits || |
| m_vars[i].alloc_if || |
| m_vars[i].free_if) { |
| char* ptr = m_vars[i].into ? |
| static_cast<char*>(m_vars[i].into) : |
| static_cast<char*>(m_vars[i].ptr); |
| m_in.receive_data(ptr + sizeof(uint64_t), |
| m_vars[i].size - sizeof(uint64_t)); |
| } |
| break; |
| |
| case c_string_ptr: |
| case c_data_ptr: |
| case c_string_ptr_ptr: |
| case c_data_ptr_ptr: |
| case c_cean_var_ptr: |
| case c_cean_var_ptr_ptr: |
| case c_dv_ptr: |
| case c_dv_data: |
| case c_dv_ptr_data: |
| case c_dv_data_slice: |
| case c_dv_ptr_data_slice: |
| break; |
| |
| case c_func_ptr: |
| case c_func_ptr_ptr: |
| if (m_vars[i].direction.in) { |
| m_in.receive_func_ptr((const void**) m_vars[i].ptr); |
| } |
| break; |
| |
| default: |
| LIBOFFLOAD_ERROR(c_unknown_var_type, m_vars_extra[i].type_dst); |
| abort(); |
| } |
| } |
| |
| OFFLOAD_TRACE(1, "Total copyin data received from host: [%lld] bytes\n", |
| m_in.get_tfr_size()); |
| |
| OFFLOAD_TIMER_STOP(c_offload_target_scatter_inputs); |
| |
| OFFLOAD_TIMER_START(c_offload_target_compute); |
| } |
| |
| void OffloadDescriptor::gather_copyout_data() |
| { |
| OFFLOAD_TIMER_STOP(c_offload_target_compute); |
| |
| OFFLOAD_TIMER_START(c_offload_target_gather_outputs); |
| |
| for (int i = 0; i < m_vars_total; i++) { |
| bool src_is_for_mic = (m_vars[i].direction.out || |
| m_vars[i].into == NULL); |
| if (m_vars[i].flags.is_stack_buf) { |
| continue; |
| } |
| switch (m_vars_extra[i].type_src) { |
| case c_data_ptr_array: |
| break; |
| case c_data: |
| case c_void_ptr: |
| case c_void_ptr_ptr: |
| case c_cean_var: |
| if (m_vars[i].direction.out && |
| !m_vars[i].flags.is_static) { |
| m_out.send_data( |
| static_cast<char*>(m_vars[i].ptr) + m_vars[i].disp, |
| m_vars[i].size); |
| } |
| break; |
| |
| case c_dv: |
| break; |
| |
| case c_string_ptr: |
| case c_data_ptr: |
| case c_string_ptr_ptr: |
| case c_data_ptr_ptr: |
| case c_cean_var_ptr: |
| case c_cean_var_ptr_ptr: |
| case c_dv_ptr: |
| if (m_vars[i].free_if && |
| src_is_for_mic && |
| !m_vars[i].flags.preallocated && |
| !m_vars[i].flags.is_static) { |
| void *buf = *static_cast<char**>(m_vars[i].ptr) - |
| m_vars[i].mic_offset - |
| (m_vars[i].flags.is_stack_buf? |
| 0 : m_vars[i].offset); |
| if (buf == NULL) { |
| break; |
| } |
| // decrement buffer reference count |
| OFFLOAD_TIMER_START(c_offload_target_release_buffer_refs); |
| BufReleaseRef(buf); |
| OFFLOAD_TIMER_STOP(c_offload_target_release_buffer_refs); |
| } |
| if (m_vars[i].flags.preallocated && m_vars[i].alloc_if) { |
| m_out.send_data((void*) m_vars[i].ptr, sizeof(void*)); |
| } |
| break; |
| |
| case c_func_ptr: |
| case c_func_ptr_ptr: |
| if (m_vars[i].direction.out) { |
| m_out.send_func_ptr(*((void**) m_vars[i].ptr)); |
| } |
| break; |
| |
| case c_dv_data: |
| case c_dv_ptr_data: |
| case c_dv_data_slice: |
| case c_dv_ptr_data_slice: |
| if (src_is_for_mic && |
| m_vars[i].free_if && |
| !m_vars[i].flags.is_static) { |
| ArrDesc *dvp = (m_vars_extra[i].type_src == c_dv_data || |
| m_vars_extra[i].type_src == c_dv_data_slice) ? |
| static_cast<ArrDesc*>(m_vars[i].ptr) : |
| *static_cast<ArrDesc**>(m_vars[i].ptr); |
| |
| void *buf = reinterpret_cast<char*>(dvp->Base) - |
| m_vars[i].mic_offset - |
| m_vars[i].offset; |
| |
| if (buf == NULL) { |
| break; |
| } |
| |
| // decrement buffer reference count |
| OFFLOAD_TIMER_START(c_offload_target_release_buffer_refs); |
| BufReleaseRef(buf); |
| OFFLOAD_TIMER_STOP(c_offload_target_release_buffer_refs); |
| } |
| break; |
| |
| default: |
| LIBOFFLOAD_ERROR(c_unknown_var_type, m_vars_extra[i].type_dst); |
| abort(); |
| } |
| |
| if (m_vars[i].into) { |
| switch (m_vars_extra[i].type_dst) { |
| case c_data_ptr_array: |
| break; |
| case c_data: |
| case c_void_ptr: |
| case c_void_ptr_ptr: |
| case c_cean_var: |
| case c_dv: |
| break; |
| |
| case c_string_ptr: |
| case c_data_ptr: |
| case c_string_ptr_ptr: |
| case c_data_ptr_ptr: |
| case c_cean_var_ptr: |
| case c_cean_var_ptr_ptr: |
| case c_dv_ptr: |
| if (m_vars[i].direction.in && |
| m_vars[i].free_if && |
| !m_vars[i].flags.is_static_dstn) { |
| void *buf = *static_cast<char**>(m_vars[i].into) - |
| m_vars[i].mic_offset - |
| (m_vars[i].flags.is_stack_buf? |
| 0 : m_vars[i].offset); |
| |
| if (buf == NULL) { |
| break; |
| } |
| // decrement buffer reference count |
| OFFLOAD_TIMER_START( |
| c_offload_target_release_buffer_refs); |
| BufReleaseRef(buf); |
| OFFLOAD_TIMER_STOP( |
| c_offload_target_release_buffer_refs); |
| } |
| break; |
| |
| case c_func_ptr: |
| case c_func_ptr_ptr: |
| break; |
| |
| case c_dv_data: |
| case c_dv_ptr_data: |
| case c_dv_data_slice: |
| case c_dv_ptr_data_slice: |
| if (m_vars[i].free_if && |
| m_vars[i].direction.in && |
| !m_vars[i].flags.is_static_dstn) { |
| ArrDesc *dvp = |
| (m_vars_extra[i].type_dst == c_dv_data_slice || |
| m_vars_extra[i].type_dst == c_dv_data) ? |
| static_cast<ArrDesc*>(m_vars[i].into) : |
| *static_cast<ArrDesc**>(m_vars[i].into); |
| void *buf = reinterpret_cast<char*>(dvp->Base) - |
| m_vars[i].mic_offset - |
| m_vars[i].offset; |
| |
| if (buf == NULL) { |
| break; |
| } |
| // decrement buffer reference count |
| OFFLOAD_TIMER_START( |
| c_offload_target_release_buffer_refs); |
| BufReleaseRef(buf); |
| OFFLOAD_TIMER_STOP( |
| c_offload_target_release_buffer_refs); |
| } |
| break; |
| |
| default: |
| LIBOFFLOAD_ERROR(c_unknown_var_type, m_vars_extra[i].type_dst); |
| abort(); |
| } |
| } |
| } |
| |
| OFFLOAD_DEBUG_TRACE(2, "OUT buffer @ p %p size %lld\n", |
| m_out.get_buffer_start(), |
| m_out.get_buffer_size()); |
| |
| OFFLOAD_DEBUG_DUMP_BYTES(2, |
| m_out.get_buffer_start(), |
| m_out.get_buffer_size()); |
| |
| OFFLOAD_DEBUG_TRACE_1(1, get_offload_number(), c_offload_copyout_data, |
| "Total copyout data sent to host: [%lld] bytes\n", |
| m_out.get_tfr_size()); |
| |
| OFFLOAD_TIMER_STOP(c_offload_target_gather_outputs); |
| } |
| |
| void __offload_target_init(void) |
| { |
| #ifdef SEP_SUPPORT |
| const char* env_var = getenv(sep_monitor_env); |
| if (env_var != 0 && *env_var != '\0') { |
| sep_monitor = atoi(env_var); |
| } |
| env_var = getenv(sep_device_env); |
| if (env_var != 0 && *env_var != '\0') { |
| sep_device = env_var; |
| } |
| #endif // SEP_SUPPORT |
| |
| prefix = report_get_message_str(c_report_mic); |
| |
| // init frequency |
| mic_frequency = COIPerfGetCycleFrequency(); |
| } |
| |
| // User-visible offload API |
| |
| int _Offload_number_of_devices(void) |
| { |
| return mic_engines_total; |
| } |
| |
| int _Offload_get_device_number(void) |
| { |
| return mic_index; |
| } |
| |
| int _Offload_get_physical_device_number(void) |
| { |
| uint32_t index; |
| EngineGetIndex(&index); |
| return index; |
| } |