| /* |
| Copyright (c) 2014-2016 Intel Corporation. All Rights Reserved. |
| |
| Redistribution and use in source and binary forms, with or without |
| modification, are permitted provided that the following conditions |
| are met: |
| |
| * Redistributions of source code must retain the above copyright |
| notice, this list of conditions and the following disclaimer. |
| * Redistributions in binary form must reproduce the above copyright |
| notice, this list of conditions and the following disclaimer in the |
| documentation and/or other materials provided with the distribution. |
| * Neither the name of Intel Corporation nor the names of its |
| contributors may be used to endorse or promote products derived |
| from this software without specific prior written permission. |
| |
| THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS |
| "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT |
| LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR |
| A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT |
| HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, |
| SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT |
| LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, |
| DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY |
| THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT |
| (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE |
| OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
| */ |
| |
| |
| // Forward declaration as the following 2 functions are declared as friend |
| // in offload_engine.h. |
| // CLANG does not like static to been after friend declaration. |
| static void __offload_init_library_once(void); |
| static void __offload_fini_library(void); |
| |
| #include "offload_host.h" |
| #ifdef MYO_SUPPORT |
| #include "offload_myo_host.h" |
| #endif |
| |
| #include <malloc.h> |
| #ifndef TARGET_WINNT |
| #include <alloca.h> |
| #include <elf.h> |
| #endif // TARGET_WINNT |
| #include <errno.h> |
| #include <fcntl.h> |
| #include <stdlib.h> |
| #include <string.h> |
| #include <sys/stat.h> |
| #include <sys/types.h> |
| |
| #include <algorithm> |
| #include <bitset> |
| #include <iostream> |
| |
| #if defined(HOST_WINNT) |
| #define PATH_SEPARATOR ";" |
| #else |
| #define PATH_SEPARATOR ":" |
| #endif |
| |
| #define GET_OFFLOAD_NUMBER(timer_data) \ |
| timer_data? timer_data->offload_number : 0 |
| |
| static void (*task_completion_callback)(void *); |
| |
| extern "C" { |
| #ifdef TARGET_WINNT |
| // Windows does not support imports from libraries without actually |
| // including them as dependence. We don't want to include in the |
| // dependence since is it used only for Fortran when traceback is enabled. |
| // Chose to implement it with GetProcAddress. |
| #define FORTRAN_TRACE_BACK win_for__continue_traceback |
| int win_for__continue_traceback( _Offload_result coi_offload_result ) |
| { |
| HINSTANCE hDLL; |
| int (* TraceBackRoutine)(_Offload_result value); |
| |
| hDLL = LoadLibrary("libifcoremd.dll"); |
| if (hDLL != 0) { |
| TraceBackRoutine = (int (*)(_Offload_result)) GetProcAddress(hDLL, |
| "for__continue_traceback"); |
| if (TraceBackRoutine != 0) { |
| return TraceBackRoutine(coi_offload_result); |
| } |
| else { |
| OFFLOAD_TRACE(3, |
| "Cannot find for__continue_traceback routine in libifcorert.dll\n"); |
| exit(1); |
| } |
| } |
| else { |
| OFFLOAD_TRACE(3, "Cannot load libifcorert.dll\n"); |
| exit(1); |
| } |
| return 0; |
| } |
| |
| #else // TARGET_WINNT |
| |
| #define FORTRAN_TRACE_BACK for__continue_traceback |
| |
| // for__continue_traceback is provided as a dummy to resolve link time symbols |
| // for C/C++ programs. For Fortran the actual fortran library function in |
| // libifcore.so is used. |
| #pragma weak for__continue_traceback |
| int for__continue_traceback( _Offload_result coi_offload_result ) |
| { |
| OFFLOAD_TRACE(3, |
| "liboffload function for_continue_traceback should not be called.\n"); |
| exit(1); |
| } |
| #endif //TARGET_WINNT |
| } // extern "C" |
| |
| #ifdef TARGET_WINNT |
| // Small subset of ELF declarations for Windows which is needed to compile |
| // this file. ELF header is used to understand what binary type is contained |
| // in the target image - shared library or executable. |
| |
| typedef uint16_t Elf64_Half; |
| typedef uint32_t Elf64_Word; |
| typedef uint64_t Elf64_Addr; |
| typedef uint64_t Elf64_Off; |
| |
| #define EI_NIDENT 16 |
| |
| #define ET_EXEC 2 |
| #define ET_DYN 3 |
| |
| typedef struct |
| { |
| unsigned char e_ident[EI_NIDENT]; |
| Elf64_Half e_type; |
| Elf64_Half e_machine; |
| Elf64_Word e_version; |
| Elf64_Addr e_entry; |
| Elf64_Off e_phoff; |
| Elf64_Off e_shoff; |
| Elf64_Word e_flags; |
| Elf64_Half e_ehsize; |
| Elf64_Half e_phentsize; |
| Elf64_Half e_phnum; |
| Elf64_Half e_shentsize; |
| Elf64_Half e_shnum; |
| Elf64_Half e_shstrndx; |
| } Elf64_Ehdr; |
| #endif // TARGET_WINNT |
| |
| // Host console and file logging |
| const char *prefix; |
| int console_enabled = 0; |
| int offload_number = 0; |
| |
| static const char *htrace_envname = "H_TRACE"; |
| static const char *offload_report_envname = "OFFLOAD_REPORT"; |
| static const char *timer_envname = "H_TIME"; |
| |
| // DMA channel count used by COI and set via |
| // OFFLOAD_DMA_CHANNEL_COUNT environment variable |
| uint32_t mic_dma_channel_count; |
| |
| // Trace information |
| static const char* vardesc_direction_as_string[] = { |
| "NOCOPY", |
| "IN", |
| "OUT", |
| "INOUT" |
| }; |
| static const char* vardesc_type_as_string[] = { |
| "unknown", |
| "data", |
| "data_ptr", |
| "func_ptr", |
| "void_ptr", |
| "string_ptr", |
| "dv", |
| "dv_data", |
| "dv_data_slice", |
| "dv_ptr", |
| "dv_ptr_data", |
| "dv_ptr_data_slice", |
| "cean_var", |
| "cean_var_ptr", |
| "c_data_ptr_array", |
| "c_extended_type", |
| "c_func_ptr_array", |
| "c_void_ptr_array", |
| "c_string_ptr_array", |
| "c_data_ptr_ptr", |
| "c_func_ptr_ptr", |
| "c_void_ptr_ptr", |
| "c_string_ptr_ptr", |
| "c_cean_var_ptr_ptr", |
| }; |
| |
| Engine* mic_engines = 0; |
| uint32_t mic_engines_total = 0; |
| pthread_key_t mic_thread_key; |
| MicEnvVar mic_env_vars; |
| uint64_t cpu_frequency = 0; |
| |
| // MIC_STACKSIZE |
| uint32_t mic_stack_size = 12 * 1024 * 1024; |
| |
| // MIC_BUFFERSIZE |
| uint64_t mic_buffer_size = 0; |
| |
| // Preallocated 4K page memory size for buffers on MIC |
| uint64_t mic_4k_buffer_size = 0; |
| |
| // Preallocated 2M page memory size for buffers on MIC |
| uint64_t mic_2m_buffer_size = 0; |
| |
| |
| // LD_LIBRARY_PATH for KNC |
| char* knc_library_path = 0; |
| |
| // LD_LIBRARY_PATH for KNL |
| char* knl_library_path = 0; |
| |
| |
| // MIC_PROXY_IO |
| bool mic_proxy_io = true; |
| |
| // MIC_PROXY_FS_ROOT |
| char* mic_proxy_fs_root = 0; |
| |
| // Threshold for creating buffers with large pages. Buffer is created |
| // with large pages hint if its size exceeds the threshold value. |
| // By default large pages are disabled right now (by setting default |
| // value for threshold to MAX) due to HSD 4114629. |
| uint64_t __offload_use_2mb_buffers = 0xffffffffffffffffULL; |
| static const char *mic_use_2mb_buffers_envname = |
| "MIC_USE_2MB_BUFFERS"; |
| |
| static uint64_t __offload_use_async_buffer_write = 2 * 1024 * 1024; |
| static const char *mic_use_async_buffer_write_envname = |
| "MIC_USE_ASYNC_BUFFER_WRITE"; |
| |
| static uint64_t __offload_use_async_buffer_read = 2 * 1024 * 1024; |
| static const char *mic_use_async_buffer_read_envname = |
| "MIC_USE_ASYNC_BUFFER_READ"; |
| |
| // device initialization type |
| OffloadInitType __offload_init_type = c_init_on_offload_all; |
| static const char *offload_init_envname = "OFFLOAD_INIT"; |
| |
| // active wait |
| static bool __offload_active_wait = true; |
| static const char *offload_active_wait_envname = "OFFLOAD_ACTIVE_WAIT"; |
| |
| // wait even for asynchronous offload |
| // true for now still the performance issue with COI is not fixed |
| static bool __offload_always_wait = true; |
| static const char *offload_always_wait_envname = "OFFLOAD_ALWAYS_WAIT"; |
| |
| // OMP_DEFAULT_DEVICE |
| int __omp_device_num = 0; |
| static const char *omp_device_num_envname = "OMP_DEFAULT_DEVICE"; |
| |
| //OFFLOAD_PARALLEL_COPY |
| static bool __offload_parallel_copy = false; |
| static const char *parallel_copy_envname = "OFFLOAD_PARALLEL_COPY"; |
| |
| //Use COI interface for noncontiguous transfer if it exists. |
| static bool __offload_use_coi_noncontiguous_transfer = false; |
| static const char *use_coi_noncontiguous_transfer_envname = |
| "MIC_USE_COI_MULTI_D"; |
| |
| // The list of pending target libraries |
| static bool __target_libs; |
| static TargetImageList __target_libs_list; |
| static mutex_t __target_libs_lock; |
| static mutex_t stack_alloc_lock; |
| static mutex_t lock_complete; |
| |
| // Set of OffloadDescriptors of asynchronous offloads that are not destroyed |
| std::map<void *, bool> offload_descr_map; |
| |
| // Target executable |
| TargetImage* __target_exe; |
| // is true if last loaded image is dll |
| bool __current_image_is_dll = false; |
| // is true if myo library is loaded when dll is loaded |
| bool __myo_init_in_so = false; |
| |
| // Print readable offload flags |
| static void trace_offload_flags( |
| OffloadHostTimerData* timer_data, |
| OffloadFlags offload_flags |
| ) |
| { |
| // Sized big enough for all flag names |
| char fbuffer[256]; |
| bool first = true; |
| if (!OFFLOAD_DO_TRACE && (console_enabled >= 1)) { |
| sprintf(fbuffer, " OffloadFlags=("); |
| if (offload_flags.bits.fortran_traceback) { |
| sprintf(fbuffer+strlen(fbuffer), "fortran_traceback"); |
| first = false; |
| } |
| if (offload_flags.bits.omp_async) { |
| sprintf(fbuffer+strlen(fbuffer), first ? "omp_async" : ",omp_async"); |
| first = false; |
| } |
| OFFLOAD_DEBUG_TRACE_1(1, |
| GET_OFFLOAD_NUMBER(timer_data), c_offload_init_func, |
| "%s)\n", fbuffer); |
| } |
| } |
| |
| // Print readable varDesc flags |
| static void trace_varDesc_flags( |
| OffloadHostTimerData* timer_data, |
| varDescFlags offload_flags |
| ) |
| { |
| // Sized big enough for all flag names |
| char fbuffer[256]; |
| bool first = true; |
| if (!OFFLOAD_DO_TRACE && (console_enabled >= 1)) { |
| sprintf(fbuffer, " varDescFlags=("); |
| if (offload_flags.is_static) { |
| sprintf(fbuffer+strlen(fbuffer), "is_static"); |
| first = false; |
| } |
| if (offload_flags.is_static_dstn) { |
| sprintf(fbuffer+strlen(fbuffer), |
| first ? "is_static_dstn" : ",is_static_dstn"); |
| first = false; |
| } |
| if (offload_flags.has_length) { |
| sprintf(fbuffer+strlen(fbuffer), |
| first ? "has_length" : ",has_length"); |
| first = false; |
| } |
| if (offload_flags.is_stack_buf) { |
| sprintf(fbuffer+strlen(fbuffer), |
| first ? "is_stack_buf" : ",is_stack_buf"); |
| first = false; |
| } |
| if (offload_flags.targetptr) { |
| sprintf(fbuffer+strlen(fbuffer), |
| first ? "targetptr" : ",targetptr"); |
| first = false; |
| } |
| if (offload_flags.preallocated) { |
| sprintf(fbuffer+strlen(fbuffer), |
| first ? "preallocated" : ",preallocated"); |
| first = false; |
| } |
| if (offload_flags.is_pointer) { |
| sprintf(fbuffer+strlen(fbuffer), |
| first ? "is_pointer" : ",is_pointer"); |
| first = false; |
| } |
| if (offload_flags.sink_addr) { |
| sprintf(fbuffer+strlen(fbuffer), |
| first ? "sink_addr" : ",sink_addr"); |
| first = false; |
| } |
| if (offload_flags.alloc_disp) { |
| sprintf(fbuffer+strlen(fbuffer), |
| first ? "alloc_disp" : ",alloc_disp"); |
| first = false; |
| } |
| if (offload_flags.is_noncont_src) { |
| sprintf(fbuffer+strlen(fbuffer), |
| first ? "is_noncont_src" : ",is_noncont_src"); |
| first = false; |
| } |
| if (offload_flags.is_noncont_dst) { |
| sprintf(fbuffer+strlen(fbuffer), |
| first ? "is_noncont_dst" : ",is_noncont_dst"); |
| first = false; |
| } |
| if (offload_flags.always_copy) { |
| sprintf(fbuffer+strlen(fbuffer), |
| first ? "always_copy" : ",always_copy"); |
| first = false; |
| } |
| if (offload_flags.always_delete) { |
| sprintf(fbuffer+strlen(fbuffer), |
| first ? "always_delete" : ",always_delete"); |
| first = false; |
| } |
| if (offload_flags.is_non_cont_struct) { |
| sprintf(fbuffer+strlen(fbuffer), |
| first ? "is_non_cont_struct" : ",is_non_cont_struct"); |
| first = false; |
| } |
| if (offload_flags.pin) { |
| sprintf(fbuffer+strlen(fbuffer), |
| first ? "pin" : ",pin"); |
| first = false; |
| } |
| if (offload_flags.is_device_ptr) { |
| sprintf(fbuffer+strlen(fbuffer), |
| first ? "is_device_ptr" : ",is_device_ptr"); |
| first = false; |
| } |
| if (offload_flags.use_device_ptr) { |
| sprintf(fbuffer+strlen(fbuffer), |
| first ? "use_device_ptr" : ",use_device_ptr"); |
| } |
| OFFLOAD_DEBUG_TRACE_1(1, |
| GET_OFFLOAD_NUMBER(timer_data), c_offload_init_func, |
| "%s)\n", fbuffer); |
| } |
| } |
| |
| static char * offload_get_src_base(void * ptr, uint8_t type) |
| { |
| char *base; |
| if (VAR_TYPE_IS_PTR(type)) { |
| base = *static_cast<char**>(ptr); |
| } |
| else if (VAR_TYPE_IS_SCALAR(type)) { |
| base = static_cast<char*>(ptr); |
| } |
| else if (VAR_TYPE_IS_DV_DATA_SLICE(type) || VAR_TYPE_IS_DV_DATA(type)) { |
| ArrDesc *dvp; |
| if (VAR_TYPE_IS_DV_DATA_SLICE(type)) { |
| const Arr_Desc *ap = static_cast<const Arr_Desc*>(ptr); |
| dvp = (type == c_dv_data_slice) ? |
| reinterpret_cast<ArrDesc*>(ap->base) : |
| *reinterpret_cast<ArrDesc**>(ap->base); |
| } |
| else { |
| dvp = (type == c_dv_data) ? |
| static_cast<ArrDesc*>(ptr) : |
| *static_cast<ArrDesc**>(ptr); |
| } |
| base = reinterpret_cast<char*>(dvp->Base); |
| } |
| else { |
| base = NULL; |
| } |
| return base; |
| } |
| |
| void OffloadDescriptor::report_coi_error(error_types msg, COIRESULT res) |
| { |
| // special case for the 'process died' error |
| if (res == COI_PROCESS_DIED) { |
| m_device.fini_process(true); |
| } |
| else { |
| switch (msg) { |
| case c_buf_create: |
| if (res == COI_OUT_OF_MEMORY) { |
| msg = c_buf_create_out_of_mem; |
| } |
| /* fallthru */ |
| |
| case c_buf_create_from_mem: |
| case c_buf_get_address: |
| case c_pipeline_create: |
| case c_pipeline_run_func: |
| LIBOFFLOAD_ERROR(msg, m_device.get_logical_index(), res); |
| break; |
| |
| case c_buf_read: |
| case c_buf_write: |
| case c_buf_copy: |
| case c_buf_map: |
| case c_buf_unmap: |
| case c_buf_destroy: |
| case c_buf_set_state: |
| LIBOFFLOAD_ERROR(msg, res); |
| break; |
| |
| default: |
| break; |
| } |
| } |
| |
| exit(1); |
| } |
| |
| _Offload_result OffloadDescriptor::translate_coi_error(COIRESULT res) const |
| { |
| switch (res) { |
| case COI_SUCCESS: |
| return OFFLOAD_SUCCESS; |
| |
| case COI_PROCESS_DIED: |
| return OFFLOAD_PROCESS_DIED; |
| |
| case COI_OUT_OF_MEMORY: |
| return OFFLOAD_OUT_OF_MEMORY; |
| |
| default: |
| return OFFLOAD_ERROR; |
| } |
| } |
| |
| // is_targetptr == 0 && is_prealloc == 0 - allocation of pointer data; |
| // is_targetptr == 1 && is_prealloc == 0 - allocation of target memory: |
| // allocate memory at target; use its value as base in target table. |
| // is_targetptr == 1 && is_prealloc == 1 - use preallocated target memory: |
| // base - is address at target of preallocated memory; use its value as |
| // base in target table. |
| |
| bool OffloadDescriptor::alloc_ptr_data( |
| PtrData* &ptr_data, |
| void *base, |
| int64_t disp, |
| int64_t size, |
| int64_t alloc_disp, |
| int align, |
| bool is_targptr, |
| bool is_prealloc, |
| bool pin |
| ) |
| { |
| // total length of base |
| int64_t length = size; |
| bool is_new; |
| COIBUFFER targptr_buf; |
| COIRESULT res; |
| uint32_t buffer_flags = 0; |
| char * base_disp = reinterpret_cast<char *>(base) + disp; |
| |
| // create buffer with large pages if data length exceeds |
| // large page threshold |
| if (length >= __offload_use_2mb_buffers) { |
| buffer_flags = COI_OPTIMIZE_HUGE_PAGE_SIZE; |
| } |
| // Allocate memory at target for targetptr without preallocated as we need |
| // its address as base argument in call to m_device.insert_ptr_data |
| if (is_targptr && !is_prealloc) { |
| length = alloc_disp ? length : size + disp; |
| res = COI::BufferCreate( |
| length, |
| COI_BUFFER_OPENCL, |
| buffer_flags, |
| 0, |
| 1, |
| &m_device.get_process(), |
| &targptr_buf); |
| if (res != COI_SUCCESS) { |
| if (m_status != 0) { |
| m_status->result = translate_coi_error(res); |
| } |
| else if (m_is_mandatory) { |
| report_coi_error(c_buf_create, res); |
| } |
| return false; |
| } |
| |
| res = COI::BufferGetSinkAddress( |
| targptr_buf, reinterpret_cast<uint64_t *>(&base)); |
| if (res != COI_SUCCESS) { |
| if (m_status != 0) { |
| m_status->result = translate_coi_error(res); |
| } |
| else if (m_is_mandatory) { |
| report_coi_error(c_buf_get_address, res); |
| } |
| return false; |
| } |
| } |
| |
| OFFLOAD_TRACE(3, "Creating association for data: addr %p, length %lld\n", |
| alloc_disp ? base : base_disp, |
| alloc_disp ? length : size + disp); |
| |
| // add new entry |
| |
| ptr_data = is_targptr ? |
| m_device.find_targetptr_data(base_disp) : |
| m_device.find_ptr_data(base_disp); |
| // if ptr_data is found just need to check it for overlapping |
| if (ptr_data) { |
| is_new = false; |
| base = base_disp; |
| } |
| else { |
| // If association is not found we must create it. |
| length = alloc_disp ? length : size + disp; |
| ptr_data = is_targptr ? |
| m_device.insert_targetptr_data(base, length, is_new) : |
| m_device.insert_ptr_data(base, length, is_new); |
| } |
| if (is_new) { |
| |
| OFFLOAD_TRACE(3, "Added new association\n"); |
| |
| if (length > 0) { |
| OffloadTimer timer(get_timer_data(), c_offload_host_alloc_buffers); |
| |
| // align should be a power of 2 |
| if (!pin && !is_targptr && |
| align > 0 && (align & (align - 1)) == 0) { |
| // offset within mic_buffer. Can do offset optimization |
| // only when source address alignment satisfies requested |
| // alignment on the target (cq172736). |
| if ((reinterpret_cast<intptr_t>(base) & (align - 1)) == 0) { |
| ptr_data->mic_offset = |
| reinterpret_cast<intptr_t>(base) & 4095; |
| } |
| } |
| |
| // buffer size and flags |
| uint64_t buffer_size = length + ptr_data->mic_offset; |
| |
| // For targetptr there is no CPU buffer |
| if (pin || !is_targptr) { |
| // create CPU buffer |
| OFFLOAD_DEBUG_TRACE_1(3, |
| GET_OFFLOAD_NUMBER(get_timer_data()), |
| c_offload_create_buf_host, |
| "Creating buffer from source memory %p, " |
| "length %lld\n", base, length); |
| |
| // result is not checked because we can continue without cpu |
| // buffer. In this case we will use COIBufferRead/Write |
| // instead of COIBufferCopy. |
| |
| COI::BufferCreateFromMemory(length, |
| COI_BUFFER_OPENCL, |
| 0, |
| base, |
| 1, |
| &m_device.get_process(), |
| &ptr_data->cpu_buf); |
| } |
| |
| // create MIC buffer |
| if (is_prealloc) { |
| OFFLOAD_DEBUG_TRACE_1(3, |
| GET_OFFLOAD_NUMBER(get_timer_data()), |
| c_offload_create_buf_mic, |
| "Creating buffer from sink memory: " |
| "addr %p, size %lld, offset %d, flags 0x%x\n", |
| base, buffer_size, ptr_data->mic_offset, |
| buffer_flags); |
| res = COI::BufferCreateFromMemory(ptr_data->cpu_addr.length(), |
| COI_BUFFER_NORMAL, |
| COI_SINK_MEMORY, |
| base, |
| 1, |
| &m_device.get_process(), |
| &ptr_data->mic_buf); |
| if (res != COI_SUCCESS) { |
| if (m_status != 0) { |
| m_status->result = translate_coi_error(res); |
| } |
| else if (m_is_mandatory) { |
| report_coi_error(c_buf_create, res); |
| } |
| ptr_data->alloc_ptr_data_lock.unlock(); |
| return false; |
| } |
| } |
| else if (is_targptr) { |
| ptr_data->mic_buf = targptr_buf; |
| } |
| else if (!pin) { |
| OFFLOAD_DEBUG_TRACE_1(3, |
| GET_OFFLOAD_NUMBER(get_timer_data()), |
| c_offload_create_buf_mic, |
| "Creating buffer for sink: size %lld, offset %d, " |
| "flags =0x%x\n", buffer_size, |
| ptr_data->mic_offset, buffer_flags); |
| res = COI::BufferCreate(buffer_size, |
| COI_BUFFER_NORMAL, |
| buffer_flags, |
| 0, |
| 1, |
| &m_device.get_process(), |
| &ptr_data->mic_buf); |
| if (res != COI_SUCCESS) { |
| if (m_status != 0) { |
| m_status->result = translate_coi_error(res); |
| } |
| else if (m_is_mandatory) { |
| report_coi_error(c_buf_create, res); |
| } |
| ptr_data->alloc_ptr_data_lock.unlock(); |
| return false; |
| } |
| } |
| |
| if (!pin) { |
| // make buffer valid on the device. |
| res = COI::BufferSetState(ptr_data->mic_buf, |
| m_device.get_process(), |
| COI_BUFFER_VALID, |
| COI_BUFFER_NO_MOVE, |
| 0, 0, 0); |
| if (res != COI_SUCCESS) { |
| if (m_status != 0) { |
| m_status->result = translate_coi_error(res); |
| } |
| else if (m_is_mandatory) { |
| report_coi_error(c_buf_set_state, res); |
| } |
| ptr_data->alloc_ptr_data_lock.unlock(); |
| return false; |
| } |
| |
| res = COI::BufferSetState(ptr_data->mic_buf, |
| COI_PROCESS_SOURCE, |
| COI_BUFFER_INVALID, |
| COI_BUFFER_NO_MOVE, |
| 0, 0, 0); |
| if (res != COI_SUCCESS) { |
| if (m_status != 0) { |
| m_status->result = translate_coi_error(res); |
| } |
| else if (m_is_mandatory) { |
| report_coi_error(c_buf_set_state, res); |
| } |
| ptr_data->alloc_ptr_data_lock.unlock(); |
| return false; |
| } |
| } |
| } |
| ptr_data->alloc_disp = alloc_disp; |
| ptr_data->alloc_ptr_data_lock.unlock(); |
| } |
| else { |
| mutex_locker_t locker(ptr_data->alloc_ptr_data_lock); |
| |
| OFFLOAD_TRACE(3, "Found existing association: addr %p, length %lld, " |
| "is_static %d\n", |
| ptr_data->cpu_addr.start(), ptr_data->cpu_addr.length(), |
| ptr_data->is_static); |
| |
| // This is not a new entry. Make sure that provided address range fits |
| // into existing one. |
| MemRange addr_range(base, length); |
| if (!ptr_data->cpu_addr.contains(addr_range)) { |
| LIBOFFLOAD_ERROR(c_bad_ptr_mem_alloc, base, length, |
| const_cast<void *>(ptr_data->cpu_addr.start()), |
| ptr_data->cpu_addr.length()); |
| exit(1); |
| } |
| |
| // if the entry is associated with static data it may not have buffers |
| // created because they are created on demand. |
| if (ptr_data->is_static && !init_static_ptr_data(ptr_data)) { |
| return false; |
| } |
| } |
| |
| return true; |
| } |
| |
| bool OffloadDescriptor::find_ptr_data( |
| PtrData* &ptr_data, |
| void *in_base, |
| int64_t disp, |
| int64_t size, |
| bool is_targetptr, |
| bool report_error |
| ) |
| { |
| // total length of base |
| int64_t length = size; |
| char *base = reinterpret_cast<char *>(in_base) + disp; |
| |
| OFFLOAD_TRACE(3, "Looking for association for data: addr %p, " |
| "length %lld\n", base, length); |
| |
| // find existing association in pointer table |
| ptr_data = is_targetptr ? |
| m_device.find_targetptr_data(base) : |
| m_device.find_ptr_data(base); |
| if (ptr_data == 0) { |
| if (report_error) { |
| LIBOFFLOAD_ERROR(c_no_ptr_data, base); |
| exit(1); |
| } |
| OFFLOAD_TRACE(3, "Association does not exist\n"); |
| return true; |
| } |
| |
| OFFLOAD_TRACE(3, "Found association: base %p, length %lld, is_static %d\n", |
| ptr_data->cpu_addr.start(), ptr_data->cpu_addr.length(), |
| ptr_data->is_static); |
| |
| // make sure that provided address range fits into existing one |
| MemRange addr_range(base, length); |
| if (!ptr_data->cpu_addr.contains(addr_range)) { |
| if (report_error) { |
| LIBOFFLOAD_ERROR(c_bad_ptr_mem_range, base, length, |
| const_cast<void *>(ptr_data->cpu_addr.start()), |
| ptr_data->cpu_addr.length()); |
| exit(1); |
| } |
| OFFLOAD_TRACE(3, "Existing association partially overlaps with " |
| "data address range\n"); |
| ptr_data = 0; |
| return true; |
| } |
| |
| // if the entry is associated with static data it may not have buffers |
| // created because they are created on demand. |
| if (ptr_data->is_static && !init_static_ptr_data(ptr_data)) { |
| return false; |
| } |
| |
| return true; |
| } |
| |
| void OffloadDescriptor::find_device_ptr( |
| int64_t* &device_ptr, |
| void *host_ptr |
| ) |
| { |
| PtrData* ptr_data; |
| char *base = reinterpret_cast<char *>(host_ptr); |
| |
| OFFLOAD_TRACE(3, "Looking for association for data: addr %p\n", base); |
| |
| // find existing association in pointer table |
| ptr_data = m_device.find_ptr_data(base); |
| |
| // MIC address should have been assigned. |
| // For now assume does not exist and get the addr |
| // if ((ptr_data == 0) || ptr_data->mic_addr) { |
| |
| if (ptr_data == 0) { |
| OFFLOAD_TRACE(3, "Association does not exist\n"); |
| LIBOFFLOAD_ERROR(c_no_ptr_data, base); |
| exit(1); |
| } |
| if (!ptr_data->mic_addr) { |
| COIRESULT res = COI::BufferGetSinkAddress(ptr_data->mic_buf, |
| &ptr_data->mic_addr); |
| if (res != COI_SUCCESS) { |
| if (m_status != 0) |
| m_status->result = translate_coi_error(res); |
| report_coi_error(c_buf_get_address, res); |
| } |
| } |
| |
| device_ptr = (int64_t *) ptr_data->mic_addr; |
| |
| OFFLOAD_TRACE(3, "Found association: host_ptr %p, device_ptr = %p\n", |
| ptr_data->cpu_addr.start(), device_ptr); |
| } |
| |
| bool OffloadDescriptor::init_static_ptr_data(PtrData *ptr_data) |
| { |
| OffloadTimer timer(get_timer_data(), c_offload_host_alloc_buffers); |
| |
| if (ptr_data->cpu_buf == 0) { |
| OFFLOAD_TRACE(3, "Creating buffer from source memory %llx\n", |
| ptr_data->cpu_addr.start()); |
| |
| COIRESULT res = COI::BufferCreateFromMemory( |
| ptr_data->cpu_addr.length(), |
| COI_BUFFER_OPENCL, |
| 0, |
| const_cast<void*>(ptr_data->cpu_addr.start()), |
| 1, &m_device.get_process(), |
| &ptr_data->cpu_buf); |
| |
| if (res != COI_SUCCESS) { |
| if (m_status != 0) { |
| m_status->result = translate_coi_error(res); |
| return false; |
| } |
| report_coi_error(c_buf_create_from_mem, res); |
| } |
| } |
| |
| if (ptr_data->mic_buf == 0) { |
| OFFLOAD_TRACE(3, "Creating buffer from sink memory %llx\n", |
| ptr_data->mic_addr); |
| |
| COIRESULT res = COI::BufferCreateFromMemory( |
| ptr_data->cpu_addr.length(), |
| COI_BUFFER_NORMAL, |
| COI_SINK_MEMORY, |
| reinterpret_cast<void*>(ptr_data->mic_addr), |
| 1, &m_device.get_process(), |
| &ptr_data->mic_buf); |
| |
| if (res != COI_SUCCESS) { |
| if (m_status != 0) { |
| m_status->result = translate_coi_error(res); |
| return false; |
| } |
| report_coi_error(c_buf_create_from_mem, res); |
| } |
| } |
| |
| return true; |
| } |
| |
| bool OffloadDescriptor::init_mic_address(PtrData *ptr_data) |
| { |
| if (ptr_data->mic_buf != 0 && ptr_data->mic_addr == 0) { |
| COIRESULT res = COI::BufferGetSinkAddress(ptr_data->mic_buf, |
| &ptr_data->mic_addr); |
| if (res != COI_SUCCESS) { |
| if (m_status != 0) { |
| m_status->result = translate_coi_error(res); |
| } |
| else if (m_is_mandatory) { |
| report_coi_error(c_buf_get_address, res); |
| } |
| return false; |
| } |
| } |
| return true; |
| } |
| |
| bool OffloadDescriptor::nullify_target_stack( |
| COIBUFFER targ_buf, |
| uint64_t size |
| ) |
| { |
| char * ptr = (char*)malloc(size); |
| if (ptr == NULL) |
| LIBOFFLOAD_ERROR(c_malloc); |
| COIRESULT res; |
| |
| memset(ptr, 0, size); |
| res = COI::BufferWrite( |
| targ_buf, |
| 0, |
| ptr, |
| size, |
| COI_COPY_UNSPECIFIED, |
| 0, 0, 0); |
| free(ptr); |
| if (res != COI_SUCCESS) { |
| if (m_status != 0) { |
| m_status->result = translate_coi_error(res); |
| return false; |
| } |
| report_coi_error(c_buf_write, res); |
| } |
| return true; |
| } |
| |
| static void print_persistList_item( |
| const char *msg, |
| PersistData *cur_el |
| ) |
| { |
| OFFLOAD_TRACE(4, "%s\n", msg); |
| OFFLOAD_TRACE(4, " stack_cpu_addr = %p\n", cur_el->stack_cpu_addr); |
| OFFLOAD_TRACE(4, " routine_id = %d\n", cur_el->routine_id); |
| OFFLOAD_TRACE(4, " thread_id = %lld\n", cur_el->thread_id); |
| OFFLOAD_TRACE(4, " stack_ptr_data = %p\n", cur_el->stack_ptr_data); |
| OFFLOAD_TRACE(4, " MIC buffer = %p\n", cur_el->stack_ptr_data->mic_buf); |
| OFFLOAD_TRACE(4, " MIC addr = %p\n", cur_el->stack_ptr_data->mic_addr); |
| OFFLOAD_TRACE(4, " cpu_stack_addr = %p\n", cur_el->cpu_stack_addr); |
| } |
| |
| static mutex_t stack_memory_manager_lock; |
| |
| bool OffloadDescriptor::offload_stack_memory_manager( |
| const void * stack_begin, |
| int routine_id, |
| int buf_size, |
| int align, |
| bool thread_specific_function_locals, |
| bool *is_new) |
| { |
| //mutex_locker_t locker(stack_alloc_lock); |
| stack_memory_manager_lock.lock(); |
| |
| PersistData * new_el; |
| PersistDataList::iterator it_begin = m_device.m_persist_list.begin(); |
| PersistDataList::iterator it_end; |
| int erase = 0; |
| uint64_t cur_thread_id = m_device.get_thread_id(); |
| |
| OFFLOAD_TRACE(3, "offload_stack_memory_manager(" |
| "stack_begin=%p, routine_id=%d, buf_size=%d," |
| "align=%d, thread_specific_function_locals=%d, bool=%p)\n", |
| stack_begin, routine_id, buf_size, |
| align, thread_specific_function_locals, is_new); |
| OFFLOAD_TRACE(3, "cur_thread_id=%lld\n", cur_thread_id); |
| *is_new = false; |
| |
| for (PersistDataList::iterator it = m_device.m_persist_list.begin(); |
| it != m_device.m_persist_list.end(); it++) { |
| PersistData cur_el = *it; |
| |
| print_persistList_item("Current element in persist list:", &cur_el); |
| if (stack_begin > it->stack_cpu_addr) { |
| if (cur_thread_id == cur_el.thread_id) { |
| // this stack data must be destroyed |
| m_destroy_stack.push_front(cur_el.stack_ptr_data); |
| it_end = it; |
| erase++; |
| OFFLOAD_TRACE(3, "Current element below TOS: so delete\n"); |
| } |
| } |
| else if (stack_begin == it->stack_cpu_addr) { |
| if (routine_id != it-> routine_id) { |
| // this stack data must be destroyed |
| // because the current function is a dynamic sibling |
| m_destroy_stack.push_front(cur_el.stack_ptr_data); |
| it_end = it; |
| erase++; |
| OFFLOAD_TRACE(3, "Current element is sibling: so delete\n"); |
| break; |
| } |
| else if (!thread_specific_function_locals || |
| cur_thread_id == cur_el.thread_id) { |
| // stack data is reused |
| m_stack_ptr_data = it->stack_ptr_data; |
| if (erase > 0) { |
| // all obsolete stack sections must be erased from the list |
| m_device.m_persist_list.erase(it_begin, ++it_end); |
| m_in_datalen += |
| erase * sizeof(new_el->stack_ptr_data->mic_addr); |
| } |
| OFFLOAD_TRACE(3, "Reuse of stack buffer with addr %p\n", |
| m_stack_ptr_data->mic_addr); |
| stack_memory_manager_lock.unlock(); |
| return true; |
| } |
| } |
| else if (stack_begin < it->stack_cpu_addr && |
| cur_thread_id == cur_el.thread_id) { |
| OFFLOAD_TRACE(3, "Current element is above TOS\n"); |
| break; |
| } |
| } |
| |
| if (erase > 0) { |
| // all obsolete stack sections must be erased from the list |
| m_device.m_persist_list.erase(it_begin, ++it_end); |
| m_in_datalen += erase * sizeof(new_el->stack_ptr_data->mic_addr); |
| } |
| // new stack table is created |
| new_el = new PersistData(stack_begin, routine_id, buf_size, cur_thread_id); |
| // create MIC buffer |
| COIRESULT res; |
| uint32_t buffer_flags = 0; |
| |
| // create buffer with large pages if data length exceeds |
| // large page threshold |
| if (buf_size >= __offload_use_2mb_buffers) { |
| buffer_flags = COI_OPTIMIZE_HUGE_PAGE_SIZE; |
| } |
| res = COI::BufferCreate(buf_size, |
| COI_BUFFER_NORMAL, |
| buffer_flags, |
| 0, |
| 1, |
| &m_device.get_process(), |
| &new_el->stack_ptr_data->mic_buf); |
| if (res != COI_SUCCESS) { |
| if (m_status != 0) { |
| m_status->result = translate_coi_error(res); |
| } |
| else if (m_is_mandatory) { |
| report_coi_error(c_buf_create, res); |
| } |
| stack_memory_manager_lock.unlock(); |
| return false; |
| } |
| // make buffer valid on the device. |
| res = COI::BufferSetState(new_el->stack_ptr_data->mic_buf, |
| m_device.get_process(), |
| COI_BUFFER_VALID, |
| COI_BUFFER_NO_MOVE, |
| 0, 0, 0); |
| if (res != COI_SUCCESS) { |
| if (m_status != 0) { |
| m_status->result = translate_coi_error(res); |
| } |
| else if (m_is_mandatory) { |
| report_coi_error(c_buf_set_state, res); |
| } |
| stack_memory_manager_lock.unlock(); |
| return false; |
| } |
| res = COI::BufferSetState(new_el->stack_ptr_data->mic_buf, |
| COI_PROCESS_SOURCE, |
| COI_BUFFER_INVALID, |
| COI_BUFFER_NO_MOVE, |
| 0, 0, 0); |
| if (res != COI_SUCCESS) { |
| if (m_status != 0) { |
| m_status->result = translate_coi_error(res); |
| } |
| else if (m_is_mandatory) { |
| report_coi_error(c_buf_set_state, res); |
| } |
| stack_memory_manager_lock.unlock(); |
| return false; |
| } |
| // persistence algorithm requires target stack initialy to be nullified |
| if (!nullify_target_stack(new_el->stack_ptr_data->mic_buf, buf_size)) { |
| stack_memory_manager_lock.unlock(); |
| return false; |
| } |
| |
| m_stack_ptr_data = new_el->stack_ptr_data; |
| init_mic_address(m_stack_ptr_data); |
| OFFLOAD_TRACE(3, "Allocating stack buffer with addr %p\n", |
| m_stack_ptr_data->mic_addr); |
| m_device.m_persist_list.push_front(*new_el); |
| init_mic_address(new_el->stack_ptr_data); |
| *is_new = true; |
| |
| stack_memory_manager_lock.unlock(); |
| return true; |
| } |
| |
| // Search through persistent stack buffers |
| // for the top-of-stack buffer for this thread |
| char* OffloadDescriptor::get_this_threads_cpu_stack_addr( |
| const void * stack_begin, |
| int routine_id, |
| bool thread_specific_function_locals |
| ) |
| { |
| uint64_t cur_thread_id = m_device.get_thread_id(); |
| char* matched = 0; |
| |
| OFFLOAD_TRACE(3, "get_this_threads_cpu_stack_addr(" |
| "stack_begin=%p, routine_id=%d, thread_specific_function_locals=%d)\n", |
| stack_begin, routine_id, thread_specific_function_locals); |
| OFFLOAD_TRACE(3, "cur_thread_id=%lld\n", cur_thread_id); |
| |
| stack_memory_manager_lock.lock(); |
| for (PersistDataList::iterator it = m_device.m_persist_list.begin(); |
| it != m_device.m_persist_list.end(); it++) |
| { |
| PersistData cur_el = *it; |
| print_persistList_item("Current element in persist list:", &cur_el); |
| if (stack_begin == cur_el.stack_cpu_addr) |
| { |
| // For OpenMP shared function locals matching is done without |
| // regard to thread id. But, we return the last match, which |
| // corresponds to the outer stack. |
| if (!thread_specific_function_locals) |
| { |
| matched = cur_el.cpu_stack_addr; |
| continue; |
| } |
| // For non-OpenMP shared function-local variables |
| // the thread-id must match |
| if (cur_thread_id == cur_el.thread_id) |
| { |
| matched = cur_el.cpu_stack_addr; |
| break; |
| } |
| } |
| } |
| stack_memory_manager_lock.unlock(); |
| if (matched != 0) |
| { |
| OFFLOAD_TRACE(3, "get_this_threads_cpu_stack_addr() => %p\n", matched); |
| return matched; |
| } |
| |
| OFFLOAD_TRACE(1, |
| "Could not find persistent data; expect Read/Write failure\n"); |
| return 0; |
| } |
| |
| // Search through persistent stack buffers |
| // for the top-of-stack MIC buffer for this thread |
| PtrData* OffloadDescriptor::get_this_threads_mic_stack_addr( |
| const void * stack_begin, |
| int routine_id, |
| bool thread_specific_function_locals |
| ) |
| { |
| uint64_t cur_thread_id = m_device.get_thread_id(); |
| PtrData* matched = 0; |
| |
| OFFLOAD_TRACE(3, "get_this_threads_mic_stack_addr(" |
| "stack_begin=%p, routine_id=%d, thread_specific_function_locals=%d)\n", |
| stack_begin, routine_id, thread_specific_function_locals); |
| OFFLOAD_TRACE(3, "cur_thread_id=%lld\n", cur_thread_id); |
| |
| stack_memory_manager_lock.lock(); |
| for (PersistDataList::iterator it = m_device.m_persist_list.begin(); |
| it != m_device.m_persist_list.end(); it++) |
| { |
| PersistData cur_el = *it; |
| print_persistList_item("Current element in persist list:", &cur_el); |
| if (stack_begin == cur_el.stack_cpu_addr) |
| { |
| // For OpenMP shared function locals matching is done without |
| // regard to thread id. But, we return the last match, which |
| // corresponds to the outer stack. |
| if (!thread_specific_function_locals) |
| { |
| matched = cur_el.stack_ptr_data; |
| continue; |
| } |
| // For non-OpenMP shared function-local variables |
| // the thread-id must match |
| if (cur_thread_id == cur_el.thread_id) |
| { |
| matched = cur_el.stack_ptr_data; |
| break; |
| } |
| } |
| } |
| stack_memory_manager_lock.unlock(); |
| if (matched != 0) |
| { |
| OFFLOAD_TRACE(3, "get_this_threads_mic_stack_addr() => %p\n", matched); |
| return matched; |
| } |
| |
| OFFLOAD_TRACE(1, |
| "Could not find persistent data; expect Read/Write failure\n"); |
| return 0; |
| } |
| |
| void OffloadDescriptor::setup_use_device_ptr(int i) |
| { |
| PtrData *ptr_data; |
| ArrDesc *dvp; |
| void *base; |
| if (m_vars_extra[i].type_src == c_dv_ptr) { |
| dvp = *static_cast<ArrDesc**>(m_vars[i].ptr); |
| base = reinterpret_cast<void*>(dvp->Base); |
| } |
| else { |
| base = *static_cast<void**>(m_vars[i].ptr); |
| } |
| if (m_vars[i].direction.in) { |
| int64_t *device_ptr; |
| bool is_new = true; |
| |
| find_device_ptr(device_ptr, base); |
| |
| // Create a entry in targetptr table using device_ptr |
| // as lookup for later recover the host pointer |
| ptr_data = m_device.insert_targetptr_data(device_ptr, |
| 0, is_new); |
| |
| // Actually the base is a host pointer and cpu_addr is |
| // device pointer. This is special case where the 2 |
| // address usage is reversed to enable using existing |
| // PtrData structure instead of adding new fields. |
| ptr_data->mic_addr = (uint64_t) base; |
| |
| ptr_data->alloc_ptr_data_lock.unlock(); |
| |
| // Replace host pointer with device pointer |
| if (m_vars_extra[i].type_src == c_dv_ptr) { |
| dvp->Base = reinterpret_cast<dv_size>(device_ptr); |
| } |
| else { |
| *static_cast<void**>(m_vars[i].ptr) = device_ptr; |
| } |
| } |
| else if (m_vars[i].direction.out) { |
| // For use_device_ptr and out find associated host ptr |
| // and assign to host ptr |
| ptr_data = m_device.find_targetptr_data(base); |
| if (!ptr_data) { |
| LIBOFFLOAD_ERROR(c_no_ptr_data, base); |
| exit(1); |
| } |
| if (m_vars_extra[i].type_src == c_dv_ptr) { |
| dvp->Base = ptr_data->mic_addr; |
| } |
| else { |
| *static_cast<void**>(m_vars[i].ptr) = |
| reinterpret_cast<void*>(ptr_data->mic_addr); |
| } |
| m_device.remove_targetptr_data( |
| ptr_data->cpu_addr.start()); |
| } |
| } |
| |
| bool OffloadDescriptor::setup_descriptors( |
| VarDesc *vars, |
| VarDesc2 *vars2, |
| int vars_total, |
| int entry_id, |
| const void *stack_addr |
| ) |
| { |
| COIRESULT res; |
| // To enable caching the CPU stack base address for stack variables |
| char* this_threads_cpu_stack_addr = 0; |
| // To properly deal with non-OpenMP threading and function-local variables |
| // For OpenMP threading we support all function-locals in shared mode only |
| bool thread_specific_function_locals = !omp_in_parallel(); |
| |
| OffloadTimer timer(get_timer_data(), c_offload_host_setup_buffers); |
| // make a copy of variable descriptors |
| m_vars_total = vars_total; |
| if (vars_total > 0) { |
| m_vars = (VarDesc*) malloc(m_vars_total * sizeof(VarDesc)); |
| if (m_vars == NULL) |
| LIBOFFLOAD_ERROR(c_malloc); |
| memcpy(m_vars, vars, m_vars_total * sizeof(VarDesc)); |
| m_vars_extra = (VarExtra*) malloc(m_vars_total * sizeof(VarExtra)); |
| if (m_vars_extra == NULL) |
| LIBOFFLOAD_ERROR(c_malloc); |
| } |
| |
| // dependencies |
| m_in_deps_allocated = m_vars_total + 1; |
| m_in_deps = (COIEVENT*) malloc(sizeof(COIEVENT) * m_in_deps_allocated); |
| if (m_in_deps == NULL) |
| LIBOFFLOAD_ERROR(c_malloc); |
| if (m_vars_total > 0) { |
| m_out_deps_allocated = m_vars_total; |
| m_out_deps = (COIEVENT*) malloc(sizeof(COIEVENT) * m_out_deps_allocated); |
| if (m_out_deps == NULL) |
| LIBOFFLOAD_ERROR(c_malloc); |
| } |
| // copyin/copyout data length |
| m_in_datalen = 0; |
| m_out_datalen = 0; |
| |
| // First pass over variable descriptors |
| // - Calculate size of the input and output non-pointer data |
| // - Allocate buffers for input and output pointers |
| for (int i = 0; i < m_vars_total; i++) { |
| void* alloc_base = NULL; |
| int64_t alloc_disp = 0; |
| int64_t alloc_size = 0; |
| bool src_is_for_mic = (m_vars[i].direction.out || |
| m_vars[i].into == NULL); |
| bool src_is_for_host = (m_vars[i].direction.in || |
| m_vars[i].into == NULL); |
| const char *var_sname = ""; |
| if (vars2 != NULL && i < vars_total) { |
| if (vars2[i].sname != NULL) { |
| var_sname = vars2[i].sname; |
| } |
| } |
| |
| // instead of m_vars[i].type.src we will use m_vars_extra[i].type_src |
| if (m_vars[i].type.src == c_extended_type) { |
| VarDescExtendedType *etype = |
| reinterpret_cast<VarDescExtendedType*>(m_vars[i].ptr); |
| m_vars_extra[i].type_src = etype->extended_type; |
| m_vars[i].ptr = etype->ptr; |
| } |
| else { |
| m_vars_extra[i].type_src = m_vars[i].type.src; |
| } |
| // instead of m_vars[i].type.dst we will use m_vars_extra[i].type_dst |
| if (m_vars[i].type.dst == c_extended_type) { |
| VarDescExtendedType *etype = |
| reinterpret_cast<VarDescExtendedType*>(m_vars[i].into); |
| if (etype) { |
| m_vars_extra[i].type_dst = etype->extended_type; |
| m_vars[i].into = etype->ptr; |
| } |
| else { |
| m_vars_extra[i].type_dst = m_vars_extra[i].type_src; |
| } |
| } |
| else { |
| m_vars_extra[i].type_dst = m_vars[i].type.dst; |
| } |
| OFFLOAD_TRACE(2, " VarDesc %d, var=%s, %s, %s\n", |
| i, var_sname, |
| vardesc_direction_as_string[m_vars[i].direction.bits], |
| vardesc_type_as_string[m_vars_extra[i].type_src]); |
| if (vars2 != NULL && i < vars_total && vars2[i].dname != NULL) { |
| OFFLOAD_TRACE(2, " into=%s, %s\n", vars2[i].dname, |
| vardesc_type_as_string[m_vars_extra[i].type_dst]); |
| } |
| OFFLOAD_TRACE(2, |
| " type_src=%d, type_dstn=%d, direction=%d, " |
| "alloc_if=%d, free_if=%d, align=%d, mic_offset=%d, flags=0x%x, " |
| "offset=%lld, size=%lld, count/disp=%lld, ptr=%p, into=%p\n", |
| m_vars_extra[i].type_src, |
| m_vars_extra[i].type_dst, |
| m_vars[i].direction.bits, |
| m_vars[i].alloc_if, |
| m_vars[i].free_if, |
| m_vars[i].align, |
| m_vars[i].mic_offset, |
| m_vars[i].flags.bits, |
| m_vars[i].offset, |
| m_vars[i].size, |
| m_vars[i].count, |
| m_vars[i].ptr, |
| m_vars[i].into); |
| // If any varDesc flags bits set, show them |
| if (console_enabled >= 1 && m_vars[i].flags.bits != 0) { |
| trace_varDesc_flags(get_timer_data(), m_vars[i].flags); |
| } |
| |
| // preallocated implies targetptr |
| if (m_vars[i].flags.preallocated) { |
| // targetptr preallocated alloc_if(1) may not be used with |
| // an in clause |
| if (m_vars[i].direction.in && m_vars[i].alloc_if) { |
| LIBOFFLOAD_ERROR(c_in_with_preallocated); |
| exit(1); |
| } |
| m_vars[i].flags.targetptr = 1; |
| } |
| if (m_vars[i].alloc != NULL) { |
| // array descriptor |
| const Arr_Desc *ap = |
| static_cast<const Arr_Desc*>(m_vars[i].alloc); |
| |
| // debug dump |
| ARRAY_DESC_DUMP(" ", "ALLOC", ap, 0, 1); |
| |
| __arr_data_offset_and_length(ap, alloc_disp, alloc_size); |
| |
| alloc_base = reinterpret_cast<void*>(ap->base); |
| } |
| |
| m_vars_extra[i].alloc = m_vars[i].alloc; |
| m_vars_extra[i].auto_data = 0; |
| m_vars_extra[i].cpu_disp = 0; |
| m_vars_extra[i].cpu_offset = 0; |
| m_vars_extra[i].src_data = 0; |
| m_vars_extra[i].read_rng_src = 0; |
| m_vars_extra[i].read_rng_dst = 0; |
| m_vars_extra[i].omp_last_event_type = c_last_not; |
| // flag is_arr_ptr_el is 1 only for var_descs generated |
| // for c_data_ptr_array type |
| if (i < vars_total) { |
| m_vars_extra[i].is_arr_ptr_el = 0; |
| } |
| if (TYPE_IS_PTR_TO_PTR(m_vars_extra[i].type_src) || |
| TYPE_IS_PTR_TO_PTR(m_vars_extra[i].type_dst) || |
| m_vars[i].flags.is_pointer) { |
| m_vars_extra[i].pointer_offset = m_vars[i].offset; |
| m_vars[i].offset = 0; |
| m_in_datalen += sizeof(m_vars[i].offset); |
| } |
| |
| switch (m_vars_extra[i].type_src) { |
| case c_data_ptr_array: |
| { |
| const Arr_Desc *ap; |
| const VarDesc3 *vd3 = |
| static_cast<const VarDesc3*>(m_vars[i].ptr); |
| int flags = vd3->array_fields; |
| OFFLOAD_TRACE(2, |
| " pointer array flags = %04x\n", flags); |
| OFFLOAD_TRACE(2, |
| " pointer array type is %s\n", |
| vardesc_type_as_string[flags & 0x3f]); |
| ap = static_cast<const Arr_Desc*>(vd3->ptr_array); |
| ARRAY_DESC_DUMP(" ", "ptr array", ap, |
| m_vars[i].flags.is_pointer, 1); |
| if (m_vars[i].into) { |
| ap = static_cast<const Arr_Desc*>(m_vars[i].into); |
| ARRAY_DESC_DUMP( |
| " ", "into array", ap, 0, 1); |
| } |
| if ((flags & (1<<flag_align_is_array)) != 0) { |
| ap = static_cast<const Arr_Desc*>(vd3->align_array); |
| ARRAY_DESC_DUMP( |
| " ", "align array", ap, 0, 1); |
| } |
| if ((flags & (1<<flag_alloc_if_is_array)) != 0) { |
| ap = static_cast<const Arr_Desc*>(vd3->alloc_if_array); |
| ARRAY_DESC_DUMP( |
| " ", "alloc_if array", ap, 0, 1); |
| } |
| if ((flags & (1<<flag_free_if_is_array)) != 0) { |
| ap = static_cast<const Arr_Desc*>(vd3->free_if_array); |
| ARRAY_DESC_DUMP( |
| " ", "free_if array", ap, 0, 1); |
| } |
| if ((flags & (1<<flag_extent_start_is_array)) != 0) { |
| ap = static_cast<const Arr_Desc*>(vd3->extent_start); |
| ARRAY_DESC_DUMP( |
| " ", "extent_start array", ap, 0, 1); |
| } else if ((flags & |
| (1<<flag_extent_start_is_scalar)) != 0) { |
| OFFLOAD_TRACE(2, |
| " extent_start scalar = %d\n", |
| (int64_t)vd3->extent_start); |
| } |
| if ((flags & (1<<flag_extent_elements_is_array)) != 0) { |
| ap = static_cast<const Arr_Desc*> |
| (vd3->extent_elements); |
| ARRAY_DESC_DUMP(" ", |
| "extent_elements array", ap, 0, 1); |
| } else if ((flags & |
| (1<<flag_extent_elements_is_scalar)) != 0) { |
| OFFLOAD_TRACE(2, |
| " extent_elements scalar = %d\n", |
| (int64_t)vd3->extent_elements); |
| } |
| if ((flags & (1<<flag_into_start_is_array)) != 0) { |
| ap = static_cast<const Arr_Desc*>(vd3->into_start); |
| ARRAY_DESC_DUMP( |
| " ", "into_start array", ap, 0, 1); |
| } else if ((flags & |
| (1<<flag_into_start_is_scalar)) != 0) { |
| OFFLOAD_TRACE(2, |
| " into_start scalar = %d\n", |
| (int64_t)vd3->into_start); |
| } |
| if ((flags & (1<<flag_into_elements_is_array)) != 0) { |
| ap = static_cast<const Arr_Desc*>(vd3->into_elements); |
| ARRAY_DESC_DUMP( |
| " ", "into_elements array", ap, 0, 1); |
| } else if ((flags & |
| (1<<flag_into_elements_is_scalar)) != 0) { |
| OFFLOAD_TRACE(2, |
| " into_elements scalar = %d\n", |
| (int64_t)vd3->into_elements); |
| } |
| if ((flags & (1<<flag_alloc_start_is_array)) != 0) { |
| ap = static_cast<const Arr_Desc*>(vd3->alloc_start); |
| ARRAY_DESC_DUMP( |
| " ", "alloc_start array", ap, 0, 1); |
| } else if ((flags & |
| (1<<flag_alloc_start_is_scalar)) != 0) { |
| OFFLOAD_TRACE(2, |
| " alloc_start scalar = %d\n", |
| (int64_t)vd3->alloc_start); |
| } |
| if ((flags & (1<<flag_alloc_elements_is_array)) != 0) { |
| ap = static_cast<const Arr_Desc*>(vd3->alloc_elements); |
| ARRAY_DESC_DUMP(" ", |
| "alloc_elements array", ap, 0, 1); |
| } else if ((flags & |
| (1<<flag_alloc_elements_is_scalar)) != 0) { |
| OFFLOAD_TRACE(2, |
| " alloc_elements scalar = %d\n", |
| (int64_t)vd3->alloc_elements); |
| } |
| } |
| if (!gen_var_descs_for_pointer_array(i)) { |
| return false; |
| } |
| break; |
| |
| case c_data: |
| case c_void_ptr: |
| case c_void_ptr_ptr: |
| case c_cean_var: |
| // In all uses later |
| // VarDesc.size will have the length of the data to be |
| // transferred |
| // VarDesc.disp will have an offset from base |
| |
| if (m_vars[i].flags.is_non_cont_struct && src_is_for_host) { |
| NonContigDesc *desc = |
| static_cast<NonContigDesc*>(m_vars[i].ptr); |
| noncont_struct_dump(" ", "DATA", desc); |
| m_vars_extra[i].noncont_desc = desc; |
| m_vars[i].ptr = reinterpret_cast<void*>(desc->base); |
| m_vars[i].size = get_noncont_struct_size(desc); |
| m_vars[i].disp = 0; |
| } |
| else if (m_vars_extra[i].type_src == c_cean_var) { |
| // array descriptor |
| const Arr_Desc *ap = |
| static_cast<const Arr_Desc*>(m_vars[i].ptr); |
| |
| // debug dump |
| ARRAY_DESC_DUMP("", "IN/OUT", ap, 0, !src_is_for_mic); |
| |
| // offset and length are derived from the array descriptor |
| __arr_data_offset_and_length(ap, m_vars[i].disp, |
| m_vars[i].size); |
| if (!is_arr_desc_contiguous(ap)) { |
| m_vars[i].flags.is_noncont_src = 1; |
| m_vars_extra[i].read_rng_src = |
| init_read_ranges_arr_desc(ap); |
| } |
| // all necessary information about length and offset is |
| // transferred in var descriptor. There is no need to send |
| // array descriptor to the target side. |
| m_vars[i].ptr = reinterpret_cast<void*>(ap->base); |
| } |
| else { |
| m_vars[i].size *= m_vars[i].count; |
| m_vars[i].disp = 0; |
| } |
| |
| if (m_vars[i].direction.bits) { |
| // make sure that transfer size > 0 |
| if (m_vars[i].size <= 0) { |
| LIBOFFLOAD_ERROR(c_zero_or_neg_transfer_size); |
| exit(1); |
| } |
| |
| if (m_vars[i].flags.is_static) { |
| PtrData *ptr_data; |
| // find data associated with variable |
| if (!find_ptr_data(ptr_data, |
| m_vars[i].ptr, |
| m_vars[i].disp, |
| m_vars[i].size, |
| false, false)) { |
| return false; |
| } |
| |
| if (ptr_data != 0) { |
| // offset to base from the beginning of the buffer |
| // memory |
| m_vars[i].offset = |
| (char*) m_vars[i].ptr - |
| (char*) ptr_data->cpu_addr.start(); |
| } |
| else { |
| m_vars[i].flags.is_static = false; |
| if (m_vars[i].into == NULL) { |
| m_vars[i].flags.is_static_dstn = false; |
| } |
| } |
| m_vars_extra[i].src_data = ptr_data; |
| } |
| |
| if (m_vars[i].direction.in && |
| !m_vars[i].flags.is_static && |
| !m_vars[i].flags.is_stack_buf) { |
| m_in_datalen += m_vars[i].size; |
| |
| // for non-static target destination defined as CEAN |
| // expression we pass to target its size and dist |
| if (m_vars[i].into == NULL && |
| m_vars_extra[i].type_src == c_cean_var) { |
| m_in_datalen += 2 * sizeof(uint64_t); |
| } |
| m_need_runfunction = true; |
| } |
| if (m_vars[i].direction.out && |
| !m_vars[i].flags.is_static && |
| !m_vars[i].flags.is_stack_buf) { |
| m_out_datalen += m_vars[i].size; |
| m_need_runfunction = true; |
| } |
| } |
| if (m_is_openmp && src_is_for_host && |
| !m_vars[i].flags.is_device_ptr) { |
| if (m_vars[i].flags.is_static) { |
| PtrData *ptr_data = m_vars_extra[i].src_data; |
| // Static data is transferred either by omp target |
| // update construct which passes zeros for |
| // alloc_if and free_if or by always modifier. |
| // Implicit openmp reference is transfered also |
| // if its reference count is equal to 1 |
| if (ptr_data && |
| IS_OPENMP_IMPLICIT_OR_LINK(ptr_data->var_alloc_type)) { |
| if (m_vars[i].alloc_if) { |
| ptr_data->add_reference(); |
| } |
| |
| if (!m_vars[i].flags.always_copy && |
| (m_vars[i].alloc_if || m_vars[i].free_if) && |
| ptr_data->get_reference() != 1) { |
| m_vars[i].direction.bits = c_parameter_nocopy; |
| } |
| } |
| else if ( |
| !m_vars[i].flags.always_copy && |
| (m_vars[i].alloc_if || m_vars[i].free_if)) { |
| m_vars[i].direction.bits = c_parameter_nocopy; |
| } |
| } |
| else { |
| AutoData *auto_data; |
| if (m_vars[i].alloc_if) { |
| auto_data = m_device.insert_auto_data( |
| m_vars[i].ptr, m_vars[i].size); |
| auto_data->add_reference(); |
| } |
| else { |
| // TODO: what should be done if var is not in |
| // the table? |
| auto_data = m_device.find_auto_data( |
| m_vars[i].ptr); |
| } |
| |
| // For automatic variables data is transferred: |
| // - if always modifier is used OR |
| // - if alloc_if == 0 && free_if == 0 OR |
| // - if reference count is 1 |
| if (!m_vars[i].flags.always_copy && |
| (m_vars[i].alloc_if || m_vars[i].free_if) && |
| auto_data != 0 && |
| auto_data->get_reference() != 1) { |
| m_vars[i].direction.bits = c_parameter_nocopy; |
| } |
| |
| // save data for later use |
| m_vars_extra[i].auto_data = auto_data; |
| } |
| } |
| break; |
| |
| case c_dv: |
| if (m_vars[i].flags.use_device_ptr) { |
| setup_use_device_ptr(i); |
| break; |
| } |
| else if (m_vars[i].direction.bits || |
| m_vars[i].alloc_if || |
| m_vars[i].free_if) { |
| ArrDesc *dvp = static_cast<ArrDesc*>(m_vars[i].ptr); |
| |
| // debug dump |
| __dv_desc_dump("IN/OUT", dvp); |
| |
| // send dope vector contents excluding base |
| m_in_datalen += m_vars[i].size - sizeof(uint64_t); |
| m_need_runfunction = true; |
| } |
| break; |
| |
| case c_string_ptr: |
| case c_string_ptr_ptr: |
| if ((m_vars[i].direction.bits || |
| m_vars[i].alloc_if || |
| m_vars[i].free_if) && |
| m_vars[i].size == 0) { |
| m_vars[i].size = 1; |
| m_vars[i].count = |
| strlen(*static_cast<char**>(m_vars[i].ptr)) + 1; |
| } |
| /* fallthru */ |
| |
| case c_data_ptr: |
| case c_data_ptr_ptr: |
| if (m_vars[i].flags.is_stack_buf && |
| !m_vars[i].direction.bits && |
| m_vars[i].alloc_if) { |
| // this var_desc is for stack buffer |
| bool is_new; |
| |
| if (!offload_stack_memory_manager( |
| stack_addr, entry_id, |
| m_vars[i].count, m_vars[i].align, |
| thread_specific_function_locals, &is_new)) { |
| return false; |
| } |
| if (is_new) { |
| m_compute_buffers.push_back( |
| m_stack_ptr_data->mic_buf); |
| m_device.m_persist_list.front().cpu_stack_addr = |
| static_cast<char*>(m_vars[i].ptr); |
| PersistData *new_el = &m_device.m_persist_list.front(); |
| print_persistList_item( |
| "New element in persist list:", |
| new_el); |
| } |
| else { |
| m_vars[i].flags.sink_addr = 1; |
| m_in_datalen += sizeof(m_stack_ptr_data->mic_addr); |
| if (thread_specific_function_locals) { |
| m_stack_ptr_data = get_this_threads_mic_stack_addr( |
| stack_addr, entry_id, |
| thread_specific_function_locals); |
| } |
| } |
| m_vars[i].size = m_destroy_stack.size(); |
| m_vars_extra[i].src_data = m_stack_ptr_data; |
| |
| // need to add or remove references for stack buffer at target |
| if (is_new || m_destroy_stack.size()) { |
| m_need_runfunction = true; |
| } |
| |
| break; |
| } |
| /* fallthru */ |
| |
| case c_cean_var_ptr: |
| case c_cean_var_ptr_ptr: |
| case c_dv_ptr: |
| if (m_vars[i].flags.is_non_cont_struct && src_is_for_host) { |
| NonContigDesc *desc = |
| static_cast<NonContigDesc*>(m_vars[i].ptr); |
| noncont_struct_dump(" ", "PTR", desc); |
| m_vars_extra[i].noncont_desc = desc; |
| m_vars[i].ptr = reinterpret_cast<void*>(desc->base); |
| m_vars[i].disp = 0; |
| } |
| else if (m_vars_extra[i].type_src == c_cean_var_ptr || |
| m_vars_extra[i].type_src == c_cean_var_ptr_ptr) { |
| // array descriptor |
| const Arr_Desc *ap = |
| static_cast<const Arr_Desc*>(m_vars[i].ptr); |
| |
| // debug dump |
| ARRAY_DESC_DUMP("", "IN/OUT", ap, 1, !src_is_for_mic); |
| |
| // offset and length are derived from the array descriptor |
| __arr_data_offset_and_length(ap, m_vars[i].disp, |
| m_vars[i].size); |
| |
| if (!is_arr_desc_contiguous(ap)) { |
| m_vars[i].flags.is_noncont_src = 1; |
| m_vars_extra[i].read_rng_src = |
| init_read_ranges_arr_desc(ap); |
| } |
| // all necessary information about length and offset is |
| // transferred in var descriptor. There is no need to send |
| // array descriptor to the target side. |
| m_vars[i].ptr = reinterpret_cast<void*>(ap->base); |
| } |
| else if (m_vars_extra[i].type_src == c_dv_ptr) { |
| // need to send DV to the device unless it is 'nocopy' |
| if (m_vars[i].direction.bits || |
| m_vars[i].alloc_if || |
| m_vars[i].free_if) { |
| ArrDesc *dvp = *static_cast<ArrDesc**>(m_vars[i].ptr); |
| |
| // debug dump |
| __dv_desc_dump("IN/OUT", dvp); |
| |
| // for use_device_ptr don't need to change |
| // OUT direction to IN direction |
| if (!m_vars[i].flags.use_device_ptr) { |
| m_vars[i].direction.bits = c_parameter_in; |
| } |
| } |
| |
| // no displacement |
| m_vars[i].disp = 0; |
| } |
| else { |
| // For "use_device_ptr" if direction is "in" then need to |
| // find the associated device pointer and replace the host |
| // pointer with device pointer. Also save the host pointer |
| // to restore when "out" is encountered. |
| // For "out" find the host pointer associated with the |
| // device pointer and restore the host pointer |
| if (m_vars[i].flags.use_device_ptr && src_is_for_host) { |
| setup_use_device_ptr(i); |
| break; |
| } |
| |
| // c_data_ptr or c_string_ptr |
| m_vars[i].size *= m_vars[i].count; |
| m_vars[i].disp = 0; |
| } |
| |
| if (m_vars[i].direction.bits || |
| m_vars[i].alloc_if || |
| m_vars[i].free_if) { |
| PtrData *ptr_data; |
| |
| // check that buffer length > 0 |
| if (m_vars[i].alloc_if && |
| m_vars[i].disp + m_vars[i].size < |
| (m_is_openmp ? 0 : 1)) { |
| LIBOFFLOAD_ERROR(c_zero_or_neg_ptr_len); |
| exit(1); |
| } |
| |
| // base address |
| void *base = *static_cast<void**>(m_vars[i].ptr); |
| |
| // allocate buffer if we have no INTO and don't need |
| // allocation for the ptr at target |
| if (src_is_for_mic) { |
| if (m_vars[i].flags.is_stack_buf) { |
| // for stack persistent objects ptr data is created |
| // by var_desc with number 0. |
| // Its ptr_data is stored at m_stack_ptr_data |
| ptr_data = m_stack_ptr_data; |
| } |
| else if (m_vars[i].alloc_if) { |
| if (m_vars[i].flags.preallocated) { |
| m_out_datalen += sizeof(void*); |
| m_need_runfunction = true; |
| break; |
| } |
| // add new entry |
| if (!alloc_ptr_data( |
| ptr_data, |
| reinterpret_cast<char *>(base) + alloc_disp, |
| (alloc_base != NULL) ? |
| alloc_disp : m_vars[i].disp, |
| (alloc_base != NULL) ? |
| alloc_size : m_vars[i].size, |
| alloc_disp, |
| (alloc_base != NULL) ? |
| 0 : m_vars[i].align, |
| m_vars[i].flags.targetptr, |
| 0, |
| m_vars[i].flags.pin)) { |
| return false; |
| } |
| if (m_vars[i].flags.targetptr) { |
| if (!init_mic_address(ptr_data)) { |
| return false; |
| } |
| *static_cast<void**>(m_vars[i].ptr) = base = |
| reinterpret_cast<void*>(ptr_data->mic_addr); |
| } |
| if (ptr_data->add_reference() == 0 && |
| ptr_data->mic_buf != 0) { |
| // add buffer to the list of buffers that |
| // are passed to dispatch call |
| m_compute_buffers.push_back( |
| ptr_data->mic_buf); |
| } |
| else if (!m_vars[i].flags.pin && |
| !m_vars[i].flags.preallocated) { |
| // will send buffer address to device |
| m_vars[i].flags.sink_addr = 1; |
| m_in_datalen += sizeof(ptr_data->mic_addr); |
| } |
| |
| if (!m_vars[i].flags.pin && |
| !ptr_data->is_static) { |
| // need to add reference for buffer |
| m_need_runfunction = true; |
| } |
| } |
| else { |
| bool error_if_not_found = true; |
| if (m_is_openmp) { |
| // For omp target update variable is ignored |
| // if it does not exist. |
| if (m_vars[i].flags.always_copy || |
| (!m_vars[i].alloc_if && |
| !m_vars[i].free_if)) { |
| error_if_not_found = false; |
| } |
| } |
| |
| // use existing association from pointer table |
| if (!find_ptr_data(ptr_data, |
| base, |
| m_vars[i].disp, |
| m_vars[i].size, |
| m_vars[i].flags.targetptr, |
| error_if_not_found)) { |
| return false; |
| } |
| |
| if (m_is_openmp) { |
| // make var nocopy if it does not exist |
| if (ptr_data == 0) { |
| m_vars[i].direction.bits = |
| c_parameter_nocopy; |
| } |
| } |
| |
| if (ptr_data != 0) { |
| m_vars[i].flags.sink_addr = 1; |
| m_in_datalen += sizeof(ptr_data->mic_addr); |
| } |
| } |
| |
| if (ptr_data != 0) { |
| |
| if (ptr_data->alloc_disp != 0) { |
| m_vars[i].flags.alloc_disp = 1; |
| m_in_datalen += sizeof(alloc_disp); |
| } |
| |
| if (m_vars[i].flags.sink_addr) { |
| // get buffers's address on the sink |
| if (!init_mic_address(ptr_data)) { |
| return false; |
| } |
| |
| m_in_datalen += sizeof(ptr_data->mic_addr); |
| } |
| |
| if (!m_vars[i].flags.pin && |
| !ptr_data->is_static && m_vars[i].free_if) { |
| // need to decrement buffer reference on target |
| m_need_runfunction = true; |
| } |
| |
| // offset to base from the beginning of the buffer |
| // memory |
| m_vars[i].offset = (char*) base - |
| (char*) ptr_data->cpu_addr.start(); |
| |
| // copy other pointer properties to var descriptor |
| m_vars[i].mic_offset = ptr_data->mic_offset; |
| m_vars[i].flags.is_static = ptr_data->is_static; |
| } |
| } |
| else { |
| if (!find_ptr_data(ptr_data, |
| base, |
| m_vars[i].disp, |
| m_vars[i].size, |
| false, false)) { |
| return false; |
| } |
| if (ptr_data) { |
| m_vars[i].offset = |
| (char*) base - |
| (char*) ptr_data->cpu_addr.start(); |
| } |
| } |
| |
| if (m_is_openmp) { |
| if (m_vars[i].flags.use_device_ptr) { |
| setup_use_device_ptr(i); |
| } |
| // for TO transfer of stack buffer's variable |
| if (src_is_for_host && m_vars[i].flags.is_stack_buf) { |
| AutoData *auto_data; |
| char *base = *static_cast<char**>(m_vars[i].ptr); |
| if (m_vars[i].alloc_if) { |
| auto_data =m_device.insert_auto_data( |
| base + m_vars[i].disp, |
| m_vars[i].size); |
| auto_data->add_reference(); |
| } |
| else { |
| auto_data = m_device.find_auto_data( |
| base + m_vars[i].disp); |
| } |
| // save data for later use |
| m_vars_extra[i].auto_data = auto_data; |
| |
| // For automatic variables |
| // data is transferred: |
| // - if always modifier is used OR |
| // - if alloc_if == 0 && free_if == 0 OR |
| // - if reference count is 1 |
| if (!m_vars[i].flags.always_copy && |
| (m_vars[i].alloc_if || |
| m_vars[i].free_if) && |
| auto_data != 0 && |
| auto_data->get_reference() != 1) { |
| m_vars[i].direction.bits = |
| c_parameter_nocopy; |
| } |
| } |
| // for FROM transfer of global pointer variable |
| // FROM transfer of stack buffer's variable |
| // is treated at INTO branch |
| else if (src_is_for_mic && |
| !m_vars[i].flags.is_stack_buf) { |
| // data is transferred only if |
| // alloc_if == 0 && free_if == 0 |
| // or reference count is 1 |
| if (!m_vars[i].flags.always_copy && |
| (m_vars[i].alloc_if || |
| m_vars[i].free_if) && |
| ptr_data && |
| ptr_data->get_reference() != 1) |
| { |
| m_vars[i].direction.bits = |
| c_parameter_nocopy; |
| } |
| } |
| } |
| // save pointer data |
| m_vars_extra[i].src_data = ptr_data; |
| } |
| break; |
| |
| case c_func_ptr: |
| case c_func_ptr_ptr: |
| if (m_vars[i].direction.in) { |
| m_in_datalen += __offload_funcs.max_name_length(); |
| } |
| if (m_vars[i].direction.out) { |
| m_out_datalen += __offload_funcs.max_name_length(); |
| } |
| m_need_runfunction = true; |
| break; |
| |
| case c_dv_data: |
| case c_dv_ptr_data: |
| case c_dv_data_slice: |
| case c_dv_ptr_data_slice: |
| ArrDesc *dvp; |
| if (m_vars[i].flags.is_non_cont_struct) { |
| NonContigDesc *desc = |
| static_cast<NonContigDesc*>(m_vars[i].ptr); |
| noncont_struct_dump(" ", "DV-DATA", desc); |
| dvp = reinterpret_cast<ArrDesc*>(desc->base); |
| } |
| else if (VAR_TYPE_IS_DV_DATA_SLICE(m_vars_extra[i].type_src)) { |
| const Arr_Desc *ap; |
| ap = static_cast<const Arr_Desc*>(m_vars[i].ptr); |
| |
| dvp = (m_vars_extra[i].type_src == c_dv_data_slice) ? |
| reinterpret_cast<ArrDesc*>(ap->base) : |
| *reinterpret_cast<ArrDesc**>(ap->base); |
| } |
| else { |
| dvp = (m_vars_extra[i].type_src == c_dv_data) ? |
| static_cast<ArrDesc*>(m_vars[i].ptr) : |
| *static_cast<ArrDesc**>(m_vars[i].ptr); |
| } |
| |
| // if allocatable dope vector isn't allocated don't |
| // transfer its data |
| if (!__dv_is_allocated(dvp)) { |
| m_vars[i].direction.bits = c_parameter_nocopy; |
| m_vars[i].alloc_if = 0; |
| m_vars[i].free_if = 0; |
| } |
| if (m_vars[i].direction.bits || |
| m_vars[i].alloc_if || |
| m_vars[i].free_if) { |
| const Arr_Desc *ap; |
| |
| if (VAR_TYPE_IS_DV_DATA_SLICE(m_vars_extra[i].type_src)) { |
| ap = static_cast<const Arr_Desc*>(m_vars[i].ptr); |
| |
| // debug dump |
| ARRAY_DESC_DUMP("", "IN/OUT", ap, 0, !src_is_for_mic); |
| } |
| if (!__dv_is_contiguous(dvp)) { |
| m_vars[i].flags.is_noncont_src = 1; |
| m_vars_extra[i].read_rng_src = |
| init_read_ranges_dv(dvp); |
| } |
| |
| // size and displacement |
| if (VAR_TYPE_IS_DV_DATA_SLICE(m_vars_extra[i].type_src)) { |
| // offset and length are derived from the |
| // array descriptor |
| __arr_data_offset_and_length(ap, |
| m_vars[i].disp, |
| m_vars[i].size); |
| if (m_vars[i].direction.bits) { |
| if (!is_arr_desc_contiguous(ap)) { |
| if (m_vars[i].flags.is_noncont_src) { |
| LIBOFFLOAD_ERROR(c_slice_of_noncont_array); |
| return false; |
| } |
| m_vars[i].flags.is_noncont_src = 1; |
| m_vars_extra[i].read_rng_src = |
| init_read_ranges_arr_desc(ap); |
| } |
| } |
| } |
| else { |
| if (m_vars[i].flags.has_length) { |
| m_vars[i].size = |
| __dv_data_length(dvp, m_vars[i].count); |
| } |
| else { |
| m_vars[i].size = __dv_data_length(dvp); |
| } |
| m_vars[i].disp = 0; |
| } |
| |
| // check that length >= 0 |
| if (m_vars[i].alloc_if && |
| (m_vars[i].disp + m_vars[i].size < 0)) { |
| LIBOFFLOAD_ERROR(c_zero_or_neg_ptr_len); |
| exit(1); |
| } |
| |
| // base address |
| void *base = reinterpret_cast<void*>(dvp->Base); |
| PtrData *ptr_data; |
| |
| // allocate buffer if we have no INTO and don't need |
| // allocation for the ptr at target |
| if (src_is_for_mic) { |
| if (m_vars[i].alloc_if) { |
| // add new entry |
| if (!alloc_ptr_data( |
| ptr_data, |
| reinterpret_cast<char *>(base) + alloc_disp, |
| (alloc_base != NULL) ? |
| alloc_disp : m_vars[i].disp, |
| (alloc_base != NULL) ? |
| alloc_size : m_vars[i].size, |
| alloc_disp, |
| (alloc_base != NULL) ? |
| 0 : m_vars[i].align, |
| m_vars[i].flags.targetptr, |
| m_vars[i].flags.preallocated, |
| m_vars[i].flags.pin)) { |
| return false; |
| } |
| |
| if (ptr_data->add_reference() == 0 && |
| ptr_data->mic_buf != 0) { |
| // add buffer to the list of buffers |
| // that are passed to dispatch call |
| m_compute_buffers.push_back( |
| ptr_data->mic_buf); |
| } |
| else { |
| // will send buffer address to device |
| m_vars[i].flags.sink_addr = 1; |
| } |
| |
| if (!ptr_data->is_static) { |
| // need to add reference for buffer |
| m_need_runfunction = true; |
| } |
| } |
| else { |
| bool error_if_not_found = true; |
| if (m_is_openmp) { |
| // For omp target update variable is ignored |
| // if it does not exist. |
| if (m_vars[i].flags.always_copy || |
| (!m_vars[i].alloc_if && |
| !m_vars[i].free_if)) { |
| error_if_not_found = false; |
| } |
| } |
| |
| // use existing association from pointer table |
| if (!find_ptr_data(ptr_data, |
| base, |
| m_vars[i].disp, |
| m_vars[i].size, |
| m_vars[i].flags.targetptr, |
| error_if_not_found)) { |
| return false; |
| } |
| |
| if (m_is_openmp) { |
| // make var nocopy if it does not exist |
| if (ptr_data == 0) { |
| m_vars[i].direction.bits = |
| c_parameter_nocopy; |
| } |
| } |
| |
| if (ptr_data != 0) { |
| // need to update base in dope vector on device |
| m_vars[i].flags.sink_addr = 1; |
| } |
| } |
| |
| if (ptr_data != 0) { |
| if (m_is_openmp) { |
| // data is transferred if |
| // - if always modifier is used OR |
| // - if alloc_if == 0 && free_if == 0 OR |
| // - if reference count is 1 |
| if (!m_vars[i].flags.always_copy && |
| (m_vars[i].alloc_if || |
| m_vars[i].free_if) && |
| ptr_data->get_reference() != 1) { |
| m_vars[i].direction.bits = |
| c_parameter_nocopy; |
| } |
| } |
| |
| if (ptr_data->alloc_disp != 0) { |
| m_vars[i].flags.alloc_disp = 1; |
| m_in_datalen += sizeof(alloc_disp); |
| } |
| |
| if (m_vars[i].flags.sink_addr) { |
| // get buffers's address on the sink |
| if (!init_mic_address(ptr_data)) { |
| return false; |
| } |
| |
| m_in_datalen += sizeof(ptr_data->mic_addr); |
| } |
| |
| if (!ptr_data->is_static && m_vars[i].free_if) { |
| // need to decrement buffer reference on target |
| m_need_runfunction = true; |
| } |
| |
| // offset to base from the beginning of the buffer |
| // memory |
| m_vars[i].offset = |
| (char*) base - |
| (char*) ptr_data->cpu_addr.start(); |
| |
| // copy other pointer properties to var descriptor |
| m_vars[i].mic_offset = ptr_data->mic_offset; |
| m_vars[i].flags.is_static = ptr_data->is_static; |
| } |
| } |
| else { // !src_is_for_mic |
| if (!find_ptr_data(ptr_data, |
| base, |
| m_vars[i].disp, |
| m_vars[i].size, |
| false, false)) { |
| return false; |
| } |
| m_vars[i].offset = !ptr_data ? 0 : |
| (char*) base - |
| (char*) ptr_data->cpu_addr.start(); |
| } |
| |
| // save pointer data |
| m_vars_extra[i].src_data = ptr_data; |
| } |
| break; |
| |
| default: |
| LIBOFFLOAD_ERROR(c_unknown_var_type, m_vars_extra[i].type_src); |
| LIBOFFLOAD_ABORT; |
| } |
| if (m_vars_extra[i].type_src == c_data_ptr_array) { |
| continue; |
| } |
| |
| if (src_is_for_mic && m_vars[i].flags.is_stack_buf) { |
| if (this_threads_cpu_stack_addr == 0) { |
| this_threads_cpu_stack_addr = |
| get_this_threads_cpu_stack_addr( |
| stack_addr, entry_id, thread_specific_function_locals); |
| } |
| m_vars[i].offset = static_cast<char*> |
| (m_vars[i].ptr) - |
| this_threads_cpu_stack_addr; |
| } |
| // if source is used at CPU save its offset and disp |
| if (m_vars[i].into == NULL || m_vars[i].direction.in) { |
| m_vars_extra[i].cpu_offset = m_vars[i].offset; |
| m_vars_extra[i].cpu_disp = m_vars[i].disp; |
| } |
| |
| // If "into" is define we need to do the similar work for it |
| if (!m_vars[i].into) { |
| continue; |
| } |
| |
| int64_t into_disp =0, into_offset = 0; |
| |
| switch (m_vars_extra[i].type_dst) { |
| case c_data_ptr_array: |
| break; |
| case c_data: |
| case c_void_ptr: |
| case c_void_ptr_ptr: |
| case c_cean_var: { |
| int64_t size = m_vars[i].size; |
| |
| if (m_vars[i].flags.is_non_cont_struct && src_is_for_mic) { |
| NonContigDesc *desc = |
| static_cast<NonContigDesc*>(m_vars[i].into); |
| noncont_struct_dump("", "INTO DATA", desc); |
| m_vars_extra[i].noncont_desc = desc; |
| m_vars[i].into = reinterpret_cast<void*>(desc->base); |
| size = get_noncont_struct_size(desc); |
| into_disp = 0; |
| } |
| else if (m_vars_extra[i].type_dst == c_cean_var) { |
| // array descriptor |
| const Arr_Desc *ap = |
| static_cast<const Arr_Desc*>(m_vars[i].into); |
| |
| // debug dump |
| ARRAY_DESC_DUMP(" ", "INTO", ap, 0, src_is_for_mic); |
| |
| // offset and length are derived from the array descriptor |
| __arr_data_offset_and_length(ap, into_disp, size); |
| |
| if (!is_arr_desc_contiguous(ap)) { |
| m_vars[i].flags.is_noncont_dst = 1; |
| m_vars_extra[i].read_rng_dst = |
| init_read_ranges_arr_desc(ap); |
| if (!cean_ranges_match( |
| m_vars_extra[i].read_rng_src, |
| m_vars_extra[i].read_rng_dst)) { |
| LIBOFFLOAD_ERROR(c_ranges_dont_match); |
| exit(1); |
| } |
| } |
| m_vars[i].into = reinterpret_cast<void*>(ap->base); |
| } |
| |
| int64_t size_src = m_vars_extra[i].read_rng_src && |
| !m_vars[i].flags.is_non_cont_struct ? |
| cean_get_transf_size(m_vars_extra[i].read_rng_src) : |
| m_vars[i].size; |
| int64_t size_dst = m_vars_extra[i].read_rng_dst ? |
| cean_get_transf_size(m_vars_extra[i].read_rng_dst) : |
| size; |
| // It's supposed that "into" size must be not less |
| // than src size |
| if (size_src > size_dst) { |
| LIBOFFLOAD_ERROR(c_different_src_and_dstn_sizes, |
| size_src, size_dst); |
| exit(1); |
| } |
| |
| if (m_vars[i].direction.bits) { |
| if (m_vars[i].flags.is_static_dstn) { |
| PtrData *ptr_data; |
| |
| // find data associated with variable |
| if (!find_ptr_data(ptr_data, m_vars[i].into, |
| into_disp, size, false, false)) { |
| return false; |
| } |
| if (ptr_data != 0) { |
| // offset to base from the beginning of the buffer |
| // memory |
| into_offset = |
| (char*) m_vars[i].into - |
| (char*) ptr_data->cpu_addr.start(); |
| } |
| else { |
| m_vars[i].flags.is_static_dstn = false; |
| } |
| m_vars_extra[i].dst_data = ptr_data; |
| } |
| } |
| |
| if (m_vars[i].direction.in && |
| !m_vars[i].flags.is_static_dstn) { |
| m_in_datalen += m_vars[i].size; |
| |
| // for non-static target destination defined as CEAN |
| // expression we pass to target its size and dist |
| if (m_vars_extra[i].type_dst == c_cean_var) { |
| m_in_datalen += 2 * sizeof(uint64_t); |
| } |
| m_need_runfunction = true; |
| } |
| |
| if (m_is_openmp && src_is_for_mic) { |
| if (m_vars[i].flags.is_static_dstn) { |
| // Static data is transferred either by omp target |
| // update construct which passes zeros for |
| // alloc_if and free_if or by always modifier. |
| if (!m_vars[i].flags.always_copy && |
| (m_vars[i].alloc_if || m_vars[i].free_if)) { |
| m_vars[i].direction.bits = c_parameter_nocopy; |
| } |
| } |
| else { |
| AutoData *auto_data; |
| if (m_vars[i].alloc_if) { |
| auto_data = m_device.insert_auto_data( |
| m_vars[i].into, size_dst); |
| auto_data->add_reference(); |
| } |
| else { |
| // TODO: what should be done if var is not in |
| // the table? |
| auto_data = m_device.find_auto_data( |
| m_vars[i].into); |
| } |
| |
| // For automatic variables data is transferred: |
| // - if always modifier is used OR |
| // - if alloc_if == 0 && free_if == 0 OR |
| // - if reference count is 1 |
| if (!m_vars[i].flags.always_copy && |
| (m_vars[i].alloc_if || m_vars[i].free_if) && |
| (auto_data == 0 || |
| auto_data->get_reference() != 1)) { |
| m_vars[i].direction.bits = c_parameter_nocopy; |
| } |
| // save data for later use |
| m_vars_extra[i].auto_data = auto_data; |
| } |
| } |
| break; |
| } |
| |
| case c_dv: |
| if (m_vars[i].direction.bits || |
| m_vars[i].alloc_if || |
| m_vars[i].free_if) { |
| ArrDesc *dvp = static_cast<ArrDesc*>(m_vars[i].into); |
| |
| // debug dump |
| __dv_desc_dump("INTO", dvp); |
| |
| // send dope vector contents excluding base |
| m_in_datalen += m_vars[i].size - sizeof(uint64_t); |
| m_need_runfunction = true; |
| } |
| break; |
| |
| case c_string_ptr: |
| case c_data_ptr: |
| case c_string_ptr_ptr: |
| case c_data_ptr_ptr: |
| case c_cean_var_ptr: |
| case c_cean_var_ptr_ptr: |
| case c_dv_ptr: { |
| int64_t size = m_vars[i].size; |
| |
| if (m_vars_extra[i].type_dst == c_cean_var_ptr || |
| m_vars_extra[i].type_dst == c_cean_var_ptr_ptr) { |
| // array descriptor |
| const Arr_Desc *ap = |
| static_cast<const Arr_Desc*>(m_vars[i].into); |
| |
| // debug dump |
| ARRAY_DESC_DUMP(" ", "INTO", ap, 1, src_is_for_mic); |
| |
| // offset and length are derived from the array descriptor |
| __arr_data_offset_and_length(ap, into_disp, size); |
| |
| if (!is_arr_desc_contiguous(ap)) { |
| m_vars[i].flags.is_noncont_src = 1; |
| m_vars_extra[i].read_rng_dst = |
| init_read_ranges_arr_desc(ap); |
| if (!cean_ranges_match( |
| m_vars_extra[i].read_rng_src, |
| m_vars_extra[i].read_rng_dst)) { |
| LIBOFFLOAD_ERROR(c_ranges_dont_match); |
| } |
| } |
| m_vars[i].into = reinterpret_cast<char**>(ap->base); |
| } |
| else if (m_vars_extra[i].type_dst == c_dv_ptr) { |
| // need to send DV to the device unless it is 'nocopy' |
| if (m_vars[i].direction.bits || |
| m_vars[i].alloc_if || |
| m_vars[i].free_if) { |
| ArrDesc *dvp = *static_cast<ArrDesc**>(m_vars[i].into); |
| |
| // debug dump |
| __dv_desc_dump("INTO", dvp); |
| |
| m_vars[i].direction.bits = c_parameter_in; |
| } |
| } |
| |
| int64_t size_src = m_vars_extra[i].read_rng_src && |
| !m_vars[i].flags.is_non_cont_struct ? |
| cean_get_transf_size(m_vars_extra[i].read_rng_src) : |
| m_vars[i].size; |
| int64_t size_dst = m_vars_extra[i].read_rng_dst ? |
| cean_get_transf_size(m_vars_extra[i].read_rng_dst) : |
| size; |
| // It's supposed that "into" size must be not less than |
| // src size |
| if (size_src > size_dst) { |
| LIBOFFLOAD_ERROR(c_different_src_and_dstn_sizes, |
| size_src, size_dst); |
| exit(1); |
| } |
| |
| if (m_vars[i].direction.bits) { |
| PtrData *ptr_data; |
| |
| // base address |
| void *base = *static_cast<void**>(m_vars[i].into); |
| |
| if (m_vars[i].direction.in) { |
| // allocate buffer |
| if (m_vars[i].flags.is_stack_buf) { |
| // for stack persistent objects ptr data is created |
| // by var_desc with number 0. |
| // Its ptr_data is stored at m_stack_ptr_data |
| ptr_data = m_stack_ptr_data; |
| } |
| else if (m_vars[i].alloc_if) { |
| if (m_vars[i].flags.preallocated) { |
| m_out_datalen += sizeof(void*); |
| m_need_runfunction = true; |
| break; |
| } |
| // add new entry |
| if (!alloc_ptr_data( |
| ptr_data, |
| reinterpret_cast<char *>(base) + alloc_disp, |
| (alloc_base != NULL) ? |
| alloc_disp : into_disp, |
| (alloc_base != NULL) ? |
| alloc_size : size, |
| alloc_disp, |
| (alloc_base != NULL) ? |
| 0 : m_vars[i].align, |
| m_vars[i].flags.targetptr, |
| m_vars[i].flags.preallocated, |
| m_vars[i].flags.pin)) { |
| return false; |
| } |
| if (m_vars[i].flags.targetptr) { |
| if (!init_mic_address(ptr_data)) { |
| return false; |
| } |
| *static_cast<void**>(m_vars[i].into) = base = |
| reinterpret_cast<void*>(ptr_data->mic_addr); |
| } |
| if (ptr_data->add_reference() == 0 && |
| ptr_data->mic_buf != 0) { |
| // add buffer to the list of buffers that |
| // are passed to dispatch call |
| m_compute_buffers.push_back( |
| ptr_data->mic_buf); |
| } |
| else { |
| // will send buffer address to device |
| m_vars[i].flags.sink_addr = 1; |
| } |
| |
| if (!ptr_data->is_static) { |
| // need to add reference for buffer |
| m_need_runfunction = true; |
| } |
| } |
| else { |
| // use existing association from pointer table |
| if (!find_ptr_data(ptr_data, base, into_disp, |
| size, m_vars[i].flags.targetptr, true)) { |
| return false; |
| } |
| m_vars[i].flags.sink_addr = 1; |
| } |
| |
| if (ptr_data->alloc_disp != 0) { |
| m_vars[i].flags.alloc_disp = 1; |
| m_in_datalen += sizeof(alloc_disp); |
| } |
| |
| if (m_vars[i].flags.sink_addr) { |
| // get buffers's address on the sink |
| if (!init_mic_address(ptr_data)) { |
| return false; |
| } |
| |
| m_in_datalen += sizeof(ptr_data->mic_addr); |
| } |
| |
| if (!ptr_data->is_static && m_vars[i].free_if) { |
| // need to decrement buffer reference on target |
| m_need_runfunction = true; |
| } |
| |
| // copy other pointer properties to var descriptor |
| m_vars[i].mic_offset = ptr_data->mic_offset; |
| m_vars[i].flags.is_static_dstn = ptr_data->is_static; |
| } |
| else { |
| if (!find_ptr_data(ptr_data, |
| base, |
| into_disp, |
| m_vars[i].size, |
| false, false)) { |
| return false; |
| } |
| } |
| if (ptr_data) { |
| into_offset = ptr_data ? |
| (char*) base - |
| (char*) ptr_data->cpu_addr.start() : |
| 0; |
| } |
| |
| if (m_is_openmp) { |
| // for FROM transfer of stack buffer's variable |
| if (src_is_for_mic && m_vars[i].flags.is_stack_buf) { |
| AutoData *auto_data; |
| char *base = *static_cast<char**>(m_vars[i].into); |
| if (m_vars[i].alloc_if) { |
| auto_data =m_device.insert_auto_data( |
| base + into_disp, |
| size); |
| auto_data->add_reference(); |
| } |
| else { |
| auto_data = m_device.find_auto_data( |
| base + into_disp); |
| } |
| // save data for later use |
| m_vars_extra[i].auto_data = auto_data; |
| // For automatic variables |
| // data is transferred: |
| // - if always modifier is used OR |
| // - if alloc_if == 0 && free_if == 0 OR |
| // - if reference count is 1 |
| if (!m_vars[i].flags.always_copy && |
| (m_vars[i].alloc_if || |
| m_vars[i].free_if) && |
| auto_data != 0 && |
| auto_data->get_reference() != 1) { |
| m_vars[i].direction.bits = |
| c_parameter_nocopy; |
| } |
| } |
| } |
| // save pointer data |
| m_vars_extra[i].dst_data = ptr_data; |
| } |
| break; |
| } |
| |
| case c_func_ptr: |
| case c_func_ptr_ptr: |
| break; |
| |
| case c_dv_data: |
| case c_dv_ptr_data: |
| case c_dv_data_slice: |
| case c_dv_ptr_data_slice: |
| if (m_vars[i].direction.bits || |
| m_vars[i].alloc_if || |
| m_vars[i].free_if) { |
| const Arr_Desc *ap; |
| ArrDesc *dvp; |
| PtrData *ptr_data; |
| int64_t disp; |
| int64_t size; |
| |
| if (VAR_TYPE_IS_DV_DATA_SLICE(m_vars_extra[i].type_dst)) { |
| ap = static_cast<const Arr_Desc*>(m_vars[i].into); |
| |
| // debug dump |
| ARRAY_DESC_DUMP(" ", "INTO", ap, 0, src_is_for_mic); |
| |
| dvp = (m_vars_extra[i].type_dst == c_dv_data_slice) ? |
| reinterpret_cast<ArrDesc*>(ap->base) : |
| *reinterpret_cast<ArrDesc**>(ap->base); |
| } |
| else { |
| dvp = (m_vars_extra[i].type_dst == c_dv_data) ? |
| static_cast<ArrDesc*>(m_vars[i].into) : |
| *static_cast<ArrDesc**>(m_vars[i].into); |
| } |
| if (!__dv_is_contiguous(dvp)) { |
| m_vars[i].flags.is_noncont_dst = 1; |
| m_vars_extra[i].read_rng_dst = |
| init_read_ranges_dv(dvp); |
| } |
| // size and displacement |
| if (VAR_TYPE_IS_DV_DATA_SLICE(m_vars_extra[i].type_dst)) { |
| // offset and length are derived from the array |
| // descriptor |
| __arr_data_offset_and_length(ap, into_disp, size); |
| if (m_vars[i].direction.bits) { |
| if (!is_arr_desc_contiguous(ap)) { |
| if (m_vars[i].flags.is_noncont_dst) { |
| LIBOFFLOAD_ERROR(c_slice_of_noncont_array); |
| return false; |
| } |
| m_vars[i].flags.is_noncont_dst = 1; |
| m_vars_extra[i].read_rng_dst = |
| init_read_ranges_arr_desc(ap); |
| if (!cean_ranges_match( |
| m_vars_extra[i].read_rng_src, |
| m_vars_extra[i].read_rng_dst)) { |
| LIBOFFLOAD_ERROR(c_ranges_dont_match); |
| } |
| } |
| } |
| } |
| else { |
| if (m_vars[i].flags.has_length) { |
| size = __dv_data_length(dvp, m_vars[i].count); |
| } |
| else { |
| size = __dv_data_length(dvp); |
| } |
| disp = 0; |
| } |
| |
| int64_t size_src = |
| m_vars_extra[i].read_rng_src && |
| (!m_vars[i].flags.is_non_cont_struct || |
| src_is_for_mic) ? |
| cean_get_transf_size(m_vars_extra[i].read_rng_src) : |
| m_vars[i].size; |
| int64_t size_dst = |
| m_vars_extra[i].read_rng_dst ? |
| cean_get_transf_size(m_vars_extra[i].read_rng_dst) : |
| size; |
| // It's supposed that "into" size must be not less |
| // than src size |
| if (size_src > size_dst) { |
| LIBOFFLOAD_ERROR(c_different_src_and_dstn_sizes, |
| size_src, size_dst); |
| exit(1); |
| } |
| |
| // base address |
| void *base = reinterpret_cast<void*>(dvp->Base); |
| |
| // allocate buffer |
| if (m_vars[i].direction.in) { |
| if (m_vars[i].alloc_if) { |
| // add new entry |
| if (!alloc_ptr_data( |
| ptr_data, |
| reinterpret_cast<char *>(base) + alloc_disp, |
| (alloc_base != NULL) ? |
| alloc_disp : into_disp, |
| (alloc_base != NULL) ? |
| alloc_size : size, |
| alloc_disp, |
| (alloc_base != NULL) ? |
| 0 : m_vars[i].align, |
| m_vars[i].flags.targetptr, |
| m_vars[i].flags.preallocated, |
| m_vars[i].flags.pin)) { |
| return false; |
| } |
| if (ptr_data->add_reference() == 0 && |
| ptr_data->mic_buf !=0) { |
| // add buffer to the list of buffers |
| // that are passed to dispatch call |
| m_compute_buffers.push_back( |
| ptr_data->mic_buf); |
| } |
| else { |
| // will send buffer address to device |
| m_vars[i].flags.sink_addr = 1; |
| } |
| |
| if (!ptr_data->is_static) { |
| // need to add reference for buffer |
| m_need_runfunction = true; |
| } |
| } |
| else { |
| // use existing association from pointer table |
| if (!find_ptr_data(ptr_data, base, into_disp, |
| size, m_vars[i].flags.targetptr, true)) { |
| return false; |
| } |
| |
| // need to update base in dope vector on device |
| m_vars[i].flags.sink_addr = 1; |
| } |
| |
| if (ptr_data->alloc_disp != 0) { |
| m_vars[i].flags.alloc_disp = 1; |
| m_in_datalen += sizeof(alloc_disp); |
| } |
| |
| if (m_vars[i].flags.sink_addr) { |
| // get buffers's address on the sink |
| if (!init_mic_address(ptr_data)) { |
| return false; |
| } |
| m_in_datalen += sizeof(ptr_data->mic_addr); |
| } |
| |
| if (!ptr_data->is_static && m_vars[i].free_if) { |
| // need to decrement buffer reference on target |
| m_need_runfunction = true; |
| } |
| |
| // offset to base from the beginning of the buffer |
| // memory |
| into_offset = |
| (char*) base - (char*) ptr_data->cpu_addr.start(); |
| |
| // copy other pointer properties to var descriptor |
| m_vars[i].mic_offset = ptr_data->mic_offset; |
| m_vars[i].flags.is_static_dstn = ptr_data->is_static; |
| } |
| else { // src_is_for_mic |
| if (!find_ptr_data(ptr_data, |
| base, |
| into_disp, |
| size, |
| false, false)) { |
| return false; |
| } |
| into_offset = !ptr_data ? |
| 0 : |
| (char*) base - (char*) ptr_data->cpu_addr.start(); |
| } |
| |
| // save pointer data |
| m_vars_extra[i].dst_data = ptr_data; |
| } |
| break |