| /* |
| Copyright (c) 2014-2016 Intel Corporation. All Rights Reserved. |
| |
| Redistribution and use in source and binary forms, with or without |
| modification, are permitted provided that the following conditions |
| are met: |
| |
| * Redistributions of source code must retain the above copyright |
| notice, this list of conditions and the following disclaimer. |
| * Redistributions in binary form must reproduce the above copyright |
| notice, this list of conditions and the following disclaimer in the |
| documentation and/or other materials provided with the distribution. |
| * Neither the name of Intel Corporation nor the names of its |
| contributors may be used to endorse or promote products derived |
| from this software without specific prior written permission. |
| |
| THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS |
| "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT |
| LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR |
| A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT |
| HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, |
| SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT |
| LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, |
| DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY |
| THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT |
| (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE |
| OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
| */ |
| |
| |
| /*! \file |
| \brief The parts of the runtime library used only on the host |
| */ |
| |
| #ifndef OFFLOAD_HOST_H_INCLUDED |
| #define OFFLOAD_HOST_H_INCLUDED |
| |
| #ifndef TARGET_WINNT |
| #include <unistd.h> |
| #endif // TARGET_WINNT |
| #include "offload_common.h" |
| #include "offload_util.h" |
| #include "offload_engine.h" |
| #include "offload_env.h" |
| #include "offload_orsl.h" |
| #include "coi/coi_client.h" |
| |
| // MIC engines. |
| DLL_LOCAL extern Engine* mic_engines; |
| DLL_LOCAL extern uint32_t mic_engines_total; |
| |
| // DMA channel count used by COI and set via |
| // OFFLOAD_DMA_CHANNEL_COUNT environment variable |
| DLL_LOCAL extern uint32_t mic_dma_channel_count; |
| |
| //! The target image is packed as follows. |
| /*! 1. 8 bytes containing the size of the target binary */ |
| /*! 2. a null-terminated string which is the binary name */ |
| /*! 3. <size> number of bytes that are the contents of the image */ |
| /*! The address of symbol __offload_target_image |
| is the address of this structure. */ |
| struct Image { |
| int64_t size; //!< Size in bytes of the target binary name and contents |
| char data[]; //!< The name and contents of the target image |
| }; |
| |
| // The offload descriptor. |
| class OffloadDescriptor |
| { |
| public: |
| enum OmpAsyncLastEventType { |
| c_last_not, // not last event |
| c_last_write, // the last event that is write |
| c_last_read, // the last event that is read |
| c_last_runfunc // the last event that is runfunction |
| }; |
| |
| OffloadDescriptor( |
| int index, |
| _Offload_status *status, |
| bool is_mandatory, |
| bool is_openmp, |
| OffloadHostTimerData * timer_data |
| ) : |
| m_device(mic_engines[index == -1 ? 0 : index % mic_engines_total]), |
| m_is_mandatory(is_mandatory), |
| m_is_openmp(is_openmp), |
| m_inout_buf(0), |
| m_func_desc(0), |
| m_func_desc_size(0), |
| m_num_in_dependencies(0), |
| m_p_in_dependencies(0), |
| m_in_deps(0), |
| m_in_deps_total(0), |
| m_in_deps_allocated(0), |
| m_out_deps(0), |
| m_out_deps_total(0), |
| m_out_deps_allocated(0), |
| m_vars(0), |
| m_vars_extra(0), |
| m_status(status), |
| m_timer_data(timer_data), |
| m_out_with_preallocated(false), |
| m_preallocated_alloc(false), |
| m_traceback_called(false), |
| m_stream(-1), |
| m_signal(0), |
| m_has_signal(0), |
| m_omp_async_last_event_type(c_last_not) |
| { |
| m_wait_all_devices = index == -1; |
| } |
| |
| ~OffloadDescriptor() |
| { |
| if (m_in_deps != 0) { |
| free(m_in_deps); |
| } |
| if (m_out_deps != 0) { |
| free(m_out_deps); |
| } |
| if (m_func_desc != 0) { |
| free(m_func_desc); |
| } |
| if (m_vars != 0) { |
| free(m_vars); |
| free(m_vars_extra); |
| } |
| } |
| |
| bool offload(const char *name, bool is_empty, |
| VarDesc *vars, VarDesc2 *vars2, int vars_total, |
| const void **waits, int num_waits, const void **signal, |
| int entry_id, const void *stack_addr, |
| OffloadFlags offload_flags); |
| |
| bool offload_finish(bool is_traceback); |
| |
| bool is_signaled(); |
| |
| OffloadHostTimerData* get_timer_data() const { |
| return m_timer_data; |
| } |
| |
| void set_stream(_Offload_stream stream) { |
| m_stream = stream; |
| } |
| |
| _Offload_stream get_stream() { |
| return(m_stream); |
| } |
| |
| Engine& get_device() { |
| return m_device; |
| } |
| |
| void* get_signal() { |
| return(m_signal); |
| } |
| |
| void set_signal(const void* signal) { |
| m_has_signal = 1; |
| m_signal = const_cast<void*>(signal); |
| } |
| |
| void cleanup(); |
| |
| uint32_t m_event_count; |
| bool m_has_signal; |
| |
| private: |
| bool offload_wrap(const char *name, bool is_empty, |
| VarDesc *vars, VarDesc2 *vars2, int vars_total, |
| const void **waits, int num_waits, const void **signal, |
| int entry_id, const void *stack_addr, |
| OffloadFlags offload_flags); |
| bool wait_dependencies(const void **waits, int num_waits, |
| _Offload_stream stream); |
| bool setup_descriptors(VarDesc *vars, VarDesc2 *vars2, int vars_total, |
| int entry_id, const void *stack_addr); |
| bool setup_misc_data(const char *name); |
| bool send_pointer_data(bool is_async, void* info); |
| bool send_noncontiguous_pointer_data( |
| int i, |
| PtrData* src_buf, |
| PtrData* dst_buf, |
| COIEVENT *event, |
| uint64_t &sent_data, |
| uint32_t in_deps_amount, |
| COIEVENT *in_deps |
| ); |
| bool receive_noncontiguous_pointer_data( |
| int i, |
| COIBUFFER dst_buf, |
| COIEVENT *event, |
| uint64_t &received_data, |
| uint32_t in_deps_amount, |
| COIEVENT *in_deps |
| ); |
| |
| bool gather_copyin_data(); |
| |
| bool compute(void *); |
| |
| bool receive_pointer_data(bool is_async, bool first_run, void * info); |
| bool scatter_copyout_data(); |
| |
| bool find_ptr_data(PtrData* &ptr_data, void *base, int64_t disp, |
| int64_t length, bool is_targptr, |
| bool error_does_not_exist = true); |
| |
| void find_device_ptr( int64_t* &device_ptr, |
| void *host_ptr); |
| |
| bool alloc_ptr_data(PtrData* &ptr_data, void *base, int64_t disp, |
| int64_t length, int64_t alloc_disp, int align, |
| bool is_targptr, bool is_prealloc, bool pin); |
| bool create_preallocated_buffer(PtrData* ptr_data, void *base); |
| bool init_static_ptr_data(PtrData *ptr_data); |
| bool init_mic_address(PtrData *ptr_data); |
| bool offload_stack_memory_manager( |
| const void * stack_begin, |
| int routine_id, |
| int buf_size, |
| int align, |
| bool thread_specific_function_locals, |
| bool *is_new); |
| char *get_this_threads_cpu_stack_addr( |
| const void * stack_begin, |
| int routine_id, |
| bool thread_specific_function_locals); |
| PtrData *get_this_threads_mic_stack_addr( |
| const void * stack_begin, |
| int routine_id, |
| bool thread_specific_function_locals); |
| bool nullify_target_stack(COIBUFFER targ_buf, uint64_t size); |
| |
| bool gen_var_descs_for_pointer_array(int i); |
| |
| void get_stream_in_dependencies(uint32_t &in_deps_amount, |
| COIEVENT* &in_deps); |
| |
| void report_coi_error(error_types msg, COIRESULT res); |
| _Offload_result translate_coi_error(COIRESULT res) const; |
| |
| void setup_omp_async_info(); |
| |
| void setup_use_device_ptr(int i); |
| |
| void register_event_call_back(void (*)( |
| COIEVENT, |
| const COIRESULT, |
| const void*), |
| const COIEVENT *event, |
| const void *info); |
| |
| void register_omp_event_call_back(const COIEVENT *event, const void *info); |
| |
| private: |
| typedef std::list<COIBUFFER> BufferList; |
| |
| // extra data associated with each variable descriptor |
| struct VarExtra { |
| PtrData* src_data; |
| PtrData* dst_data; |
| AutoData* auto_data; |
| int64_t cpu_disp; |
| int64_t cpu_offset; |
| void *alloc; |
| union { |
| CeanReadRanges *read_rng_src; |
| NonContigDesc *noncont_desc; |
| }; |
| CeanReadRanges *read_rng_dst; |
| int64_t ptr_arr_offset; |
| bool is_arr_ptr_el; |
| OmpAsyncLastEventType omp_last_event_type; |
| int64_t pointer_offset; |
| uint16_t type_src; |
| uint16_t type_dst; |
| }; |
| |
| template<typename T> class ReadArrElements { |
| public: |
| ReadArrElements(): |
| ranges(NULL), |
| el_size(sizeof(T)), |
| offset(0), |
| count(0), |
| is_empty(true), |
| base(NULL) |
| {} |
| |
| bool read_next(bool flag) |
| { |
| if (flag != 0) { |
| if (is_empty) { |
| if (ranges) { |
| if (!get_next_range(ranges, &offset)) { |
| // ranges are over |
| return false; |
| } |
| } |
| // all contiguous elements are over |
| else if (count != 0) { |
| return false; |
| } |
| |
| length_cur = size; |
| } |
| else { |
| offset += el_size; |
| } |
| val = (T)get_el_value(base, offset, el_size); |
| length_cur -= el_size; |
| count++; |
| is_empty = length_cur == 0; |
| } |
| return true; |
| } |
| public: |
| CeanReadRanges * ranges; |
| T val; |
| int el_size; |
| int64_t size, |
| offset, |
| length_cur; |
| bool is_empty; |
| int count; |
| char *base; |
| }; |
| |
| // ptr_data for persistent auto objects |
| PtrData* m_stack_ptr_data; |
| PtrDataList m_destroy_stack; |
| |
| // Engine |
| Engine& m_device; |
| |
| // true for offload_wait target(mic) stream(0) |
| bool m_wait_all_devices; |
| |
| // if true offload is mandatory |
| bool m_is_mandatory; |
| |
| // if true offload has openmp origin |
| const bool m_is_openmp; |
| |
| // The Marshaller for the inputs of the offloaded region. |
| Marshaller m_in; |
| |
| // The Marshaller for the outputs of the offloaded region. |
| Marshaller m_out; |
| |
| // List of buffers that are passed to dispatch call |
| BufferList m_compute_buffers; |
| |
| // List of buffers that need to be destroyed at the end of offload |
| BufferList m_destroy_buffers; |
| |
| // Variable descriptors |
| VarDesc* m_vars; |
| VarExtra* m_vars_extra; |
| int m_vars_total; |
| |
| // Pointer to a user-specified status variable |
| _Offload_status *m_status; |
| |
| // Function descriptor |
| FunctionDescriptor* m_func_desc; |
| uint32_t m_func_desc_size; |
| |
| // Buffer for transferring copyin/copyout data |
| COIBUFFER m_inout_buf; |
| |
| |
| // Dependencies |
| COIEVENT *m_in_deps; |
| uint32_t m_in_deps_total; |
| uint32_t m_in_deps_allocated; |
| COIEVENT *m_out_deps; |
| uint32_t m_out_deps_total; |
| uint32_t m_out_deps_allocated; |
| |
| // 2 variables defines input dependencies for current COI API. |
| // The calls to routines as BufferWrite/PipelineRunFunction/BufferRead |
| // is supposed to have input dependencies. |
| // 2 variables below defines the number and vector of dependencies |
| // in every current moment of offload. |
| // So any phase of offload can use its values as input dependencies |
| // for the COI API that the phase calls. |
| // It means that all phases (of Write, RunFunction,Read) must keep |
| // the variables correct to be used by following phase. |
| // If some consequent offloads are connected (i.e. by the same stream) |
| // the final 2 variables of the offload is used as initial inputs |
| // for the next offload. |
| uint32_t m_num_in_dependencies; |
| COIEVENT *m_p_in_dependencies; |
| |
| // Stream |
| _Offload_stream m_stream; |
| |
| // Signal |
| void* m_signal; |
| |
| // Timer data |
| OffloadHostTimerData *m_timer_data; |
| |
| // copyin/copyout data length |
| uint64_t m_in_datalen; |
| uint64_t m_out_datalen; |
| |
| // a boolean value calculated in setup_descriptors. If true we need to do |
| // a run function on the target. Otherwise it may be optimized away. |
| bool m_need_runfunction; |
| |
| // initialized value of m_need_runfunction; |
| // is used to recognize offload_transfer |
| bool m_initial_need_runfunction; |
| |
| // a Boolean value set to true when OUT clauses with preallocated targetptr |
| // is encountered to indicate that call receive_pointer_data needs to be |
| // invoked again after call to scatter_copyout_data. |
| bool m_out_with_preallocated; |
| |
| // a Boolean value set to true if an alloc_if(1) is used with preallocated |
| // targetptr to indicate the need to scatter_copyout_data even for |
| // async offload |
| bool m_preallocated_alloc; |
| |
| // a Boolean value set to true if traceback routine is called |
| bool m_traceback_called; |
| |
| OmpAsyncLastEventType m_omp_async_last_event_type; |
| }; |
| |
| // Initialization types for MIC |
| enum OffloadInitType { |
| c_init_on_start, // all devices before entering main |
| c_init_on_offload, // single device before starting the first offload |
| c_init_on_offload_all // all devices before starting the first offload |
| }; |
| |
| // Determines if MIC code is an executable or a shared library |
| extern "C" bool __offload_target_image_is_executable(const void *target_image); |
| |
| // Initializes library and registers specified offload image. |
| extern "C" bool __offload_register_image(const void* image); |
| extern "C" void __offload_unregister_image(const void* image); |
| |
| // Registers asynchronous task completion callback |
| extern "C" void __offload_register_task_callback(void (*cb)(void *)); |
| |
| // Initializes offload runtime library. |
| DLL_LOCAL extern int __offload_init_library(void); |
| |
| // thread data for associating pipelines with threads |
| DLL_LOCAL extern pthread_key_t mic_thread_key; |
| |
| // location of offload_main executable |
| // To be used if the main application has no offload and is not built |
| // with -offload but dynamic library linked in has offload pragma |
| DLL_LOCAL extern char* mic_device_main; |
| |
| // Environment variables for devices |
| DLL_LOCAL extern MicEnvVar mic_env_vars; |
| |
| // CPU frequency |
| DLL_LOCAL extern uint64_t cpu_frequency; |
| |
| // LD_LIBRARY_PATH for KNC libraries |
| DLL_LOCAL extern char* knc_library_path; |
| |
| // LD_LIBRARY_PATH for KNL libraries |
| DLL_LOCAL extern char* knl_library_path; |
| |
| // stack size for target |
| DLL_LOCAL extern uint32_t mic_stack_size; |
| |
| // Preallocated memory size for buffers on MIC |
| DLL_LOCAL extern uint64_t mic_buffer_size; |
| |
| // Preallocated 4K page memory size for buffers on MIC |
| DLL_LOCAL extern uint64_t mic_4k_buffer_size; |
| |
| // Preallocated 2M page memory size for buffers on MIC |
| DLL_LOCAL extern uint64_t mic_2m_buffer_size; |
| |
| // Setting controlling inout proxy |
| DLL_LOCAL extern bool mic_proxy_io; |
| DLL_LOCAL extern char* mic_proxy_fs_root; |
| |
| // Threshold for creating buffers with large pages |
| DLL_LOCAL extern uint64_t __offload_use_2mb_buffers; |
| |
| // offload initialization type |
| DLL_LOCAL extern OffloadInitType __offload_init_type; |
| |
| // Device number to offload to when device is not explicitly specified. |
| DLL_LOCAL extern int __omp_device_num; |
| |
| // target executable |
| DLL_LOCAL extern TargetImage* __target_exe; |
| |
| // is true if last loaded image is dll |
| DLL_LOCAL extern bool __current_image_is_dll; |
| // is true if myo library is loaded when dll is loaded |
| DLL_LOCAL extern bool __myo_init_in_so; |
| |
| // IDB support |
| |
| // Called by the offload runtime after initialization of offload infrastructure |
| // has been completed. |
| extern "C" void __dbg_target_so_loaded(); |
| |
| // Called by the offload runtime when the offload infrastructure is about to be |
| // shut down, currently at application exit. |
| extern "C" void __dbg_target_so_unloaded(); |
| |
| // Null-terminated string containing path to the process image of the hosting |
| // application (offload_main) |
| #define MAX_TARGET_NAME 512 |
| extern "C" char __dbg_target_exe_name[MAX_TARGET_NAME]; |
| |
| // Integer specifying the process id |
| extern "C" pid_t __dbg_target_so_pid; |
| |
| // Integer specifying the 0-based device number |
| extern "C" int __dbg_target_id; |
| |
| // Set to non-zero by the host-side debugger to enable offload debugging |
| // support |
| extern "C" int __dbg_is_attached; |
| |
| // Major version of the debugger support API |
| extern "C" const int __dbg_api_major_version; |
| |
| // Minor version of the debugger support API |
| extern "C" const int __dbg_api_minor_version; |
| |
| #endif // OFFLOAD_HOST_H_INCLUDED |