Index: vendor/llvm-openmp/openmp-release80-r363030/runtime/src/ompt-general.cpp =================================================================== --- vendor/llvm-openmp/openmp-release80-r363030/runtime/src/ompt-general.cpp (revision 348960) +++ vendor/llvm-openmp/openmp-release80-r363030/runtime/src/ompt-general.cpp (nonexistent) @@ -1,724 +0,0 @@ -/* - * ompt-general.cpp -- OMPT implementation of interface functions - */ - -//===----------------------------------------------------------------------===// -// -// The LLVM Compiler Infrastructure -// -// This file is dual licensed under the MIT and the University of Illinois Open -// Source Licenses. See LICENSE.txt for details. -// -//===----------------------------------------------------------------------===// - -/***************************************************************************** - * system include files - ****************************************************************************/ - -#include - -#include -#include -#include -#include -#if KMP_OS_UNIX -#include -#endif - -/***************************************************************************** - * ompt include files - ****************************************************************************/ - -#include "ompt-specific.cpp" - -/***************************************************************************** - * macros - ****************************************************************************/ - -#define ompt_get_callback_success 1 -#define ompt_get_callback_failure 0 - -#define no_tool_present 0 - -#define OMPT_API_ROUTINE static - -#ifndef OMPT_STR_MATCH -#define OMPT_STR_MATCH(haystack, needle) (!strcasecmp(haystack, needle)) -#endif - -/***************************************************************************** - * types - ****************************************************************************/ - -typedef struct { - const char *state_name; - ompt_state_t state_id; -} ompt_state_info_t; - -typedef struct { - const char *name; - kmp_mutex_impl_t id; -} kmp_mutex_impl_info_t; - -enum tool_setting_e { - omp_tool_error, - omp_tool_unset, - omp_tool_disabled, - omp_tool_enabled -}; - -/***************************************************************************** - * global variables - ****************************************************************************/ - -ompt_callbacks_active_t ompt_enabled; - -ompt_state_info_t ompt_state_info[] = { -#define ompt_state_macro(state, code) {#state, state}, - FOREACH_OMPT_STATE(ompt_state_macro) -#undef ompt_state_macro -}; - -kmp_mutex_impl_info_t kmp_mutex_impl_info[] = { -#define kmp_mutex_impl_macro(name, id) {#name, name}, - FOREACH_KMP_MUTEX_IMPL(kmp_mutex_impl_macro) -#undef kmp_mutex_impl_macro -}; - -ompt_callbacks_internal_t ompt_callbacks; - -static ompt_start_tool_result_t *ompt_start_tool_result = NULL; - -/***************************************************************************** - * forward declarations - ****************************************************************************/ - -static ompt_interface_fn_t ompt_fn_lookup(const char *s); - -OMPT_API_ROUTINE ompt_data_t *ompt_get_thread_data(void); - -/***************************************************************************** - * initialization and finalization (private operations) - ****************************************************************************/ - -typedef ompt_start_tool_result_t *(*ompt_start_tool_t)(unsigned int, - const char *); - -#if KMP_OS_DARWIN - -// While Darwin supports weak symbols, the library that wishes to provide a new -// implementation has to link against this runtime which defeats the purpose -// of having tools that are agnostic of the underlying runtime implementation. -// -// Fortunately, the linker includes all symbols of an executable in the global -// symbol table by default so dlsym() even finds static implementations of -// ompt_start_tool. For this to work on Linux, -Wl,--export-dynamic needs to be -// passed when building the application which we don't want to rely on. - -static ompt_start_tool_result_t *ompt_tool_darwin(unsigned int omp_version, - const char *runtime_version) { - ompt_start_tool_result_t *ret = NULL; - // Search symbol in the current address space. - ompt_start_tool_t start_tool = - (ompt_start_tool_t)dlsym(RTLD_DEFAULT, "ompt_start_tool"); - if (start_tool) { - ret = start_tool(omp_version, runtime_version); - } - return ret; -} - -#elif OMPT_HAVE_WEAK_ATTRIBUTE - -// On Unix-like systems that support weak symbols the following implementation -// of ompt_start_tool() will be used in case no tool-supplied implementation of -// this function is present in the address space of a process. - -_OMP_EXTERN OMPT_WEAK_ATTRIBUTE ompt_start_tool_result_t * -ompt_start_tool(unsigned int omp_version, const char *runtime_version) { - ompt_start_tool_result_t *ret = NULL; - // Search next symbol in the current address space. This can happen if the - // runtime library is linked before the tool. Since glibc 2.2 strong symbols - // don't override weak symbols that have been found before unless the user - // sets the environment variable LD_DYNAMIC_WEAK. - ompt_start_tool_t next_tool = - (ompt_start_tool_t)dlsym(RTLD_NEXT, "ompt_start_tool"); - if (next_tool) { - ret = next_tool(omp_version, runtime_version); - } - return ret; -} - -#elif OMPT_HAVE_PSAPI - -// On Windows, the ompt_tool_windows function is used to find the -// ompt_start_tool symbol across all modules loaded by a process. If -// ompt_start_tool is found, ompt_start_tool's return value is used to -// initialize the tool. Otherwise, NULL is returned and OMPT won't be enabled. - -#include -#pragma comment(lib, "psapi.lib") - -// The number of loaded modules to start enumeration with EnumProcessModules() -#define NUM_MODULES 128 - -static ompt_start_tool_result_t * -ompt_tool_windows(unsigned int omp_version, const char *runtime_version) { - int i; - DWORD needed, new_size; - HMODULE *modules; - HANDLE process = GetCurrentProcess(); - modules = (HMODULE *)malloc(NUM_MODULES * sizeof(HMODULE)); - ompt_start_tool_t ompt_tool_p = NULL; - -#if OMPT_DEBUG - printf("ompt_tool_windows(): looking for ompt_start_tool\n"); -#endif - if (!EnumProcessModules(process, modules, NUM_MODULES * sizeof(HMODULE), - &needed)) { - // Regardless of the error reason use the stub initialization function - free(modules); - return NULL; - } - // Check if NUM_MODULES is enough to list all modules - new_size = needed / sizeof(HMODULE); - if (new_size > NUM_MODULES) { -#if OMPT_DEBUG - printf("ompt_tool_windows(): resize buffer to %d bytes\n", needed); -#endif - modules = (HMODULE *)realloc(modules, needed); - // If resizing failed use the stub function. - if (!EnumProcessModules(process, modules, needed, &needed)) { - free(modules); - return NULL; - } - } - for (i = 0; i < new_size; ++i) { - (FARPROC &)ompt_tool_p = GetProcAddress(modules[i], "ompt_start_tool"); - if (ompt_tool_p) { -#if OMPT_DEBUG - TCHAR modName[MAX_PATH]; - if (GetModuleFileName(modules[i], modName, MAX_PATH)) - printf("ompt_tool_windows(): ompt_start_tool found in module %s\n", - modName); -#endif - free(modules); - return (*ompt_tool_p)(omp_version, runtime_version); - } -#if OMPT_DEBUG - else { - TCHAR modName[MAX_PATH]; - if (GetModuleFileName(modules[i], modName, MAX_PATH)) - printf("ompt_tool_windows(): ompt_start_tool not found in module %s\n", - modName); - } -#endif - } - free(modules); - return NULL; -} -#else -#error Activation of OMPT is not supported on this platform. -#endif - -static ompt_start_tool_result_t * -ompt_try_start_tool(unsigned int omp_version, const char *runtime_version) { - ompt_start_tool_result_t *ret = NULL; - ompt_start_tool_t start_tool = NULL; -#if KMP_OS_WINDOWS - // Cannot use colon to describe a list of absolute paths on Windows - const char *sep = ";"; -#else - const char *sep = ":"; -#endif - -#if KMP_OS_DARWIN - // Try in the current address space - ret = ompt_tool_darwin(omp_version, runtime_version); -#elif OMPT_HAVE_WEAK_ATTRIBUTE - ret = ompt_start_tool(omp_version, runtime_version); -#elif OMPT_HAVE_PSAPI - ret = ompt_tool_windows(omp_version, runtime_version); -#else -#error Activation of OMPT is not supported on this platform. -#endif - if (ret) - return ret; - - // Try tool-libraries-var ICV - const char *tool_libs = getenv("OMP_TOOL_LIBRARIES"); - if (tool_libs) { - char *libs = __kmp_str_format("%s", tool_libs); - char *buf; - char *fname = __kmp_str_token(libs, sep, &buf); - while (fname) { -#if KMP_OS_UNIX - void *h = dlopen(fname, RTLD_LAZY); - if (h) { - start_tool = (ompt_start_tool_t)dlsym(h, "ompt_start_tool"); -#elif KMP_OS_WINDOWS - HMODULE h = LoadLibrary(fname); - if (h) { - start_tool = (ompt_start_tool_t)GetProcAddress(h, "ompt_start_tool"); -#else -#error Activation of OMPT is not supported on this platform. -#endif - if (start_tool && (ret = (*start_tool)(omp_version, runtime_version))) - break; - } - fname = __kmp_str_token(NULL, sep, &buf); - } - __kmp_str_free(&libs); - } - return ret; -} - -void ompt_pre_init() { - //-------------------------------------------------- - // Execute the pre-initialization logic only once. - //-------------------------------------------------- - static int ompt_pre_initialized = 0; - - if (ompt_pre_initialized) - return; - - ompt_pre_initialized = 1; - - //-------------------------------------------------- - // Use a tool iff a tool is enabled and available. - //-------------------------------------------------- - const char *ompt_env_var = getenv("OMP_TOOL"); - tool_setting_e tool_setting = omp_tool_error; - - if (!ompt_env_var || !strcmp(ompt_env_var, "")) - tool_setting = omp_tool_unset; - else if (OMPT_STR_MATCH(ompt_env_var, "disabled")) - tool_setting = omp_tool_disabled; - else if (OMPT_STR_MATCH(ompt_env_var, "enabled")) - tool_setting = omp_tool_enabled; - -#if OMPT_DEBUG - printf("ompt_pre_init(): tool_setting = %d\n", tool_setting); -#endif - switch (tool_setting) { - case omp_tool_disabled: - break; - - case omp_tool_unset: - case omp_tool_enabled: - - //-------------------------------------------------- - // Load tool iff specified in environment variable - //-------------------------------------------------- - ompt_start_tool_result = - ompt_try_start_tool(__kmp_openmp_version, ompt_get_runtime_version()); - - memset(&ompt_enabled, 0, sizeof(ompt_enabled)); - break; - - case omp_tool_error: - fprintf(stderr, "Warning: OMP_TOOL has invalid value \"%s\".\n" - " legal values are (NULL,\"\",\"disabled\"," - "\"enabled\").\n", - ompt_env_var); - break; - } -#if OMPT_DEBUG - printf("ompt_pre_init(): ompt_enabled = %d\n", ompt_enabled); -#endif -} - -extern "C" int omp_get_initial_device(void); - -void ompt_post_init() { - //-------------------------------------------------- - // Execute the post-initialization logic only once. - //-------------------------------------------------- - static int ompt_post_initialized = 0; - - if (ompt_post_initialized) - return; - - ompt_post_initialized = 1; - - //-------------------------------------------------- - // Initialize the tool if so indicated. - //-------------------------------------------------- - if (ompt_start_tool_result) { - ompt_enabled.enabled = !!ompt_start_tool_result->initialize( - ompt_fn_lookup, omp_get_initial_device(), &(ompt_start_tool_result->tool_data)); - - if (!ompt_enabled.enabled) { - // tool not enabled, zero out the bitmap, and done - memset(&ompt_enabled, 0, sizeof(ompt_enabled)); - return; - } - - kmp_info_t *root_thread = ompt_get_thread(); - - ompt_set_thread_state(root_thread, ompt_state_overhead); - - if (ompt_enabled.ompt_callback_thread_begin) { - ompt_callbacks.ompt_callback(ompt_callback_thread_begin)( - ompt_thread_initial, __ompt_get_thread_data_internal()); - } - ompt_data_t *task_data; - __ompt_get_task_info_internal(0, NULL, &task_data, NULL, NULL, NULL); - if (ompt_enabled.ompt_callback_task_create) { - ompt_callbacks.ompt_callback(ompt_callback_task_create)( - NULL, NULL, task_data, ompt_task_initial, 0, NULL); - } - - ompt_set_thread_state(root_thread, ompt_state_work_serial); - } -} - -void ompt_fini() { - if (ompt_enabled.enabled) { - ompt_start_tool_result->finalize(&(ompt_start_tool_result->tool_data)); - } - - memset(&ompt_enabled, 0, sizeof(ompt_enabled)); -} - -/***************************************************************************** - * interface operations - ****************************************************************************/ - -/***************************************************************************** - * state - ****************************************************************************/ - -OMPT_API_ROUTINE int ompt_enumerate_states(int current_state, int *next_state, - const char **next_state_name) { - const static int len = sizeof(ompt_state_info) / sizeof(ompt_state_info_t); - int i = 0; - - for (i = 0; i < len - 1; i++) { - if (ompt_state_info[i].state_id == current_state) { - *next_state = ompt_state_info[i + 1].state_id; - *next_state_name = ompt_state_info[i + 1].state_name; - return 1; - } - } - - return 0; -} - -OMPT_API_ROUTINE int ompt_enumerate_mutex_impls(int current_impl, - int *next_impl, - const char **next_impl_name) { - const static int len = - sizeof(kmp_mutex_impl_info) / sizeof(kmp_mutex_impl_info_t); - int i = 0; - for (i = 0; i < len - 1; i++) { - if (kmp_mutex_impl_info[i].id != current_impl) - continue; - *next_impl = kmp_mutex_impl_info[i + 1].id; - *next_impl_name = kmp_mutex_impl_info[i + 1].name; - return 1; - } - return 0; -} - -/***************************************************************************** - * callbacks - ****************************************************************************/ - -OMPT_API_ROUTINE ompt_set_result_t ompt_set_callback(ompt_callbacks_t which, - ompt_callback_t callback) { - switch (which) { - -#define ompt_event_macro(event_name, callback_type, event_id) \ - case event_name: \ - if (ompt_event_implementation_status(event_name)) { \ - ompt_callbacks.ompt_callback(event_name) = (callback_type)callback; \ - ompt_enabled.event_name = (callback != 0); \ - } \ - if (callback) \ - return ompt_event_implementation_status(event_name); \ - else \ - return ompt_set_always; - - FOREACH_OMPT_EVENT(ompt_event_macro) - -#undef ompt_event_macro - - default: - return ompt_set_error; - } -} - -OMPT_API_ROUTINE int ompt_get_callback(ompt_callbacks_t which, - ompt_callback_t *callback) { - switch (which) { - -#define ompt_event_macro(event_name, callback_type, event_id) \ - case event_name: \ - if (ompt_event_implementation_status(event_name)) { \ - ompt_callback_t mycb = \ - (ompt_callback_t)ompt_callbacks.ompt_callback(event_name); \ - if (mycb) { \ - *callback = mycb; \ - return ompt_get_callback_success; \ - } \ - } \ - return ompt_get_callback_failure; - - FOREACH_OMPT_EVENT(ompt_event_macro) - -#undef ompt_event_macro - - default: - return ompt_get_callback_failure; - } -} - -/***************************************************************************** - * parallel regions - ****************************************************************************/ - -OMPT_API_ROUTINE int ompt_get_parallel_info(int ancestor_level, - ompt_data_t **parallel_data, - int *team_size) { - return __ompt_get_parallel_info_internal(ancestor_level, parallel_data, - team_size); -} - -OMPT_API_ROUTINE int ompt_get_state(ompt_wait_id_t *wait_id) { - int thread_state = __ompt_get_state_internal(wait_id); - - if (thread_state == ompt_state_undefined) { - thread_state = ompt_state_work_serial; - } - - return thread_state; -} - -/***************************************************************************** - * tasks - ****************************************************************************/ - -OMPT_API_ROUTINE ompt_data_t *ompt_get_thread_data(void) { - return __ompt_get_thread_data_internal(); -} - -OMPT_API_ROUTINE int ompt_get_task_info(int ancestor_level, int *type, - ompt_data_t **task_data, - ompt_frame_t **task_frame, - ompt_data_t **parallel_data, - int *thread_num) { - return __ompt_get_task_info_internal(ancestor_level, type, task_data, - task_frame, parallel_data, thread_num); -} - -OMPT_API_ROUTINE int ompt_get_task_memory(void **addr, size_t *size, - int block) { - // stub - return 0; -} - -/***************************************************************************** - * num_procs - ****************************************************************************/ - -OMPT_API_ROUTINE int ompt_get_num_procs(void) { - // copied from kmp_ftn_entry.h (but modified: OMPT can only be called when - // runtime is initialized) - return __kmp_avail_proc; -} - -/***************************************************************************** - * places - ****************************************************************************/ - -OMPT_API_ROUTINE int ompt_get_num_places(void) { -// copied from kmp_ftn_entry.h (but modified) -#if !KMP_AFFINITY_SUPPORTED - return 0; -#else - if (!KMP_AFFINITY_CAPABLE()) - return 0; - return __kmp_affinity_num_masks; -#endif -} - -OMPT_API_ROUTINE int ompt_get_place_proc_ids(int place_num, int ids_size, - int *ids) { -// copied from kmp_ftn_entry.h (but modified) -#if !KMP_AFFINITY_SUPPORTED - return 0; -#else - int i, count; - int tmp_ids[ids_size]; - if (!KMP_AFFINITY_CAPABLE()) - return 0; - if (place_num < 0 || place_num >= (int)__kmp_affinity_num_masks) - return 0; - /* TODO: Is this safe for asynchronous call from signal handler during runtime - * shutdown? */ - kmp_affin_mask_t *mask = KMP_CPU_INDEX(__kmp_affinity_masks, place_num); - count = 0; - KMP_CPU_SET_ITERATE(i, mask) { - if ((!KMP_CPU_ISSET(i, __kmp_affin_fullMask)) || - (!KMP_CPU_ISSET(i, mask))) { - continue; - } - if (count < ids_size) - tmp_ids[count] = i; - count++; - } - if (ids_size >= count) { - for (i = 0; i < count; i++) { - ids[i] = tmp_ids[i]; - } - } - return count; -#endif -} - -OMPT_API_ROUTINE int ompt_get_place_num(void) { -// copied from kmp_ftn_entry.h (but modified) -#if !KMP_AFFINITY_SUPPORTED - return -1; -#else - if (__kmp_get_gtid() < 0) - return -1; - - int gtid; - kmp_info_t *thread; - if (!KMP_AFFINITY_CAPABLE()) - return -1; - gtid = __kmp_entry_gtid(); - thread = __kmp_thread_from_gtid(gtid); - if (thread == NULL || thread->th.th_current_place < 0) - return -1; - return thread->th.th_current_place; -#endif -} - -OMPT_API_ROUTINE int ompt_get_partition_place_nums(int place_nums_size, - int *place_nums) { -// copied from kmp_ftn_entry.h (but modified) -#if !KMP_AFFINITY_SUPPORTED - return 0; -#else - if (__kmp_get_gtid() < 0) - return 0; - - int i, gtid, place_num, first_place, last_place, start, end; - kmp_info_t *thread; - if (!KMP_AFFINITY_CAPABLE()) - return 0; - gtid = __kmp_entry_gtid(); - thread = __kmp_thread_from_gtid(gtid); - if (thread == NULL) - return 0; - first_place = thread->th.th_first_place; - last_place = thread->th.th_last_place; - if (first_place < 0 || last_place < 0) - return 0; - if (first_place <= last_place) { - start = first_place; - end = last_place; - } else { - start = last_place; - end = first_place; - } - if (end - start <= place_nums_size) - for (i = 0, place_num = start; place_num <= end; ++place_num, ++i) { - place_nums[i] = place_num; - } - return end - start + 1; -#endif -} - -/***************************************************************************** - * places - ****************************************************************************/ - -OMPT_API_ROUTINE int ompt_get_proc_id(void) { - if (__kmp_get_gtid() < 0) - return -1; -#if KMP_OS_LINUX - return sched_getcpu(); -#elif KMP_OS_WINDOWS - PROCESSOR_NUMBER pn; - GetCurrentProcessorNumberEx(&pn); - return 64 * pn.Group + pn.Number; -#else - return -1; -#endif -} - -/***************************************************************************** - * compatability - ****************************************************************************/ - -/* - * Currently unused function -OMPT_API_ROUTINE int ompt_get_ompt_version() { return OMPT_VERSION; } -*/ - -/***************************************************************************** -* application-facing API - ****************************************************************************/ - -/*---------------------------------------------------------------------------- - | control - ---------------------------------------------------------------------------*/ - -int __kmp_control_tool(uint64_t command, uint64_t modifier, void *arg) { - - if (ompt_enabled.enabled) { - if (ompt_enabled.ompt_callback_control_tool) { - return ompt_callbacks.ompt_callback(ompt_callback_control_tool)( - command, modifier, arg, OMPT_LOAD_RETURN_ADDRESS(__kmp_entry_gtid())); - } else { - return -1; - } - } else { - return -2; - } -} - -/***************************************************************************** - * misc - ****************************************************************************/ - -OMPT_API_ROUTINE uint64_t ompt_get_unique_id(void) { - return __ompt_get_unique_id_internal(); -} - -OMPT_API_ROUTINE void ompt_finalize_tool(void) { - // stub -} - -/***************************************************************************** - * Target - ****************************************************************************/ - -OMPT_API_ROUTINE int ompt_get_target_info(uint64_t *device_num, - ompt_id_t *target_id, - ompt_id_t *host_op_id) { - return 0; // thread is not in a target region -} - -OMPT_API_ROUTINE int ompt_get_num_devices(void) { - return 1; // only one device (the current device) is available -} - -/***************************************************************************** - * API inquiry for tool - ****************************************************************************/ - -static ompt_interface_fn_t ompt_fn_lookup(const char *s) { - -#define ompt_interface_fn(fn) \ - fn##_t fn##_f = fn; \ - if (strcmp(s, #fn) == 0) \ - return (ompt_interface_fn_t)fn##_f; - - FOREACH_OMPT_INQUIRY_FN(ompt_interface_fn) - - return (ompt_interface_fn_t)0; -} Property changes on: vendor/llvm-openmp/openmp-release80-r363030/runtime/src/ompt-general.cpp ___________________________________________________________________ Deleted: svn:eol-style ## -1 +0,0 ## -native \ No newline at end of property Deleted: svn:keywords ## -1 +0,0 ## -FreeBSD=%H \ No newline at end of property Deleted: svn:mime-type ## -1 +0,0 ## -text/plain \ No newline at end of property Index: vendor/llvm-openmp/openmp-release80-r363030/runtime/src/kmp_ftn_extra.cpp =================================================================== --- vendor/llvm-openmp/openmp-release80-r363030/runtime/src/kmp_ftn_extra.cpp (revision 348960) +++ vendor/llvm-openmp/openmp-release80-r363030/runtime/src/kmp_ftn_extra.cpp (nonexistent) @@ -1,33 +0,0 @@ -/* - * kmp_ftn_extra.cpp -- Fortran 'extra' linkage support for OpenMP. - */ - -//===----------------------------------------------------------------------===// -// -// The LLVM Compiler Infrastructure -// -// This file is dual licensed under the MIT and the University of Illinois Open -// Source Licenses. See LICENSE.txt for details. -// -//===----------------------------------------------------------------------===// - -#include "kmp.h" -#include "kmp_affinity.h" - -#if KMP_OS_WINDOWS -#define KMP_FTN_ENTRIES KMP_FTN_PLAIN -#elif KMP_OS_UNIX -#define KMP_FTN_ENTRIES KMP_FTN_APPEND -#endif - -// Note: This string is not printed when KMP_VERSION=1. -char const __kmp_version_ftnextra[] = - KMP_VERSION_PREFIX "Fortran \"extra\" OMP support: " -#ifdef KMP_FTN_ENTRIES - "yes"; -#define FTN_STDCALL /* nothing to do */ -#include "kmp_ftn_os.h" -#include "kmp_ftn_entry.h" -#else - "no"; -#endif /* KMP_FTN_ENTRIES */ Property changes on: vendor/llvm-openmp/openmp-release80-r363030/runtime/src/kmp_ftn_extra.cpp ___________________________________________________________________ Deleted: svn:eol-style ## -1 +0,0 ## -native \ No newline at end of property Deleted: svn:keywords ## -1 +0,0 ## -FreeBSD=%H \ No newline at end of property Deleted: svn:mime-type ## -1 +0,0 ## -text/plain \ No newline at end of property Index: vendor/llvm-openmp/openmp-release80-r363030/runtime/src/i18n/en_US.txt =================================================================== --- vendor/llvm-openmp/openmp-release80-r363030/runtime/src/i18n/en_US.txt (revision 348960) +++ vendor/llvm-openmp/openmp-release80-r363030/runtime/src/i18n/en_US.txt (nonexistent) @@ -1,493 +0,0 @@ -# en_US.txt # - -# -#//===----------------------------------------------------------------------===// -#// -#// The LLVM Compiler Infrastructure -#// -#// This file is dual licensed under the MIT and the University of Illinois Open -#// Source Licenses. See LICENSE.txt for details. -#// -#//===----------------------------------------------------------------------===// -# - -# Default messages, embedded into the OpenMP RTL, and source for English catalog. - - -# Compatible changes (which does not require version bumping): -# * Editing message (number and type of placeholders must remain, relative order of -# placeholders may be changed, e.g. "File %1$s line %2$d" may be safely edited to -# "Line %2$d file %1$s"). -# * Adding new message to the end of section. -# Incompatible changes (version must be bumbed by 1): -# * Introducing new placeholders to existing messages. -# * Changing type of placeholders (e.g. "line %1$d" -> "line %1$s"). -# * Rearranging order of messages. -# * Deleting messages. -# Use special "OBSOLETE" pseudoidentifier for obsolete entries, which is kept only for backward -# compatibility. When version is bumped, do not forget to delete all obsolete entries. - - -# -------------------------------------------------------------------------------------------------- --*- META -*- -# -------------------------------------------------------------------------------------------------- - -# Meta information about message catalog. - -Language "English" -Country "USA" -LangId "1033" -Version "2" -Revision "20170523" - - - -# -------------------------------------------------------------------------------------------------- --*- STRINGS -*- -# -------------------------------------------------------------------------------------------------- - -# Strings are not complete messages, just fragments. We need to work on it and reduce number of -# strings (to zero?). - -Error "Error" -UnknownFile "(unknown file)" -NotANumber "not a number" -BadUnit "bad unit" -IllegalCharacters "illegal characters" -ValueTooLarge "value too large" -ValueTooSmall "value too small" -NotMultiple4K "value is not a multiple of 4k" -UnknownTopology "Unknown processor topology" -CantOpenCpuinfo "Cannot open /proc/cpuinfo" -ProcCpuinfo "/proc/cpuinfo" -NoProcRecords "cpuinfo file invalid (No processor records)" -TooManyProcRecords "cpuinfo file invalid (Too many processor records)" -CantRewindCpuinfo "Cannot rewind cpuinfo file" -LongLineCpuinfo "cpuinfo file invalid (long line)" -TooManyEntries "cpuinfo file contains too many entries" -MissingProcField "cpuinfo file missing processor field" -MissingPhysicalIDField "cpuinfo file missing physical id field" -MissingValCpuinfo "cpuinfo file invalid (missing val)" -DuplicateFieldCpuinfo "cpuinfo file invalid (duplicate field)" -PhysicalIDsNotUnique "Physical node/pkg/core/thread ids not unique" -ApicNotPresent "APIC not present" -InvalidCpuidInfo "Invalid cpuid info" -OBSOLETE "APIC ids not unique" -InconsistentCpuidInfo "Inconsistent cpuid info" -OutOfHeapMemory "Out of heap memory" -MemoryAllocFailed "Memory allocation failed" -Core "core" -Thread "thread" -Package "package" -Node "node" -OBSOLETE "" -DecodingLegacyAPIC "decoding legacy APIC ids" -OBSOLETE "parsing /proc/cpuinfo" -NotDefined "value is not defined" -EffectiveSettings "Effective settings:" -UserSettings "User settings:" -StorageMapWarning "warning: pointers or size don't make sense" -OBSOLETE "CPU" -OBSOLETE "TPU" -OBSOLETE "TPUs per package" -OBSOLETE "HT enabled" -OBSOLETE "HT disabled" -Decodingx2APIC "decoding x2APIC ids" -NoLeaf11Support "cpuid leaf 11 not supported" -NoLeaf4Support "cpuid leaf 4 not supported" -ThreadIDsNotUnique "thread ids not unique" -UsingPthread "using pthread info" -LegacyApicIDsNotUnique "legacy APIC ids not unique" -x2ApicIDsNotUnique "x2APIC ids not unique" -DisplayEnvBegin "OPENMP DISPLAY ENVIRONMENT BEGIN" -DisplayEnvEnd "OPENMP DISPLAY ENVIRONMENT END" -Device "[device]" -Host "[host]" -Tile "tile" - - - -# -------------------------------------------------------------------------------------------------- --*- FORMATS -*- -# -------------------------------------------------------------------------------------------------- - -Info "OMP: Info #%1$d: %2$s\n" -Warning "OMP: Warning #%1$d: %2$s\n" -Fatal "OMP: Error #%1$d: %2$s\n" -SysErr "OMP: System error #%1$d: %2$s\n" -Hint "OMP: Hint %1$s\n" - -Pragma "%1$s pragma (at %2$s:%3$s():%4$s)" - # %1 is pragma name (like "parallel" or "master", - # %2 is file name, - # %3 is function (routine) name, - # %4 is the line number (as string, so "s" type specifier should be used). - - - -# -------------------------------------------------------------------------------------------------- --*- MESSAGES -*- -# -------------------------------------------------------------------------------------------------- - -# Messages of any severity: informational, warning, or fatal. -# To maintain message numbers (they are visible to customers), add new messages to the end. - -# Use following prefixes for messages and hints when appropriate: -# Aff -- Affinity messages. -# Cns -- Consistency check failures (KMP_CONSISTENCY_CHECK). -# Itt -- ITT Notify-related messages. - -LibraryIsSerial "Library is \"serial\"." -CantOpenMessageCatalog "Cannot open message catalog \"%1$s\":" -WillUseDefaultMessages "Default messages will be used." -LockIsUninitialized "%1$s: Lock is uninitialized" -LockSimpleUsedAsNestable "%1$s: Lock was initialized as simple, but used as nestable" -LockNestableUsedAsSimple "%1$s: Lock was initialized as nestable, but used as simple" -LockIsAlreadyOwned "%1$s: Lock is already owned by requesting thread" -LockStillOwned "%1$s: Lock is still owned by a thread" -LockUnsettingFree "%1$s: Attempt to release a lock not owned by any thread" -LockUnsettingSetByAnother "%1$s: Attempt to release a lock owned by another thread" -StackOverflow "Stack overflow detected for OpenMP thread #%1$d" -StackOverlap "Stack overlap detected. " -AssertionFailure "Assertion failure at %1$s(%2$d)." -CantRegisterNewThread "Unable to register a new user thread." -DuplicateLibrary "Initializing %1$s, but found %2$s already initialized." -CantOpenFileForReading "Cannot open file \"%1$s\" for reading:" -CantGetEnvVar "Getting environment variable \"%1$s\" failed:" -CantSetEnvVar "Setting environment variable \"%1$s\" failed:" -CantGetEnvironment "Getting environment failed:" -BadBoolValue "%1$s=\"%2$s\": Wrong value, boolean expected." -SSPNotBuiltIn "No Helper Thread support built in this OMP library." -SPPSotfTerminateFailed "Helper thread failed to soft terminate." -BufferOverflow "Buffer overflow detected." -RealTimeSchedNotSupported "Real-time scheduling policy is not supported." -RunningAtMaxPriority "OMP application is running at maximum priority with real-time scheduling policy. " -CantChangeMonitorPriority "Changing priority of the monitor thread failed:" -MonitorWillStarve "Deadlocks are highly possible due to monitor thread starvation." -CantSetMonitorStackSize "Unable to set monitor thread stack size to %1$lu bytes:" -CantSetWorkerStackSize "Unable to set OMP thread stack size to %1$lu bytes:" -CantInitThreadAttrs "Thread attribute initialization failed:" -CantDestroyThreadAttrs "Thread attribute destroying failed:" -CantSetWorkerState "OMP thread joinable state setting failed:" -CantSetMonitorState "Monitor thread joinable state setting failed:" -NoResourcesForWorkerThread "System unable to allocate necessary resources for OMP thread:" -NoResourcesForMonitorThread "System unable to allocate necessary resources for the monitor thread:" -CantTerminateWorkerThread "Unable to terminate OMP thread:" -ScheduleKindOutOfRange "Wrong schedule type %1$d, see or file for the list of values supported." -UnknownSchedulingType "Unknown scheduling type \"%1$d\"." -InvalidValue "%1$s value \"%2$s\" is invalid." -SmallValue "%1$s value \"%2$s\" is too small." -LargeValue "%1$s value \"%2$s\" is too large." -StgInvalidValue "%1$s: \"%2$s\" is an invalid value; ignored." -BarrReleaseValueInvalid "%1$s release value \"%2$s\" is invalid." -BarrGatherValueInvalid "%1$s gather value \"%2$s\" is invalid." -OBSOLETE "%1$s supported only on debug builds; ignored." -ParRangeSyntax "Syntax error: Usage: %1$s=[ routine= | filename= | range=: " - "| excl_range=: ],..." -UnbalancedQuotes "Unbalanced quotes in %1$s." -EmptyString "Empty string specified for %1$s; ignored." -LongValue "%1$s value is too long; ignored." -InvalidClause "%1$s: Invalid clause in \"%2$s\"." -EmptyClause "Empty clause in %1$s." -InvalidChunk "%1$s value \"%2$s\" is invalid chunk size." -LargeChunk "%1$s value \"%2$s\" is to large chunk size." -IgnoreChunk "%1$s value \"%2$s\" is ignored." -CantGetProcFreq "Cannot get processor frequency, using zero KMP_ITT_PREPARE_DELAY." -EnvParallelWarn "%1$s must be set prior to first parallel region; ignored." -AffParamDefined "%1$s: parameter has been specified already, ignoring \"%2$s\"." -AffInvalidParam "%1$s: parameter invalid, ignoring \"%2$s\"." -AffManyParams "%1$s: too many integer parameters specified, ignoring \"%2$s\"." -AffManyParamsForLogic "%1$s: too many integer parameters specified for logical or physical type, ignoring \"%2$d\"." -AffNoParam "%1$s: '%2$s' type does not take any integer parameters, ignoring them." -AffNoProcList "%1$s: proclist not specified with explicit affinity type, using \"none\"." -AffProcListNoType "%1$s: proclist specified, setting affinity type to \"explicit\"." -AffProcListNotExplicit "%1$s: proclist specified without \"explicit\" affinity type, proclist ignored." -AffSyntaxError "%1$s: syntax error, not using affinity." -AffZeroStride "%1$s: range error (zero stride), not using affinity." -AffStartGreaterEnd "%1$s: range error (%2$d > %3$d), not using affinity." -AffStrideLessZero "%1$s: range error (%2$d < %3$d & stride < 0), not using affinity." -AffRangeTooBig "%1$s: range error ((%2$d-%3$d)/%4$d too big), not using affinity." -OBSOLETE "%1$s: %2$s is defined. %3$s will be ignored." -AffNotSupported "%1$s: affinity not supported, using \"disabled\"." -OBSOLETE "%1$s: affinity only supported for Intel(R) Architecture Processors." -GetAffSysCallNotSupported "%1$s: getaffinity system call not supported." -SetAffSysCallNotSupported "%1$s: setaffinity system call not supported." -OBSOLETE "%1$s: pthread_aff_set_np call not found." -OBSOLETE "%1$s: pthread_get_num_resources_np call not found." -OBSOLETE "%1$s: the OS kernel does not support affinity." -OBSOLETE "%1$s: pthread_get_num_resources_np returned %2$d." -AffCantGetMaskSize "%1$s: cannot determine proper affinity mask size." -ParseSizeIntWarn "%1$s=\"%2$s\": %3$s." -ParseExtraCharsWarn "%1$s: extra trailing characters ignored: \"%2$s\"." -UnknownForceReduction "%1$s: unknown method \"%2$s\"." -TimerUseGettimeofday "KMP_STATS_TIMER: clock_gettime is undefined, using gettimeofday." -TimerNeedMoreParam "KMP_STATS_TIMER: \"%1$s\" needs additional parameter, e.g. 'clock_gettime,2'. Using gettimeofday." -TimerInvalidParam "KMP_STATS_TIMER: clock_gettime parameter \"%1$s\" is invalid, using gettimeofday." -TimerGettimeFailed "KMP_STATS_TIMER: clock_gettime failed, using gettimeofday." -TimerUnknownFunction "KMP_STATS_TIMER: clock function unknown (ignoring value \"%1$s\")." -UnknownSchedTypeDetected "Unknown scheduling type detected." -DispatchManyThreads "Too many threads to use analytical guided scheduling - switching to iterative guided scheduling." -IttLookupFailed "ittnotify: Lookup of \"%1$s\" function in \"%2$s\" library failed." -IttLoadLibFailed "ittnotify: Loading \"%1$s\" library failed." -IttAllNotifDisabled "ittnotify: All itt notifications disabled." -IttObjNotifDisabled "ittnotify: Object state itt notifications disabled." -IttMarkNotifDisabled "ittnotify: Mark itt notifications disabled." -IttUnloadLibFailed "ittnotify: Unloading \"%1$s\" library failed." -CantFormThrTeam "Cannot form a team with %1$d threads, using %2$d instead." -ActiveLevelsNegative "Requested number of active parallel levels \"%1$d\" is negative; ignored." -ActiveLevelsExceedLimit "Requested number of active parallel levels \"%1$d\" exceeds supported limit; " - "the following limit value will be used: \"%1$d\"." -SetLibraryIncorrectCall "kmp_set_library must only be called from the top level serial thread; ignored." -FatalSysError "Fatal system error detected." -OutOfHeapMemory "Out of heap memory." -OBSOLETE "Clearing __KMP_REGISTERED_LIB env var failed." -OBSOLETE "Registering library with env var failed." -Using_int_Value "%1$s value \"%2$d\" will be used." -Using_uint_Value "%1$s value \"%2$u\" will be used." -Using_uint64_Value "%1$s value \"%2$s\" will be used." -Using_str_Value "%1$s value \"%2$s\" will be used." -MaxValueUsing "%1$s maximum value \"%2$d\" will be used." -MinValueUsing "%1$s minimum value \"%2$d\" will be used." -MemoryAllocFailed "Memory allocation failed." -FileNameTooLong "File name too long." -OBSOLETE "Lock table overflow." -ManyThreadsForTPDirective "Too many threads to use threadprivate directive." -AffinityInvalidMask "%1$s: invalid mask." -WrongDefinition "Wrong definition." -TLSSetValueFailed "Windows* OS: TLS Set Value failed." -TLSOutOfIndexes "Windows* OS: TLS out of indexes." -OBSOLETE "PDONE directive must be nested within a DO directive." -CantGetNumAvailCPU "Cannot get number of available CPUs." -AssumedNumCPU "Assumed number of CPUs is 2." -ErrorInitializeAffinity "Error initializing affinity - not using affinity." -AffThreadsMayMigrate "Threads may migrate across all available OS procs (granularity setting too coarse)." -AffIgnoreInvalidProcID "Ignoring invalid OS proc ID %1$d." -AffNoValidProcID "No valid OS proc IDs specified - not using affinity." -UsingFlatOS "%1$s - using \"flat\" OS <-> physical proc mapping." -UsingFlatOSFile "%1$s: %2$s - using \"flat\" OS <-> physical proc mapping." -UsingFlatOSFileLine "%1$s, line %2$d: %3$s - using \"flat\" OS <-> physical proc mapping." -FileMsgExiting "%1$s: %2$s - exiting." -FileLineMsgExiting "%1$s, line %2$d: %3$s - exiting." -ConstructIdentInvalid "Construct identifier invalid." -ThreadIdentInvalid "Thread identifier invalid." -RTLNotInitialized "runtime library not initialized." -TPCommonBlocksInconsist "Inconsistent THREADPRIVATE common block declarations are non-conforming " - "and are unsupported. Either all threadprivate common blocks must be declared " - "identically, or the largest instance of each threadprivate common block " - "must be referenced first during the run." -CantSetThreadAffMask "Cannot set thread affinity mask." -CantSetThreadPriority "Cannot set thread priority." -CantCreateThread "Cannot create thread." -CantCreateEvent "Cannot create event." -CantSetEvent "Cannot set event." -CantCloseHandle "Cannot close handle." -UnknownLibraryType "Unknown library type: %1$d." -ReapMonitorError "Monitor did not reap properly." -ReapWorkerError "Worker thread failed to join." -ChangeThreadAffMaskError "Cannot change thread affinity mask." -ThreadsMigrate "%1$s: Threads may migrate across %2$d innermost levels of machine" -DecreaseToThreads "%1$s: decrease to %2$d threads" -IncreaseToThreads "%1$s: increase to %2$d threads" -OBSOLETE "%1$s: Internal thread %2$d bound to OS proc set %3$s" -AffCapableUseCpuinfo "%1$s: Affinity capable, using cpuinfo file" -AffUseGlobCpuid "%1$s: Affinity capable, using global cpuid info" -AffCapableUseFlat "%1$s: Affinity capable, using default \"flat\" topology" -AffNotCapableUseLocCpuid "%1$s: Affinity not capable, using local cpuid info" -AffNotCapableUseCpuinfo "%1$s: Affinity not capable, using cpuinfo file" -AffFlatTopology "%1$s: Affinity not capable, assumming \"flat\" topology" -InitOSProcSetRespect "%1$s: Initial OS proc set respected: %2$s" -InitOSProcSetNotRespect "%1$s: Initial OS proc set not respected: %2$s" -AvailableOSProc "%1$s: %2$d available OS procs" -Uniform "%1$s: Uniform topology" -NonUniform "%1$s: Nonuniform topology" -Topology "%1$s: %2$d packages x %3$d cores/pkg x %4$d threads/core (%5$d total cores)" -OBSOLETE "%1$s: OS proc to physical thread map ([] => level not in map):" -OSProcToPackage "%1$s: OS proc maps to th package core 0" -OBSOLETE "%1$s: OS proc %2$d maps to package %3$d [core %4$d] [thread %5$d]" -OBSOLETE "%1$s: OS proc %2$d maps to [package %3$d] [core %4$d] [thread %5$d]" -OBSOLETE "%1$s: OS proc %2$d maps to [package %3$d] [core %4$d] thread %5$d" -OBSOLETE "%1$s: OS proc %2$d maps to [package %3$d] core %4$d [thread %5$d]" -OBSOLETE "%1$s: OS proc %2$d maps to package %3$d [core %4$d] [thread %5$d]" -OBSOLETE "%1$s: OS proc %2$d maps to [package %3$d] core %4$d thread %5$d" -OBSOLETE "%1$s: OS proc %2$d maps to package %3$d core %4$d [thread %5$d]" -OBSOLETE "%1$s: OS proc %2$d maps to package %3$d [core %4$d] thread %5$d" -OBSOLETE "%1$s: OS proc %2$d maps to package %3$d core %4$d thread %5$d" -OSProcMapToPack "%1$s: OS proc %2$d maps to %3$s" -OBSOLETE "%1$s: Internal thread %2$d changed affinity mask from %3$s to %4$s" -OBSOLETE "%1$s: OS proc %2$d maps to package %3$d, CPU %4$d, TPU %5$d" -OBSOLETE "%1$s: OS proc %2$d maps to package %3$d, CPU %4$d" -OBSOLETE "%1$s: HT enabled; %2$d packages; %3$d TPU; %4$d TPUs per package" -OBSOLETE "%1$s: HT disabled; %2$d packages" -BarriersInDifferentOrder "Threads encountered barriers in different order. " -FunctionError "Function %1$s failed:" -TopologyExtra "%1$s: %2$s packages x %3$d cores/pkg x %4$d threads/core (%5$d total cores)" -WrongMessageCatalog "Incompatible message catalog \"%1$s\": Version \"%2$s\" found, version \"%3$s\" expected." -StgIgnored "%1$s: ignored because %2$s has been defined" - # %1, -- name of ignored variable, %2 -- name of variable with higher priority. -OBSOLETE "%1$s: overrides %3$s specified before" - # %1, %2 -- name and value of the overriding variable, %3 -- name of overriden variable. -AffTilesNoHWLOC "%1$s: Tiles are only supported if KMP_TOPOLOGY_METHOD=hwloc, using granularity=package instead" -AffTilesNoTiles "%1$s: Tiles requested but were not detected on this HW, using granularity=package instead" -TopologyExtraTile "%1$s: %2$d packages x %3$d tiles/pkg x %4$d cores/tile x %5$d threads/core (%6$d total cores)" -TopologyExtraNode "%1$s: %2$d packages x %3$d nodes/pkg x %4$d cores/node x %5$d threads/core (%6$d total cores)" -TopologyExtraNoTi "%1$s: %2$d packages x %3$d nodes/pkg x %4$d tiles/node x %5$d cores/tile x %6$d threads/core (%7$d total cores)" -OmptOutdatedWorkshare "OMPT: Cannot determine workshare type; using the default (loop) instead. " - "This issue is fixed in an up-to-date compiler." -OmpNoAllocator "Allocator %1$s is not available, will use default allocator." - -# --- OpenMP errors detected at runtime --- -# -# %1 is the name of OpenMP construct (formatted with "Pragma" format). -# -CnsBoundToWorksharing "%1$s must be bound to a work-sharing or work-queuing construct with an \"ordered\" clause" -CnsDetectedEnd "Detected end of %1$s without first executing a corresponding beginning." -CnsIterationRangeTooLarge "Iteration range too large in %1$s." -CnsLoopIncrZeroProhibited "%1$s must not have a loop increment that evaluates to zero." -# -# %1 is the name of the first OpenMP construct, %2 -- the name of the second one (both formatted with "Pragma" format). -# -CnsExpectedEnd "Expected end of %1$s; %2$s, however, has most recently begun execution." -CnsInvalidNesting "%1$s is incorrectly nested within %2$s" -CnsMultipleNesting "%1$s cannot be executed multiple times during execution of one parallel iteration/section of %2$s" -CnsNestingSameName "%1$s is incorrectly nested within %2$s of the same name" -CnsNoOrderedClause "%1$s is incorrectly nested within %2$s that does not have an \"ordered\" clause" -CnsNotInTaskConstruct "%1$s is incorrectly nested within %2$s but not within any of its \"task\" constructs" -CnsThreadsAtBarrier "One thread at %1$s while another thread is at %2$s." - -# New errors -CantConnect "Cannot connect to %1$s" -CantConnectUsing "Cannot connect to %1$s - Using %2$s" -LibNotSupport "%1$s does not support %2$s. Continuing without using %2$s." -LibNotSupportFor "%1$s does not support %2$s for %3$s. Continuing without using %2$s." -StaticLibNotSupport "Static %1$s does not support %2$s. Continuing without using %2$s." -OBSOLETE "KMP_DYNAMIC_MODE=irml cannot be used with KMP_USE_IRML=0" -IttUnknownGroup "ittnotify: Unknown group \"%2$s\" specified in environment variable \"%1$s\"." -IttEnvVarTooLong "ittnotify: Environment variable \"%1$s\" too long: Actual lengths is %2$lu, max allowed length is %3$lu." -AffUseGlobCpuidL11 "%1$s: Affinity capable, using global cpuid leaf 11 info" -AffNotCapableUseLocCpuidL11 "%1$s: Affinity not capable, using local cpuid leaf 11 info" -AffInfoStr "%1$s: %2$s." -AffInfoStrStr "%1$s: %2$s - %3$s." -OSProcToPhysicalThreadMap "%1$s: OS proc to physical thread map:" -AffUsingFlatOS "%1$s: using \"flat\" OS <-> physical proc mapping." -AffParseFilename "%1$s: parsing %2$s." -MsgExiting "%1$s - exiting." -IncompatibleLibrary "Incompatible %1$s library with version %2$s found." -IttFunctionError "ittnotify: Function %1$s failed:" -IttUnknownError "ittnofify: Error #%1$d." -EnvMiddleWarn "%1$s must be set prior to first parallel region or certain API calls; ignored." -CnsLockNotDestroyed "Lock initialized at %1$s(%2$d) was not destroyed" - # %1, %2, %3, %4 -- file, line, func, col -CantLoadBalUsing "Cannot determine machine load balance - Using %1$s" -AffNotCapableUsePthread "%1$s: Affinity not capable, using pthread info" -AffUsePthread "%1$s: Affinity capable, using pthread info" -OBSOLETE "Loading \"%1$s\" library failed:" -OBSOLETE "Lookup of \"%1$s\" function failed:" -OBSOLETE "Buffer too small." -OBSOLETE "Error #%1$d." -NthSyntaxError "%1$s: Invalid symbols found. Check the value \"%2$s\"." -NthSpacesNotAllowed "%1$s: Spaces between digits are not allowed \"%2$s\"." -AffStrParseFilename "%1$s: %2$s - parsing %3$s." -OBSOLETE "%1$s cannot be specified via kmp_set_defaults() on this machine because it has more than one processor group." -AffTypeCantUseMultGroups "Cannot use affinity type \"%1$s\" with multiple Windows* OS processor groups, using \"%2$s\"." -AffGranCantUseMultGroups "Cannot use affinity granularity \"%1$s\" with multiple Windows* OS processor groups, using \"%2$s\"." -AffWindowsProcGroupMap "%1$s: Mapping Windows* OS processor group proc to OS proc 64*+." -AffOSProcToGroup "%1$s: OS proc %2$d maps to Windows* OS processor group %3$d proc %4$d" -AffBalancedNotAvail "%1$s: Affinity balanced is not available." -OBSOLETE "%1$s: granularity=core will be used." -EnvLockWarn "%1$s must be set prior to first OMP lock call or critical section; ignored." -FutexNotSupported "futex system call not supported; %1$s=%2$s ignored." -AffGranUsing "%1$s: granularity=%2$s will be used." -AffHWSubsetInvalid "%1$s: invalid value \"%2$s\", valid format is \"N[@N][,...][,Nt] " - "( can be S, N, L2, C, T for Socket, NUMA Node, L2 Cache, Core, Thread)\"." -AffHWSubsetUnsupported "KMP_HW_SUBSET ignored: unsupported architecture." -AffHWSubsetManyCores "KMP_HW_SUBSET ignored: too many cores requested." -SyntaxErrorUsing "%1$s: syntax error, using %2$s." -AdaptiveNotSupported "%1$s: Adaptive locks are not supported; using queuing." -EnvSyntaxError "%1$s: Invalid symbols found. Check the value \"%2$s\"." -EnvSpacesNotAllowed "%1$s: Spaces between digits are not allowed \"%2$s\"." -BoundToOSProcSet "%1$s: pid %2$d tid %3$d thread %4$d bound to OS proc set %5$s" -CnsLoopIncrIllegal "%1$s error: parallel loop increment and condition are inconsistent." -NoGompCancellation "libgomp cancellation is not currently supported." -AffHWSubsetNonUniform "KMP_HW_SUBSET ignored: non-uniform topology." -AffHWSubsetNonThreeLevel "KMP_HW_SUBSET ignored: only three-level topology is supported." -AffGranTopGroup "%1$s: granularity=%2$s is not supported with KMP_TOPOLOGY_METHOD=group. Using \"granularity=fine\"." -AffGranGroupType "%1$s: granularity=group is not supported with KMP_AFFINITY=%2$s. Using \"granularity=core\"." -AffHWSubsetManySockets "KMP_HW_SUBSET ignored: too many sockets requested." -AffHWSubsetDeprecated "KMP_HW_SUBSET \"o\" offset designator deprecated, please use @ prefix for offset value." -AffUsingHwloc "%1$s: Affinity capable, using hwloc." -AffIgnoringHwloc "%1$s: Ignoring hwloc mechanism." -AffHwlocErrorOccurred "%1$s: Hwloc failed in %2$s. Relying on internal affinity mechanisms." -EnvSerialWarn "%1$s must be set prior to OpenMP runtime library initialization; ignored." -EnvVarDeprecated "%1$s variable deprecated, please use %2$s instead." -RedMethodNotSupported "KMP_FORCE_REDUCTION: %1$s method is not supported; using critical." -AffHWSubsetNoHWLOC "KMP_HW_SUBSET ignored: unsupported item requested for non-HWLOC topology method (KMP_TOPOLOGY_METHOD)" -AffHWSubsetManyNodes "KMP_HW_SUBSET ignored: too many NUMA Nodes requested." -AffHWSubsetManyTiles "KMP_HW_SUBSET ignored: too many L2 Caches requested." -AffHWSubsetManyProcs "KMP_HW_SUBSET ignored: too many Procs requested." -HierSchedInvalid "Hierarchy ignored: unsupported level: %1$s." -AffFormatDefault "OMP: pid %1$s tid %2$s thread %3$s bound to OS proc set {%4$s}" - - -# -------------------------------------------------------------------------------------------------- --*- HINTS -*- -# -------------------------------------------------------------------------------------------------- - -# Hints. Hint may be printed after a message. Usually it is longer explanation text or suggestion. -# To maintain hint numbers (they are visible to customers), add new hints to the end. - -SubmitBugReport "Please submit a bug report with this message, compile and run " - "commands used, and machine configuration info including native " - "compiler and operating system versions. Faster response will be " - "obtained by including all program sources. For information on " - "submitting this issue, please see " - "https://bugs.llvm.org/." -OBSOLETE "Check NLSPATH environment variable, its value is \"%1$s\"." -ChangeStackLimit "Please try changing the shell stack limit or adjusting the " - "OMP_STACKSIZE environment variable." -Unset_ALL_THREADS "Consider unsetting KMP_DEVICE_THREAD_LIMIT (KMP_ALL_THREADS), KMP_TEAMS_THREAD_LIMIT, and OMP_THREAD_LIMIT (if any are set)." -Set_ALL_THREADPRIVATE "Consider setting KMP_ALL_THREADPRIVATE to a value larger than %1$d." -PossibleSystemLimitOnThreads "This could also be due to a system-related limit on the number of threads." -DuplicateLibrary "This means that multiple copies of the OpenMP runtime have been " - "linked into the program. That is dangerous, since it can degrade " - "performance or cause incorrect results. " - "The best thing to do is to ensure that only a single OpenMP runtime is " - "linked into the process, e.g. by avoiding static linking of the OpenMP " - "runtime in any library. As an unsafe, unsupported, undocumented workaround " - "you can set the environment variable KMP_DUPLICATE_LIB_OK=TRUE to allow " - "the program to continue to execute, but that may cause crashes or " - "silently produce incorrect results. " - "For more information, please see http://openmp.llvm.org/" -NameComesFrom_CPUINFO_FILE "This name is specified in environment variable KMP_CPUINFO_FILE." -NotEnoughMemory "Seems application required too much memory." -ValidBoolValues "Use \"0\", \"FALSE\". \".F.\", \"off\", \"no\" as false values, " - "\"1\", \"TRUE\", \".T.\", \"on\", \"yes\" as true values." -BufferOverflow "Perhaps too many threads." -RunningAtMaxPriority "Decrease priority of application. " - "This will allow the monitor thread run at higher priority than other threads." -ChangeMonitorStackSize "Try changing KMP_MONITOR_STACKSIZE or the shell stack limit." -ChangeWorkerStackSize "Try changing OMP_STACKSIZE and/or the shell stack limit." -IncreaseWorkerStackSize "Try increasing OMP_STACKSIZE or the shell stack limit." -DecreaseWorkerStackSize "Try decreasing OMP_STACKSIZE." -Decrease_NUM_THREADS "Try decreasing the value of OMP_NUM_THREADS." -IncreaseMonitorStackSize "Try increasing KMP_MONITOR_STACKSIZE." -DecreaseMonitorStackSize "Try decreasing KMP_MONITOR_STACKSIZE." -DecreaseNumberOfThreadsInUse "Try decreasing the number of threads in use simultaneously." -DefaultScheduleKindUsed "Will use default schedule type (%1$s)." -GetNewerLibrary "It could be a result of using an older OMP library with a newer " - "compiler or memory corruption. You may check the proper OMP library " - "is linked to the application." -CheckEnvVar "Check %1$s environment variable, its value is \"%2$s\"." -OBSOLETE "You may want to use an %1$s library that supports %2$s interface with version %3$s." -OBSOLETE "You may want to use an %1$s library with version %2$s." -BadExeFormat "System error #193 is \"Bad format of EXE or DLL file\". " - "Usually it means the file is found, but it is corrupted or " - "a file for another architecture. " - "Check whether \"%1$s\" is a file for %2$s architecture." -SystemLimitOnThreads "System-related limit on the number of threads." - - - -# -------------------------------------------------------------------------------------------------- -# end of file # -# -------------------------------------------------------------------------------------------------- - Property changes on: vendor/llvm-openmp/openmp-release80-r363030/runtime/src/i18n/en_US.txt ___________________________________________________________________ Deleted: svn:eol-style ## -1 +0,0 ## -native \ No newline at end of property Deleted: svn:keywords ## -1 +0,0 ## -FreeBSD=%H \ No newline at end of property Deleted: svn:mime-type ## -1 +0,0 ## -text/plain \ No newline at end of property Index: vendor/llvm-openmp/openmp-release80-r363030/runtime/src/kmp_threadprivate.cpp =================================================================== --- vendor/llvm-openmp/openmp-release80-r363030/runtime/src/kmp_threadprivate.cpp (revision 348960) +++ vendor/llvm-openmp/openmp-release80-r363030/runtime/src/kmp_threadprivate.cpp (nonexistent) @@ -1,800 +0,0 @@ -/* - * kmp_threadprivate.cpp -- OpenMP threadprivate support library - */ - -//===----------------------------------------------------------------------===// -// -// The LLVM Compiler Infrastructure -// -// This file is dual licensed under the MIT and the University of Illinois Open -// Source Licenses. See LICENSE.txt for details. -// -//===----------------------------------------------------------------------===// - -#include "kmp.h" -#include "kmp_i18n.h" -#include "kmp_itt.h" - -#define USE_CHECKS_COMMON - -#define KMP_INLINE_SUBR 1 - -void kmp_threadprivate_insert_private_data(int gtid, void *pc_addr, - void *data_addr, size_t pc_size); -struct private_common *kmp_threadprivate_insert(int gtid, void *pc_addr, - void *data_addr, - size_t pc_size); - -struct shared_table __kmp_threadprivate_d_table; - -static -#ifdef KMP_INLINE_SUBR - __forceinline -#endif - struct private_common * - __kmp_threadprivate_find_task_common(struct common_table *tbl, int gtid, - void *pc_addr) - -{ - struct private_common *tn; - -#ifdef KMP_TASK_COMMON_DEBUG - KC_TRACE(10, ("__kmp_threadprivate_find_task_common: thread#%d, called with " - "address %p\n", - gtid, pc_addr)); - dump_list(); -#endif - - for (tn = tbl->data[KMP_HASH(pc_addr)]; tn; tn = tn->next) { - if (tn->gbl_addr == pc_addr) { -#ifdef KMP_TASK_COMMON_DEBUG - KC_TRACE(10, ("__kmp_threadprivate_find_task_common: thread#%d, found " - "node %p on list\n", - gtid, pc_addr)); -#endif - return tn; - } - } - return 0; -} - -static -#ifdef KMP_INLINE_SUBR - __forceinline -#endif - struct shared_common * - __kmp_find_shared_task_common(struct shared_table *tbl, int gtid, - void *pc_addr) { - struct shared_common *tn; - - for (tn = tbl->data[KMP_HASH(pc_addr)]; tn; tn = tn->next) { - if (tn->gbl_addr == pc_addr) { -#ifdef KMP_TASK_COMMON_DEBUG - KC_TRACE( - 10, - ("__kmp_find_shared_task_common: thread#%d, found node %p on list\n", - gtid, pc_addr)); -#endif - return tn; - } - } - return 0; -} - -// Create a template for the data initialized storage. Either the template is -// NULL indicating zero fill, or the template is a copy of the original data. -static struct private_data *__kmp_init_common_data(void *pc_addr, - size_t pc_size) { - struct private_data *d; - size_t i; - char *p; - - d = (struct private_data *)__kmp_allocate(sizeof(struct private_data)); - /* - d->data = 0; // AC: commented out because __kmp_allocate zeroes the - memory - d->next = 0; - */ - d->size = pc_size; - d->more = 1; - - p = (char *)pc_addr; - - for (i = pc_size; i > 0; --i) { - if (*p++ != '\0') { - d->data = __kmp_allocate(pc_size); - KMP_MEMCPY(d->data, pc_addr, pc_size); - break; - } - } - - return d; -} - -// Initialize the data area from the template. -static void __kmp_copy_common_data(void *pc_addr, struct private_data *d) { - char *addr = (char *)pc_addr; - int i, offset; - - for (offset = 0; d != 0; d = d->next) { - for (i = d->more; i > 0; --i) { - if (d->data == 0) - memset(&addr[offset], '\0', d->size); - else - KMP_MEMCPY(&addr[offset], d->data, d->size); - offset += d->size; - } - } -} - -/* we are called from __kmp_serial_initialize() with __kmp_initz_lock held. */ -void __kmp_common_initialize(void) { - if (!TCR_4(__kmp_init_common)) { - int q; -#ifdef KMP_DEBUG - int gtid; -#endif - - __kmp_threadpriv_cache_list = NULL; - -#ifdef KMP_DEBUG - /* verify the uber masters were initialized */ - for (gtid = 0; gtid < __kmp_threads_capacity; gtid++) - if (__kmp_root[gtid]) { - KMP_DEBUG_ASSERT(__kmp_root[gtid]->r.r_uber_thread); - for (q = 0; q < KMP_HASH_TABLE_SIZE; ++q) - KMP_DEBUG_ASSERT( - !__kmp_root[gtid]->r.r_uber_thread->th.th_pri_common->data[q]); - /* __kmp_root[ gitd ]-> r.r_uber_thread -> - * th.th_pri_common -> data[ q ] = 0;*/ - } -#endif /* KMP_DEBUG */ - - for (q = 0; q < KMP_HASH_TABLE_SIZE; ++q) - __kmp_threadprivate_d_table.data[q] = 0; - - TCW_4(__kmp_init_common, TRUE); - } -} - -/* Call all destructors for threadprivate data belonging to all threads. - Currently unused! */ -void __kmp_common_destroy(void) { - if (TCR_4(__kmp_init_common)) { - int q; - - TCW_4(__kmp_init_common, FALSE); - - for (q = 0; q < KMP_HASH_TABLE_SIZE; ++q) { - int gtid; - struct private_common *tn; - struct shared_common *d_tn; - - /* C++ destructors need to be called once per thread before exiting. - Don't call destructors for master thread though unless we used copy - constructor */ - - for (d_tn = __kmp_threadprivate_d_table.data[q]; d_tn; - d_tn = d_tn->next) { - if (d_tn->is_vec) { - if (d_tn->dt.dtorv != 0) { - for (gtid = 0; gtid < __kmp_all_nth; ++gtid) { - if (__kmp_threads[gtid]) { - if ((__kmp_foreign_tp) ? (!KMP_INITIAL_GTID(gtid)) - : (!KMP_UBER_GTID(gtid))) { - tn = __kmp_threadprivate_find_task_common( - __kmp_threads[gtid]->th.th_pri_common, gtid, - d_tn->gbl_addr); - if (tn) { - (*d_tn->dt.dtorv)(tn->par_addr, d_tn->vec_len); - } - } - } - } - if (d_tn->obj_init != 0) { - (*d_tn->dt.dtorv)(d_tn->obj_init, d_tn->vec_len); - } - } - } else { - if (d_tn->dt.dtor != 0) { - for (gtid = 0; gtid < __kmp_all_nth; ++gtid) { - if (__kmp_threads[gtid]) { - if ((__kmp_foreign_tp) ? (!KMP_INITIAL_GTID(gtid)) - : (!KMP_UBER_GTID(gtid))) { - tn = __kmp_threadprivate_find_task_common( - __kmp_threads[gtid]->th.th_pri_common, gtid, - d_tn->gbl_addr); - if (tn) { - (*d_tn->dt.dtor)(tn->par_addr); - } - } - } - } - if (d_tn->obj_init != 0) { - (*d_tn->dt.dtor)(d_tn->obj_init); - } - } - } - } - __kmp_threadprivate_d_table.data[q] = 0; - } - } -} - -/* Call all destructors for threadprivate data belonging to this thread */ -void __kmp_common_destroy_gtid(int gtid) { - struct private_common *tn; - struct shared_common *d_tn; - - if (!TCR_4(__kmp_init_gtid)) { - // This is possible when one of multiple roots initiates early library - // termination in a sequential region while other teams are active, and its - // child threads are about to end. - return; - } - - KC_TRACE(10, ("__kmp_common_destroy_gtid: T#%d called\n", gtid)); - if ((__kmp_foreign_tp) ? (!KMP_INITIAL_GTID(gtid)) : (!KMP_UBER_GTID(gtid))) { - - if (TCR_4(__kmp_init_common)) { - - /* Cannot do this here since not all threads have destroyed their data */ - /* TCW_4(__kmp_init_common, FALSE); */ - - for (tn = __kmp_threads[gtid]->th.th_pri_head; tn; tn = tn->link) { - - d_tn = __kmp_find_shared_task_common(&__kmp_threadprivate_d_table, gtid, - tn->gbl_addr); - - KMP_DEBUG_ASSERT(d_tn); - - if (d_tn->is_vec) { - if (d_tn->dt.dtorv != 0) { - (void)(*d_tn->dt.dtorv)(tn->par_addr, d_tn->vec_len); - } - if (d_tn->obj_init != 0) { - (void)(*d_tn->dt.dtorv)(d_tn->obj_init, d_tn->vec_len); - } - } else { - if (d_tn->dt.dtor != 0) { - (void)(*d_tn->dt.dtor)(tn->par_addr); - } - if (d_tn->obj_init != 0) { - (void)(*d_tn->dt.dtor)(d_tn->obj_init); - } - } - } - KC_TRACE(30, ("__kmp_common_destroy_gtid: T#%d threadprivate destructors " - "complete\n", - gtid)); - } - } -} - -#ifdef KMP_TASK_COMMON_DEBUG -static void dump_list(void) { - int p, q; - - for (p = 0; p < __kmp_all_nth; ++p) { - if (!__kmp_threads[p]) - continue; - for (q = 0; q < KMP_HASH_TABLE_SIZE; ++q) { - if (__kmp_threads[p]->th.th_pri_common->data[q]) { - struct private_common *tn; - - KC_TRACE(10, ("\tdump_list: gtid:%d addresses\n", p)); - - for (tn = __kmp_threads[p]->th.th_pri_common->data[q]; tn; - tn = tn->next) { - KC_TRACE(10, - ("\tdump_list: THREADPRIVATE: Serial %p -> Parallel %p\n", - tn->gbl_addr, tn->par_addr)); - } - } - } - } -} -#endif /* KMP_TASK_COMMON_DEBUG */ - -// NOTE: this routine is to be called only from the serial part of the program. -void kmp_threadprivate_insert_private_data(int gtid, void *pc_addr, - void *data_addr, size_t pc_size) { - struct shared_common **lnk_tn, *d_tn; - KMP_DEBUG_ASSERT(__kmp_threads[gtid] && - __kmp_threads[gtid]->th.th_root->r.r_active == 0); - - d_tn = __kmp_find_shared_task_common(&__kmp_threadprivate_d_table, gtid, - pc_addr); - - if (d_tn == 0) { - d_tn = (struct shared_common *)__kmp_allocate(sizeof(struct shared_common)); - - d_tn->gbl_addr = pc_addr; - d_tn->pod_init = __kmp_init_common_data(data_addr, pc_size); - /* - d_tn->obj_init = 0; // AC: commented out because __kmp_allocate - zeroes the memory - d_tn->ct.ctor = 0; - d_tn->cct.cctor = 0;; - d_tn->dt.dtor = 0; - d_tn->is_vec = FALSE; - d_tn->vec_len = 0L; - */ - d_tn->cmn_size = pc_size; - - __kmp_acquire_lock(&__kmp_global_lock, gtid); - - lnk_tn = &(__kmp_threadprivate_d_table.data[KMP_HASH(pc_addr)]); - - d_tn->next = *lnk_tn; - *lnk_tn = d_tn; - - __kmp_release_lock(&__kmp_global_lock, gtid); - } -} - -struct private_common *kmp_threadprivate_insert(int gtid, void *pc_addr, - void *data_addr, - size_t pc_size) { - struct private_common *tn, **tt; - struct shared_common *d_tn; - - /* +++++++++ START OF CRITICAL SECTION +++++++++ */ - __kmp_acquire_lock(&__kmp_global_lock, gtid); - - tn = (struct private_common *)__kmp_allocate(sizeof(struct private_common)); - - tn->gbl_addr = pc_addr; - - d_tn = __kmp_find_shared_task_common( - &__kmp_threadprivate_d_table, gtid, - pc_addr); /* Only the MASTER data table exists. */ - - if (d_tn != 0) { - /* This threadprivate variable has already been seen. */ - - if (d_tn->pod_init == 0 && d_tn->obj_init == 0) { - d_tn->cmn_size = pc_size; - - if (d_tn->is_vec) { - if (d_tn->ct.ctorv != 0) { - /* Construct from scratch so no prototype exists */ - d_tn->obj_init = 0; - } else if (d_tn->cct.cctorv != 0) { - /* Now data initialize the prototype since it was previously - * registered */ - d_tn->obj_init = (void *)__kmp_allocate(d_tn->cmn_size); - (void)(*d_tn->cct.cctorv)(d_tn->obj_init, pc_addr, d_tn->vec_len); - } else { - d_tn->pod_init = __kmp_init_common_data(data_addr, d_tn->cmn_size); - } - } else { - if (d_tn->ct.ctor != 0) { - /* Construct from scratch so no prototype exists */ - d_tn->obj_init = 0; - } else if (d_tn->cct.cctor != 0) { - /* Now data initialize the prototype since it was previously - registered */ - d_tn->obj_init = (void *)__kmp_allocate(d_tn->cmn_size); - (void)(*d_tn->cct.cctor)(d_tn->obj_init, pc_addr); - } else { - d_tn->pod_init = __kmp_init_common_data(data_addr, d_tn->cmn_size); - } - } - } - } else { - struct shared_common **lnk_tn; - - d_tn = (struct shared_common *)__kmp_allocate(sizeof(struct shared_common)); - d_tn->gbl_addr = pc_addr; - d_tn->cmn_size = pc_size; - d_tn->pod_init = __kmp_init_common_data(data_addr, pc_size); - /* - d_tn->obj_init = 0; // AC: commented out because __kmp_allocate - zeroes the memory - d_tn->ct.ctor = 0; - d_tn->cct.cctor = 0; - d_tn->dt.dtor = 0; - d_tn->is_vec = FALSE; - d_tn->vec_len = 0L; - */ - lnk_tn = &(__kmp_threadprivate_d_table.data[KMP_HASH(pc_addr)]); - - d_tn->next = *lnk_tn; - *lnk_tn = d_tn; - } - - tn->cmn_size = d_tn->cmn_size; - - if ((__kmp_foreign_tp) ? (KMP_INITIAL_GTID(gtid)) : (KMP_UBER_GTID(gtid))) { - tn->par_addr = (void *)pc_addr; - } else { - tn->par_addr = (void *)__kmp_allocate(tn->cmn_size); - } - - __kmp_release_lock(&__kmp_global_lock, gtid); -/* +++++++++ END OF CRITICAL SECTION +++++++++ */ - -#ifdef USE_CHECKS_COMMON - if (pc_size > d_tn->cmn_size) { - KC_TRACE( - 10, ("__kmp_threadprivate_insert: THREADPRIVATE: %p (%" KMP_UINTPTR_SPEC - " ,%" KMP_UINTPTR_SPEC ")\n", - pc_addr, pc_size, d_tn->cmn_size)); - KMP_FATAL(TPCommonBlocksInconsist); - } -#endif /* USE_CHECKS_COMMON */ - - tt = &(__kmp_threads[gtid]->th.th_pri_common->data[KMP_HASH(pc_addr)]); - -#ifdef KMP_TASK_COMMON_DEBUG - if (*tt != 0) { - KC_TRACE( - 10, - ("__kmp_threadprivate_insert: WARNING! thread#%d: collision on %p\n", - gtid, pc_addr)); - } -#endif - tn->next = *tt; - *tt = tn; - -#ifdef KMP_TASK_COMMON_DEBUG - KC_TRACE(10, - ("__kmp_threadprivate_insert: thread#%d, inserted node %p on list\n", - gtid, pc_addr)); - dump_list(); -#endif - - /* Link the node into a simple list */ - - tn->link = __kmp_threads[gtid]->th.th_pri_head; - __kmp_threads[gtid]->th.th_pri_head = tn; - - if ((__kmp_foreign_tp) ? (KMP_INITIAL_GTID(gtid)) : (KMP_UBER_GTID(gtid))) - return tn; - - /* if C++ object with copy constructor, use it; - * else if C++ object with constructor, use it for the non-master copies only; - * else use pod_init and memcpy - * - * C++ constructors need to be called once for each non-master thread on - * allocate - * C++ copy constructors need to be called once for each thread on allocate */ - - /* C++ object with constructors/destructors; don't call constructors for - master thread though */ - if (d_tn->is_vec) { - if (d_tn->ct.ctorv != 0) { - (void)(*d_tn->ct.ctorv)(tn->par_addr, d_tn->vec_len); - } else if (d_tn->cct.cctorv != 0) { - (void)(*d_tn->cct.cctorv)(tn->par_addr, d_tn->obj_init, d_tn->vec_len); - } else if (tn->par_addr != tn->gbl_addr) { - __kmp_copy_common_data(tn->par_addr, d_tn->pod_init); - } - } else { - if (d_tn->ct.ctor != 0) { - (void)(*d_tn->ct.ctor)(tn->par_addr); - } else if (d_tn->cct.cctor != 0) { - (void)(*d_tn->cct.cctor)(tn->par_addr, d_tn->obj_init); - } else if (tn->par_addr != tn->gbl_addr) { - __kmp_copy_common_data(tn->par_addr, d_tn->pod_init); - } - } - /* !BUILD_OPENMP_C - if (tn->par_addr != tn->gbl_addr) - __kmp_copy_common_data( tn->par_addr, d_tn->pod_init ); */ - - return tn; -} - -/* ------------------------------------------------------------------------ */ -/* We are currently parallel, and we know the thread id. */ -/* ------------------------------------------------------------------------ */ - -/*! - @ingroup THREADPRIVATE - - @param loc source location information - @param data pointer to data being privatized - @param ctor pointer to constructor function for data - @param cctor pointer to copy constructor function for data - @param dtor pointer to destructor function for data - - Register constructors and destructors for thread private data. - This function is called when executing in parallel, when we know the thread id. -*/ -void __kmpc_threadprivate_register(ident_t *loc, void *data, kmpc_ctor ctor, - kmpc_cctor cctor, kmpc_dtor dtor) { - struct shared_common *d_tn, **lnk_tn; - - KC_TRACE(10, ("__kmpc_threadprivate_register: called\n")); - -#ifdef USE_CHECKS_COMMON - /* copy constructor must be zero for current code gen (Nov 2002 - jph) */ - KMP_ASSERT(cctor == 0); -#endif /* USE_CHECKS_COMMON */ - - /* Only the global data table exists. */ - d_tn = __kmp_find_shared_task_common(&__kmp_threadprivate_d_table, -1, data); - - if (d_tn == 0) { - d_tn = (struct shared_common *)__kmp_allocate(sizeof(struct shared_common)); - d_tn->gbl_addr = data; - - d_tn->ct.ctor = ctor; - d_tn->cct.cctor = cctor; - d_tn->dt.dtor = dtor; - /* - d_tn->is_vec = FALSE; // AC: commented out because __kmp_allocate - zeroes the memory - d_tn->vec_len = 0L; - d_tn->obj_init = 0; - d_tn->pod_init = 0; - */ - lnk_tn = &(__kmp_threadprivate_d_table.data[KMP_HASH(data)]); - - d_tn->next = *lnk_tn; - *lnk_tn = d_tn; - } -} - -void *__kmpc_threadprivate(ident_t *loc, kmp_int32 global_tid, void *data, - size_t size) { - void *ret; - struct private_common *tn; - - KC_TRACE(10, ("__kmpc_threadprivate: T#%d called\n", global_tid)); - -#ifdef USE_CHECKS_COMMON - if (!__kmp_init_serial) - KMP_FATAL(RTLNotInitialized); -#endif /* USE_CHECKS_COMMON */ - - if (!__kmp_threads[global_tid]->th.th_root->r.r_active && !__kmp_foreign_tp) { - /* The parallel address will NEVER overlap with the data_address */ - /* dkp: 3rd arg to kmp_threadprivate_insert_private_data() is the - * data_address; use data_address = data */ - - KC_TRACE(20, ("__kmpc_threadprivate: T#%d inserting private data\n", - global_tid)); - kmp_threadprivate_insert_private_data(global_tid, data, data, size); - - ret = data; - } else { - KC_TRACE( - 50, - ("__kmpc_threadprivate: T#%d try to find private data at address %p\n", - global_tid, data)); - tn = __kmp_threadprivate_find_task_common( - __kmp_threads[global_tid]->th.th_pri_common, global_tid, data); - - if (tn) { - KC_TRACE(20, ("__kmpc_threadprivate: T#%d found data\n", global_tid)); -#ifdef USE_CHECKS_COMMON - if ((size_t)size > tn->cmn_size) { - KC_TRACE(10, ("THREADPRIVATE: %p (%" KMP_UINTPTR_SPEC - " ,%" KMP_UINTPTR_SPEC ")\n", - data, size, tn->cmn_size)); - KMP_FATAL(TPCommonBlocksInconsist); - } -#endif /* USE_CHECKS_COMMON */ - } else { - /* The parallel address will NEVER overlap with the data_address */ - /* dkp: 3rd arg to kmp_threadprivate_insert() is the data_address; use - * data_address = data */ - KC_TRACE(20, ("__kmpc_threadprivate: T#%d inserting data\n", global_tid)); - tn = kmp_threadprivate_insert(global_tid, data, data, size); - } - - ret = tn->par_addr; - } - KC_TRACE(10, ("__kmpc_threadprivate: T#%d exiting; return value = %p\n", - global_tid, ret)); - - return ret; -} - -static kmp_cached_addr_t *__kmp_find_cache(void *data) { - kmp_cached_addr_t *ptr = __kmp_threadpriv_cache_list; - while (ptr && ptr->data != data) - ptr = ptr->next; - return ptr; -} - -/*! - @ingroup THREADPRIVATE - @param loc source location information - @param global_tid global thread number - @param data pointer to data to privatize - @param size size of data to privatize - @param cache pointer to cache - @return pointer to private storage - - Allocate private storage for threadprivate data. -*/ -void * -__kmpc_threadprivate_cached(ident_t *loc, - kmp_int32 global_tid, // gtid. - void *data, // Pointer to original global variable. - size_t size, // Size of original global variable. - void ***cache) { - KC_TRACE(10, ("__kmpc_threadprivate_cached: T#%d called with cache: %p, " - "address: %p, size: %" KMP_SIZE_T_SPEC "\n", - global_tid, *cache, data, size)); - - if (TCR_PTR(*cache) == 0) { - __kmp_acquire_lock(&__kmp_global_lock, global_tid); - - if (TCR_PTR(*cache) == 0) { - __kmp_acquire_bootstrap_lock(&__kmp_tp_cached_lock); - // Compiler often passes in NULL cache, even if it's already been created - void **my_cache; - kmp_cached_addr_t *tp_cache_addr; - // Look for an existing cache - tp_cache_addr = __kmp_find_cache(data); - if (!tp_cache_addr) { // Cache was never created; do it now - __kmp_tp_cached = 1; - KMP_ITT_IGNORE(my_cache = (void **)__kmp_allocate( - sizeof(void *) * __kmp_tp_capacity + - sizeof(kmp_cached_addr_t));); - // No need to zero the allocated memory; __kmp_allocate does that. - KC_TRACE(50, ("__kmpc_threadprivate_cached: T#%d allocated cache at " - "address %p\n", - global_tid, my_cache)); - /* TODO: free all this memory in __kmp_common_destroy using - * __kmp_threadpriv_cache_list */ - /* Add address of mycache to linked list for cleanup later */ - tp_cache_addr = (kmp_cached_addr_t *)&my_cache[__kmp_tp_capacity]; - tp_cache_addr->addr = my_cache; - tp_cache_addr->data = data; - tp_cache_addr->compiler_cache = cache; - tp_cache_addr->next = __kmp_threadpriv_cache_list; - __kmp_threadpriv_cache_list = tp_cache_addr; - } else { // A cache was already created; use it - my_cache = tp_cache_addr->addr; - tp_cache_addr->compiler_cache = cache; - } - KMP_MB(); - - TCW_PTR(*cache, my_cache); - __kmp_release_bootstrap_lock(&__kmp_tp_cached_lock); - - KMP_MB(); - } - __kmp_release_lock(&__kmp_global_lock, global_tid); - } - - void *ret; - if ((ret = TCR_PTR((*cache)[global_tid])) == 0) { - ret = __kmpc_threadprivate(loc, global_tid, data, (size_t)size); - - TCW_PTR((*cache)[global_tid], ret); - } - KC_TRACE(10, - ("__kmpc_threadprivate_cached: T#%d exiting; return value = %p\n", - global_tid, ret)); - return ret; -} - -// This function should only be called when both __kmp_tp_cached_lock and -// kmp_forkjoin_lock are held. -void __kmp_threadprivate_resize_cache(int newCapacity) { - KC_TRACE(10, ("__kmp_threadprivate_resize_cache: called with size: %d\n", - newCapacity)); - - kmp_cached_addr_t *ptr = __kmp_threadpriv_cache_list; - - while (ptr) { - if (ptr->data) { // this location has an active cache; resize it - void **my_cache; - KMP_ITT_IGNORE(my_cache = - (void **)__kmp_allocate(sizeof(void *) * newCapacity + - sizeof(kmp_cached_addr_t));); - // No need to zero the allocated memory; __kmp_allocate does that. - KC_TRACE(50, ("__kmp_threadprivate_resize_cache: allocated cache at %p\n", - my_cache)); - // Now copy old cache into new cache - void **old_cache = ptr->addr; - for (int i = 0; i < __kmp_tp_capacity; ++i) { - my_cache[i] = old_cache[i]; - } - - // Add address of new my_cache to linked list for cleanup later - kmp_cached_addr_t *tp_cache_addr; - tp_cache_addr = (kmp_cached_addr_t *)&my_cache[newCapacity]; - tp_cache_addr->addr = my_cache; - tp_cache_addr->data = ptr->data; - tp_cache_addr->compiler_cache = ptr->compiler_cache; - tp_cache_addr->next = __kmp_threadpriv_cache_list; - __kmp_threadpriv_cache_list = tp_cache_addr; - - // Copy new cache to compiler's location: We can copy directly - // to (*compiler_cache) if compiler guarantees it will keep - // using the same location for the cache. This is not yet true - // for some compilers, in which case we have to check if - // compiler_cache is still pointing at old cache, and if so, we - // can point it at the new cache with an atomic compare&swap - // operation. (Old method will always work, but we should shift - // to new method (commented line below) when Intel and Clang - // compilers use new method.) - (void)KMP_COMPARE_AND_STORE_PTR(tp_cache_addr->compiler_cache, old_cache, - my_cache); - // TCW_PTR(*(tp_cache_addr->compiler_cache), my_cache); - - // If the store doesn't happen here, the compiler's old behavior will - // inevitably call __kmpc_threadprivate_cache with a new location for the - // cache, and that function will store the resized cache there at that - // point. - - // Nullify old cache's data pointer so we skip it next time - ptr->data = NULL; - } - ptr = ptr->next; - } - // After all caches are resized, update __kmp_tp_capacity to the new size - *(volatile int *)&__kmp_tp_capacity = newCapacity; -} - -/*! - @ingroup THREADPRIVATE - @param loc source location information - @param data pointer to data being privatized - @param ctor pointer to constructor function for data - @param cctor pointer to copy constructor function for data - @param dtor pointer to destructor function for data - @param vector_length length of the vector (bytes or elements?) - Register vector constructors and destructors for thread private data. -*/ -void __kmpc_threadprivate_register_vec(ident_t *loc, void *data, - kmpc_ctor_vec ctor, kmpc_cctor_vec cctor, - kmpc_dtor_vec dtor, - size_t vector_length) { - struct shared_common *d_tn, **lnk_tn; - - KC_TRACE(10, ("__kmpc_threadprivate_register_vec: called\n")); - -#ifdef USE_CHECKS_COMMON - /* copy constructor must be zero for current code gen (Nov 2002 - jph) */ - KMP_ASSERT(cctor == 0); -#endif /* USE_CHECKS_COMMON */ - - d_tn = __kmp_find_shared_task_common( - &__kmp_threadprivate_d_table, -1, - data); /* Only the global data table exists. */ - - if (d_tn == 0) { - d_tn = (struct shared_common *)__kmp_allocate(sizeof(struct shared_common)); - d_tn->gbl_addr = data; - - d_tn->ct.ctorv = ctor; - d_tn->cct.cctorv = cctor; - d_tn->dt.dtorv = dtor; - d_tn->is_vec = TRUE; - d_tn->vec_len = (size_t)vector_length; - // d_tn->obj_init = 0; // AC: __kmp_allocate zeroes the memory - // d_tn->pod_init = 0; - lnk_tn = &(__kmp_threadprivate_d_table.data[KMP_HASH(data)]); - - d_tn->next = *lnk_tn; - *lnk_tn = d_tn; - } -} - -void __kmp_cleanup_threadprivate_caches() { - kmp_cached_addr_t *ptr = __kmp_threadpriv_cache_list; - - while (ptr) { - void **cache = ptr->addr; - __kmp_threadpriv_cache_list = ptr->next; - if (*ptr->compiler_cache) - *ptr->compiler_cache = NULL; - ptr->compiler_cache = NULL; - ptr->data = NULL; - ptr->addr = NULL; - ptr->next = NULL; - // Threadprivate data pointed at by cache entries are destroyed at end of - // __kmp_launch_thread with __kmp_common_destroy_gtid. - __kmp_free(cache); // implicitly frees ptr too - ptr = __kmp_threadpriv_cache_list; - } -} Property changes on: vendor/llvm-openmp/openmp-release80-r363030/runtime/src/kmp_threadprivate.cpp ___________________________________________________________________ Deleted: svn:eol-style ## -1 +0,0 ## -native \ No newline at end of property Deleted: svn:keywords ## -1 +0,0 ## -FreeBSD=%H \ No newline at end of property Deleted: svn:mime-type ## -1 +0,0 ## -text/plain \ No newline at end of property Index: vendor/llvm-openmp/openmp-release80-r363030/runtime/src/ompt-internal.h =================================================================== --- vendor/llvm-openmp/openmp-release80-r363030/runtime/src/ompt-internal.h (revision 348960) +++ vendor/llvm-openmp/openmp-release80-r363030/runtime/src/ompt-internal.h (nonexistent) @@ -1,129 +0,0 @@ -/* - * ompt-internal.h - header of OMPT internal data structures - */ - -//===----------------------------------------------------------------------===// -// -// The LLVM Compiler Infrastructure -// -// This file is dual licensed under the MIT and the University of Illinois Open -// Source Licenses. See LICENSE.txt for details. -// -//===----------------------------------------------------------------------===// - -#ifndef __OMPT_INTERNAL_H__ -#define __OMPT_INTERNAL_H__ - -#include "ompt-event-specific.h" -#include "omp-tools.h" - -#define OMPT_VERSION 1 - -#define _OMP_EXTERN extern "C" - -#define OMPT_INVOKER(x) \ - ((x == fork_context_gnu) ? ompt_parallel_invoker_program \ - : ompt_parallel_invoker_runtime) - -#define ompt_callback(e) e##_callback - -typedef struct ompt_callbacks_internal_s { -#define ompt_event_macro(event, callback, eventid) \ - callback ompt_callback(event); - - FOREACH_OMPT_EVENT(ompt_event_macro) - -#undef ompt_event_macro -} ompt_callbacks_internal_t; - -typedef struct ompt_callbacks_active_s { - unsigned int enabled : 1; -#define ompt_event_macro(event, callback, eventid) unsigned int event : 1; - - FOREACH_OMPT_EVENT(ompt_event_macro) - -#undef ompt_event_macro -} ompt_callbacks_active_t; - -#define TASK_TYPE_DETAILS_FORMAT(info) \ - ((info->td_flags.task_serial || info->td_flags.tasking_ser) \ - ? ompt_task_undeferred \ - : 0x0) | \ - ((!(info->td_flags.tiedness)) ? ompt_task_untied : 0x0) | \ - (info->td_flags.final ? ompt_task_final : 0x0) | \ - (info->td_flags.merged_if0 ? ompt_task_mergeable : 0x0) - -typedef struct { - ompt_frame_t frame; - ompt_data_t task_data; - struct kmp_taskdata *scheduling_parent; - int thread_num; -#if OMP_40_ENABLED - int ndeps; - ompt_dependence_t *deps; -#endif /* OMP_40_ENABLED */ -} ompt_task_info_t; - -typedef struct { - ompt_data_t parallel_data; - void *master_return_address; -} ompt_team_info_t; - -typedef struct ompt_lw_taskteam_s { - ompt_team_info_t ompt_team_info; - ompt_task_info_t ompt_task_info; - int heap; - struct ompt_lw_taskteam_s *parent; -} ompt_lw_taskteam_t; - -typedef struct { - ompt_data_t thread_data; - ompt_data_t task_data; /* stored here from implicit barrier-begin until - implicit-task-end */ - void *return_address; /* stored here on entry of runtime */ - ompt_state_t state; - ompt_wait_id_t wait_id; - int ompt_task_yielded; - void *idle_frame; -} ompt_thread_info_t; - -extern ompt_callbacks_internal_t ompt_callbacks; - -#if OMP_40_ENABLED && OMPT_SUPPORT && OMPT_OPTIONAL -#if USE_FAST_MEMORY -#define KMP_OMPT_DEPS_ALLOC __kmp_fast_allocate -#define KMP_OMPT_DEPS_FREE __kmp_fast_free -#else -#define KMP_OMPT_DEPS_ALLOC __kmp_thread_malloc -#define KMP_OMPT_DEPS_FREE __kmp_thread_free -#endif -#endif /* OMP_40_ENABLED && OMPT_SUPPORT && OMPT_OPTIONAL */ - -#ifdef __cplusplus -extern "C" { -#endif - -void ompt_pre_init(void); -void ompt_post_init(void); -void ompt_fini(void); - -#define OMPT_GET_RETURN_ADDRESS(level) __builtin_return_address(level) -#define OMPT_GET_FRAME_ADDRESS(level) __builtin_frame_address(level) - -int __kmp_control_tool(uint64_t command, uint64_t modifier, void *arg); - -extern ompt_callbacks_active_t ompt_enabled; - -#if KMP_OS_WINDOWS -#define UNLIKELY(x) (x) -#define OMPT_NOINLINE __declspec(noinline) -#else -#define UNLIKELY(x) __builtin_expect(!!(x), 0) -#define OMPT_NOINLINE __attribute__((noinline)) -#endif - -#ifdef __cplusplus -}; -#endif - -#endif Property changes on: vendor/llvm-openmp/openmp-release80-r363030/runtime/src/ompt-internal.h ___________________________________________________________________ Deleted: svn:eol-style ## -1 +0,0 ## -native \ No newline at end of property Deleted: svn:keywords ## -1 +0,0 ## -FreeBSD=%H \ No newline at end of property Deleted: svn:mime-type ## -1 +0,0 ## -text/plain \ No newline at end of property Index: vendor/llvm-openmp/openmp-release80-r363030/runtime/src/kmp_itt.cpp =================================================================== --- vendor/llvm-openmp/openmp-release80-r363030/runtime/src/kmp_itt.cpp (revision 348960) +++ vendor/llvm-openmp/openmp-release80-r363030/runtime/src/kmp_itt.cpp (nonexistent) @@ -1,161 +0,0 @@ -#include "kmp_config.h" - -#if USE_ITT_BUILD -/* - * kmp_itt.cpp -- ITT Notify interface. - */ - -//===----------------------------------------------------------------------===// -// -// The LLVM Compiler Infrastructure -// -// This file is dual licensed under the MIT and the University of Illinois Open -// Source Licenses. See LICENSE.txt for details. -// -//===----------------------------------------------------------------------===// - -#include "kmp_itt.h" - -#if KMP_DEBUG -#include "kmp_itt.inl" -#endif - -#if USE_ITT_NOTIFY - -#include "ittnotify_config.h" -__itt_global __kmp_ittapi_clean_global; -extern __itt_global __kmp_itt__ittapi_global; -kmp_int32 __kmp_barrier_domain_count; -kmp_int32 __kmp_region_domain_count; -__itt_domain *__kmp_itt_barrier_domains[KMP_MAX_FRAME_DOMAINS]; -__itt_domain *__kmp_itt_region_domains[KMP_MAX_FRAME_DOMAINS]; -__itt_domain *__kmp_itt_imbalance_domains[KMP_MAX_FRAME_DOMAINS]; -kmp_int32 __kmp_itt_region_team_size[KMP_MAX_FRAME_DOMAINS]; -__itt_domain *metadata_domain = NULL; -__itt_string_handle *string_handle_imbl = NULL; -__itt_string_handle *string_handle_loop = NULL; -__itt_string_handle *string_handle_sngl = NULL; - -#include "kmp_i18n.h" -#include "kmp_str.h" -#include "kmp_version.h" - -KMP_BUILD_ASSERT(sizeof(kmp_itt_mark_t) == sizeof(__itt_mark_type)); - -/* Previously used warnings: - - KMP_WARNING( IttAllNotifDisabled ); - KMP_WARNING( IttObjNotifDisabled ); - KMP_WARNING( IttMarkNotifDisabled ); - KMP_WARNING( IttUnloadLibFailed, libittnotify ); -*/ - -kmp_int32 __kmp_itt_prepare_delay = 0; -kmp_bootstrap_lock_t __kmp_itt_debug_lock = - KMP_BOOTSTRAP_LOCK_INITIALIZER(__kmp_itt_debug_lock); - -#endif // USE_ITT_NOTIFY - -void __kmp_itt_reset() { -#if USE_ITT_NOTIFY - __kmp_itt__ittapi_global = __kmp_ittapi_clean_global; -#endif -} - -void __kmp_itt_initialize() { - -// ITTNotify library is loaded and initialized at first call to any ittnotify -// function, so we do not need to explicitly load it any more. Just report OMP -// RTL version to ITTNotify. - -#if USE_ITT_NOTIFY - // Backup a clean global state - __kmp_ittapi_clean_global = __kmp_itt__ittapi_global; - - // Report OpenMP RTL version. - kmp_str_buf_t buf; - __itt_mark_type version; - __kmp_str_buf_init(&buf); - __kmp_str_buf_print(&buf, "OMP RTL Version %d.%d.%d", __kmp_version_major, - __kmp_version_minor, __kmp_version_build); - if (__itt_api_version_ptr != NULL) { - __kmp_str_buf_print(&buf, ":%s", __itt_api_version()); - } - version = __itt_mark_create(buf.str); - __itt_mark(version, NULL); - __kmp_str_buf_free(&buf); -#endif - -} // __kmp_itt_initialize - -void __kmp_itt_destroy() { -#if USE_ITT_NOTIFY - __kmp_itt_fini_ittlib(); -#endif -} // __kmp_itt_destroy - -extern "C" void __itt_error_handler(__itt_error_code err, va_list args) { - - switch (err) { - case __itt_error_no_module: { - char const *library = va_arg(args, char const *); -#if KMP_OS_WINDOWS - int sys_err = va_arg(args, int); - kmp_msg_t err_code = KMP_SYSERRCODE(sys_err); - __kmp_msg(kmp_ms_warning, KMP_MSG(IttLoadLibFailed, library), err_code, - __kmp_msg_null); - if (__kmp_generate_warnings == kmp_warnings_off) { - __kmp_str_free(&err_code.str); - } -#else - char const *sys_err = va_arg(args, char const *); - kmp_msg_t err_code = KMP_SYSERRMESG(sys_err); - __kmp_msg(kmp_ms_warning, KMP_MSG(IttLoadLibFailed, library), err_code, - __kmp_msg_null); - if (__kmp_generate_warnings == kmp_warnings_off) { - __kmp_str_free(&err_code.str); - } -#endif - } break; - case __itt_error_no_symbol: { - char const *library = va_arg(args, char const *); - char const *symbol = va_arg(args, char const *); - KMP_WARNING(IttLookupFailed, symbol, library); - } break; - case __itt_error_unknown_group: { - char const *var = va_arg(args, char const *); - char const *group = va_arg(args, char const *); - KMP_WARNING(IttUnknownGroup, var, group); - } break; - case __itt_error_env_too_long: { - char const *var = va_arg(args, char const *); - size_t act_len = va_arg(args, size_t); - size_t max_len = va_arg(args, size_t); - KMP_WARNING(IttEnvVarTooLong, var, (unsigned long)act_len, - (unsigned long)max_len); - } break; - case __itt_error_cant_read_env: { - char const *var = va_arg(args, char const *); - int sys_err = va_arg(args, int); - kmp_msg_t err_code = KMP_ERR(sys_err); - __kmp_msg(kmp_ms_warning, KMP_MSG(CantGetEnvVar, var), err_code, - __kmp_msg_null); - if (__kmp_generate_warnings == kmp_warnings_off) { - __kmp_str_free(&err_code.str); - } - } break; - case __itt_error_system: { - char const *func = va_arg(args, char const *); - int sys_err = va_arg(args, int); - kmp_msg_t err_code = KMP_SYSERRCODE(sys_err); - __kmp_msg(kmp_ms_warning, KMP_MSG(IttFunctionError, func), err_code, - __kmp_msg_null); - if (__kmp_generate_warnings == kmp_warnings_off) { - __kmp_str_free(&err_code.str); - } - } break; - default: { KMP_WARNING(IttUnknownError, err); } - } -} // __itt_error_handler - -#endif /* USE_ITT_BUILD */ Property changes on: vendor/llvm-openmp/openmp-release80-r363030/runtime/src/kmp_itt.cpp ___________________________________________________________________ Deleted: svn:eol-style ## -1 +0,0 ## -native \ No newline at end of property Deleted: svn:keywords ## -1 +0,0 ## -FreeBSD=%H \ No newline at end of property Deleted: svn:mime-type ## -1 +0,0 ## -text/plain \ No newline at end of property Index: vendor/llvm-openmp/openmp-release80-r363030/runtime/src/kmp_runtime.cpp =================================================================== --- vendor/llvm-openmp/openmp-release80-r363030/runtime/src/kmp_runtime.cpp (revision 348960) +++ vendor/llvm-openmp/openmp-release80-r363030/runtime/src/kmp_runtime.cpp (nonexistent) @@ -1,8192 +0,0 @@ -/* - * kmp_runtime.cpp -- KPTS runtime support library - */ - -//===----------------------------------------------------------------------===// -// -// The LLVM Compiler Infrastructure -// -// This file is dual licensed under the MIT and the University of Illinois Open -// Source Licenses. See LICENSE.txt for details. -// -//===----------------------------------------------------------------------===// - -#include "kmp.h" -#include "kmp_affinity.h" -#include "kmp_atomic.h" -#include "kmp_environment.h" -#include "kmp_error.h" -#include "kmp_i18n.h" -#include "kmp_io.h" -#include "kmp_itt.h" -#include "kmp_settings.h" -#include "kmp_stats.h" -#include "kmp_str.h" -#include "kmp_wait_release.h" -#include "kmp_wrapper_getpid.h" -#include "kmp_dispatch.h" -#if KMP_USE_HIER_SCHED -#include "kmp_dispatch_hier.h" -#endif - -#if OMPT_SUPPORT -#include "ompt-specific.h" -#endif - -/* these are temporary issues to be dealt with */ -#define KMP_USE_PRCTL 0 - -#if KMP_OS_WINDOWS -#include -#endif - -#include "tsan_annotations.h" - -#if defined(KMP_GOMP_COMPAT) -char const __kmp_version_alt_comp[] = - KMP_VERSION_PREFIX "alternative compiler support: yes"; -#endif /* defined(KMP_GOMP_COMPAT) */ - -char const __kmp_version_omp_api[] = KMP_VERSION_PREFIX "API version: " -#if OMP_50_ENABLED - "5.0 (201611)"; -#elif OMP_45_ENABLED - "4.5 (201511)"; -#elif OMP_40_ENABLED - "4.0 (201307)"; -#else - "3.1 (201107)"; -#endif - -#ifdef KMP_DEBUG -char const __kmp_version_lock[] = - KMP_VERSION_PREFIX "lock type: run time selectable"; -#endif /* KMP_DEBUG */ - -#define KMP_MIN(x, y) ((x) < (y) ? (x) : (y)) - -/* ------------------------------------------------------------------------ */ - -#if KMP_USE_MONITOR -kmp_info_t __kmp_monitor; -#endif - -/* Forward declarations */ - -void __kmp_cleanup(void); - -static void __kmp_initialize_info(kmp_info_t *, kmp_team_t *, int tid, - int gtid); -static void __kmp_initialize_team(kmp_team_t *team, int new_nproc, - kmp_internal_control_t *new_icvs, - ident_t *loc); -#if OMP_40_ENABLED && KMP_AFFINITY_SUPPORTED -static void __kmp_partition_places(kmp_team_t *team, - int update_master_only = 0); -#endif -static void __kmp_do_serial_initialize(void); -void __kmp_fork_barrier(int gtid, int tid); -void __kmp_join_barrier(int gtid); -void __kmp_setup_icv_copy(kmp_team_t *team, int new_nproc, - kmp_internal_control_t *new_icvs, ident_t *loc); - -#ifdef USE_LOAD_BALANCE -static int __kmp_load_balance_nproc(kmp_root_t *root, int set_nproc); -#endif - -static int __kmp_expand_threads(int nNeed); -#if KMP_OS_WINDOWS -static int __kmp_unregister_root_other_thread(int gtid); -#endif -static void __kmp_unregister_library(void); // called by __kmp_internal_end() -static void __kmp_reap_thread(kmp_info_t *thread, int is_root); -kmp_info_t *__kmp_thread_pool_insert_pt = NULL; - -/* Calculate the identifier of the current thread */ -/* fast (and somewhat portable) way to get unique identifier of executing - thread. Returns KMP_GTID_DNE if we haven't been assigned a gtid. */ -int __kmp_get_global_thread_id() { - int i; - kmp_info_t **other_threads; - size_t stack_data; - char *stack_addr; - size_t stack_size; - char *stack_base; - - KA_TRACE( - 1000, - ("*** __kmp_get_global_thread_id: entering, nproc=%d all_nproc=%d\n", - __kmp_nth, __kmp_all_nth)); - - /* JPH - to handle the case where __kmpc_end(0) is called immediately prior to - a parallel region, made it return KMP_GTID_DNE to force serial_initialize - by caller. Had to handle KMP_GTID_DNE at all call-sites, or else guarantee - __kmp_init_gtid for this to work. */ - - if (!TCR_4(__kmp_init_gtid)) - return KMP_GTID_DNE; - -#ifdef KMP_TDATA_GTID - if (TCR_4(__kmp_gtid_mode) >= 3) { - KA_TRACE(1000, ("*** __kmp_get_global_thread_id: using TDATA\n")); - return __kmp_gtid; - } -#endif - if (TCR_4(__kmp_gtid_mode) >= 2) { - KA_TRACE(1000, ("*** __kmp_get_global_thread_id: using keyed TLS\n")); - return __kmp_gtid_get_specific(); - } - KA_TRACE(1000, ("*** __kmp_get_global_thread_id: using internal alg.\n")); - - stack_addr = (char *)&stack_data; - other_threads = __kmp_threads; - - /* ATT: The code below is a source of potential bugs due to unsynchronized - access to __kmp_threads array. For example: - 1. Current thread loads other_threads[i] to thr and checks it, it is - non-NULL. - 2. Current thread is suspended by OS. - 3. Another thread unregisters and finishes (debug versions of free() - may fill memory with something like 0xEF). - 4. Current thread is resumed. - 5. Current thread reads junk from *thr. - TODO: Fix it. --ln */ - - for (i = 0; i < __kmp_threads_capacity; i++) { - - kmp_info_t *thr = (kmp_info_t *)TCR_SYNC_PTR(other_threads[i]); - if (!thr) - continue; - - stack_size = (size_t)TCR_PTR(thr->th.th_info.ds.ds_stacksize); - stack_base = (char *)TCR_PTR(thr->th.th_info.ds.ds_stackbase); - - /* stack grows down -- search through all of the active threads */ - - if (stack_addr <= stack_base) { - size_t stack_diff = stack_base - stack_addr; - - if (stack_diff <= stack_size) { - /* The only way we can be closer than the allocated */ - /* stack size is if we are running on this thread. */ - KMP_DEBUG_ASSERT(__kmp_gtid_get_specific() == i); - return i; - } - } - } - - /* get specific to try and determine our gtid */ - KA_TRACE(1000, - ("*** __kmp_get_global_thread_id: internal alg. failed to find " - "thread, using TLS\n")); - i = __kmp_gtid_get_specific(); - - /*fprintf( stderr, "=== %d\n", i ); */ /* GROO */ - - /* if we havn't been assigned a gtid, then return code */ - if (i < 0) - return i; - - /* dynamically updated stack window for uber threads to avoid get_specific - call */ - if (!TCR_4(other_threads[i]->th.th_info.ds.ds_stackgrow)) { - KMP_FATAL(StackOverflow, i); - } - - stack_base = (char *)other_threads[i]->th.th_info.ds.ds_stackbase; - if (stack_addr > stack_base) { - TCW_PTR(other_threads[i]->th.th_info.ds.ds_stackbase, stack_addr); - TCW_PTR(other_threads[i]->th.th_info.ds.ds_stacksize, - other_threads[i]->th.th_info.ds.ds_stacksize + stack_addr - - stack_base); - } else { - TCW_PTR(other_threads[i]->th.th_info.ds.ds_stacksize, - stack_base - stack_addr); - } - - /* Reprint stack bounds for ubermaster since they have been refined */ - if (__kmp_storage_map) { - char *stack_end = (char *)other_threads[i]->th.th_info.ds.ds_stackbase; - char *stack_beg = stack_end - other_threads[i]->th.th_info.ds.ds_stacksize; - __kmp_print_storage_map_gtid(i, stack_beg, stack_end, - other_threads[i]->th.th_info.ds.ds_stacksize, - "th_%d stack (refinement)", i); - } - return i; -} - -int __kmp_get_global_thread_id_reg() { - int gtid; - - if (!__kmp_init_serial) { - gtid = KMP_GTID_DNE; - } else -#ifdef KMP_TDATA_GTID - if (TCR_4(__kmp_gtid_mode) >= 3) { - KA_TRACE(1000, ("*** __kmp_get_global_thread_id_reg: using TDATA\n")); - gtid = __kmp_gtid; - } else -#endif - if (TCR_4(__kmp_gtid_mode) >= 2) { - KA_TRACE(1000, ("*** __kmp_get_global_thread_id_reg: using keyed TLS\n")); - gtid = __kmp_gtid_get_specific(); - } else { - KA_TRACE(1000, - ("*** __kmp_get_global_thread_id_reg: using internal alg.\n")); - gtid = __kmp_get_global_thread_id(); - } - - /* we must be a new uber master sibling thread */ - if (gtid == KMP_GTID_DNE) { - KA_TRACE(10, - ("__kmp_get_global_thread_id_reg: Encountered new root thread. " - "Registering a new gtid.\n")); - __kmp_acquire_bootstrap_lock(&__kmp_initz_lock); - if (!__kmp_init_serial) { - __kmp_do_serial_initialize(); - gtid = __kmp_gtid_get_specific(); - } else { - gtid = __kmp_register_root(FALSE); - } - __kmp_release_bootstrap_lock(&__kmp_initz_lock); - /*__kmp_printf( "+++ %d\n", gtid ); */ /* GROO */ - } - - KMP_DEBUG_ASSERT(gtid >= 0); - - return gtid; -} - -/* caller must hold forkjoin_lock */ -void __kmp_check_stack_overlap(kmp_info_t *th) { - int f; - char *stack_beg = NULL; - char *stack_end = NULL; - int gtid; - - KA_TRACE(10, ("__kmp_check_stack_overlap: called\n")); - if (__kmp_storage_map) { - stack_end = (char *)th->th.th_info.ds.ds_stackbase; - stack_beg = stack_end - th->th.th_info.ds.ds_stacksize; - - gtid = __kmp_gtid_from_thread(th); - - if (gtid == KMP_GTID_MONITOR) { - __kmp_print_storage_map_gtid( - gtid, stack_beg, stack_end, th->th.th_info.ds.ds_stacksize, - "th_%s stack (%s)", "mon", - (th->th.th_info.ds.ds_stackgrow) ? "initial" : "actual"); - } else { - __kmp_print_storage_map_gtid( - gtid, stack_beg, stack_end, th->th.th_info.ds.ds_stacksize, - "th_%d stack (%s)", gtid, - (th->th.th_info.ds.ds_stackgrow) ? "initial" : "actual"); - } - } - - /* No point in checking ubermaster threads since they use refinement and - * cannot overlap */ - gtid = __kmp_gtid_from_thread(th); - if (__kmp_env_checks == TRUE && !KMP_UBER_GTID(gtid)) { - KA_TRACE(10, - ("__kmp_check_stack_overlap: performing extensive checking\n")); - if (stack_beg == NULL) { - stack_end = (char *)th->th.th_info.ds.ds_stackbase; - stack_beg = stack_end - th->th.th_info.ds.ds_stacksize; - } - - for (f = 0; f < __kmp_threads_capacity; f++) { - kmp_info_t *f_th = (kmp_info_t *)TCR_SYNC_PTR(__kmp_threads[f]); - - if (f_th && f_th != th) { - char *other_stack_end = - (char *)TCR_PTR(f_th->th.th_info.ds.ds_stackbase); - char *other_stack_beg = - other_stack_end - (size_t)TCR_PTR(f_th->th.th_info.ds.ds_stacksize); - if ((stack_beg > other_stack_beg && stack_beg < other_stack_end) || - (stack_end > other_stack_beg && stack_end < other_stack_end)) { - - /* Print the other stack values before the abort */ - if (__kmp_storage_map) - __kmp_print_storage_map_gtid( - -1, other_stack_beg, other_stack_end, - (size_t)TCR_PTR(f_th->th.th_info.ds.ds_stacksize), - "th_%d stack (overlapped)", __kmp_gtid_from_thread(f_th)); - - __kmp_fatal(KMP_MSG(StackOverlap), KMP_HNT(ChangeStackLimit), - __kmp_msg_null); - } - } - } - } - KA_TRACE(10, ("__kmp_check_stack_overlap: returning\n")); -} - -/* ------------------------------------------------------------------------ */ - -void __kmp_infinite_loop(void) { - static int done = FALSE; - - while (!done) { - KMP_YIELD(1); - } -} - -#define MAX_MESSAGE 512 - -void __kmp_print_storage_map_gtid(int gtid, void *p1, void *p2, size_t size, - char const *format, ...) { - char buffer[MAX_MESSAGE]; - va_list ap; - - va_start(ap, format); - KMP_SNPRINTF(buffer, sizeof(buffer), "OMP storage map: %p %p%8lu %s\n", p1, - p2, (unsigned long)size, format); - __kmp_acquire_bootstrap_lock(&__kmp_stdio_lock); - __kmp_vprintf(kmp_err, buffer, ap); -#if KMP_PRINT_DATA_PLACEMENT - int node; - if (gtid >= 0) { - if (p1 <= p2 && (char *)p2 - (char *)p1 == size) { - if (__kmp_storage_map_verbose) { - node = __kmp_get_host_node(p1); - if (node < 0) /* doesn't work, so don't try this next time */ - __kmp_storage_map_verbose = FALSE; - else { - char *last; - int lastNode; - int localProc = __kmp_get_cpu_from_gtid(gtid); - - const int page_size = KMP_GET_PAGE_SIZE(); - - p1 = (void *)((size_t)p1 & ~((size_t)page_size - 1)); - p2 = (void *)(((size_t)p2 - 1) & ~((size_t)page_size - 1)); - if (localProc >= 0) - __kmp_printf_no_lock(" GTID %d localNode %d\n", gtid, - localProc >> 1); - else - __kmp_printf_no_lock(" GTID %d\n", gtid); -#if KMP_USE_PRCTL - /* The more elaborate format is disabled for now because of the prctl - * hanging bug. */ - do { - last = p1; - lastNode = node; - /* This loop collates adjacent pages with the same host node. */ - do { - (char *)p1 += page_size; - } while (p1 <= p2 && (node = __kmp_get_host_node(p1)) == lastNode); - __kmp_printf_no_lock(" %p-%p memNode %d\n", last, (char *)p1 - 1, - lastNode); - } while (p1 <= p2); -#else - __kmp_printf_no_lock(" %p-%p memNode %d\n", p1, - (char *)p1 + (page_size - 1), - __kmp_get_host_node(p1)); - if (p1 < p2) { - __kmp_printf_no_lock(" %p-%p memNode %d\n", p2, - (char *)p2 + (page_size - 1), - __kmp_get_host_node(p2)); - } -#endif - } - } - } else - __kmp_printf_no_lock(" %s\n", KMP_I18N_STR(StorageMapWarning)); - } -#endif /* KMP_PRINT_DATA_PLACEMENT */ - __kmp_release_bootstrap_lock(&__kmp_stdio_lock); -} - -void __kmp_warn(char const *format, ...) { - char buffer[MAX_MESSAGE]; - va_list ap; - - if (__kmp_generate_warnings == kmp_warnings_off) { - return; - } - - va_start(ap, format); - - KMP_SNPRINTF(buffer, sizeof(buffer), "OMP warning: %s\n", format); - __kmp_acquire_bootstrap_lock(&__kmp_stdio_lock); - __kmp_vprintf(kmp_err, buffer, ap); - __kmp_release_bootstrap_lock(&__kmp_stdio_lock); - - va_end(ap); -} - -void __kmp_abort_process() { - // Later threads may stall here, but that's ok because abort() will kill them. - __kmp_acquire_bootstrap_lock(&__kmp_exit_lock); - - if (__kmp_debug_buf) { - __kmp_dump_debug_buffer(); - } - - if (KMP_OS_WINDOWS) { - // Let other threads know of abnormal termination and prevent deadlock - // if abort happened during library initialization or shutdown - __kmp_global.g.g_abort = SIGABRT; - - /* On Windows* OS by default abort() causes pop-up error box, which stalls - nightly testing. Unfortunately, we cannot reliably suppress pop-up error - boxes. _set_abort_behavior() works well, but this function is not - available in VS7 (this is not problem for DLL, but it is a problem for - static OpenMP RTL). SetErrorMode (and so, timelimit utility) does not - help, at least in some versions of MS C RTL. - - It seems following sequence is the only way to simulate abort() and - avoid pop-up error box. */ - raise(SIGABRT); - _exit(3); // Just in case, if signal ignored, exit anyway. - } else { - abort(); - } - - __kmp_infinite_loop(); - __kmp_release_bootstrap_lock(&__kmp_exit_lock); - -} // __kmp_abort_process - -void __kmp_abort_thread(void) { - // TODO: Eliminate g_abort global variable and this function. - // In case of abort just call abort(), it will kill all the threads. - __kmp_infinite_loop(); -} // __kmp_abort_thread - -/* Print out the storage map for the major kmp_info_t thread data structures - that are allocated together. */ - -static void __kmp_print_thread_storage_map(kmp_info_t *thr, int gtid) { - __kmp_print_storage_map_gtid(gtid, thr, thr + 1, sizeof(kmp_info_t), "th_%d", - gtid); - - __kmp_print_storage_map_gtid(gtid, &thr->th.th_info, &thr->th.th_team, - sizeof(kmp_desc_t), "th_%d.th_info", gtid); - - __kmp_print_storage_map_gtid(gtid, &thr->th.th_local, &thr->th.th_pri_head, - sizeof(kmp_local_t), "th_%d.th_local", gtid); - - __kmp_print_storage_map_gtid( - gtid, &thr->th.th_bar[0], &thr->th.th_bar[bs_last_barrier], - sizeof(kmp_balign_t) * bs_last_barrier, "th_%d.th_bar", gtid); - - __kmp_print_storage_map_gtid(gtid, &thr->th.th_bar[bs_plain_barrier], - &thr->th.th_bar[bs_plain_barrier + 1], - sizeof(kmp_balign_t), "th_%d.th_bar[plain]", - gtid); - - __kmp_print_storage_map_gtid(gtid, &thr->th.th_bar[bs_forkjoin_barrier], - &thr->th.th_bar[bs_forkjoin_barrier + 1], - sizeof(kmp_balign_t), "th_%d.th_bar[forkjoin]", - gtid); - -#if KMP_FAST_REDUCTION_BARRIER - __kmp_print_storage_map_gtid(gtid, &thr->th.th_bar[bs_reduction_barrier], - &thr->th.th_bar[bs_reduction_barrier + 1], - sizeof(kmp_balign_t), "th_%d.th_bar[reduction]", - gtid); -#endif // KMP_FAST_REDUCTION_BARRIER -} - -/* Print out the storage map for the major kmp_team_t team data structures - that are allocated together. */ - -static void __kmp_print_team_storage_map(const char *header, kmp_team_t *team, - int team_id, int num_thr) { - int num_disp_buff = team->t.t_max_nproc > 1 ? __kmp_dispatch_num_buffers : 2; - __kmp_print_storage_map_gtid(-1, team, team + 1, sizeof(kmp_team_t), "%s_%d", - header, team_id); - - __kmp_print_storage_map_gtid(-1, &team->t.t_bar[0], - &team->t.t_bar[bs_last_barrier], - sizeof(kmp_balign_team_t) * bs_last_barrier, - "%s_%d.t_bar", header, team_id); - - __kmp_print_storage_map_gtid(-1, &team->t.t_bar[bs_plain_barrier], - &team->t.t_bar[bs_plain_barrier + 1], - sizeof(kmp_balign_team_t), "%s_%d.t_bar[plain]", - header, team_id); - - __kmp_print_storage_map_gtid(-1, &team->t.t_bar[bs_forkjoin_barrier], - &team->t.t_bar[bs_forkjoin_barrier + 1], - sizeof(kmp_balign_team_t), - "%s_%d.t_bar[forkjoin]", header, team_id); - -#if KMP_FAST_REDUCTION_BARRIER - __kmp_print_storage_map_gtid(-1, &team->t.t_bar[bs_reduction_barrier], - &team->t.t_bar[bs_reduction_barrier + 1], - sizeof(kmp_balign_team_t), - "%s_%d.t_bar[reduction]", header, team_id); -#endif // KMP_FAST_REDUCTION_BARRIER - - __kmp_print_storage_map_gtid( - -1, &team->t.t_dispatch[0], &team->t.t_dispatch[num_thr], - sizeof(kmp_disp_t) * num_thr, "%s_%d.t_dispatch", header, team_id); - - __kmp_print_storage_map_gtid( - -1, &team->t.t_threads[0], &team->t.t_threads[num_thr], - sizeof(kmp_info_t *) * num_thr, "%s_%d.t_threads", header, team_id); - - __kmp_print_storage_map_gtid(-1, &team->t.t_disp_buffer[0], - &team->t.t_disp_buffer[num_disp_buff], - sizeof(dispatch_shared_info_t) * num_disp_buff, - "%s_%d.t_disp_buffer", header, team_id); - - __kmp_print_storage_map_gtid(-1, &team->t.t_taskq, &team->t.t_copypriv_data, - sizeof(kmp_taskq_t), "%s_%d.t_taskq", header, - team_id); -} - -static void __kmp_init_allocator() { -#if OMP_50_ENABLED - __kmp_init_memkind(); -#endif -} -static void __kmp_fini_allocator() { -#if OMP_50_ENABLED - __kmp_fini_memkind(); -#endif -} - -/* ------------------------------------------------------------------------ */ - -#if KMP_DYNAMIC_LIB -#if KMP_OS_WINDOWS - -static void __kmp_reset_lock(kmp_bootstrap_lock_t *lck) { - // TODO: Change to __kmp_break_bootstrap_lock(). - __kmp_init_bootstrap_lock(lck); // make the lock released -} - -static void __kmp_reset_locks_on_process_detach(int gtid_req) { - int i; - int thread_count; - - // PROCESS_DETACH is expected to be called by a thread that executes - // ProcessExit() or FreeLibrary(). OS terminates other threads (except the one - // calling ProcessExit or FreeLibrary). So, it might be safe to access the - // __kmp_threads[] without taking the forkjoin_lock. However, in fact, some - // threads can be still alive here, although being about to be terminated. The - // threads in the array with ds_thread==0 are most suspicious. Actually, it - // can be not safe to access the __kmp_threads[]. - - // TODO: does it make sense to check __kmp_roots[] ? - - // Let's check that there are no other alive threads registered with the OMP - // lib. - while (1) { - thread_count = 0; - for (i = 0; i < __kmp_threads_capacity; ++i) { - if (!__kmp_threads) - continue; - kmp_info_t *th = __kmp_threads[i]; - if (th == NULL) - continue; - int gtid = th->th.th_info.ds.ds_gtid; - if (gtid == gtid_req) - continue; - if (gtid < 0) - continue; - DWORD exit_val; - int alive = __kmp_is_thread_alive(th, &exit_val); - if (alive) { - ++thread_count; - } - } - if (thread_count == 0) - break; // success - } - - // Assume that I'm alone. Now it might be safe to check and reset locks. - // __kmp_forkjoin_lock and __kmp_stdio_lock are expected to be reset. - __kmp_reset_lock(&__kmp_forkjoin_lock); -#ifdef KMP_DEBUG - __kmp_reset_lock(&__kmp_stdio_lock); -#endif // KMP_DEBUG -} - -BOOL WINAPI DllMain(HINSTANCE hInstDLL, DWORD fdwReason, LPVOID lpReserved) { - //__kmp_acquire_bootstrap_lock( &__kmp_initz_lock ); - - switch (fdwReason) { - - case DLL_PROCESS_ATTACH: - KA_TRACE(10, ("DllMain: PROCESS_ATTACH\n")); - - return TRUE; - - case DLL_PROCESS_DETACH: - KA_TRACE(10, ("DllMain: PROCESS_DETACH T#%d\n", __kmp_gtid_get_specific())); - - if (lpReserved != NULL) { - // lpReserved is used for telling the difference: - // lpReserved == NULL when FreeLibrary() was called, - // lpReserved != NULL when the process terminates. - // When FreeLibrary() is called, worker threads remain alive. So they will - // release the forkjoin lock by themselves. When the process terminates, - // worker threads disappear triggering the problem of unreleased forkjoin - // lock as described below. - - // A worker thread can take the forkjoin lock. The problem comes up if - // that worker thread becomes dead before it releases the forkjoin lock. - // The forkjoin lock remains taken, while the thread executing - // DllMain()->PROCESS_DETACH->__kmp_internal_end_library() below will try - // to take the forkjoin lock and will always fail, so that the application - // will never finish [normally]. This scenario is possible if - // __kmpc_end() has not been executed. It looks like it's not a corner - // case, but common cases: - // - the main function was compiled by an alternative compiler; - // - the main function was compiled by icl but without /Qopenmp - // (application with plugins); - // - application terminates by calling C exit(), Fortran CALL EXIT() or - // Fortran STOP. - // - alive foreign thread prevented __kmpc_end from doing cleanup. - // - // This is a hack to work around the problem. - // TODO: !!! figure out something better. - __kmp_reset_locks_on_process_detach(__kmp_gtid_get_specific()); - } - - __kmp_internal_end_library(__kmp_gtid_get_specific()); - - return TRUE; - - case DLL_THREAD_ATTACH: - KA_TRACE(10, ("DllMain: THREAD_ATTACH\n")); - - /* if we want to register new siblings all the time here call - * __kmp_get_gtid(); */ - return TRUE; - - case DLL_THREAD_DETACH: - KA_TRACE(10, ("DllMain: THREAD_DETACH T#%d\n", __kmp_gtid_get_specific())); - - __kmp_internal_end_thread(__kmp_gtid_get_specific()); - return TRUE; - } - - return TRUE; -} - -#endif /* KMP_OS_WINDOWS */ -#endif /* KMP_DYNAMIC_LIB */ - -/* Change the library type to "status" and return the old type */ -/* called from within initialization routines where __kmp_initz_lock is held */ -int __kmp_change_library(int status) { - int old_status; - - old_status = __kmp_yield_init & - 1; // check whether KMP_LIBRARY=throughput (even init count) - - if (status) { - __kmp_yield_init |= 1; // throughput => turnaround (odd init count) - } else { - __kmp_yield_init &= ~1; // turnaround => throughput (even init count) - } - - return old_status; // return previous setting of whether - // KMP_LIBRARY=throughput -} - -/* __kmp_parallel_deo -- Wait until it's our turn. */ -void __kmp_parallel_deo(int *gtid_ref, int *cid_ref, ident_t *loc_ref) { - int gtid = *gtid_ref; -#ifdef BUILD_PARALLEL_ORDERED - kmp_team_t *team = __kmp_team_from_gtid(gtid); -#endif /* BUILD_PARALLEL_ORDERED */ - - if (__kmp_env_consistency_check) { - if (__kmp_threads[gtid]->th.th_root->r.r_active) -#if KMP_USE_DYNAMIC_LOCK - __kmp_push_sync(gtid, ct_ordered_in_parallel, loc_ref, NULL, 0); -#else - __kmp_push_sync(gtid, ct_ordered_in_parallel, loc_ref, NULL); -#endif - } -#ifdef BUILD_PARALLEL_ORDERED - if (!team->t.t_serialized) { - KMP_MB(); - KMP_WAIT_YIELD(&team->t.t_ordered.dt.t_value, __kmp_tid_from_gtid(gtid), - KMP_EQ, NULL); - KMP_MB(); - } -#endif /* BUILD_PARALLEL_ORDERED */ -} - -/* __kmp_parallel_dxo -- Signal the next task. */ -void __kmp_parallel_dxo(int *gtid_ref, int *cid_ref, ident_t *loc_ref) { - int gtid = *gtid_ref; -#ifdef BUILD_PARALLEL_ORDERED - int tid = __kmp_tid_from_gtid(gtid); - kmp_team_t *team = __kmp_team_from_gtid(gtid); -#endif /* BUILD_PARALLEL_ORDERED */ - - if (__kmp_env_consistency_check) { - if (__kmp_threads[gtid]->th.th_root->r.r_active) - __kmp_pop_sync(gtid, ct_ordered_in_parallel, loc_ref); - } -#ifdef BUILD_PARALLEL_ORDERED - if (!team->t.t_serialized) { - KMP_MB(); /* Flush all pending memory write invalidates. */ - - /* use the tid of the next thread in this team */ - /* TODO replace with general release procedure */ - team->t.t_ordered.dt.t_value = ((tid + 1) % team->t.t_nproc); - - KMP_MB(); /* Flush all pending memory write invalidates. */ - } -#endif /* BUILD_PARALLEL_ORDERED */ -} - -/* ------------------------------------------------------------------------ */ -/* The BARRIER for a SINGLE process section is always explicit */ - -int __kmp_enter_single(int gtid, ident_t *id_ref, int push_ws) { - int status; - kmp_info_t *th; - kmp_team_t *team; - - if (!TCR_4(__kmp_init_parallel)) - __kmp_parallel_initialize(); - - th = __kmp_threads[gtid]; - team = th->th.th_team; - status = 0; - - th->th.th_ident = id_ref; - - if (team->t.t_serialized) { - status = 1; - } else { - kmp_int32 old_this = th->th.th_local.this_construct; - - ++th->th.th_local.this_construct; - /* try to set team count to thread count--success means thread got the - single block */ - /* TODO: Should this be acquire or release? */ - if (team->t.t_construct == old_this) { - status = __kmp_atomic_compare_store_acq(&team->t.t_construct, old_this, - th->th.th_local.this_construct); - } -#if USE_ITT_BUILD - if (__itt_metadata_add_ptr && __kmp_forkjoin_frames_mode == 3 && - KMP_MASTER_GTID(gtid) && -#if OMP_40_ENABLED - th->th.th_teams_microtask == NULL && -#endif - team->t.t_active_level == - 1) { // Only report metadata by master of active team at level 1 - __kmp_itt_metadata_single(id_ref); - } -#endif /* USE_ITT_BUILD */ - } - - if (__kmp_env_consistency_check) { - if (status && push_ws) { - __kmp_push_workshare(gtid, ct_psingle, id_ref); - } else { - __kmp_check_workshare(gtid, ct_psingle, id_ref); - } - } -#if USE_ITT_BUILD - if (status) { - __kmp_itt_single_start(gtid); - } -#endif /* USE_ITT_BUILD */ - return status; -} - -void __kmp_exit_single(int gtid) { -#if USE_ITT_BUILD - __kmp_itt_single_end(gtid); -#endif /* USE_ITT_BUILD */ - if (__kmp_env_consistency_check) - __kmp_pop_workshare(gtid, ct_psingle, NULL); -} - -/* determine if we can go parallel or must use a serialized parallel region and - * how many threads we can use - * set_nproc is the number of threads requested for the team - * returns 0 if we should serialize or only use one thread, - * otherwise the number of threads to use - * The forkjoin lock is held by the caller. */ -static int __kmp_reserve_threads(kmp_root_t *root, kmp_team_t *parent_team, - int master_tid, int set_nthreads -#if OMP_40_ENABLED - , - int enter_teams -#endif /* OMP_40_ENABLED */ - ) { - int capacity; - int new_nthreads; - KMP_DEBUG_ASSERT(__kmp_init_serial); - KMP_DEBUG_ASSERT(root && parent_team); - - // If dyn-var is set, dynamically adjust the number of desired threads, - // according to the method specified by dynamic_mode. - new_nthreads = set_nthreads; - if (!get__dynamic_2(parent_team, master_tid)) { - ; - } -#ifdef USE_LOAD_BALANCE - else if (__kmp_global.g.g_dynamic_mode == dynamic_load_balance) { - new_nthreads = __kmp_load_balance_nproc(root, set_nthreads); - if (new_nthreads == 1) { - KC_TRACE(10, ("__kmp_reserve_threads: T#%d load balance reduced " - "reservation to 1 thread\n", - master_tid)); - return 1; - } - if (new_nthreads < set_nthreads) { - KC_TRACE(10, ("__kmp_reserve_threads: T#%d load balance reduced " - "reservation to %d threads\n", - master_tid, new_nthreads)); - } - } -#endif /* USE_LOAD_BALANCE */ - else if (__kmp_global.g.g_dynamic_mode == dynamic_thread_limit) { - new_nthreads = __kmp_avail_proc - __kmp_nth + - (root->r.r_active ? 1 : root->r.r_hot_team->t.t_nproc); - if (new_nthreads <= 1) { - KC_TRACE(10, ("__kmp_reserve_threads: T#%d thread limit reduced " - "reservation to 1 thread\n", - master_tid)); - return 1; - } - if (new_nthreads < set_nthreads) { - KC_TRACE(10, ("__kmp_reserve_threads: T#%d thread limit reduced " - "reservation to %d threads\n", - master_tid, new_nthreads)); - } else { - new_nthreads = set_nthreads; - } - } else if (__kmp_global.g.g_dynamic_mode == dynamic_random) { - if (set_nthreads > 2) { - new_nthreads = __kmp_get_random(parent_team->t.t_threads[master_tid]); - new_nthreads = (new_nthreads % set_nthreads) + 1; - if (new_nthreads == 1) { - KC_TRACE(10, ("__kmp_reserve_threads: T#%d dynamic random reduced " - "reservation to 1 thread\n", - master_tid)); - return 1; - } - if (new_nthreads < set_nthreads) { - KC_TRACE(10, ("__kmp_reserve_threads: T#%d dynamic random reduced " - "reservation to %d threads\n", - master_tid, new_nthreads)); - } - } - } else { - KMP_ASSERT(0); - } - - // Respect KMP_ALL_THREADS/KMP_DEVICE_THREAD_LIMIT. - if (__kmp_nth + new_nthreads - - (root->r.r_active ? 1 : root->r.r_hot_team->t.t_nproc) > - __kmp_max_nth) { - int tl_nthreads = __kmp_max_nth - __kmp_nth + - (root->r.r_active ? 1 : root->r.r_hot_team->t.t_nproc); - if (tl_nthreads <= 0) { - tl_nthreads = 1; - } - - // If dyn-var is false, emit a 1-time warning. - if (!get__dynamic_2(parent_team, master_tid) && (!__kmp_reserve_warn)) { - __kmp_reserve_warn = 1; - __kmp_msg(kmp_ms_warning, - KMP_MSG(CantFormThrTeam, set_nthreads, tl_nthreads), - KMP_HNT(Unset_ALL_THREADS), __kmp_msg_null); - } - if (tl_nthreads == 1) { - KC_TRACE(10, ("__kmp_reserve_threads: T#%d KMP_DEVICE_THREAD_LIMIT " - "reduced reservation to 1 thread\n", - master_tid)); - return 1; - } - KC_TRACE(10, ("__kmp_reserve_threads: T#%d KMP_DEVICE_THREAD_LIMIT reduced " - "reservation to %d threads\n", - master_tid, tl_nthreads)); - new_nthreads = tl_nthreads; - } - - // Respect OMP_THREAD_LIMIT - if (root->r.r_cg_nthreads + new_nthreads - - (root->r.r_active ? 1 : root->r.r_hot_team->t.t_nproc) > - __kmp_cg_max_nth) { - int tl_nthreads = __kmp_cg_max_nth - root->r.r_cg_nthreads + - (root->r.r_active ? 1 : root->r.r_hot_team->t.t_nproc); - if (tl_nthreads <= 0) { - tl_nthreads = 1; - } - - // If dyn-var is false, emit a 1-time warning. - if (!get__dynamic_2(parent_team, master_tid) && (!__kmp_reserve_warn)) { - __kmp_reserve_warn = 1; - __kmp_msg(kmp_ms_warning, - KMP_MSG(CantFormThrTeam, set_nthreads, tl_nthreads), - KMP_HNT(Unset_ALL_THREADS), __kmp_msg_null); - } - if (tl_nthreads == 1) { - KC_TRACE(10, ("__kmp_reserve_threads: T#%d OMP_THREAD_LIMIT " - "reduced reservation to 1 thread\n", - master_tid)); - return 1; - } - KC_TRACE(10, ("__kmp_reserve_threads: T#%d OMP_THREAD_LIMIT reduced " - "reservation to %d threads\n", - master_tid, tl_nthreads)); - new_nthreads = tl_nthreads; - } - - // Check if the threads array is large enough, or needs expanding. - // See comment in __kmp_register_root() about the adjustment if - // __kmp_threads[0] == NULL. - capacity = __kmp_threads_capacity; - if (TCR_PTR(__kmp_threads[0]) == NULL) { - --capacity; - } - if (__kmp_nth + new_nthreads - - (root->r.r_active ? 1 : root->r.r_hot_team->t.t_nproc) > - capacity) { - // Expand the threads array. - int slotsRequired = __kmp_nth + new_nthreads - - (root->r.r_active ? 1 : root->r.r_hot_team->t.t_nproc) - - capacity; - int slotsAdded = __kmp_expand_threads(slotsRequired); - if (slotsAdded < slotsRequired) { - // The threads array was not expanded enough. - new_nthreads -= (slotsRequired - slotsAdded); - KMP_ASSERT(new_nthreads >= 1); - - // If dyn-var is false, emit a 1-time warning. - if (!get__dynamic_2(parent_team, master_tid) && (!__kmp_reserve_warn)) { - __kmp_reserve_warn = 1; - if (__kmp_tp_cached) { - __kmp_msg(kmp_ms_warning, - KMP_MSG(CantFormThrTeam, set_nthreads, new_nthreads), - KMP_HNT(Set_ALL_THREADPRIVATE, __kmp_tp_capacity), - KMP_HNT(PossibleSystemLimitOnThreads), __kmp_msg_null); - } else { - __kmp_msg(kmp_ms_warning, - KMP_MSG(CantFormThrTeam, set_nthreads, new_nthreads), - KMP_HNT(SystemLimitOnThreads), __kmp_msg_null); - } - } - } - } - -#ifdef KMP_DEBUG - if (new_nthreads == 1) { - KC_TRACE(10, - ("__kmp_reserve_threads: T#%d serializing team after reclaiming " - "dead roots and rechecking; requested %d threads\n", - __kmp_get_gtid(), set_nthreads)); - } else { - KC_TRACE(10, ("__kmp_reserve_threads: T#%d allocating %d threads; requested" - " %d threads\n", - __kmp_get_gtid(), new_nthreads, set_nthreads)); - } -#endif // KMP_DEBUG - return new_nthreads; -} - -/* Allocate threads from the thread pool and assign them to the new team. We are - assured that there are enough threads available, because we checked on that - earlier within critical section forkjoin */ -static void __kmp_fork_team_threads(kmp_root_t *root, kmp_team_t *team, - kmp_info_t *master_th, int master_gtid) { - int i; - int use_hot_team; - - KA_TRACE(10, ("__kmp_fork_team_threads: new_nprocs = %d\n", team->t.t_nproc)); - KMP_DEBUG_ASSERT(master_gtid == __kmp_get_gtid()); - KMP_MB(); - - /* first, let's setup the master thread */ - master_th->th.th_info.ds.ds_tid = 0; - master_th->th.th_team = team; - master_th->th.th_team_nproc = team->t.t_nproc; - master_th->th.th_team_master = master_th; - master_th->th.th_team_serialized = FALSE; - master_th->th.th_dispatch = &team->t.t_dispatch[0]; - -/* make sure we are not the optimized hot team */ -#if KMP_NESTED_HOT_TEAMS - use_hot_team = 0; - kmp_hot_team_ptr_t *hot_teams = master_th->th.th_hot_teams; - if (hot_teams) { // hot teams array is not allocated if - // KMP_HOT_TEAMS_MAX_LEVEL=0 - int level = team->t.t_active_level - 1; // index in array of hot teams - if (master_th->th.th_teams_microtask) { // are we inside the teams? - if (master_th->th.th_teams_size.nteams > 1) { - ++level; // level was not increased in teams construct for - // team_of_masters - } - if (team->t.t_pkfn != (microtask_t)__kmp_teams_master && - master_th->th.th_teams_level == team->t.t_level) { - ++level; // level was not increased in teams construct for - // team_of_workers before the parallel - } // team->t.t_level will be increased inside parallel - } - if (level < __kmp_hot_teams_max_level) { - if (hot_teams[level].hot_team) { - // hot team has already been allocated for given level - KMP_DEBUG_ASSERT(hot_teams[level].hot_team == team); - use_hot_team = 1; // the team is ready to use - } else { - use_hot_team = 0; // AC: threads are not allocated yet - hot_teams[level].hot_team = team; // remember new hot team - hot_teams[level].hot_team_nth = team->t.t_nproc; - } - } else { - use_hot_team = 0; - } - } -#else - use_hot_team = team == root->r.r_hot_team; -#endif - if (!use_hot_team) { - - /* install the master thread */ - team->t.t_threads[0] = master_th; - __kmp_initialize_info(master_th, team, 0, master_gtid); - - /* now, install the worker threads */ - for (i = 1; i < team->t.t_nproc; i++) { - - /* fork or reallocate a new thread and install it in team */ - kmp_info_t *thr = __kmp_allocate_thread(root, team, i); - team->t.t_threads[i] = thr; - KMP_DEBUG_ASSERT(thr); - KMP_DEBUG_ASSERT(thr->th.th_team == team); - /* align team and thread arrived states */ - KA_TRACE(20, ("__kmp_fork_team_threads: T#%d(%d:%d) init arrived " - "T#%d(%d:%d) join =%llu, plain=%llu\n", - __kmp_gtid_from_tid(0, team), team->t.t_id, 0, - __kmp_gtid_from_tid(i, team), team->t.t_id, i, - team->t.t_bar[bs_forkjoin_barrier].b_arrived, - team->t.t_bar[bs_plain_barrier].b_arrived)); -#if OMP_40_ENABLED - thr->th.th_teams_microtask = master_th->th.th_teams_microtask; - thr->th.th_teams_level = master_th->th.th_teams_level; - thr->th.th_teams_size = master_th->th.th_teams_size; -#endif - { // Initialize threads' barrier data. - int b; - kmp_balign_t *balign = team->t.t_threads[i]->th.th_bar; - for (b = 0; b < bs_last_barrier; ++b) { - balign[b].bb.b_arrived = team->t.t_bar[b].b_arrived; - KMP_DEBUG_ASSERT(balign[b].bb.wait_flag != KMP_BARRIER_PARENT_FLAG); -#if USE_DEBUGGER - balign[b].bb.b_worker_arrived = team->t.t_bar[b].b_team_arrived; -#endif - } - } - } - -#if OMP_40_ENABLED && KMP_AFFINITY_SUPPORTED - __kmp_partition_places(team); -#endif - } - -#if OMP_50_ENABLED - if (__kmp_display_affinity && team->t.t_display_affinity != 1) { - for (i = 0; i < team->t.t_nproc; i++) { - kmp_info_t *thr = team->t.t_threads[i]; - if (thr->th.th_prev_num_threads != team->t.t_nproc || - thr->th.th_prev_level != team->t.t_level) { - team->t.t_display_affinity = 1; - break; - } - } - } -#endif - - KMP_MB(); -} - -#if KMP_ARCH_X86 || KMP_ARCH_X86_64 -// Propagate any changes to the floating point control registers out to the team -// We try to avoid unnecessary writes to the relevant cache line in the team -// structure, so we don't make changes unless they are needed. -inline static void propagateFPControl(kmp_team_t *team) { - if (__kmp_inherit_fp_control) { - kmp_int16 x87_fpu_control_word; - kmp_uint32 mxcsr; - - // Get master values of FPU control flags (both X87 and vector) - __kmp_store_x87_fpu_control_word(&x87_fpu_control_word); - __kmp_store_mxcsr(&mxcsr); - mxcsr &= KMP_X86_MXCSR_MASK; - - // There is no point looking at t_fp_control_saved here. - // If it is TRUE, we still have to update the values if they are different - // from those we now have. If it is FALSE we didn't save anything yet, but - // our objective is the same. We have to ensure that the values in the team - // are the same as those we have. - // So, this code achieves what we need whether or not t_fp_control_saved is - // true. By checking whether the value needs updating we avoid unnecessary - // writes that would put the cache-line into a written state, causing all - // threads in the team to have to read it again. - KMP_CHECK_UPDATE(team->t.t_x87_fpu_control_word, x87_fpu_control_word); - KMP_CHECK_UPDATE(team->t.t_mxcsr, mxcsr); - // Although we don't use this value, other code in the runtime wants to know - // whether it should restore them. So we must ensure it is correct. - KMP_CHECK_UPDATE(team->t.t_fp_control_saved, TRUE); - } else { - // Similarly here. Don't write to this cache-line in the team structure - // unless we have to. - KMP_CHECK_UPDATE(team->t.t_fp_control_saved, FALSE); - } -} - -// Do the opposite, setting the hardware registers to the updated values from -// the team. -inline static void updateHWFPControl(kmp_team_t *team) { - if (__kmp_inherit_fp_control && team->t.t_fp_control_saved) { - // Only reset the fp control regs if they have been changed in the team. - // the parallel region that we are exiting. - kmp_int16 x87_fpu_control_word; - kmp_uint32 mxcsr; - __kmp_store_x87_fpu_control_word(&x87_fpu_control_word); - __kmp_store_mxcsr(&mxcsr); - mxcsr &= KMP_X86_MXCSR_MASK; - - if (team->t.t_x87_fpu_control_word != x87_fpu_control_word) { - __kmp_clear_x87_fpu_status_word(); - __kmp_load_x87_fpu_control_word(&team->t.t_x87_fpu_control_word); - } - - if (team->t.t_mxcsr != mxcsr) { - __kmp_load_mxcsr(&team->t.t_mxcsr); - } - } -} -#else -#define propagateFPControl(x) ((void)0) -#define updateHWFPControl(x) ((void)0) -#endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */ - -static void __kmp_alloc_argv_entries(int argc, kmp_team_t *team, - int realloc); // forward declaration - -/* Run a parallel region that has been serialized, so runs only in a team of the - single master thread. */ -void __kmp_serialized_parallel(ident_t *loc, kmp_int32 global_tid) { - kmp_info_t *this_thr; - kmp_team_t *serial_team; - - KC_TRACE(10, ("__kmpc_serialized_parallel: called by T#%d\n", global_tid)); - - /* Skip all this code for autopar serialized loops since it results in - unacceptable overhead */ - if (loc != NULL && (loc->flags & KMP_IDENT_AUTOPAR)) - return; - - if (!TCR_4(__kmp_init_parallel)) - __kmp_parallel_initialize(); - - this_thr = __kmp_threads[global_tid]; - serial_team = this_thr->th.th_serial_team; - - /* utilize the serialized team held by this thread */ - KMP_DEBUG_ASSERT(serial_team); - KMP_MB(); - - if (__kmp_tasking_mode != tskm_immediate_exec) { - KMP_DEBUG_ASSERT( - this_thr->th.th_task_team == - this_thr->th.th_team->t.t_task_team[this_thr->th.th_task_state]); - KMP_DEBUG_ASSERT(serial_team->t.t_task_team[this_thr->th.th_task_state] == - NULL); - KA_TRACE(20, ("__kmpc_serialized_parallel: T#%d pushing task_team %p / " - "team %p, new task_team = NULL\n", - global_tid, this_thr->th.th_task_team, this_thr->th.th_team)); - this_thr->th.th_task_team = NULL; - } - -#if OMP_40_ENABLED - kmp_proc_bind_t proc_bind = this_thr->th.th_set_proc_bind; - if (this_thr->th.th_current_task->td_icvs.proc_bind == proc_bind_false) { - proc_bind = proc_bind_false; - } else if (proc_bind == proc_bind_default) { - // No proc_bind clause was specified, so use the current value - // of proc-bind-var for this parallel region. - proc_bind = this_thr->th.th_current_task->td_icvs.proc_bind; - } - // Reset for next parallel region - this_thr->th.th_set_proc_bind = proc_bind_default; -#endif /* OMP_40_ENABLED */ - -#if OMPT_SUPPORT - ompt_data_t ompt_parallel_data = ompt_data_none; - ompt_data_t *implicit_task_data; - void *codeptr = OMPT_LOAD_RETURN_ADDRESS(global_tid); - if (ompt_enabled.enabled && - this_thr->th.ompt_thread_info.state != ompt_state_overhead) { - - ompt_task_info_t *parent_task_info; - parent_task_info = OMPT_CUR_TASK_INFO(this_thr); - - parent_task_info->frame.enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0); - if (ompt_enabled.ompt_callback_parallel_begin) { - int team_size = 1; - - ompt_callbacks.ompt_callback(ompt_callback_parallel_begin)( - &(parent_task_info->task_data), &(parent_task_info->frame), - &ompt_parallel_data, team_size, ompt_parallel_invoker_program, - codeptr); - } - } -#endif // OMPT_SUPPORT - - if (this_thr->th.th_team != serial_team) { - // Nested level will be an index in the nested nthreads array - int level = this_thr->th.th_team->t.t_level; - - if (serial_team->t.t_serialized) { - /* this serial team was already used - TODO increase performance by making this locks more specific */ - kmp_team_t *new_team; - - __kmp_acquire_bootstrap_lock(&__kmp_forkjoin_lock); - - new_team = __kmp_allocate_team(this_thr->th.th_root, 1, 1, -#if OMPT_SUPPORT - ompt_parallel_data, -#endif -#if OMP_40_ENABLED - proc_bind, -#endif - &this_thr->th.th_current_task->td_icvs, - 0 USE_NESTED_HOT_ARG(NULL)); - __kmp_release_bootstrap_lock(&__kmp_forkjoin_lock); - KMP_ASSERT(new_team); - - /* setup new serialized team and install it */ - new_team->t.t_threads[0] = this_thr; - new_team->t.t_parent = this_thr->th.th_team; - serial_team = new_team; - this_thr->th.th_serial_team = serial_team; - - KF_TRACE( - 10, - ("__kmpc_serialized_parallel: T#%d allocated new serial team %p\n", - global_tid, serial_team)); - - /* TODO the above breaks the requirement that if we run out of resources, - then we can still guarantee that serialized teams are ok, since we may - need to allocate a new one */ - } else { - KF_TRACE( - 10, - ("__kmpc_serialized_parallel: T#%d reusing cached serial team %p\n", - global_tid, serial_team)); - } - - /* we have to initialize this serial team */ - KMP_DEBUG_ASSERT(serial_team->t.t_threads); - KMP_DEBUG_ASSERT(serial_team->t.t_threads[0] == this_thr); - KMP_DEBUG_ASSERT(this_thr->th.th_team != serial_team); - serial_team->t.t_ident = loc; - serial_team->t.t_serialized = 1; - serial_team->t.t_nproc = 1; - serial_team->t.t_parent = this_thr->th.th_team; - serial_team->t.t_sched.sched = this_thr->th.th_team->t.t_sched.sched; - this_thr->th.th_team = serial_team; - serial_team->t.t_master_tid = this_thr->th.th_info.ds.ds_tid; - - KF_TRACE(10, ("__kmpc_serialized_parallel: T#d curtask=%p\n", global_tid, - this_thr->th.th_current_task)); - KMP_ASSERT(this_thr->th.th_current_task->td_flags.executing == 1); - this_thr->th.th_current_task->td_flags.executing = 0; - - __kmp_push_current_task_to_thread(this_thr, serial_team, 0); - - /* TODO: GEH: do ICVs work for nested serialized teams? Don't we need an - implicit task for each serialized task represented by - team->t.t_serialized? */ - copy_icvs(&this_thr->th.th_current_task->td_icvs, - &this_thr->th.th_current_task->td_parent->td_icvs); - - // Thread value exists in the nested nthreads array for the next nested - // level - if (__kmp_nested_nth.used && (level + 1 < __kmp_nested_nth.used)) { - this_thr->th.th_current_task->td_icvs.nproc = - __kmp_nested_nth.nth[level + 1]; - } - -#if OMP_40_ENABLED - if (__kmp_nested_proc_bind.used && - (level + 1 < __kmp_nested_proc_bind.used)) { - this_thr->th.th_current_task->td_icvs.proc_bind = - __kmp_nested_proc_bind.bind_types[level + 1]; - } -#endif /* OMP_40_ENABLED */ - -#if USE_DEBUGGER - serial_team->t.t_pkfn = (microtask_t)(~0); // For the debugger. -#endif - this_thr->th.th_info.ds.ds_tid = 0; - - /* set thread cache values */ - this_thr->th.th_team_nproc = 1; - this_thr->th.th_team_master = this_thr; - this_thr->th.th_team_serialized = 1; - - serial_team->t.t_level = serial_team->t.t_parent->t.t_level + 1; - serial_team->t.t_active_level = serial_team->t.t_parent->t.t_active_level; -#if OMP_50_ENABLED - serial_team->t.t_def_allocator = this_thr->th.th_def_allocator; // save -#endif - - propagateFPControl(serial_team); - - /* check if we need to allocate dispatch buffers stack */ - KMP_DEBUG_ASSERT(serial_team->t.t_dispatch); - if (!serial_team->t.t_dispatch->th_disp_buffer) { - serial_team->t.t_dispatch->th_disp_buffer = - (dispatch_private_info_t *)__kmp_allocate( - sizeof(dispatch_private_info_t)); - } - this_thr->th.th_dispatch = serial_team->t.t_dispatch; - - KMP_MB(); - - } else { - /* this serialized team is already being used, - * that's fine, just add another nested level */ - KMP_DEBUG_ASSERT(this_thr->th.th_team == serial_team); - KMP_DEBUG_ASSERT(serial_team->t.t_threads); - KMP_DEBUG_ASSERT(serial_team->t.t_threads[0] == this_thr); - ++serial_team->t.t_serialized; - this_thr->th.th_team_serialized = serial_team->t.t_serialized; - - // Nested level will be an index in the nested nthreads array - int level = this_thr->th.th_team->t.t_level; - // Thread value exists in the nested nthreads array for the next nested - // level - if (__kmp_nested_nth.used && (level + 1 < __kmp_nested_nth.used)) { - this_thr->th.th_current_task->td_icvs.nproc = - __kmp_nested_nth.nth[level + 1]; - } - serial_team->t.t_level++; - KF_TRACE(10, ("__kmpc_serialized_parallel: T#%d increasing nesting level " - "of serial team %p to %d\n", - global_tid, serial_team, serial_team->t.t_level)); - - /* allocate/push dispatch buffers stack */ - KMP_DEBUG_ASSERT(serial_team->t.t_dispatch); - { - dispatch_private_info_t *disp_buffer = - (dispatch_private_info_t *)__kmp_allocate( - sizeof(dispatch_private_info_t)); - disp_buffer->next = serial_team->t.t_dispatch->th_disp_buffer; - serial_team->t.t_dispatch->th_disp_buffer = disp_buffer; - } - this_thr->th.th_dispatch = serial_team->t.t_dispatch; - - KMP_MB(); - } -#if OMP_40_ENABLED - KMP_CHECK_UPDATE(serial_team->t.t_cancel_request, cancel_noreq); -#endif - -#if OMP_50_ENABLED - // Perform the display affinity functionality for - // serialized parallel regions - if (__kmp_display_affinity) { - if (this_thr->th.th_prev_level != serial_team->t.t_level || - this_thr->th.th_prev_num_threads != 1) { - // NULL means use the affinity-format-var ICV - __kmp_aux_display_affinity(global_tid, NULL); - this_thr->th.th_prev_level = serial_team->t.t_level; - this_thr->th.th_prev_num_threads = 1; - } - } -#endif - - if (__kmp_env_consistency_check) - __kmp_push_parallel(global_tid, NULL); -#if OMPT_SUPPORT - serial_team->t.ompt_team_info.master_return_address = codeptr; - if (ompt_enabled.enabled && - this_thr->th.ompt_thread_info.state != ompt_state_overhead) { - OMPT_CUR_TASK_INFO(this_thr)->frame.exit_frame.ptr = OMPT_GET_FRAME_ADDRESS(0); - - ompt_lw_taskteam_t lw_taskteam; - __ompt_lw_taskteam_init(&lw_taskteam, this_thr, global_tid, - &ompt_parallel_data, codeptr); - - __ompt_lw_taskteam_link(&lw_taskteam, this_thr, 1); - // don't use lw_taskteam after linking. content was swaped - - /* OMPT implicit task begin */ - implicit_task_data = OMPT_CUR_TASK_DATA(this_thr); - if (ompt_enabled.ompt_callback_implicit_task) { - ompt_callbacks.ompt_callback(ompt_callback_implicit_task)( - ompt_scope_begin, OMPT_CUR_TEAM_DATA(this_thr), - OMPT_CUR_TASK_DATA(this_thr), 1, __kmp_tid_from_gtid(global_tid), ompt_task_implicit); // TODO: Can this be ompt_task_initial? - OMPT_CUR_TASK_INFO(this_thr) - ->thread_num = __kmp_tid_from_gtid(global_tid); - } - - /* OMPT state */ - this_thr->th.ompt_thread_info.state = ompt_state_work_parallel; - OMPT_CUR_TASK_INFO(this_thr)->frame.exit_frame.ptr = OMPT_GET_FRAME_ADDRESS(0); - } -#endif -} - -/* most of the work for a fork */ -/* return true if we really went parallel, false if serialized */ -int __kmp_fork_call(ident_t *loc, int gtid, - enum fork_context_e call_context, // Intel, GNU, ... - kmp_int32 argc, microtask_t microtask, launch_t invoker, -/* TODO: revert workaround for Intel(R) 64 tracker #96 */ -#if (KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64) && KMP_OS_LINUX - va_list *ap -#else - va_list ap -#endif - ) { - void **argv; - int i; - int master_tid; - int master_this_cons; - kmp_team_t *team; - kmp_team_t *parent_team; - kmp_info_t *master_th; - kmp_root_t *root; - int nthreads; - int master_active; - int master_set_numthreads; - int level; -#if OMP_40_ENABLED - int active_level; - int teams_level; -#endif -#if KMP_NESTED_HOT_TEAMS - kmp_hot_team_ptr_t **p_hot_teams; -#endif - { // KMP_TIME_BLOCK - KMP_TIME_DEVELOPER_PARTITIONED_BLOCK(KMP_fork_call); - KMP_COUNT_VALUE(OMP_PARALLEL_args, argc); - - KA_TRACE(20, ("__kmp_fork_call: enter T#%d\n", gtid)); - if (__kmp_stkpadding > 0 && __kmp_root[gtid] != NULL) { - /* Some systems prefer the stack for the root thread(s) to start with */ - /* some gap from the parent stack to prevent false sharing. */ - void *dummy = KMP_ALLOCA(__kmp_stkpadding); - /* These 2 lines below are so this does not get optimized out */ - if (__kmp_stkpadding > KMP_MAX_STKPADDING) - __kmp_stkpadding += (short)((kmp_int64)dummy); - } - - /* initialize if needed */ - KMP_DEBUG_ASSERT( - __kmp_init_serial); // AC: potentially unsafe, not in sync with shutdown - if (!TCR_4(__kmp_init_parallel)) - __kmp_parallel_initialize(); - - /* setup current data */ - master_th = __kmp_threads[gtid]; // AC: potentially unsafe, not in sync with - // shutdown - parent_team = master_th->th.th_team; - master_tid = master_th->th.th_info.ds.ds_tid; - master_this_cons = master_th->th.th_local.this_construct; - root = master_th->th.th_root; - master_active = root->r.r_active; - master_set_numthreads = master_th->th.th_set_nproc; - -#if OMPT_SUPPORT - ompt_data_t ompt_parallel_data = ompt_data_none; - ompt_data_t *parent_task_data; - ompt_frame_t *ompt_frame; - ompt_data_t *implicit_task_data; - void *return_address = NULL; - - if (ompt_enabled.enabled) { - __ompt_get_task_info_internal(0, NULL, &parent_task_data, &ompt_frame, - NULL, NULL); - return_address = OMPT_LOAD_RETURN_ADDRESS(gtid); - } -#endif - - // Nested level will be an index in the nested nthreads array - level = parent_team->t.t_level; - // used to launch non-serial teams even if nested is not allowed - active_level = parent_team->t.t_active_level; -#if OMP_40_ENABLED - // needed to check nesting inside the teams - teams_level = master_th->th.th_teams_level; -#endif -#if KMP_NESTED_HOT_TEAMS - p_hot_teams = &master_th->th.th_hot_teams; - if (*p_hot_teams == NULL && __kmp_hot_teams_max_level > 0) { - *p_hot_teams = (kmp_hot_team_ptr_t *)__kmp_allocate( - sizeof(kmp_hot_team_ptr_t) * __kmp_hot_teams_max_level); - (*p_hot_teams)[0].hot_team = root->r.r_hot_team; - // it is either actual or not needed (when active_level > 0) - (*p_hot_teams)[0].hot_team_nth = 1; - } -#endif - -#if OMPT_SUPPORT - if (ompt_enabled.enabled) { - if (ompt_enabled.ompt_callback_parallel_begin) { - int team_size = master_set_numthreads - ? master_set_numthreads - : get__nproc_2(parent_team, master_tid); - ompt_callbacks.ompt_callback(ompt_callback_parallel_begin)( - parent_task_data, ompt_frame, &ompt_parallel_data, team_size, - OMPT_INVOKER(call_context), return_address); - } - master_th->th.ompt_thread_info.state = ompt_state_overhead; - } -#endif - - master_th->th.th_ident = loc; - -#if OMP_40_ENABLED - if (master_th->th.th_teams_microtask && ap && - microtask != (microtask_t)__kmp_teams_master && level == teams_level) { - // AC: This is start of parallel that is nested inside teams construct. - // The team is actual (hot), all workers are ready at the fork barrier. - // No lock needed to initialize the team a bit, then free workers. - parent_team->t.t_ident = loc; - __kmp_alloc_argv_entries(argc, parent_team, TRUE); - parent_team->t.t_argc = argc; - argv = (void **)parent_team->t.t_argv; - for (i = argc - 1; i >= 0; --i) -/* TODO: revert workaround for Intel(R) 64 tracker #96 */ -#if (KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64) && KMP_OS_LINUX - *argv++ = va_arg(*ap, void *); -#else - *argv++ = va_arg(ap, void *); -#endif - // Increment our nested depth levels, but not increase the serialization - if (parent_team == master_th->th.th_serial_team) { - // AC: we are in serialized parallel - __kmpc_serialized_parallel(loc, gtid); - KMP_DEBUG_ASSERT(parent_team->t.t_serialized > 1); - // AC: need this in order enquiry functions work - // correctly, will restore at join time - parent_team->t.t_serialized--; -#if OMPT_SUPPORT - void *dummy; - void **exit_runtime_p; - - ompt_lw_taskteam_t lw_taskteam; - - if (ompt_enabled.enabled) { - __ompt_lw_taskteam_init(&lw_taskteam, master_th, gtid, - &ompt_parallel_data, return_address); - exit_runtime_p = &(lw_taskteam.ompt_task_info.frame.exit_frame.ptr); - - __ompt_lw_taskteam_link(&lw_taskteam, master_th, 0); - // don't use lw_taskteam after linking. content was swaped - - /* OMPT implicit task begin */ - implicit_task_data = OMPT_CUR_TASK_DATA(master_th); - if (ompt_enabled.ompt_callback_implicit_task) { - ompt_callbacks.ompt_callback(ompt_callback_implicit_task)( - ompt_scope_begin, OMPT_CUR_TEAM_DATA(master_th), - implicit_task_data, 1, __kmp_tid_from_gtid(gtid), ompt_task_implicit); // TODO: Can this be ompt_task_initial? - OMPT_CUR_TASK_INFO(master_th) - ->thread_num = __kmp_tid_from_gtid(gtid); - } - - /* OMPT state */ - master_th->th.ompt_thread_info.state = ompt_state_work_parallel; - } else { - exit_runtime_p = &dummy; - } -#endif - - { - KMP_TIME_PARTITIONED_BLOCK(OMP_parallel); - KMP_SET_THREAD_STATE_BLOCK(IMPLICIT_TASK); - __kmp_invoke_microtask(microtask, gtid, 0, argc, parent_team->t.t_argv -#if OMPT_SUPPORT - , - exit_runtime_p -#endif - ); - } - -#if OMPT_SUPPORT - *exit_runtime_p = NULL; - if (ompt_enabled.enabled) { - OMPT_CUR_TASK_INFO(master_th)->frame.exit_frame = ompt_data_none; - if (ompt_enabled.ompt_callback_implicit_task) { - ompt_callbacks.ompt_callback(ompt_callback_implicit_task)( - ompt_scope_end, NULL, implicit_task_data, 1, - OMPT_CUR_TASK_INFO(master_th)->thread_num, ompt_task_implicit); // TODO: Can this be ompt_task_initial? - } - __ompt_lw_taskteam_unlink(master_th); - - if (ompt_enabled.ompt_callback_parallel_end) { - ompt_callbacks.ompt_callback(ompt_callback_parallel_end)( - OMPT_CUR_TEAM_DATA(master_th), OMPT_CUR_TASK_DATA(master_th), - OMPT_INVOKER(call_context), return_address); - } - master_th->th.ompt_thread_info.state = ompt_state_overhead; - } -#endif - return TRUE; - } - - parent_team->t.t_pkfn = microtask; - parent_team->t.t_invoke = invoker; - KMP_ATOMIC_INC(&root->r.r_in_parallel); - parent_team->t.t_active_level++; - parent_team->t.t_level++; -#if OMP_50_ENABLED - parent_team->t.t_def_allocator = master_th->th.th_def_allocator; // save -#endif - - /* Change number of threads in the team if requested */ - if (master_set_numthreads) { // The parallel has num_threads clause - if (master_set_numthreads < master_th->th.th_teams_size.nth) { - // AC: only can reduce number of threads dynamically, can't increase - kmp_info_t **other_threads = parent_team->t.t_threads; - parent_team->t.t_nproc = master_set_numthreads; - for (i = 0; i < master_set_numthreads; ++i) { - other_threads[i]->th.th_team_nproc = master_set_numthreads; - } - // Keep extra threads hot in the team for possible next parallels - } - master_th->th.th_set_nproc = 0; - } - -#if USE_DEBUGGER - if (__kmp_debugging) { // Let debugger override number of threads. - int nth = __kmp_omp_num_threads(loc); - if (nth > 0) { // 0 means debugger doesn't want to change num threads - master_set_numthreads = nth; - } - } -#endif - - KF_TRACE(10, ("__kmp_fork_call: before internal fork: root=%p, team=%p, " - "master_th=%p, gtid=%d\n", - root, parent_team, master_th, gtid)); - __kmp_internal_fork(loc, gtid, parent_team); - KF_TRACE(10, ("__kmp_fork_call: after internal fork: root=%p, team=%p, " - "master_th=%p, gtid=%d\n", - root, parent_team, master_th, gtid)); - - /* Invoke microtask for MASTER thread */ - KA_TRACE(20, ("__kmp_fork_call: T#%d(%d:0) invoke microtask = %p\n", gtid, - parent_team->t.t_id, parent_team->t.t_pkfn)); - - if (!parent_team->t.t_invoke(gtid)) { - KMP_ASSERT2(0, "cannot invoke microtask for MASTER thread"); - } - KA_TRACE(20, ("__kmp_fork_call: T#%d(%d:0) done microtask = %p\n", gtid, - parent_team->t.t_id, parent_team->t.t_pkfn)); - KMP_MB(); /* Flush all pending memory write invalidates. */ - - KA_TRACE(20, ("__kmp_fork_call: parallel exit T#%d\n", gtid)); - - return TRUE; - } // Parallel closely nested in teams construct -#endif /* OMP_40_ENABLED */ - -#if KMP_DEBUG - if (__kmp_tasking_mode != tskm_immediate_exec) { - KMP_DEBUG_ASSERT(master_th->th.th_task_team == - parent_team->t.t_task_team[master_th->th.th_task_state]); - } -#endif - - if (parent_team->t.t_active_level >= - master_th->th.th_current_task->td_icvs.max_active_levels) { - nthreads = 1; - } else { -#if OMP_40_ENABLED - int enter_teams = ((ap == NULL && active_level == 0) || - (ap && teams_level > 0 && teams_level == level)); -#endif - nthreads = - master_set_numthreads - ? master_set_numthreads - : get__nproc_2( - parent_team, - master_tid); // TODO: get nproc directly from current task - - // Check if we need to take forkjoin lock? (no need for serialized - // parallel out of teams construct). This code moved here from - // __kmp_reserve_threads() to speedup nested serialized parallels. - if (nthreads > 1) { - if ((!get__nested(master_th) && (root->r.r_in_parallel -#if OMP_40_ENABLED - && !enter_teams -#endif /* OMP_40_ENABLED */ - )) || - (__kmp_library == library_serial)) { - KC_TRACE(10, ("__kmp_fork_call: T#%d serializing team; requested %d" - " threads\n", - gtid, nthreads)); - nthreads = 1; - } - } - if (nthreads > 1) { - /* determine how many new threads we can use */ - __kmp_acquire_bootstrap_lock(&__kmp_forkjoin_lock); - nthreads = __kmp_reserve_threads( - root, parent_team, master_tid, nthreads -#if OMP_40_ENABLED - /* AC: If we execute teams from parallel region (on host), then - teams should be created but each can only have 1 thread if - nesting is disabled. If teams called from serial region, then - teams and their threads should be created regardless of the - nesting setting. */ - , - enter_teams -#endif /* OMP_40_ENABLED */ - ); - if (nthreads == 1) { - // Free lock for single thread execution here; for multi-thread - // execution it will be freed later after team of threads created - // and initialized - __kmp_release_bootstrap_lock(&__kmp_forkjoin_lock); - } - } - } - KMP_DEBUG_ASSERT(nthreads > 0); - - // If we temporarily changed the set number of threads then restore it now - master_th->th.th_set_nproc = 0; - - /* create a serialized parallel region? */ - if (nthreads == 1) { -/* josh todo: hypothetical question: what do we do for OS X*? */ -#if KMP_OS_LINUX && \ - (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64) - void *args[argc]; -#else - void **args = (void **)KMP_ALLOCA(argc * sizeof(void *)); -#endif /* KMP_OS_LINUX && ( KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM || \ - KMP_ARCH_AARCH64) */ - - KA_TRACE(20, - ("__kmp_fork_call: T#%d serializing parallel region\n", gtid)); - - __kmpc_serialized_parallel(loc, gtid); - - if (call_context == fork_context_intel) { - /* TODO this sucks, use the compiler itself to pass args! :) */ - master_th->th.th_serial_team->t.t_ident = loc; -#if OMP_40_ENABLED - if (!ap) { - // revert change made in __kmpc_serialized_parallel() - master_th->th.th_serial_team->t.t_level--; -// Get args from parent team for teams construct - -#if OMPT_SUPPORT - void *dummy; - void **exit_runtime_p; - ompt_task_info_t *task_info; - - ompt_lw_taskteam_t lw_taskteam; - - if (ompt_enabled.enabled) { - __ompt_lw_taskteam_init(&lw_taskteam, master_th, gtid, - &ompt_parallel_data, return_address); - - __ompt_lw_taskteam_link(&lw_taskteam, master_th, 0); - // don't use lw_taskteam after linking. content was swaped - - task_info = OMPT_CUR_TASK_INFO(master_th); - exit_runtime_p = &(task_info->frame.exit_frame.ptr); - if (ompt_enabled.ompt_callback_implicit_task) { - ompt_callbacks.ompt_callback(ompt_callback_implicit_task)( - ompt_scope_begin, OMPT_CUR_TEAM_DATA(master_th), - &(task_info->task_data), 1, __kmp_tid_from_gtid(gtid), ompt_task_implicit); // TODO: Can this be ompt_task_initial? - OMPT_CUR_TASK_INFO(master_th) - ->thread_num = __kmp_tid_from_gtid(gtid); - } - - /* OMPT state */ - master_th->th.ompt_thread_info.state = ompt_state_work_parallel; - } else { - exit_runtime_p = &dummy; - } -#endif - - { - KMP_TIME_PARTITIONED_BLOCK(OMP_parallel); - KMP_SET_THREAD_STATE_BLOCK(IMPLICIT_TASK); - __kmp_invoke_microtask(microtask, gtid, 0, argc, - parent_team->t.t_argv -#if OMPT_SUPPORT - , - exit_runtime_p -#endif - ); - } - -#if OMPT_SUPPORT - if (ompt_enabled.enabled) { - exit_runtime_p = NULL; - if (ompt_enabled.ompt_callback_implicit_task) { - ompt_callbacks.ompt_callback(ompt_callback_implicit_task)( - ompt_scope_end, NULL, &(task_info->task_data), 1, - OMPT_CUR_TASK_INFO(master_th)->thread_num, ompt_task_implicit); // TODO: Can this be ompt_task_initial? - } - - __ompt_lw_taskteam_unlink(master_th); - if (ompt_enabled.ompt_callback_parallel_end) { - ompt_callbacks.ompt_callback(ompt_callback_parallel_end)( - OMPT_CUR_TEAM_DATA(master_th), parent_task_data, - OMPT_INVOKER(call_context), return_address); - } - master_th->th.ompt_thread_info.state = ompt_state_overhead; - } -#endif - } else if (microtask == (microtask_t)__kmp_teams_master) { - KMP_DEBUG_ASSERT(master_th->th.th_team == - master_th->th.th_serial_team); - team = master_th->th.th_team; - // team->t.t_pkfn = microtask; - team->t.t_invoke = invoker; - __kmp_alloc_argv_entries(argc, team, TRUE); - team->t.t_argc = argc; - argv = (void **)team->t.t_argv; - if (ap) { - for (i = argc - 1; i >= 0; --i) -// TODO: revert workaround for Intel(R) 64 tracker #96 -#if (KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64) && KMP_OS_LINUX - *argv++ = va_arg(*ap, void *); -#else - *argv++ = va_arg(ap, void *); -#endif - } else { - for (i = 0; i < argc; ++i) - // Get args from parent team for teams construct - argv[i] = parent_team->t.t_argv[i]; - } - // AC: revert change made in __kmpc_serialized_parallel() - // because initial code in teams should have level=0 - team->t.t_level--; - // AC: call special invoker for outer "parallel" of teams construct - invoker(gtid); - } else { -#endif /* OMP_40_ENABLED */ - argv = args; - for (i = argc - 1; i >= 0; --i) -// TODO: revert workaround for Intel(R) 64 tracker #96 -#if (KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64) && KMP_OS_LINUX - *argv++ = va_arg(*ap, void *); -#else - *argv++ = va_arg(ap, void *); -#endif - KMP_MB(); - -#if OMPT_SUPPORT - void *dummy; - void **exit_runtime_p; - ompt_task_info_t *task_info; - - ompt_lw_taskteam_t lw_taskteam; - - if (ompt_enabled.enabled) { - __ompt_lw_taskteam_init(&lw_taskteam, master_th, gtid, - &ompt_parallel_data, return_address); - __ompt_lw_taskteam_link(&lw_taskteam, master_th, 0); - // don't use lw_taskteam after linking. content was swaped - task_info = OMPT_CUR_TASK_INFO(master_th); - exit_runtime_p = &(task_info->frame.exit_frame.ptr); - - /* OMPT implicit task begin */ - implicit_task_data = OMPT_CUR_TASK_DATA(master_th); - if (ompt_enabled.ompt_callback_implicit_task) { - ompt_callbacks.ompt_callback(ompt_callback_implicit_task)( - ompt_scope_begin, OMPT_CUR_TEAM_DATA(master_th), - implicit_task_data, 1, __kmp_tid_from_gtid(gtid), ompt_task_implicit); // TODO: Can this be ompt_task_initial? - OMPT_CUR_TASK_INFO(master_th) - ->thread_num = __kmp_tid_from_gtid(gtid); - } - - /* OMPT state */ - master_th->th.ompt_thread_info.state = ompt_state_work_parallel; - } else { - exit_runtime_p = &dummy; - } -#endif - - { - KMP_TIME_PARTITIONED_BLOCK(OMP_parallel); - KMP_SET_THREAD_STATE_BLOCK(IMPLICIT_TASK); - __kmp_invoke_microtask(microtask, gtid, 0, argc, args -#if OMPT_SUPPORT - , - exit_runtime_p -#endif - ); - } - -#if OMPT_SUPPORT - if (ompt_enabled.enabled) { - *exit_runtime_p = NULL; - if (ompt_enabled.ompt_callback_implicit_task) { - ompt_callbacks.ompt_callback(ompt_callback_implicit_task)( - ompt_scope_end, NULL, &(task_info->task_data), 1, - OMPT_CUR_TASK_INFO(master_th)->thread_num, ompt_task_implicit); // TODO: Can this be ompt_task_initial? - } - - ompt_parallel_data = *OMPT_CUR_TEAM_DATA(master_th); - __ompt_lw_taskteam_unlink(master_th); - if (ompt_enabled.ompt_callback_parallel_end) { - ompt_callbacks.ompt_callback(ompt_callback_parallel_end)( - &ompt_parallel_data, parent_task_data, - OMPT_INVOKER(call_context), return_address); - } - master_th->th.ompt_thread_info.state = ompt_state_overhead; - } -#endif -#if OMP_40_ENABLED - } -#endif /* OMP_40_ENABLED */ - } else if (call_context == fork_context_gnu) { -#if OMPT_SUPPORT - ompt_lw_taskteam_t lwt; - __ompt_lw_taskteam_init(&lwt, master_th, gtid, &ompt_parallel_data, - return_address); - - lwt.ompt_task_info.frame.exit_frame = ompt_data_none; - __ompt_lw_taskteam_link(&lwt, master_th, 1); -// don't use lw_taskteam after linking. content was swaped -#endif - - // we were called from GNU native code - KA_TRACE(20, ("__kmp_fork_call: T#%d serial exit\n", gtid)); - return FALSE; - } else { - KMP_ASSERT2(call_context < fork_context_last, - "__kmp_fork_call: unknown fork_context parameter"); - } - - KA_TRACE(20, ("__kmp_fork_call: T#%d serial exit\n", gtid)); - KMP_MB(); - return FALSE; - } // if (nthreads == 1) - - // GEH: only modify the executing flag in the case when not serialized - // serialized case is handled in kmpc_serialized_parallel - KF_TRACE(10, ("__kmp_fork_call: parent_team_aclevel=%d, master_th=%p, " - "curtask=%p, curtask_max_aclevel=%d\n", - parent_team->t.t_active_level, master_th, - master_th->th.th_current_task, - master_th->th.th_current_task->td_icvs.max_active_levels)); - // TODO: GEH - cannot do this assertion because root thread not set up as - // executing - // KMP_ASSERT( master_th->th.th_current_task->td_flags.executing == 1 ); - master_th->th.th_current_task->td_flags.executing = 0; - -#if OMP_40_ENABLED - if (!master_th->th.th_teams_microtask || level > teams_level) -#endif /* OMP_40_ENABLED */ - { - /* Increment our nested depth level */ - KMP_ATOMIC_INC(&root->r.r_in_parallel); - } - - // See if we need to make a copy of the ICVs. - int nthreads_icv = master_th->th.th_current_task->td_icvs.nproc; - if ((level + 1 < __kmp_nested_nth.used) && - (__kmp_nested_nth.nth[level + 1] != nthreads_icv)) { - nthreads_icv = __kmp_nested_nth.nth[level + 1]; - } else { - nthreads_icv = 0; // don't update - } - -#if OMP_40_ENABLED - // Figure out the proc_bind_policy for the new team. - kmp_proc_bind_t proc_bind = master_th->th.th_set_proc_bind; - kmp_proc_bind_t proc_bind_icv = - proc_bind_default; // proc_bind_default means don't update - if (master_th->th.th_current_task->td_icvs.proc_bind == proc_bind_false) { - proc_bind = proc_bind_false; - } else { - if (proc_bind == proc_bind_default) { - // No proc_bind clause specified; use current proc-bind-var for this - // parallel region - proc_bind = master_th->th.th_current_task->td_icvs.proc_bind; - } - /* else: The proc_bind policy was specified explicitly on parallel clause. - This overrides proc-bind-var for this parallel region, but does not - change proc-bind-var. */ - // Figure the value of proc-bind-var for the child threads. - if ((level + 1 < __kmp_nested_proc_bind.used) && - (__kmp_nested_proc_bind.bind_types[level + 1] != - master_th->th.th_current_task->td_icvs.proc_bind)) { - proc_bind_icv = __kmp_nested_proc_bind.bind_types[level + 1]; - } - } - - // Reset for next parallel region - master_th->th.th_set_proc_bind = proc_bind_default; -#endif /* OMP_40_ENABLED */ - - if ((nthreads_icv > 0) -#if OMP_40_ENABLED - || (proc_bind_icv != proc_bind_default) -#endif /* OMP_40_ENABLED */ - ) { - kmp_internal_control_t new_icvs; - copy_icvs(&new_icvs, &master_th->th.th_current_task->td_icvs); - new_icvs.next = NULL; - if (nthreads_icv > 0) { - new_icvs.nproc = nthreads_icv; - } - -#if OMP_40_ENABLED - if (proc_bind_icv != proc_bind_default) { - new_icvs.proc_bind = proc_bind_icv; - } -#endif /* OMP_40_ENABLED */ - - /* allocate a new parallel team */ - KF_TRACE(10, ("__kmp_fork_call: before __kmp_allocate_team\n")); - team = __kmp_allocate_team(root, nthreads, nthreads, -#if OMPT_SUPPORT - ompt_parallel_data, -#endif -#if OMP_40_ENABLED - proc_bind, -#endif - &new_icvs, argc USE_NESTED_HOT_ARG(master_th)); - } else { - /* allocate a new parallel team */ - KF_TRACE(10, ("__kmp_fork_call: before __kmp_allocate_team\n")); - team = __kmp_allocate_team(root, nthreads, nthreads, -#if OMPT_SUPPORT - ompt_parallel_data, -#endif -#if OMP_40_ENABLED - proc_bind, -#endif - &master_th->th.th_current_task->td_icvs, - argc USE_NESTED_HOT_ARG(master_th)); - } - KF_TRACE( - 10, ("__kmp_fork_call: after __kmp_allocate_team - team = %p\n", team)); - - /* setup the new team */ - KMP_CHECK_UPDATE(team->t.t_master_tid, master_tid); - KMP_CHECK_UPDATE(team->t.t_master_this_cons, master_this_cons); - KMP_CHECK_UPDATE(team->t.t_ident, loc); - KMP_CHECK_UPDATE(team->t.t_parent, parent_team); - KMP_CHECK_UPDATE_SYNC(team->t.t_pkfn, microtask); -#if OMPT_SUPPORT - KMP_CHECK_UPDATE_SYNC(team->t.ompt_team_info.master_return_address, - return_address); -#endif - KMP_CHECK_UPDATE(team->t.t_invoke, invoker); // TODO move to root, maybe -// TODO: parent_team->t.t_level == INT_MAX ??? -#if OMP_40_ENABLED - if (!master_th->th.th_teams_microtask || level > teams_level) { -#endif /* OMP_40_ENABLED */ - int new_level = parent_team->t.t_level + 1; - KMP_CHECK_UPDATE(team->t.t_level, new_level); - new_level = parent_team->t.t_active_level + 1; - KMP_CHECK_UPDATE(team->t.t_active_level, new_level); -#if OMP_40_ENABLED - } else { - // AC: Do not increase parallel level at start of the teams construct - int new_level = parent_team->t.t_level; - KMP_CHECK_UPDATE(team->t.t_level, new_level); - new_level = parent_team->t.t_active_level; - KMP_CHECK_UPDATE(team->t.t_active_level, new_level); - } -#endif /* OMP_40_ENABLED */ - kmp_r_sched_t new_sched = get__sched_2(parent_team, master_tid); - // set master's schedule as new run-time schedule - KMP_CHECK_UPDATE(team->t.t_sched.sched, new_sched.sched); - -#if OMP_40_ENABLED - KMP_CHECK_UPDATE(team->t.t_cancel_request, cancel_noreq); -#endif -#if OMP_50_ENABLED - KMP_CHECK_UPDATE(team->t.t_def_allocator, master_th->th.th_def_allocator); -#endif - - // Update the floating point rounding in the team if required. - propagateFPControl(team); - - if (__kmp_tasking_mode != tskm_immediate_exec) { - // Set master's task team to team's task team. Unless this is hot team, it - // should be NULL. - KMP_DEBUG_ASSERT(master_th->th.th_task_team == - parent_team->t.t_task_team[master_th->th.th_task_state]); - KA_TRACE(20, ("__kmp_fork_call: Master T#%d pushing task_team %p / team " - "%p, new task_team %p / team %p\n", - __kmp_gtid_from_thread(master_th), - master_th->th.th_task_team, parent_team, - team->t.t_task_team[master_th->th.th_task_state], team)); - - if (active_level || master_th->th.th_task_team) { - // Take a memo of master's task_state - KMP_DEBUG_ASSERT(master_th->th.th_task_state_memo_stack); - if (master_th->th.th_task_state_top >= - master_th->th.th_task_state_stack_sz) { // increase size - kmp_uint32 new_size = 2 * master_th->th.th_task_state_stack_sz; - kmp_uint8 *old_stack, *new_stack; - kmp_uint32 i; - new_stack = (kmp_uint8 *)__kmp_allocate(new_size); - for (i = 0; i < master_th->th.th_task_state_stack_sz; ++i) { - new_stack[i] = master_th->th.th_task_state_memo_stack[i]; - } - for (i = master_th->th.th_task_state_stack_sz; i < new_size; - ++i) { // zero-init rest of stack - new_stack[i] = 0; - } - old_stack = master_th->th.th_task_state_memo_stack; - master_th->th.th_task_state_memo_stack = new_stack; - master_th->th.th_task_state_stack_sz = new_size; - __kmp_free(old_stack); - } - // Store master's task_state on stack - master_th->th - .th_task_state_memo_stack[master_th->th.th_task_state_top] = - master_th->th.th_task_state; - master_th->th.th_task_state_top++; -#if KMP_NESTED_HOT_TEAMS - if (master_th->th.th_hot_teams && - active_level < __kmp_hot_teams_max_level && - team == master_th->th.th_hot_teams[active_level].hot_team) { - // Restore master's nested state if nested hot team - master_th->th.th_task_state = - master_th->th - .th_task_state_memo_stack[master_th->th.th_task_state_top]; - } else { -#endif - master_th->th.th_task_state = 0; -#if KMP_NESTED_HOT_TEAMS - } -#endif - } -#if !KMP_NESTED_HOT_TEAMS - KMP_DEBUG_ASSERT((master_th->th.th_task_team == NULL) || - (team == root->r.r_hot_team)); -#endif - } - - KA_TRACE( - 20, - ("__kmp_fork_call: T#%d(%d:%d)->(%d:0) created a team of %d threads\n", - gtid, parent_team->t.t_id, team->t.t_master_tid, team->t.t_id, - team->t.t_nproc)); - KMP_DEBUG_ASSERT(team != root->r.r_hot_team || - (team->t.t_master_tid == 0 && - (team->t.t_parent == root->r.r_root_team || - team->t.t_parent->t.t_serialized))); - KMP_MB(); - - /* now, setup the arguments */ - argv = (void **)team->t.t_argv; -#if OMP_40_ENABLED - if (ap) { -#endif /* OMP_40_ENABLED */ - for (i = argc - 1; i >= 0; --i) { -// TODO: revert workaround for Intel(R) 64 tracker #96 -#if (KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64) && KMP_OS_LINUX - void *new_argv = va_arg(*ap, void *); -#else - void *new_argv = va_arg(ap, void *); -#endif - KMP_CHECK_UPDATE(*argv, new_argv); - argv++; - } -#if OMP_40_ENABLED - } else { - for (i = 0; i < argc; ++i) { - // Get args from parent team for teams construct - KMP_CHECK_UPDATE(argv[i], team->t.t_parent->t.t_argv[i]); - } - } -#endif /* OMP_40_ENABLED */ - - /* now actually fork the threads */ - KMP_CHECK_UPDATE(team->t.t_master_active, master_active); - if (!root->r.r_active) // Only do assignment if it prevents cache ping-pong - root->r.r_active = TRUE; - - __kmp_fork_team_threads(root, team, master_th, gtid); - __kmp_setup_icv_copy(team, nthreads, - &master_th->th.th_current_task->td_icvs, loc); - -#if OMPT_SUPPORT - master_th->th.ompt_thread_info.state = ompt_state_work_parallel; -#endif - - __kmp_release_bootstrap_lock(&__kmp_forkjoin_lock); - -#if USE_ITT_BUILD - if (team->t.t_active_level == 1 // only report frames at level 1 -#if OMP_40_ENABLED - && !master_th->th.th_teams_microtask // not in teams construct -#endif /* OMP_40_ENABLED */ - ) { -#if USE_ITT_NOTIFY - if ((__itt_frame_submit_v3_ptr || KMP_ITT_DEBUG) && - (__kmp_forkjoin_frames_mode == 3 || - __kmp_forkjoin_frames_mode == 1)) { - kmp_uint64 tmp_time = 0; - if (__itt_get_timestamp_ptr) - tmp_time = __itt_get_timestamp(); - // Internal fork - report frame begin - master_th->th.th_frame_time = tmp_time; - if (__kmp_forkjoin_frames_mode == 3) - team->t.t_region_time = tmp_time; - } else -// only one notification scheme (either "submit" or "forking/joined", not both) -#endif /* USE_ITT_NOTIFY */ - if ((__itt_frame_begin_v3_ptr || KMP_ITT_DEBUG) && - __kmp_forkjoin_frames && !__kmp_forkjoin_frames_mode) { - // Mark start of "parallel" region for Intel(R) VTune(TM) analyzer. - __kmp_itt_region_forking(gtid, team->t.t_nproc, 0); - } - } -#endif /* USE_ITT_BUILD */ - - /* now go on and do the work */ - KMP_DEBUG_ASSERT(team == __kmp_threads[gtid]->th.th_team); - KMP_MB(); - KF_TRACE(10, - ("__kmp_internal_fork : root=%p, team=%p, master_th=%p, gtid=%d\n", - root, team, master_th, gtid)); - -#if USE_ITT_BUILD - if (__itt_stack_caller_create_ptr) { - team->t.t_stack_id = - __kmp_itt_stack_caller_create(); // create new stack stitching id - // before entering fork barrier - } -#endif /* USE_ITT_BUILD */ - -#if OMP_40_ENABLED - // AC: skip __kmp_internal_fork at teams construct, let only master - // threads execute - if (ap) -#endif /* OMP_40_ENABLED */ - { - __kmp_internal_fork(loc, gtid, team); - KF_TRACE(10, ("__kmp_internal_fork : after : root=%p, team=%p, " - "master_th=%p, gtid=%d\n", - root, team, master_th, gtid)); - } - - if (call_context == fork_context_gnu) { - KA_TRACE(20, ("__kmp_fork_call: parallel exit T#%d\n", gtid)); - return TRUE; - } - - /* Invoke microtask for MASTER thread */ - KA_TRACE(20, ("__kmp_fork_call: T#%d(%d:0) invoke microtask = %p\n", gtid, - team->t.t_id, team->t.t_pkfn)); - } // END of timer KMP_fork_call block - - if (!team->t.t_invoke(gtid)) { - KMP_ASSERT2(0, "cannot invoke microtask for MASTER thread"); - } - KA_TRACE(20, ("__kmp_fork_call: T#%d(%d:0) done microtask = %p\n", gtid, - team->t.t_id, team->t.t_pkfn)); - KMP_MB(); /* Flush all pending memory write invalidates. */ - - KA_TRACE(20, ("__kmp_fork_call: parallel exit T#%d\n", gtid)); - -#if OMPT_SUPPORT - if (ompt_enabled.enabled) { - master_th->th.ompt_thread_info.state = ompt_state_overhead; - } -#endif - - return TRUE; -} - -#if OMPT_SUPPORT -static inline void __kmp_join_restore_state(kmp_info_t *thread, - kmp_team_t *team) { - // restore state outside the region - thread->th.ompt_thread_info.state = - ((team->t.t_serialized) ? ompt_state_work_serial - : ompt_state_work_parallel); -} - -static inline void __kmp_join_ompt(int gtid, kmp_info_t *thread, - kmp_team_t *team, ompt_data_t *parallel_data, - fork_context_e fork_context, void *codeptr) { - ompt_task_info_t *task_info = __ompt_get_task_info_object(0); - if (ompt_enabled.ompt_callback_parallel_end) { - ompt_callbacks.ompt_callback(ompt_callback_parallel_end)( - parallel_data, &(task_info->task_data), OMPT_INVOKER(fork_context), - codeptr); - } - - task_info->frame.enter_frame = ompt_data_none; - __kmp_join_restore_state(thread, team); -} -#endif - -void __kmp_join_call(ident_t *loc, int gtid -#if OMPT_SUPPORT - , - enum fork_context_e fork_context -#endif -#if OMP_40_ENABLED - , - int exit_teams -#endif /* OMP_40_ENABLED */ - ) { - KMP_TIME_DEVELOPER_PARTITIONED_BLOCK(KMP_join_call); - kmp_team_t *team; - kmp_team_t *parent_team; - kmp_info_t *master_th; - kmp_root_t *root; - int master_active; - int i; - - KA_TRACE(20, ("__kmp_join_call: enter T#%d\n", gtid)); - - /* setup current data */ - master_th = __kmp_threads[gtid]; - root = master_th->th.th_root; - team = master_th->th.th_team; - parent_team = team->t.t_parent; - - master_th->th.th_ident = loc; - -#if OMPT_SUPPORT - if (ompt_enabled.enabled) { - master_th->th.ompt_thread_info.state = ompt_state_overhead; - } -#endif - -#if KMP_DEBUG - if (__kmp_tasking_mode != tskm_immediate_exec && !exit_teams) { - KA_TRACE(20, ("__kmp_join_call: T#%d, old team = %p old task_team = %p, " - "th_task_team = %p\n", - __kmp_gtid_from_thread(master_th), team, - team->t.t_task_team[master_th->th.th_task_state], - master_th->th.th_task_team)); - KMP_DEBUG_ASSERT(master_th->th.th_task_team == - team->t.t_task_team[master_th->th.th_task_state]); - } -#endif - - if (team->t.t_serialized) { -#if OMP_40_ENABLED - if (master_th->th.th_teams_microtask) { - // We are in teams construct - int level = team->t.t_level; - int tlevel = master_th->th.th_teams_level; - if (level == tlevel) { - // AC: we haven't incremented it earlier at start of teams construct, - // so do it here - at the end of teams construct - team->t.t_level++; - } else if (level == tlevel + 1) { - // AC: we are exiting parallel inside teams, need to increment - // serialization in order to restore it in the next call to - // __kmpc_end_serialized_parallel - team->t.t_serialized++; - } - } -#endif /* OMP_40_ENABLED */ - __kmpc_end_serialized_parallel(loc, gtid); - -#if OMPT_SUPPORT - if (ompt_enabled.enabled) { - __kmp_join_restore_state(master_th, parent_team); - } -#endif - - return; - } - - master_active = team->t.t_master_active; - -#if OMP_40_ENABLED - if (!exit_teams) -#endif /* OMP_40_ENABLED */ - { - // AC: No barrier for internal teams at exit from teams construct. - // But there is barrier for external team (league). - __kmp_internal_join(loc, gtid, team); - } -#if OMP_40_ENABLED - else { - master_th->th.th_task_state = - 0; // AC: no tasking in teams (out of any parallel) - } -#endif /* OMP_40_ENABLED */ - - KMP_MB(); - -#if OMPT_SUPPORT - ompt_data_t *parallel_data = &(team->t.ompt_team_info.parallel_data); - void *codeptr = team->t.ompt_team_info.master_return_address; -#endif - -#if USE_ITT_BUILD - if (__itt_stack_caller_create_ptr) { - __kmp_itt_stack_caller_destroy( - (__itt_caller)team->t - .t_stack_id); // destroy the stack stitching id after join barrier - } - - // Mark end of "parallel" region for Intel(R) VTune(TM) analyzer. - if (team->t.t_active_level == 1 -#if OMP_40_ENABLED - && !master_th->th.th_teams_microtask /* not in teams construct */ -#endif /* OMP_40_ENABLED */ - ) { - master_th->th.th_ident = loc; - // only one notification scheme (either "submit" or "forking/joined", not - // both) - if ((__itt_frame_submit_v3_ptr || KMP_ITT_DEBUG) && - __kmp_forkjoin_frames_mode == 3) - __kmp_itt_frame_submit(gtid, team->t.t_region_time, - master_th->th.th_frame_time, 0, loc, - master_th->th.th_team_nproc, 1); - else if ((__itt_frame_end_v3_ptr || KMP_ITT_DEBUG) && - !__kmp_forkjoin_frames_mode && __kmp_forkjoin_frames) - __kmp_itt_region_joined(gtid); - } // active_level == 1 -#endif /* USE_ITT_BUILD */ - -#if OMP_40_ENABLED - if (master_th->th.th_teams_microtask && !exit_teams && - team->t.t_pkfn != (microtask_t)__kmp_teams_master && - team->t.t_level == master_th->th.th_teams_level + 1) { - // AC: We need to leave the team structure intact at the end of parallel - // inside the teams construct, so that at the next parallel same (hot) team - // works, only adjust nesting levels - - /* Decrement our nested depth level */ - team->t.t_level--; - team->t.t_active_level--; - KMP_ATOMIC_DEC(&root->r.r_in_parallel); - - /* Restore number of threads in the team if needed */ - if (master_th->th.th_team_nproc < master_th->th.th_teams_size.nth) { - int old_num = master_th->th.th_team_nproc; - int new_num = master_th->th.th_teams_size.nth; - kmp_info_t **other_threads = team->t.t_threads; - team->t.t_nproc = new_num; - for (i = 0; i < old_num; ++i) { - other_threads[i]->th.th_team_nproc = new_num; - } - // Adjust states of non-used threads of the team - for (i = old_num; i < new_num; ++i) { - // Re-initialize thread's barrier data. - int b; - kmp_balign_t *balign = other_threads[i]->th.th_bar; - for (b = 0; b < bs_last_barrier; ++b) { - balign[b].bb.b_arrived = team->t.t_bar[b].b_arrived; - KMP_DEBUG_ASSERT(balign[b].bb.wait_flag != KMP_BARRIER_PARENT_FLAG); -#if USE_DEBUGGER - balign[b].bb.b_worker_arrived = team->t.t_bar[b].b_team_arrived; -#endif - } - if (__kmp_tasking_mode != tskm_immediate_exec) { - // Synchronize thread's task state - other_threads[i]->th.th_task_state = master_th->th.th_task_state; - } - } - } - -#if OMPT_SUPPORT - if (ompt_enabled.enabled) { - __kmp_join_ompt(gtid, master_th, parent_team, parallel_data, fork_context, - codeptr); - } -#endif - - return; - } -#endif /* OMP_40_ENABLED */ - - /* do cleanup and restore the parent team */ - master_th->th.th_info.ds.ds_tid = team->t.t_master_tid; - master_th->th.th_local.this_construct = team->t.t_master_this_cons; - - master_th->th.th_dispatch = &parent_team->t.t_dispatch[team->t.t_master_tid]; - - /* jc: The following lock has instructions with REL and ACQ semantics, - separating the parallel user code called in this parallel region - from the serial user code called after this function returns. */ - __kmp_acquire_bootstrap_lock(&__kmp_forkjoin_lock); - -#if OMP_40_ENABLED - if (!master_th->th.th_teams_microtask || - team->t.t_level > master_th->th.th_teams_level) -#endif /* OMP_40_ENABLED */ - { - /* Decrement our nested depth level */ - KMP_ATOMIC_DEC(&root->r.r_in_parallel); - } - KMP_DEBUG_ASSERT(root->r.r_in_parallel >= 0); - -#if OMPT_SUPPORT - if (ompt_enabled.enabled) { - ompt_task_info_t *task_info = __ompt_get_task_info_object(0); - if (ompt_enabled.ompt_callback_implicit_task) { - int ompt_team_size = team->t.t_nproc; - ompt_callbacks.ompt_callback(ompt_callback_implicit_task)( - ompt_scope_end, NULL, &(task_info->task_data), ompt_team_size, - OMPT_CUR_TASK_INFO(master_th)->thread_num, ompt_task_implicit); // TODO: Can this be ompt_task_initial? - } - - task_info->frame.exit_frame = ompt_data_none; - task_info->task_data = ompt_data_none; - } -#endif - - KF_TRACE(10, ("__kmp_join_call1: T#%d, this_thread=%p team=%p\n", 0, - master_th, team)); - __kmp_pop_current_task_from_thread(master_th); - -#if OMP_40_ENABLED && KMP_AFFINITY_SUPPORTED - // Restore master thread's partition. - master_th->th.th_first_place = team->t.t_first_place; - master_th->th.th_last_place = team->t.t_last_place; -#endif /* OMP_40_ENABLED */ -#if OMP_50_ENABLED - master_th->th.th_def_allocator = team->t.t_def_allocator; -#endif - - updateHWFPControl(team); - - if (root->r.r_active != master_active) - root->r.r_active = master_active; - - __kmp_free_team(root, team USE_NESTED_HOT_ARG( - master_th)); // this will free worker threads - - /* this race was fun to find. make sure the following is in the critical - region otherwise assertions may fail occasionally since the old team may be - reallocated and the hierarchy appears inconsistent. it is actually safe to - run and won't cause any bugs, but will cause those assertion failures. it's - only one deref&assign so might as well put this in the critical region */ - master_th->th.th_team = parent_team; - master_th->th.th_team_nproc = parent_team->t.t_nproc; - master_th->th.th_team_master = parent_team->t.t_threads[0]; - master_th->th.th_team_serialized = parent_team->t.t_serialized; - - /* restore serialized team, if need be */ - if (parent_team->t.t_serialized && - parent_team != master_th->th.th_serial_team && - parent_team != root->r.r_root_team) { - __kmp_free_team(root, - master_th->th.th_serial_team USE_NESTED_HOT_ARG(NULL)); - master_th->th.th_serial_team = parent_team; - } - - if (__kmp_tasking_mode != tskm_immediate_exec) { - if (master_th->th.th_task_state_top > - 0) { // Restore task state from memo stack - KMP_DEBUG_ASSERT(master_th->th.th_task_state_memo_stack); - // Remember master's state if we re-use this nested hot team - master_th->th.th_task_state_memo_stack[master_th->th.th_task_state_top] = - master_th->th.th_task_state; - --master_th->th.th_task_state_top; // pop - // Now restore state at this level - master_th->th.th_task_state = - master_th->th - .th_task_state_memo_stack[master_th->th.th_task_state_top]; - } - // Copy the task team from the parent team to the master thread - master_th->th.th_task_team = - parent_team->t.t_task_team[master_th->th.th_task_state]; - KA_TRACE(20, - ("__kmp_join_call: Master T#%d restoring task_team %p / team %p\n", - __kmp_gtid_from_thread(master_th), master_th->th.th_task_team, - parent_team)); - } - - // TODO: GEH - cannot do this assertion because root thread not set up as - // executing - // KMP_ASSERT( master_th->th.th_current_task->td_flags.executing == 0 ); - master_th->th.th_current_task->td_flags.executing = 1; - - __kmp_release_bootstrap_lock(&__kmp_forkjoin_lock); - -#if OMPT_SUPPORT - if (ompt_enabled.enabled) { - __kmp_join_ompt(gtid, master_th, parent_team, parallel_data, fork_context, - codeptr); - } -#endif - - KMP_MB(); - KA_TRACE(20, ("__kmp_join_call: exit T#%d\n", gtid)); -} - -/* Check whether we should push an internal control record onto the - serial team stack. If so, do it. */ -void __kmp_save_internal_controls(kmp_info_t *thread) { - - if (thread->th.th_team != thread->th.th_serial_team) { - return; - } - if (thread->th.th_team->t.t_serialized > 1) { - int push = 0; - - if (thread->th.th_team->t.t_control_stack_top == NULL) { - push = 1; - } else { - if (thread->th.th_team->t.t_control_stack_top->serial_nesting_level != - thread->th.th_team->t.t_serialized) { - push = 1; - } - } - if (push) { /* push a record on the serial team's stack */ - kmp_internal_control_t *control = - (kmp_internal_control_t *)__kmp_allocate( - sizeof(kmp_internal_control_t)); - - copy_icvs(control, &thread->th.th_current_task->td_icvs); - - control->serial_nesting_level = thread->th.th_team->t.t_serialized; - - control->next = thread->th.th_team->t.t_control_stack_top; - thread->th.th_team->t.t_control_stack_top = control; - } - } -} - -/* Changes set_nproc */ -void __kmp_set_num_threads(int new_nth, int gtid) { - kmp_info_t *thread; - kmp_root_t *root; - - KF_TRACE(10, ("__kmp_set_num_threads: new __kmp_nth = %d\n", new_nth)); - KMP_DEBUG_ASSERT(__kmp_init_serial); - - if (new_nth < 1) - new_nth = 1; - else if (new_nth > __kmp_max_nth) - new_nth = __kmp_max_nth; - - KMP_COUNT_VALUE(OMP_set_numthreads, new_nth); - thread = __kmp_threads[gtid]; - if (thread->th.th_current_task->td_icvs.nproc == new_nth) - return; // nothing to do - - __kmp_save_internal_controls(thread); - - set__nproc(thread, new_nth); - - // If this omp_set_num_threads() call will cause the hot team size to be - // reduced (in the absence of a num_threads clause), then reduce it now, - // rather than waiting for the next parallel region. - root = thread->th.th_root; - if (__kmp_init_parallel && (!root->r.r_active) && - (root->r.r_hot_team->t.t_nproc > new_nth) -#if KMP_NESTED_HOT_TEAMS - && __kmp_hot_teams_max_level && !__kmp_hot_teams_mode -#endif - ) { - kmp_team_t *hot_team = root->r.r_hot_team; - int f; - - __kmp_acquire_bootstrap_lock(&__kmp_forkjoin_lock); - - // Release the extra threads we don't need any more. - for (f = new_nth; f < hot_team->t.t_nproc; f++) { - KMP_DEBUG_ASSERT(hot_team->t.t_threads[f] != NULL); - if (__kmp_tasking_mode != tskm_immediate_exec) { - // When decreasing team size, threads no longer in the team should unref - // task team. - hot_team->t.t_threads[f]->th.th_task_team = NULL; - } - __kmp_free_thread(hot_team->t.t_threads[f]); - hot_team->t.t_threads[f] = NULL; - } - hot_team->t.t_nproc = new_nth; -#if KMP_NESTED_HOT_TEAMS - if (thread->th.th_hot_teams) { - KMP_DEBUG_ASSERT(hot_team == thread->th.th_hot_teams[0].hot_team); - thread->th.th_hot_teams[0].hot_team_nth = new_nth; - } -#endif - - __kmp_release_bootstrap_lock(&__kmp_forkjoin_lock); - - // Update the t_nproc field in the threads that are still active. - for (f = 0; f < new_nth; f++) { - KMP_DEBUG_ASSERT(hot_team->t.t_threads[f] != NULL); - hot_team->t.t_threads[f]->th.th_team_nproc = new_nth; - } - // Special flag in case omp_set_num_threads() call - hot_team->t.t_size_changed = -1; - } -} - -/* Changes max_active_levels */ -void __kmp_set_max_active_levels(int gtid, int max_active_levels) { - kmp_info_t *thread; - - KF_TRACE(10, ("__kmp_set_max_active_levels: new max_active_levels for thread " - "%d = (%d)\n", - gtid, max_active_levels)); - KMP_DEBUG_ASSERT(__kmp_init_serial); - - // validate max_active_levels - if (max_active_levels < 0) { - KMP_WARNING(ActiveLevelsNegative, max_active_levels); - // We ignore this call if the user has specified a negative value. - // The current setting won't be changed. The last valid setting will be - // used. A warning will be issued (if warnings are allowed as controlled by - // the KMP_WARNINGS env var). - KF_TRACE(10, ("__kmp_set_max_active_levels: the call is ignored: new " - "max_active_levels for thread %d = (%d)\n", - gtid, max_active_levels)); - return; - } - if (max_active_levels <= KMP_MAX_ACTIVE_LEVELS_LIMIT) { - // it's OK, the max_active_levels is within the valid range: [ 0; - // KMP_MAX_ACTIVE_LEVELS_LIMIT ] - // We allow a zero value. (implementation defined behavior) - } else { - KMP_WARNING(ActiveLevelsExceedLimit, max_active_levels, - KMP_MAX_ACTIVE_LEVELS_LIMIT); - max_active_levels = KMP_MAX_ACTIVE_LEVELS_LIMIT; - // Current upper limit is MAX_INT. (implementation defined behavior) - // If the input exceeds the upper limit, we correct the input to be the - // upper limit. (implementation defined behavior) - // Actually, the flow should never get here until we use MAX_INT limit. - } - KF_TRACE(10, ("__kmp_set_max_active_levels: after validation: new " - "max_active_levels for thread %d = (%d)\n", - gtid, max_active_levels)); - - thread = __kmp_threads[gtid]; - - __kmp_save_internal_controls(thread); - - set__max_active_levels(thread, max_active_levels); -} - -/* Gets max_active_levels */ -int __kmp_get_max_active_levels(int gtid) { - kmp_info_t *thread; - - KF_TRACE(10, ("__kmp_get_max_active_levels: thread %d\n", gtid)); - KMP_DEBUG_ASSERT(__kmp_init_serial); - - thread = __kmp_threads[gtid]; - KMP_DEBUG_ASSERT(thread->th.th_current_task); - KF_TRACE(10, ("__kmp_get_max_active_levels: thread %d, curtask=%p, " - "curtask_maxaclevel=%d\n", - gtid, thread->th.th_current_task, - thread->th.th_current_task->td_icvs.max_active_levels)); - return thread->th.th_current_task->td_icvs.max_active_levels; -} - -/* Changes def_sched_var ICV values (run-time schedule kind and chunk) */ -void __kmp_set_schedule(int gtid, kmp_sched_t kind, int chunk) { - kmp_info_t *thread; - // kmp_team_t *team; - - KF_TRACE(10, ("__kmp_set_schedule: new schedule for thread %d = (%d, %d)\n", - gtid, (int)kind, chunk)); - KMP_DEBUG_ASSERT(__kmp_init_serial); - - // Check if the kind parameter is valid, correct if needed. - // Valid parameters should fit in one of two intervals - standard or extended: - // , , , , , - // 2008-01-25: 0, 1 - 4, 5, 100, 101 - 102, 103 - if (kind <= kmp_sched_lower || kind >= kmp_sched_upper || - (kind <= kmp_sched_lower_ext && kind >= kmp_sched_upper_std)) { - // TODO: Hint needs attention in case we change the default schedule. - __kmp_msg(kmp_ms_warning, KMP_MSG(ScheduleKindOutOfRange, kind), - KMP_HNT(DefaultScheduleKindUsed, "static, no chunk"), - __kmp_msg_null); - kind = kmp_sched_default; - chunk = 0; // ignore chunk value in case of bad kind - } - - thread = __kmp_threads[gtid]; - - __kmp_save_internal_controls(thread); - - if (kind < kmp_sched_upper_std) { - if (kind == kmp_sched_static && chunk < KMP_DEFAULT_CHUNK) { - // differ static chunked vs. unchunked: chunk should be invalid to - // indicate unchunked schedule (which is the default) - thread->th.th_current_task->td_icvs.sched.r_sched_type = kmp_sch_static; - } else { - thread->th.th_current_task->td_icvs.sched.r_sched_type = - __kmp_sch_map[kind - kmp_sched_lower - 1]; - } - } else { - // __kmp_sch_map[ kind - kmp_sched_lower_ext + kmp_sched_upper_std - - // kmp_sched_lower - 2 ]; - thread->th.th_current_task->td_icvs.sched.r_sched_type = - __kmp_sch_map[kind - kmp_sched_lower_ext + kmp_sched_upper_std - - kmp_sched_lower - 2]; - } - if (kind == kmp_sched_auto || chunk < 1) { - // ignore parameter chunk for schedule auto - thread->th.th_current_task->td_icvs.sched.chunk = KMP_DEFAULT_CHUNK; - } else { - thread->th.th_current_task->td_icvs.sched.chunk = chunk; - } -} - -/* Gets def_sched_var ICV values */ -void __kmp_get_schedule(int gtid, kmp_sched_t *kind, int *chunk) { - kmp_info_t *thread; - enum sched_type th_type; - - KF_TRACE(10, ("__kmp_get_schedule: thread %d\n", gtid)); - KMP_DEBUG_ASSERT(__kmp_init_serial); - - thread = __kmp_threads[gtid]; - - th_type = thread->th.th_current_task->td_icvs.sched.r_sched_type; - - switch (th_type) { - case kmp_sch_static: - case kmp_sch_static_greedy: - case kmp_sch_static_balanced: - *kind = kmp_sched_static; - *chunk = 0; // chunk was not set, try to show this fact via zero value - return; - case kmp_sch_static_chunked: - *kind = kmp_sched_static; - break; - case kmp_sch_dynamic_chunked: - *kind = kmp_sched_dynamic; - break; - case kmp_sch_guided_chunked: - case kmp_sch_guided_iterative_chunked: - case kmp_sch_guided_analytical_chunked: - *kind = kmp_sched_guided; - break; - case kmp_sch_auto: - *kind = kmp_sched_auto; - break; - case kmp_sch_trapezoidal: - *kind = kmp_sched_trapezoidal; - break; -#if KMP_STATIC_STEAL_ENABLED - case kmp_sch_static_steal: - *kind = kmp_sched_static_steal; - break; -#endif - default: - KMP_FATAL(UnknownSchedulingType, th_type); - } - - *chunk = thread->th.th_current_task->td_icvs.sched.chunk; -} - -int __kmp_get_ancestor_thread_num(int gtid, int level) { - - int ii, dd; - kmp_team_t *team; - kmp_info_t *thr; - - KF_TRACE(10, ("__kmp_get_ancestor_thread_num: thread %d %d\n", gtid, level)); - KMP_DEBUG_ASSERT(__kmp_init_serial); - - // validate level - if (level == 0) - return 0; - if (level < 0) - return -1; - thr = __kmp_threads[gtid]; - team = thr->th.th_team; - ii = team->t.t_level; - if (level > ii) - return -1; - -#if OMP_40_ENABLED - if (thr->th.th_teams_microtask) { - // AC: we are in teams region where multiple nested teams have same level - int tlevel = thr->th.th_teams_level; // the level of the teams construct - if (level <= - tlevel) { // otherwise usual algorithm works (will not touch the teams) - KMP_DEBUG_ASSERT(ii >= tlevel); - // AC: As we need to pass by the teams league, we need to artificially - // increase ii - if (ii == tlevel) { - ii += 2; // three teams have same level - } else { - ii++; // two teams have same level - } - } - } -#endif - - if (ii == level) - return __kmp_tid_from_gtid(gtid); - - dd = team->t.t_serialized; - level++; - while (ii > level) { - for (dd = team->t.t_serialized; (dd > 0) && (ii > level); dd--, ii--) { - } - if ((team->t.t_serialized) && (!dd)) { - team = team->t.t_parent; - continue; - } - if (ii > level) { - team = team->t.t_parent; - dd = team->t.t_serialized; - ii--; - } - } - - return (dd > 1) ? (0) : (team->t.t_master_tid); -} - -int __kmp_get_team_size(int gtid, int level) { - - int ii, dd; - kmp_team_t *team; - kmp_info_t *thr; - - KF_TRACE(10, ("__kmp_get_team_size: thread %d %d\n", gtid, level)); - KMP_DEBUG_ASSERT(__kmp_init_serial); - - // validate level - if (level == 0) - return 1; - if (level < 0) - return -1; - thr = __kmp_threads[gtid]; - team = thr->th.th_team; - ii = team->t.t_level; - if (level > ii) - return -1; - -#if OMP_40_ENABLED - if (thr->th.th_teams_microtask) { - // AC: we are in teams region where multiple nested teams have same level - int tlevel = thr->th.th_teams_level; // the level of the teams construct - if (level <= - tlevel) { // otherwise usual algorithm works (will not touch the teams) - KMP_DEBUG_ASSERT(ii >= tlevel); - // AC: As we need to pass by the teams league, we need to artificially - // increase ii - if (ii == tlevel) { - ii += 2; // three teams have same level - } else { - ii++; // two teams have same level - } - } - } -#endif - - while (ii > level) { - for (dd = team->t.t_serialized; (dd > 0) && (ii > level); dd--, ii--) { - } - if (team->t.t_serialized && (!dd)) { - team = team->t.t_parent; - continue; - } - if (ii > level) { - team = team->t.t_parent; - ii--; - } - } - - return team->t.t_nproc; -} - -kmp_r_sched_t __kmp_get_schedule_global() { - // This routine created because pairs (__kmp_sched, __kmp_chunk) and - // (__kmp_static, __kmp_guided) may be changed by kmp_set_defaults - // independently. So one can get the updated schedule here. - - kmp_r_sched_t r_sched; - - // create schedule from 4 globals: __kmp_sched, __kmp_chunk, __kmp_static, - // __kmp_guided. __kmp_sched should keep original value, so that user can set - // KMP_SCHEDULE multiple times, and thus have different run-time schedules in - // different roots (even in OMP 2.5) - if (__kmp_sched == kmp_sch_static) { - // replace STATIC with more detailed schedule (balanced or greedy) - r_sched.r_sched_type = __kmp_static; - } else if (__kmp_sched == kmp_sch_guided_chunked) { - // replace GUIDED with more detailed schedule (iterative or analytical) - r_sched.r_sched_type = __kmp_guided; - } else { // (STATIC_CHUNKED), or (DYNAMIC_CHUNKED), or other - r_sched.r_sched_type = __kmp_sched; - } - - if (__kmp_chunk < KMP_DEFAULT_CHUNK) { - // __kmp_chunk may be wrong here (if it was not ever set) - r_sched.chunk = KMP_DEFAULT_CHUNK; - } else { - r_sched.chunk = __kmp_chunk; - } - - return r_sched; -} - -/* Allocate (realloc == FALSE) * or reallocate (realloc == TRUE) - at least argc number of *t_argv entries for the requested team. */ -static void __kmp_alloc_argv_entries(int argc, kmp_team_t *team, int realloc) { - - KMP_DEBUG_ASSERT(team); - if (!realloc || argc > team->t.t_max_argc) { - - KA_TRACE(100, ("__kmp_alloc_argv_entries: team %d: needed entries=%d, " - "current entries=%d\n", - team->t.t_id, argc, (realloc) ? team->t.t_max_argc : 0)); - /* if previously allocated heap space for args, free them */ - if (realloc && team->t.t_argv != &team->t.t_inline_argv[0]) - __kmp_free((void *)team->t.t_argv); - - if (argc <= KMP_INLINE_ARGV_ENTRIES) { - /* use unused space in the cache line for arguments */ - team->t.t_max_argc = KMP_INLINE_ARGV_ENTRIES; - KA_TRACE(100, ("__kmp_alloc_argv_entries: team %d: inline allocate %d " - "argv entries\n", - team->t.t_id, team->t.t_max_argc)); - team->t.t_argv = &team->t.t_inline_argv[0]; - if (__kmp_storage_map) { - __kmp_print_storage_map_gtid( - -1, &team->t.t_inline_argv[0], - &team->t.t_inline_argv[KMP_INLINE_ARGV_ENTRIES], - (sizeof(void *) * KMP_INLINE_ARGV_ENTRIES), "team_%d.t_inline_argv", - team->t.t_id); - } - } else { - /* allocate space for arguments in the heap */ - team->t.t_max_argc = (argc <= (KMP_MIN_MALLOC_ARGV_ENTRIES >> 1)) - ? KMP_MIN_MALLOC_ARGV_ENTRIES - : 2 * argc; - KA_TRACE(100, ("__kmp_alloc_argv_entries: team %d: dynamic allocate %d " - "argv entries\n", - team->t.t_id, team->t.t_max_argc)); - team->t.t_argv = - (void **)__kmp_page_allocate(sizeof(void *) * team->t.t_max_argc); - if (__kmp_storage_map) { - __kmp_print_storage_map_gtid(-1, &team->t.t_argv[0], - &team->t.t_argv[team->t.t_max_argc], - sizeof(void *) * team->t.t_max_argc, - "team_%d.t_argv", team->t.t_id); - } - } - } -} - -static void __kmp_allocate_team_arrays(kmp_team_t *team, int max_nth) { - int i; - int num_disp_buff = max_nth > 1 ? __kmp_dispatch_num_buffers : 2; - team->t.t_threads = - (kmp_info_t **)__kmp_allocate(sizeof(kmp_info_t *) * max_nth); - team->t.t_disp_buffer = (dispatch_shared_info_t *)__kmp_allocate( - sizeof(dispatch_shared_info_t) * num_disp_buff); - team->t.t_dispatch = - (kmp_disp_t *)__kmp_allocate(sizeof(kmp_disp_t) * max_nth); - team->t.t_implicit_task_taskdata = - (kmp_taskdata_t *)__kmp_allocate(sizeof(kmp_taskdata_t) * max_nth); - team->t.t_max_nproc = max_nth; - - /* setup dispatch buffers */ - for (i = 0; i < num_disp_buff; ++i) { - team->t.t_disp_buffer[i].buffer_index = i; -#if OMP_45_ENABLED - team->t.t_disp_buffer[i].doacross_buf_idx = i; -#endif - } -} - -static void __kmp_free_team_arrays(kmp_team_t *team) { - /* Note: this does not free the threads in t_threads (__kmp_free_threads) */ - int i; - for (i = 0; i < team->t.t_max_nproc; ++i) { - if (team->t.t_dispatch[i].th_disp_buffer != NULL) { - __kmp_free(team->t.t_dispatch[i].th_disp_buffer); - team->t.t_dispatch[i].th_disp_buffer = NULL; - } - } -#if KMP_USE_HIER_SCHED - __kmp_dispatch_free_hierarchies(team); -#endif - __kmp_free(team->t.t_threads); - __kmp_free(team->t.t_disp_buffer); - __kmp_free(team->t.t_dispatch); - __kmp_free(team->t.t_implicit_task_taskdata); - team->t.t_threads = NULL; - team->t.t_disp_buffer = NULL; - team->t.t_dispatch = NULL; - team->t.t_implicit_task_taskdata = 0; -} - -static void __kmp_reallocate_team_arrays(kmp_team_t *team, int max_nth) { - kmp_info_t **oldThreads = team->t.t_threads; - - __kmp_free(team->t.t_disp_buffer); - __kmp_free(team->t.t_dispatch); - __kmp_free(team->t.t_implicit_task_taskdata); - __kmp_allocate_team_arrays(team, max_nth); - - KMP_MEMCPY(team->t.t_threads, oldThreads, - team->t.t_nproc * sizeof(kmp_info_t *)); - - __kmp_free(oldThreads); -} - -static kmp_internal_control_t __kmp_get_global_icvs(void) { - - kmp_r_sched_t r_sched = - __kmp_get_schedule_global(); // get current state of scheduling globals - -#if OMP_40_ENABLED - KMP_DEBUG_ASSERT(__kmp_nested_proc_bind.used > 0); -#endif /* OMP_40_ENABLED */ - - kmp_internal_control_t g_icvs = { - 0, // int serial_nesting_level; //corresponds to value of th_team_serialized - (kmp_int8)__kmp_dflt_nested, // int nested; //internal control - // for nested parallelism (per thread) - (kmp_int8)__kmp_global.g.g_dynamic, // internal control for dynamic - // adjustment of threads (per thread) - (kmp_int8)__kmp_env_blocktime, // int bt_set; //internal control for - // whether blocktime is explicitly set - __kmp_dflt_blocktime, // int blocktime; //internal control for blocktime -#if KMP_USE_MONITOR - __kmp_bt_intervals, // int bt_intervals; //internal control for blocktime -// intervals -#endif - __kmp_dflt_team_nth, // int nproc; //internal control for # of threads for - // next parallel region (per thread) - // (use a max ub on value if __kmp_parallel_initialize not called yet) - __kmp_dflt_max_active_levels, // int max_active_levels; //internal control - // for max_active_levels - r_sched, // kmp_r_sched_t sched; //internal control for runtime schedule -// {sched,chunk} pair -#if OMP_40_ENABLED - __kmp_nested_proc_bind.bind_types[0], - __kmp_default_device, -#endif /* OMP_40_ENABLED */ - NULL // struct kmp_internal_control *next; - }; - - return g_icvs; -} - -static kmp_internal_control_t __kmp_get_x_global_icvs(const kmp_team_t *team) { - - kmp_internal_control_t gx_icvs; - gx_icvs.serial_nesting_level = - 0; // probably =team->t.t_serial like in save_inter_controls - copy_icvs(&gx_icvs, &team->t.t_threads[0]->th.th_current_task->td_icvs); - gx_icvs.next = NULL; - - return gx_icvs; -} - -static void __kmp_initialize_root(kmp_root_t *root) { - int f; - kmp_team_t *root_team; - kmp_team_t *hot_team; - int hot_team_max_nth; - kmp_r_sched_t r_sched = - __kmp_get_schedule_global(); // get current state of scheduling globals - kmp_internal_control_t r_icvs = __kmp_get_global_icvs(); - KMP_DEBUG_ASSERT(root); - KMP_ASSERT(!root->r.r_begin); - - /* setup the root state structure */ - __kmp_init_lock(&root->r.r_begin_lock); - root->r.r_begin = FALSE; - root->r.r_active = FALSE; - root->r.r_in_parallel = 0; - root->r.r_blocktime = __kmp_dflt_blocktime; - root->r.r_nested = __kmp_dflt_nested; - root->r.r_cg_nthreads = 1; - - /* setup the root team for this task */ - /* allocate the root team structure */ - KF_TRACE(10, ("__kmp_initialize_root: before root_team\n")); - - root_team = - __kmp_allocate_team(root, - 1, // new_nproc - 1, // max_nproc -#if OMPT_SUPPORT - ompt_data_none, // root parallel id -#endif -#if OMP_40_ENABLED - __kmp_nested_proc_bind.bind_types[0], -#endif - &r_icvs, - 0 // argc - USE_NESTED_HOT_ARG(NULL) // master thread is unknown - ); -#if USE_DEBUGGER - // Non-NULL value should be assigned to make the debugger display the root - // team. - TCW_SYNC_PTR(root_team->t.t_pkfn, (microtask_t)(~0)); -#endif - - KF_TRACE(10, ("__kmp_initialize_root: after root_team = %p\n", root_team)); - - root->r.r_root_team = root_team; - root_team->t.t_control_stack_top = NULL; - - /* initialize root team */ - root_team->t.t_threads[0] = NULL; - root_team->t.t_nproc = 1; - root_team->t.t_serialized = 1; - // TODO???: root_team->t.t_max_active_levels = __kmp_dflt_max_active_levels; - root_team->t.t_sched.sched = r_sched.sched; - KA_TRACE( - 20, - ("__kmp_initialize_root: init root team %d arrived: join=%u, plain=%u\n", - root_team->t.t_id, KMP_INIT_BARRIER_STATE, KMP_INIT_BARRIER_STATE)); - - /* setup the hot team for this task */ - /* allocate the hot team structure */ - KF_TRACE(10, ("__kmp_initialize_root: before hot_team\n")); - - hot_team = - __kmp_allocate_team(root, - 1, // new_nproc - __kmp_dflt_team_nth_ub * 2, // max_nproc -#if OMPT_SUPPORT - ompt_data_none, // root parallel id -#endif -#if OMP_40_ENABLED - __kmp_nested_proc_bind.bind_types[0], -#endif - &r_icvs, - 0 // argc - USE_NESTED_HOT_ARG(NULL) // master thread is unknown - ); - KF_TRACE(10, ("__kmp_initialize_root: after hot_team = %p\n", hot_team)); - - root->r.r_hot_team = hot_team; - root_team->t.t_control_stack_top = NULL; - - /* first-time initialization */ - hot_team->t.t_parent = root_team; - - /* initialize hot team */ - hot_team_max_nth = hot_team->t.t_max_nproc; - for (f = 0; f < hot_team_max_nth; ++f) { - hot_team->t.t_threads[f] = NULL; - } - hot_team->t.t_nproc = 1; - // TODO???: hot_team->t.t_max_active_levels = __kmp_dflt_max_active_levels; - hot_team->t.t_sched.sched = r_sched.sched; - hot_team->t.t_size_changed = 0; -} - -#ifdef KMP_DEBUG - -typedef struct kmp_team_list_item { - kmp_team_p const *entry; - struct kmp_team_list_item *next; -} kmp_team_list_item_t; -typedef kmp_team_list_item_t *kmp_team_list_t; - -static void __kmp_print_structure_team_accum( // Add team to list of teams. - kmp_team_list_t list, // List of teams. - kmp_team_p const *team // Team to add. - ) { - - // List must terminate with item where both entry and next are NULL. - // Team is added to the list only once. - // List is sorted in ascending order by team id. - // Team id is *not* a key. - - kmp_team_list_t l; - - KMP_DEBUG_ASSERT(list != NULL); - if (team == NULL) { - return; - } - - __kmp_print_structure_team_accum(list, team->t.t_parent); - __kmp_print_structure_team_accum(list, team->t.t_next_pool); - - // Search list for the team. - l = list; - while (l->next != NULL && l->entry != team) { - l = l->next; - } - if (l->next != NULL) { - return; // Team has been added before, exit. - } - - // Team is not found. Search list again for insertion point. - l = list; - while (l->next != NULL && l->entry->t.t_id <= team->t.t_id) { - l = l->next; - } - - // Insert team. - { - kmp_team_list_item_t *item = (kmp_team_list_item_t *)KMP_INTERNAL_MALLOC( - sizeof(kmp_team_list_item_t)); - *item = *l; - l->entry = team; - l->next = item; - } -} - -static void __kmp_print_structure_team(char const *title, kmp_team_p const *team - - ) { - __kmp_printf("%s", title); - if (team != NULL) { - __kmp_printf("%2x %p\n", team->t.t_id, team); - } else { - __kmp_printf(" - (nil)\n"); - } -} - -static void __kmp_print_structure_thread(char const *title, - kmp_info_p const *thread) { - __kmp_printf("%s", title); - if (thread != NULL) { - __kmp_printf("%2d %p\n", thread->th.th_info.ds.ds_gtid, thread); - } else { - __kmp_printf(" - (nil)\n"); - } -} - -void __kmp_print_structure(void) { - - kmp_team_list_t list; - - // Initialize list of teams. - list = - (kmp_team_list_item_t *)KMP_INTERNAL_MALLOC(sizeof(kmp_team_list_item_t)); - list->entry = NULL; - list->next = NULL; - - __kmp_printf("\n------------------------------\nGlobal Thread " - "Table\n------------------------------\n"); - { - int gtid; - for (gtid = 0; gtid < __kmp_threads_capacity; ++gtid) { - __kmp_printf("%2d", gtid); - if (__kmp_threads != NULL) { - __kmp_printf(" %p", __kmp_threads[gtid]); - } - if (__kmp_root != NULL) { - __kmp_printf(" %p", __kmp_root[gtid]); - } - __kmp_printf("\n"); - } - } - - // Print out __kmp_threads array. - __kmp_printf("\n------------------------------\nThreads\n--------------------" - "----------\n"); - if (__kmp_threads != NULL) { - int gtid; - for (gtid = 0; gtid < __kmp_threads_capacity; ++gtid) { - kmp_info_t const *thread = __kmp_threads[gtid]; - if (thread != NULL) { - __kmp_printf("GTID %2d %p:\n", gtid, thread); - __kmp_printf(" Our Root: %p\n", thread->th.th_root); - __kmp_print_structure_team(" Our Team: ", thread->th.th_team); - __kmp_print_structure_team(" Serial Team: ", - thread->th.th_serial_team); - __kmp_printf(" Threads: %2d\n", thread->th.th_team_nproc); - __kmp_print_structure_thread(" Master: ", - thread->th.th_team_master); - __kmp_printf(" Serialized?: %2d\n", thread->th.th_team_serialized); - __kmp_printf(" Set NProc: %2d\n", thread->th.th_set_nproc); -#if OMP_40_ENABLED - __kmp_printf(" Set Proc Bind: %2d\n", thread->th.th_set_proc_bind); -#endif - __kmp_print_structure_thread(" Next in pool: ", - thread->th.th_next_pool); - __kmp_printf("\n"); - __kmp_print_structure_team_accum(list, thread->th.th_team); - __kmp_print_structure_team_accum(list, thread->th.th_serial_team); - } - } - } else { - __kmp_printf("Threads array is not allocated.\n"); - } - - // Print out __kmp_root array. - __kmp_printf("\n------------------------------\nUbers\n----------------------" - "--------\n"); - if (__kmp_root != NULL) { - int gtid; - for (gtid = 0; gtid < __kmp_threads_capacity; ++gtid) { - kmp_root_t const *root = __kmp_root[gtid]; - if (root != NULL) { - __kmp_printf("GTID %2d %p:\n", gtid, root); - __kmp_print_structure_team(" Root Team: ", root->r.r_root_team); - __kmp_print_structure_team(" Hot Team: ", root->r.r_hot_team); - __kmp_print_structure_thread(" Uber Thread: ", - root->r.r_uber_thread); - __kmp_printf(" Active?: %2d\n", root->r.r_active); - __kmp_printf(" Nested?: %2d\n", root->r.r_nested); - __kmp_printf(" In Parallel: %2d\n", - KMP_ATOMIC_LD_RLX(&root->r.r_in_parallel)); - __kmp_printf("\n"); - __kmp_print_structure_team_accum(list, root->r.r_root_team); - __kmp_print_structure_team_accum(list, root->r.r_hot_team); - } - } - } else { - __kmp_printf("Ubers array is not allocated.\n"); - } - - __kmp_printf("\n------------------------------\nTeams\n----------------------" - "--------\n"); - while (list->next != NULL) { - kmp_team_p const *team = list->entry; - int i; - __kmp_printf("Team %2x %p:\n", team->t.t_id, team); - __kmp_print_structure_team(" Parent Team: ", team->t.t_parent); - __kmp_printf(" Master TID: %2d\n", team->t.t_master_tid); - __kmp_printf(" Max threads: %2d\n", team->t.t_max_nproc); - __kmp_printf(" Levels of serial: %2d\n", team->t.t_serialized); - __kmp_printf(" Number threads: %2d\n", team->t.t_nproc); - for (i = 0; i < team->t.t_nproc; ++i) { - __kmp_printf(" Thread %2d: ", i); - __kmp_print_structure_thread("", team->t.t_threads[i]); - } - __kmp_print_structure_team(" Next in pool: ", team->t.t_next_pool); - __kmp_printf("\n"); - list = list->next; - } - - // Print out __kmp_thread_pool and __kmp_team_pool. - __kmp_printf("\n------------------------------\nPools\n----------------------" - "--------\n"); - __kmp_print_structure_thread("Thread pool: ", - CCAST(kmp_info_t *, __kmp_thread_pool)); - __kmp_print_structure_team("Team pool: ", - CCAST(kmp_team_t *, __kmp_team_pool)); - __kmp_printf("\n"); - - // Free team list. - while (list != NULL) { - kmp_team_list_item_t *item = list; - list = list->next; - KMP_INTERNAL_FREE(item); - } -} - -#endif - -//--------------------------------------------------------------------------- -// Stuff for per-thread fast random number generator -// Table of primes -static const unsigned __kmp_primes[] = { - 0x9e3779b1, 0xffe6cc59, 0x2109f6dd, 0x43977ab5, 0xba5703f5, 0xb495a877, - 0xe1626741, 0x79695e6b, 0xbc98c09f, 0xd5bee2b3, 0x287488f9, 0x3af18231, - 0x9677cd4d, 0xbe3a6929, 0xadc6a877, 0xdcf0674b, 0xbe4d6fe9, 0x5f15e201, - 0x99afc3fd, 0xf3f16801, 0xe222cfff, 0x24ba5fdb, 0x0620452d, 0x79f149e3, - 0xc8b93f49, 0x972702cd, 0xb07dd827, 0x6c97d5ed, 0x085a3d61, 0x46eb5ea7, - 0x3d9910ed, 0x2e687b5b, 0x29609227, 0x6eb081f1, 0x0954c4e1, 0x9d114db9, - 0x542acfa9, 0xb3e6bd7b, 0x0742d917, 0xe9f3ffa7, 0x54581edb, 0xf2480f45, - 0x0bb9288f, 0xef1affc7, 0x85fa0ca7, 0x3ccc14db, 0xe6baf34b, 0x343377f7, - 0x5ca19031, 0xe6d9293b, 0xf0a9f391, 0x5d2e980b, 0xfc411073, 0xc3749363, - 0xb892d829, 0x3549366b, 0x629750ad, 0xb98294e5, 0x892d9483, 0xc235baf3, - 0x3d2402a3, 0x6bdef3c9, 0xbec333cd, 0x40c9520f}; - -//--------------------------------------------------------------------------- -// __kmp_get_random: Get a random number using a linear congruential method. -unsigned short __kmp_get_random(kmp_info_t *thread) { - unsigned x = thread->th.th_x; - unsigned short r = x >> 16; - - thread->th.th_x = x * thread->th.th_a + 1; - - KA_TRACE(30, ("__kmp_get_random: THREAD: %d, RETURN: %u\n", - thread->th.th_info.ds.ds_tid, r)); - - return r; -} -//-------------------------------------------------------- -// __kmp_init_random: Initialize a random number generator -void __kmp_init_random(kmp_info_t *thread) { - unsigned seed = thread->th.th_info.ds.ds_tid; - - thread->th.th_a = - __kmp_primes[seed % (sizeof(__kmp_primes) / sizeof(__kmp_primes[0]))]; - thread->th.th_x = (seed + 1) * thread->th.th_a + 1; - KA_TRACE(30, - ("__kmp_init_random: THREAD: %u; A: %u\n", seed, thread->th.th_a)); -} - -#if KMP_OS_WINDOWS -/* reclaim array entries for root threads that are already dead, returns number - * reclaimed */ -static int __kmp_reclaim_dead_roots(void) { - int i, r = 0; - - for (i = 0; i < __kmp_threads_capacity; ++i) { - if (KMP_UBER_GTID(i) && - !__kmp_still_running((kmp_info_t *)TCR_SYNC_PTR(__kmp_threads[i])) && - !__kmp_root[i] - ->r.r_active) { // AC: reclaim only roots died in non-active state - r += __kmp_unregister_root_other_thread(i); - } - } - return r; -} -#endif - -/* This function attempts to create free entries in __kmp_threads and - __kmp_root, and returns the number of free entries generated. - - For Windows* OS static library, the first mechanism used is to reclaim array - entries for root threads that are already dead. - - On all platforms, expansion is attempted on the arrays __kmp_threads_ and - __kmp_root, with appropriate update to __kmp_threads_capacity. Array - capacity is increased by doubling with clipping to __kmp_tp_capacity, if - threadprivate cache array has been created. Synchronization with - __kmpc_threadprivate_cached is done using __kmp_tp_cached_lock. - - After any dead root reclamation, if the clipping value allows array expansion - to result in the generation of a total of nNeed free slots, the function does - that expansion. If not, nothing is done beyond the possible initial root - thread reclamation. - - If any argument is negative, the behavior is undefined. */ -static int __kmp_expand_threads(int nNeed) { - int added = 0; - int minimumRequiredCapacity; - int newCapacity; - kmp_info_t **newThreads; - kmp_root_t **newRoot; - -// All calls to __kmp_expand_threads should be under __kmp_forkjoin_lock, so -// resizing __kmp_threads does not need additional protection if foreign -// threads are present - -#if KMP_OS_WINDOWS && !KMP_DYNAMIC_LIB - /* only for Windows static library */ - /* reclaim array entries for root threads that are already dead */ - added = __kmp_reclaim_dead_roots(); - - if (nNeed) { - nNeed -= added; - if (nNeed < 0) - nNeed = 0; - } -#endif - if (nNeed <= 0) - return added; - - // Note that __kmp_threads_capacity is not bounded by __kmp_max_nth. If - // __kmp_max_nth is set to some value less than __kmp_sys_max_nth by the - // user via KMP_DEVICE_THREAD_LIMIT, then __kmp_threads_capacity may become - // > __kmp_max_nth in one of two ways: - // - // 1) The initialization thread (gtid = 0) exits. __kmp_threads[0] - // may not be resused by another thread, so we may need to increase - // __kmp_threads_capacity to __kmp_max_nth + 1. - // - // 2) New foreign root(s) are encountered. We always register new foreign - // roots. This may cause a smaller # of threads to be allocated at - // subsequent parallel regions, but the worker threads hang around (and - // eventually go to sleep) and need slots in the __kmp_threads[] array. - // - // Anyway, that is the reason for moving the check to see if - // __kmp_max_nth was exceeded into __kmp_reserve_threads() - // instead of having it performed here. -BB - - KMP_DEBUG_ASSERT(__kmp_sys_max_nth >= __kmp_threads_capacity); - - /* compute expansion headroom to check if we can expand */ - if (__kmp_sys_max_nth - __kmp_threads_capacity < nNeed) { - /* possible expansion too small -- give up */ - return added; - } - minimumRequiredCapacity = __kmp_threads_capacity + nNeed; - - newCapacity = __kmp_threads_capacity; - do { - newCapacity = newCapacity <= (__kmp_sys_max_nth >> 1) ? (newCapacity << 1) - : __kmp_sys_max_nth; - } while (newCapacity < minimumRequiredCapacity); - newThreads = (kmp_info_t **)__kmp_allocate( - (sizeof(kmp_info_t *) + sizeof(kmp_root_t *)) * newCapacity + CACHE_LINE); - newRoot = - (kmp_root_t **)((char *)newThreads + sizeof(kmp_info_t *) * newCapacity); - KMP_MEMCPY(newThreads, __kmp_threads, - __kmp_threads_capacity * sizeof(kmp_info_t *)); - KMP_MEMCPY(newRoot, __kmp_root, - __kmp_threads_capacity * sizeof(kmp_root_t *)); - - kmp_info_t **temp_threads = __kmp_threads; - *(kmp_info_t * *volatile *)&__kmp_threads = newThreads; - *(kmp_root_t * *volatile *)&__kmp_root = newRoot; - __kmp_free(temp_threads); - added += newCapacity - __kmp_threads_capacity; - *(volatile int *)&__kmp_threads_capacity = newCapacity; - - if (newCapacity > __kmp_tp_capacity) { - __kmp_acquire_bootstrap_lock(&__kmp_tp_cached_lock); - if (__kmp_tp_cached && newCapacity > __kmp_tp_capacity) { - __kmp_threadprivate_resize_cache(newCapacity); - } else { // increase __kmp_tp_capacity to correspond with kmp_threads size - *(volatile int *)&__kmp_tp_capacity = newCapacity; - } - __kmp_release_bootstrap_lock(&__kmp_tp_cached_lock); - } - - return added; -} - -/* Register the current thread as a root thread and obtain our gtid. We must - have the __kmp_initz_lock held at this point. Argument TRUE only if are the - thread that calls from __kmp_do_serial_initialize() */ -int __kmp_register_root(int initial_thread) { - kmp_info_t *root_thread; - kmp_root_t *root; - int gtid; - int capacity; - __kmp_acquire_bootstrap_lock(&__kmp_forkjoin_lock); - KA_TRACE(20, ("__kmp_register_root: entered\n")); - KMP_MB(); - - /* 2007-03-02: - If initial thread did not invoke OpenMP RTL yet, and this thread is not an - initial one, "__kmp_all_nth >= __kmp_threads_capacity" condition does not - work as expected -- it may return false (that means there is at least one - empty slot in __kmp_threads array), but it is possible the only free slot - is #0, which is reserved for initial thread and so cannot be used for this - one. Following code workarounds this bug. - - However, right solution seems to be not reserving slot #0 for initial - thread because: - (1) there is no magic in slot #0, - (2) we cannot detect initial thread reliably (the first thread which does - serial initialization may be not a real initial thread). - */ - capacity = __kmp_threads_capacity; - if (!initial_thread && TCR_PTR(__kmp_threads[0]) == NULL) { - --capacity; - } - - /* see if there are too many threads */ - if (__kmp_all_nth >= capacity && !__kmp_expand_threads(1)) { - if (__kmp_tp_cached) { - __kmp_fatal(KMP_MSG(CantRegisterNewThread), - KMP_HNT(Set_ALL_THREADPRIVATE, __kmp_tp_capacity), - KMP_HNT(PossibleSystemLimitOnThreads), __kmp_msg_null); - } else { - __kmp_fatal(KMP_MSG(CantRegisterNewThread), KMP_HNT(SystemLimitOnThreads), - __kmp_msg_null); - } - } - - /* find an available thread slot */ - /* Don't reassign the zero slot since we need that to only be used by initial - thread */ - for (gtid = (initial_thread ? 0 : 1); TCR_PTR(__kmp_threads[gtid]) != NULL; - gtid++) - ; - KA_TRACE(1, - ("__kmp_register_root: found slot in threads array: T#%d\n", gtid)); - KMP_ASSERT(gtid < __kmp_threads_capacity); - - /* update global accounting */ - __kmp_all_nth++; - TCW_4(__kmp_nth, __kmp_nth + 1); - - // if __kmp_adjust_gtid_mode is set, then we use method #1 (sp search) for low - // numbers of procs, and method #2 (keyed API call) for higher numbers. - if (__kmp_adjust_gtid_mode) { - if (__kmp_all_nth >= __kmp_tls_gtid_min) { - if (TCR_4(__kmp_gtid_mode) != 2) { - TCW_4(__kmp_gtid_mode, 2); - } - } else { - if (TCR_4(__kmp_gtid_mode) != 1) { - TCW_4(__kmp_gtid_mode, 1); - } - } - } - -#ifdef KMP_ADJUST_BLOCKTIME - /* Adjust blocktime to zero if necessary */ - /* Middle initialization might not have occurred yet */ - if (!__kmp_env_blocktime && (__kmp_avail_proc > 0)) { - if (__kmp_nth > __kmp_avail_proc) { - __kmp_zero_bt = TRUE; - } - } -#endif /* KMP_ADJUST_BLOCKTIME */ - - /* setup this new hierarchy */ - if (!(root = __kmp_root[gtid])) { - root = __kmp_root[gtid] = (kmp_root_t *)__kmp_allocate(sizeof(kmp_root_t)); - KMP_DEBUG_ASSERT(!root->r.r_root_team); - } - -#if KMP_STATS_ENABLED - // Initialize stats as soon as possible (right after gtid assignment). - __kmp_stats_thread_ptr = __kmp_stats_list->push_back(gtid); - __kmp_stats_thread_ptr->startLife(); - KMP_SET_THREAD_STATE(SERIAL_REGION); - KMP_INIT_PARTITIONED_TIMERS(OMP_serial); -#endif - __kmp_initialize_root(root); - - /* setup new root thread structure */ - if (root->r.r_uber_thread) { - root_thread = root->r.r_uber_thread; - } else { - root_thread = (kmp_info_t *)__kmp_allocate(sizeof(kmp_info_t)); - if (__kmp_storage_map) { - __kmp_print_thread_storage_map(root_thread, gtid); - } - root_thread->th.th_info.ds.ds_gtid = gtid; -#if OMPT_SUPPORT - root_thread->th.ompt_thread_info.thread_data = ompt_data_none; -#endif - root_thread->th.th_root = root; - if (__kmp_env_consistency_check) { - root_thread->th.th_cons = __kmp_allocate_cons_stack(gtid); - } -#if USE_FAST_MEMORY - __kmp_initialize_fast_memory(root_thread); -#endif /* USE_FAST_MEMORY */ - -#if KMP_USE_BGET - KMP_DEBUG_ASSERT(root_thread->th.th_local.bget_data == NULL); - __kmp_initialize_bget(root_thread); -#endif - __kmp_init_random(root_thread); // Initialize random number generator - } - - /* setup the serial team held in reserve by the root thread */ - if (!root_thread->th.th_serial_team) { - kmp_internal_control_t r_icvs = __kmp_get_global_icvs(); - KF_TRACE(10, ("__kmp_register_root: before serial_team\n")); - root_thread->th.th_serial_team = - __kmp_allocate_team(root, 1, 1, -#if OMPT_SUPPORT - ompt_data_none, // root parallel id -#endif -#if OMP_40_ENABLED - proc_bind_default, -#endif - &r_icvs, 0 USE_NESTED_HOT_ARG(NULL)); - } - KMP_ASSERT(root_thread->th.th_serial_team); - KF_TRACE(10, ("__kmp_register_root: after serial_team = %p\n", - root_thread->th.th_serial_team)); - - /* drop root_thread into place */ - TCW_SYNC_PTR(__kmp_threads[gtid], root_thread); - - root->r.r_root_team->t.t_threads[0] = root_thread; - root->r.r_hot_team->t.t_threads[0] = root_thread; - root_thread->th.th_serial_team->t.t_threads[0] = root_thread; - // AC: the team created in reserve, not for execution (it is unused for now). - root_thread->th.th_serial_team->t.t_serialized = 0; - root->r.r_uber_thread = root_thread; - - /* initialize the thread, get it ready to go */ - __kmp_initialize_info(root_thread, root->r.r_root_team, 0, gtid); - TCW_4(__kmp_init_gtid, TRUE); - - /* prepare the master thread for get_gtid() */ - __kmp_gtid_set_specific(gtid); - -#if USE_ITT_BUILD - __kmp_itt_thread_name(gtid); -#endif /* USE_ITT_BUILD */ - -#ifdef KMP_TDATA_GTID - __kmp_gtid = gtid; -#endif - __kmp_create_worker(gtid, root_thread, __kmp_stksize); - KMP_DEBUG_ASSERT(__kmp_gtid_get_specific() == gtid); - - KA_TRACE(20, ("__kmp_register_root: T#%d init T#%d(%d:%d) arrived: join=%u, " - "plain=%u\n", - gtid, __kmp_gtid_from_tid(0, root->r.r_hot_team), - root->r.r_hot_team->t.t_id, 0, KMP_INIT_BARRIER_STATE, - KMP_INIT_BARRIER_STATE)); - { // Initialize barrier data. - int b; - for (b = 0; b < bs_last_barrier; ++b) { - root_thread->th.th_bar[b].bb.b_arrived = KMP_INIT_BARRIER_STATE; -#if USE_DEBUGGER - root_thread->th.th_bar[b].bb.b_worker_arrived = 0; -#endif - } - } - KMP_DEBUG_ASSERT(root->r.r_hot_team->t.t_bar[bs_forkjoin_barrier].b_arrived == - KMP_INIT_BARRIER_STATE); - -#if KMP_AFFINITY_SUPPORTED -#if OMP_40_ENABLED - root_thread->th.th_current_place = KMP_PLACE_UNDEFINED; - root_thread->th.th_new_place = KMP_PLACE_UNDEFINED; - root_thread->th.th_first_place = KMP_PLACE_UNDEFINED; - root_thread->th.th_last_place = KMP_PLACE_UNDEFINED; -#endif - if (TCR_4(__kmp_init_middle)) { - __kmp_affinity_set_init_mask(gtid, TRUE); - } -#endif /* KMP_AFFINITY_SUPPORTED */ -#if OMP_50_ENABLED - root_thread->th.th_def_allocator = __kmp_def_allocator; - root_thread->th.th_prev_level = 0; - root_thread->th.th_prev_num_threads = 1; -#endif - - __kmp_root_counter++; - -#if OMPT_SUPPORT - if (!initial_thread && ompt_enabled.enabled) { - - kmp_info_t *root_thread = ompt_get_thread(); - - ompt_set_thread_state(root_thread, ompt_state_overhead); - - if (ompt_enabled.ompt_callback_thread_begin) { - ompt_callbacks.ompt_callback(ompt_callback_thread_begin)( - ompt_thread_initial, __ompt_get_thread_data_internal()); - } - ompt_data_t *task_data; - __ompt_get_task_info_internal(0, NULL, &task_data, NULL, NULL, NULL); - if (ompt_enabled.ompt_callback_task_create) { - ompt_callbacks.ompt_callback(ompt_callback_task_create)( - NULL, NULL, task_data, ompt_task_initial, 0, NULL); - // initial task has nothing to return to - } - - ompt_set_thread_state(root_thread, ompt_state_work_serial); - } -#endif - - KMP_MB(); - __kmp_release_bootstrap_lock(&__kmp_forkjoin_lock); - - return gtid; -} - -#if KMP_NESTED_HOT_TEAMS -static int __kmp_free_hot_teams(kmp_root_t *root, kmp_info_t *thr, int level, - const int max_level) { - int i, n, nth; - kmp_hot_team_ptr_t *hot_teams = thr->th.th_hot_teams; - if (!hot_teams || !hot_teams[level].hot_team) { - return 0; - } - KMP_DEBUG_ASSERT(level < max_level); - kmp_team_t *team = hot_teams[level].hot_team; - nth = hot_teams[level].hot_team_nth; - n = nth - 1; // master is not freed - if (level < max_level - 1) { - for (i = 0; i < nth; ++i) { - kmp_info_t *th = team->t.t_threads[i]; - n += __kmp_free_hot_teams(root, th, level + 1, max_level); - if (i > 0 && th->th.th_hot_teams) { - __kmp_free(th->th.th_hot_teams); - th->th.th_hot_teams = NULL; - } - } - } - __kmp_free_team(root, team, NULL); - return n; -} -#endif - -// Resets a root thread and clear its root and hot teams. -// Returns the number of __kmp_threads entries directly and indirectly freed. -static int __kmp_reset_root(int gtid, kmp_root_t *root) { - kmp_team_t *root_team = root->r.r_root_team; - kmp_team_t *hot_team = root->r.r_hot_team; - int n = hot_team->t.t_nproc; - int i; - - KMP_DEBUG_ASSERT(!root->r.r_active); - - root->r.r_root_team = NULL; - root->r.r_hot_team = NULL; - // __kmp_free_team() does not free hot teams, so we have to clear r_hot_team - // before call to __kmp_free_team(). - __kmp_free_team(root, root_team USE_NESTED_HOT_ARG(NULL)); -#if KMP_NESTED_HOT_TEAMS - if (__kmp_hot_teams_max_level > - 0) { // need to free nested hot teams and their threads if any - for (i = 0; i < hot_team->t.t_nproc; ++i) { - kmp_info_t *th = hot_team->t.t_threads[i]; - if (__kmp_hot_teams_max_level > 1) { - n += __kmp_free_hot_teams(root, th, 1, __kmp_hot_teams_max_level); - } - if (th->th.th_hot_teams) { - __kmp_free(th->th.th_hot_teams); - th->th.th_hot_teams = NULL; - } - } - } -#endif - __kmp_free_team(root, hot_team USE_NESTED_HOT_ARG(NULL)); - - // Before we can reap the thread, we need to make certain that all other - // threads in the teams that had this root as ancestor have stopped trying to - // steal tasks. - if (__kmp_tasking_mode != tskm_immediate_exec) { - __kmp_wait_to_unref_task_teams(); - } - -#if KMP_OS_WINDOWS - /* Close Handle of root duplicated in __kmp_create_worker (tr #62919) */ - KA_TRACE( - 10, ("__kmp_reset_root: free handle, th = %p, handle = %" KMP_UINTPTR_SPEC - "\n", - (LPVOID) & (root->r.r_uber_thread->th), - root->r.r_uber_thread->th.th_info.ds.ds_thread)); - __kmp_free_handle(root->r.r_uber_thread->th.th_info.ds.ds_thread); -#endif /* KMP_OS_WINDOWS */ - -#if OMPT_SUPPORT - if (ompt_enabled.ompt_callback_thread_end) { - ompt_callbacks.ompt_callback(ompt_callback_thread_end)( - &(root->r.r_uber_thread->th.ompt_thread_info.thread_data)); - } -#endif - - TCW_4(__kmp_nth, - __kmp_nth - 1); // __kmp_reap_thread will decrement __kmp_all_nth. - root->r.r_cg_nthreads--; - - __kmp_reap_thread(root->r.r_uber_thread, 1); - - // We canot put root thread to __kmp_thread_pool, so we have to reap it istead - // of freeing. - root->r.r_uber_thread = NULL; - /* mark root as no longer in use */ - root->r.r_begin = FALSE; - - return n; -} - -void __kmp_unregister_root_current_thread(int gtid) { - KA_TRACE(1, ("__kmp_unregister_root_current_thread: enter T#%d\n", gtid)); - /* this lock should be ok, since unregister_root_current_thread is never - called during an abort, only during a normal close. furthermore, if you - have the forkjoin lock, you should never try to get the initz lock */ - __kmp_acquire_bootstrap_lock(&__kmp_forkjoin_lock); - if (TCR_4(__kmp_global.g.g_done) || !__kmp_init_serial) { - KC_TRACE(10, ("__kmp_unregister_root_current_thread: already finished, " - "exiting T#%d\n", - gtid)); - __kmp_release_bootstrap_lock(&__kmp_forkjoin_lock); - return; - } - kmp_root_t *root = __kmp_root[gtid]; - - KMP_DEBUG_ASSERT(__kmp_threads && __kmp_threads[gtid]); - KMP_ASSERT(KMP_UBER_GTID(gtid)); - KMP_ASSERT(root == __kmp_threads[gtid]->th.th_root); - KMP_ASSERT(root->r.r_active == FALSE); - - KMP_MB(); - -#if OMP_45_ENABLED - kmp_info_t *thread = __kmp_threads[gtid]; - kmp_team_t *team = thread->th.th_team; - kmp_task_team_t *task_team = thread->th.th_task_team; - - // we need to wait for the proxy tasks before finishing the thread - if (task_team != NULL && task_team->tt.tt_found_proxy_tasks) { -#if OMPT_SUPPORT - // the runtime is shutting down so we won't report any events - thread->th.ompt_thread_info.state = ompt_state_undefined; -#endif - __kmp_task_team_wait(thread, team USE_ITT_BUILD_ARG(NULL)); - } -#endif - - __kmp_reset_root(gtid, root); - - /* free up this thread slot */ - __kmp_gtid_set_specific(KMP_GTID_DNE); -#ifdef KMP_TDATA_GTID - __kmp_gtid = KMP_GTID_DNE; -#endif - - KMP_MB(); - KC_TRACE(10, - ("__kmp_unregister_root_current_thread: T#%d unregistered\n", gtid)); - - __kmp_release_bootstrap_lock(&__kmp_forkjoin_lock); -} - -#if KMP_OS_WINDOWS -/* __kmp_forkjoin_lock must be already held - Unregisters a root thread that is not the current thread. Returns the number - of __kmp_threads entries freed as a result. */ -static int __kmp_unregister_root_other_thread(int gtid) { - kmp_root_t *root = __kmp_root[gtid]; - int r; - - KA_TRACE(1, ("__kmp_unregister_root_other_thread: enter T#%d\n", gtid)); - KMP_DEBUG_ASSERT(__kmp_threads && __kmp_threads[gtid]); - KMP_ASSERT(KMP_UBER_GTID(gtid)); - KMP_ASSERT(root == __kmp_threads[gtid]->th.th_root); - KMP_ASSERT(root->r.r_active == FALSE); - - r = __kmp_reset_root(gtid, root); - KC_TRACE(10, - ("__kmp_unregister_root_other_thread: T#%d unregistered\n", gtid)); - return r; -} -#endif - -#if KMP_DEBUG -void __kmp_task_info() { - - kmp_int32 gtid = __kmp_entry_gtid(); - kmp_int32 tid = __kmp_tid_from_gtid(gtid); - kmp_info_t *this_thr = __kmp_threads[gtid]; - kmp_team_t *steam = this_thr->th.th_serial_team; - kmp_team_t *team = this_thr->th.th_team; - - __kmp_printf( - "__kmp_task_info: gtid=%d tid=%d t_thread=%p team=%p steam=%p curtask=%p " - "ptask=%p\n", - gtid, tid, this_thr, team, steam, this_thr->th.th_current_task, - team->t.t_implicit_task_taskdata[tid].td_parent); -} -#endif // KMP_DEBUG - -/* TODO optimize with one big memclr, take out what isn't needed, split - responsibility to workers as much as possible, and delay initialization of - features as much as possible */ -static void __kmp_initialize_info(kmp_info_t *this_thr, kmp_team_t *team, - int tid, int gtid) { - /* this_thr->th.th_info.ds.ds_gtid is setup in - kmp_allocate_thread/create_worker. - this_thr->th.th_serial_team is setup in __kmp_allocate_thread */ - kmp_info_t *master = team->t.t_threads[0]; - KMP_DEBUG_ASSERT(this_thr != NULL); - KMP_DEBUG_ASSERT(this_thr->th.th_serial_team); - KMP_DEBUG_ASSERT(team); - KMP_DEBUG_ASSERT(team->t.t_threads); - KMP_DEBUG_ASSERT(team->t.t_dispatch); - KMP_DEBUG_ASSERT(master); - KMP_DEBUG_ASSERT(master->th.th_root); - - KMP_MB(); - - TCW_SYNC_PTR(this_thr->th.th_team, team); - - this_thr->th.th_info.ds.ds_tid = tid; - this_thr->th.th_set_nproc = 0; - if (__kmp_tasking_mode != tskm_immediate_exec) - // When tasking is possible, threads are not safe to reap until they are - // done tasking; this will be set when tasking code is exited in wait - this_thr->th.th_reap_state = KMP_NOT_SAFE_TO_REAP; - else // no tasking --> always safe to reap - this_thr->th.th_reap_state = KMP_SAFE_TO_REAP; -#if OMP_40_ENABLED - this_thr->th.th_set_proc_bind = proc_bind_default; -#if KMP_AFFINITY_SUPPORTED - this_thr->th.th_new_place = this_thr->th.th_current_place; -#endif -#endif - this_thr->th.th_root = master->th.th_root; - - /* setup the thread's cache of the team structure */ - this_thr->th.th_team_nproc = team->t.t_nproc; - this_thr->th.th_team_master = master; - this_thr->th.th_team_serialized = team->t.t_serialized; - TCW_PTR(this_thr->th.th_sleep_loc, NULL); - - KMP_DEBUG_ASSERT(team->t.t_implicit_task_taskdata); - - KF_TRACE(10, ("__kmp_initialize_info1: T#%d:%d this_thread=%p curtask=%p\n", - tid, gtid, this_thr, this_thr->th.th_current_task)); - - __kmp_init_implicit_task(this_thr->th.th_team_master->th.th_ident, this_thr, - team, tid, TRUE); - - KF_TRACE(10, ("__kmp_initialize_info2: T#%d:%d this_thread=%p curtask=%p\n", - tid, gtid, this_thr, this_thr->th.th_current_task)); - // TODO: Initialize ICVs from parent; GEH - isn't that already done in - // __kmp_initialize_team()? - - /* TODO no worksharing in speculative threads */ - this_thr->th.th_dispatch = &team->t.t_dispatch[tid]; - - this_thr->th.th_local.this_construct = 0; - - if (!this_thr->th.th_pri_common) { - this_thr->th.th_pri_common = - (struct common_table *)__kmp_allocate(sizeof(struct common_table)); - if (__kmp_storage_map) { - __kmp_print_storage_map_gtid( - gtid, this_thr->th.th_pri_common, this_thr->th.th_pri_common + 1, - sizeof(struct common_table), "th_%d.th_pri_common\n", gtid); - } - this_thr->th.th_pri_head = NULL; - } - - /* Initialize dynamic dispatch */ - { - volatile kmp_disp_t *dispatch = this_thr->th.th_dispatch; - // Use team max_nproc since this will never change for the team. - size_t disp_size = - sizeof(dispatch_private_info_t) * - (team->t.t_max_nproc == 1 ? 1 : __kmp_dispatch_num_buffers); - KD_TRACE(10, ("__kmp_initialize_info: T#%d max_nproc: %d\n", gtid, - team->t.t_max_nproc)); - KMP_ASSERT(dispatch); - KMP_DEBUG_ASSERT(team->t.t_dispatch); - KMP_DEBUG_ASSERT(dispatch == &team->t.t_dispatch[tid]); - - dispatch->th_disp_index = 0; -#if OMP_45_ENABLED - dispatch->th_doacross_buf_idx = 0; -#endif - if (!dispatch->th_disp_buffer) { - dispatch->th_disp_buffer = - (dispatch_private_info_t *)__kmp_allocate(disp_size); - - if (__kmp_storage_map) { - __kmp_print_storage_map_gtid( - gtid, &dispatch->th_disp_buffer[0], - &dispatch->th_disp_buffer[team->t.t_max_nproc == 1 - ? 1 - : __kmp_dispatch_num_buffers], - disp_size, "th_%d.th_dispatch.th_disp_buffer " - "(team_%d.t_dispatch[%d].th_disp_buffer)", - gtid, team->t.t_id, gtid); - } - } else { - memset(&dispatch->th_disp_buffer[0], '\0', disp_size); - } - - dispatch->th_dispatch_pr_current = 0; - dispatch->th_dispatch_sh_current = 0; - - dispatch->th_deo_fcn = 0; /* ORDERED */ - dispatch->th_dxo_fcn = 0; /* END ORDERED */ - } - - this_thr->th.th_next_pool = NULL; - - if (!this_thr->th.th_task_state_memo_stack) { - size_t i; - this_thr->th.th_task_state_memo_stack = - (kmp_uint8 *)__kmp_allocate(4 * sizeof(kmp_uint8)); - this_thr->th.th_task_state_top = 0; - this_thr->th.th_task_state_stack_sz = 4; - for (i = 0; i < this_thr->th.th_task_state_stack_sz; - ++i) // zero init the stack - this_thr->th.th_task_state_memo_stack[i] = 0; - } - - KMP_DEBUG_ASSERT(!this_thr->th.th_spin_here); - KMP_DEBUG_ASSERT(this_thr->th.th_next_waiting == 0); - - KMP_MB(); -} - -/* allocate a new thread for the requesting team. this is only called from - within a forkjoin critical section. we will first try to get an available - thread from the thread pool. if none is available, we will fork a new one - assuming we are able to create a new one. this should be assured, as the - caller should check on this first. */ -kmp_info_t *__kmp_allocate_thread(kmp_root_t *root, kmp_team_t *team, - int new_tid) { - kmp_team_t *serial_team; - kmp_info_t *new_thr; - int new_gtid; - - KA_TRACE(20, ("__kmp_allocate_thread: T#%d\n", __kmp_get_gtid())); - KMP_DEBUG_ASSERT(root && team); -#if !KMP_NESTED_HOT_TEAMS - KMP_DEBUG_ASSERT(KMP_MASTER_GTID(__kmp_get_gtid())); -#endif - KMP_MB(); - - /* first, try to get one from the thread pool */ - if (__kmp_thread_pool) { - - new_thr = CCAST(kmp_info_t *, __kmp_thread_pool); - __kmp_thread_pool = (volatile kmp_info_t *)new_thr->th.th_next_pool; - if (new_thr == __kmp_thread_pool_insert_pt) { - __kmp_thread_pool_insert_pt = NULL; - } - TCW_4(new_thr->th.th_in_pool, FALSE); - // Don't touch th_active_in_pool or th_active. - // The worker thread adjusts those flags as it sleeps/awakens. - __kmp_thread_pool_nth--; - - KA_TRACE(20, ("__kmp_allocate_thread: T#%d using thread T#%d\n", - __kmp_get_gtid(), new_thr->th.th_info.ds.ds_gtid)); - KMP_ASSERT(!new_thr->th.th_team); - KMP_DEBUG_ASSERT(__kmp_nth < __kmp_threads_capacity); - KMP_DEBUG_ASSERT(__kmp_thread_pool_nth >= 0); - - /* setup the thread structure */ - __kmp_initialize_info(new_thr, team, new_tid, - new_thr->th.th_info.ds.ds_gtid); - KMP_DEBUG_ASSERT(new_thr->th.th_serial_team); - - TCW_4(__kmp_nth, __kmp_nth + 1); - root->r.r_cg_nthreads++; - - new_thr->th.th_task_state = 0; - new_thr->th.th_task_state_top = 0; - new_thr->th.th_task_state_stack_sz = 4; - -#ifdef KMP_ADJUST_BLOCKTIME - /* Adjust blocktime back to zero if necessary */ - /* Middle initialization might not have occurred yet */ - if (!__kmp_env_blocktime && (__kmp_avail_proc > 0)) { - if (__kmp_nth > __kmp_avail_proc) { - __kmp_zero_bt = TRUE; - } - } -#endif /* KMP_ADJUST_BLOCKTIME */ - -#if KMP_DEBUG - // If thread entered pool via __kmp_free_thread, wait_flag should != - // KMP_BARRIER_PARENT_FLAG. - int b; - kmp_balign_t *balign = new_thr->th.th_bar; - for (b = 0; b < bs_last_barrier; ++b) - KMP_DEBUG_ASSERT(balign[b].bb.wait_flag != KMP_BARRIER_PARENT_FLAG); -#endif - - KF_TRACE(10, ("__kmp_allocate_thread: T#%d using thread %p T#%d\n", - __kmp_get_gtid(), new_thr, new_thr->th.th_info.ds.ds_gtid)); - - KMP_MB(); - return new_thr; - } - - /* no, well fork a new one */ - KMP_ASSERT(__kmp_nth == __kmp_all_nth); - KMP_ASSERT(__kmp_all_nth < __kmp_threads_capacity); - -#if KMP_USE_MONITOR - // If this is the first worker thread the RTL is creating, then also - // launch the monitor thread. We try to do this as early as possible. - if (!TCR_4(__kmp_init_monitor)) { - __kmp_acquire_bootstrap_lock(&__kmp_monitor_lock); - if (!TCR_4(__kmp_init_monitor)) { - KF_TRACE(10, ("before __kmp_create_monitor\n")); - TCW_4(__kmp_init_monitor, 1); - __kmp_create_monitor(&__kmp_monitor); - KF_TRACE(10, ("after __kmp_create_monitor\n")); -#if KMP_OS_WINDOWS - // AC: wait until monitor has started. This is a fix for CQ232808. - // The reason is that if the library is loaded/unloaded in a loop with - // small (parallel) work in between, then there is high probability that - // monitor thread started after the library shutdown. At shutdown it is - // too late to cope with the problem, because when the master is in - // DllMain (process detach) the monitor has no chances to start (it is - // blocked), and master has no means to inform the monitor that the - // library has gone, because all the memory which the monitor can access - // is going to be released/reset. - while (TCR_4(__kmp_init_monitor) < 2) { - KMP_YIELD(TRUE); - } - KF_TRACE(10, ("after monitor thread has started\n")); -#endif - } - __kmp_release_bootstrap_lock(&__kmp_monitor_lock); - } -#endif - - KMP_MB(); - for (new_gtid = 1; TCR_PTR(__kmp_threads[new_gtid]) != NULL; ++new_gtid) { - KMP_DEBUG_ASSERT(new_gtid < __kmp_threads_capacity); - } - - /* allocate space for it. */ - new_thr = (kmp_info_t *)__kmp_allocate(sizeof(kmp_info_t)); - - TCW_SYNC_PTR(__kmp_threads[new_gtid], new_thr); - - if (__kmp_storage_map) { - __kmp_print_thread_storage_map(new_thr, new_gtid); - } - - // add the reserve serialized team, initialized from the team's master thread - { - kmp_internal_control_t r_icvs = __kmp_get_x_global_icvs(team); - KF_TRACE(10, ("__kmp_allocate_thread: before th_serial/serial_team\n")); - new_thr->th.th_serial_team = serial_team = - (kmp_team_t *)__kmp_allocate_team(root, 1, 1, -#if OMPT_SUPPORT - ompt_data_none, // root parallel id -#endif -#if OMP_40_ENABLED - proc_bind_default, -#endif - &r_icvs, 0 USE_NESTED_HOT_ARG(NULL)); - } - KMP_ASSERT(serial_team); - serial_team->t.t_serialized = 0; // AC: the team created in reserve, not for - // execution (it is unused for now). - serial_team->t.t_threads[0] = new_thr; - KF_TRACE(10, - ("__kmp_allocate_thread: after th_serial/serial_team : new_thr=%p\n", - new_thr)); - - /* setup the thread structures */ - __kmp_initialize_info(new_thr, team, new_tid, new_gtid); - -#if USE_FAST_MEMORY - __kmp_initialize_fast_memory(new_thr); -#endif /* USE_FAST_MEMORY */ - -#if KMP_USE_BGET - KMP_DEBUG_ASSERT(new_thr->th.th_local.bget_data == NULL); - __kmp_initialize_bget(new_thr); -#endif - - __kmp_init_random(new_thr); // Initialize random number generator - - /* Initialize these only once when thread is grabbed for a team allocation */ - KA_TRACE(20, - ("__kmp_allocate_thread: T#%d init go fork=%u, plain=%u\n", - __kmp_get_gtid(), KMP_INIT_BARRIER_STATE, KMP_INIT_BARRIER_STATE)); - - int b; - kmp_balign_t *balign = new_thr->th.th_bar; - for (b = 0; b < bs_last_barrier; ++b) { - balign[b].bb.b_go = KMP_INIT_BARRIER_STATE; - balign[b].bb.team = NULL; - balign[b].bb.wait_flag = KMP_BARRIER_NOT_WAITING; - balign[b].bb.use_oncore_barrier = 0; - } - - new_thr->th.th_spin_here = FALSE; - new_thr->th.th_next_waiting = 0; -#if KMP_OS_UNIX - new_thr->th.th_blocking = false; -#endif - -#if OMP_40_ENABLED && KMP_AFFINITY_SUPPORTED - new_thr->th.th_current_place = KMP_PLACE_UNDEFINED; - new_thr->th.th_new_place = KMP_PLACE_UNDEFINED; - new_thr->th.th_first_place = KMP_PLACE_UNDEFINED; - new_thr->th.th_last_place = KMP_PLACE_UNDEFINED; -#endif -#if OMP_50_ENABLED - new_thr->th.th_def_allocator = __kmp_def_allocator; - new_thr->th.th_prev_level = 0; - new_thr->th.th_prev_num_threads = 1; -#endif - - TCW_4(new_thr->th.th_in_pool, FALSE); - new_thr->th.th_active_in_pool = FALSE; - TCW_4(new_thr->th.th_active, TRUE); - - /* adjust the global counters */ - __kmp_all_nth++; - __kmp_nth++; - - root->r.r_cg_nthreads++; - - // if __kmp_adjust_gtid_mode is set, then we use method #1 (sp search) for low - // numbers of procs, and method #2 (keyed API call) for higher numbers. - if (__kmp_adjust_gtid_mode) { - if (__kmp_all_nth >= __kmp_tls_gtid_min) { - if (TCR_4(__kmp_gtid_mode) != 2) { - TCW_4(__kmp_gtid_mode, 2); - } - } else { - if (TCR_4(__kmp_gtid_mode) != 1) { - TCW_4(__kmp_gtid_mode, 1); - } - } - } - -#ifdef KMP_ADJUST_BLOCKTIME - /* Adjust blocktime back to zero if necessary */ - /* Middle initialization might not have occurred yet */ - if (!__kmp_env_blocktime && (__kmp_avail_proc > 0)) { - if (__kmp_nth > __kmp_avail_proc) { - __kmp_zero_bt = TRUE; - } - } -#endif /* KMP_ADJUST_BLOCKTIME */ - - /* actually fork it and create the new worker thread */ - KF_TRACE( - 10, ("__kmp_allocate_thread: before __kmp_create_worker: %p\n", new_thr)); - __kmp_create_worker(new_gtid, new_thr, __kmp_stksize); - KF_TRACE(10, - ("__kmp_allocate_thread: after __kmp_create_worker: %p\n", new_thr)); - - KA_TRACE(20, ("__kmp_allocate_thread: T#%d forked T#%d\n", __kmp_get_gtid(), - new_gtid)); - KMP_MB(); - return new_thr; -} - -/* Reinitialize team for reuse. - The hot team code calls this case at every fork barrier, so EPCC barrier - test are extremely sensitive to changes in it, esp. writes to the team - struct, which cause a cache invalidation in all threads. - IF YOU TOUCH THIS ROUTINE, RUN EPCC C SYNCBENCH ON A BIG-IRON MACHINE!!! */ -static void __kmp_reinitialize_team(kmp_team_t *team, - kmp_internal_control_t *new_icvs, - ident_t *loc) { - KF_TRACE(10, ("__kmp_reinitialize_team: enter this_thread=%p team=%p\n", - team->t.t_threads[0], team)); - KMP_DEBUG_ASSERT(team && new_icvs); - KMP_DEBUG_ASSERT((!TCR_4(__kmp_init_parallel)) || new_icvs->nproc); - KMP_CHECK_UPDATE(team->t.t_ident, loc); - - KMP_CHECK_UPDATE(team->t.t_id, KMP_GEN_TEAM_ID()); - // Copy ICVs to the master thread's implicit taskdata - __kmp_init_implicit_task(loc, team->t.t_threads[0], team, 0, FALSE); - copy_icvs(&team->t.t_implicit_task_taskdata[0].td_icvs, new_icvs); - - KF_TRACE(10, ("__kmp_reinitialize_team: exit this_thread=%p team=%p\n", - team->t.t_threads[0], team)); -} - -/* Initialize the team data structure. - This assumes the t_threads and t_max_nproc are already set. - Also, we don't touch the arguments */ -static void __kmp_initialize_team(kmp_team_t *team, int new_nproc, - kmp_internal_control_t *new_icvs, - ident_t *loc) { - KF_TRACE(10, ("__kmp_initialize_team: enter: team=%p\n", team)); - - /* verify */ - KMP_DEBUG_ASSERT(team); - KMP_DEBUG_ASSERT(new_nproc <= team->t.t_max_nproc); - KMP_DEBUG_ASSERT(team->t.t_threads); - KMP_MB(); - - team->t.t_master_tid = 0; /* not needed */ - /* team->t.t_master_bar; not needed */ - team->t.t_serialized = new_nproc > 1 ? 0 : 1; - team->t.t_nproc = new_nproc; - - /* team->t.t_parent = NULL; TODO not needed & would mess up hot team */ - team->t.t_next_pool = NULL; - /* memset( team->t.t_threads, 0, sizeof(kmp_info_t*)*new_nproc ); would mess - * up hot team */ - - TCW_SYNC_PTR(team->t.t_pkfn, NULL); /* not needed */ - team->t.t_invoke = NULL; /* not needed */ - - // TODO???: team->t.t_max_active_levels = new_max_active_levels; - team->t.t_sched.sched = new_icvs->sched.sched; - -#if KMP_ARCH_X86 || KMP_ARCH_X86_64 - team->t.t_fp_control_saved = FALSE; /* not needed */ - team->t.t_x87_fpu_control_word = 0; /* not needed */ - team->t.t_mxcsr = 0; /* not needed */ -#endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */ - - team->t.t_construct = 0; - - team->t.t_ordered.dt.t_value = 0; - team->t.t_master_active = FALSE; - - memset(&team->t.t_taskq, '\0', sizeof(kmp_taskq_t)); - -#ifdef KMP_DEBUG - team->t.t_copypriv_data = NULL; /* not necessary, but nice for debugging */ -#endif -#if KMP_OS_WINDOWS - team->t.t_copyin_counter = 0; /* for barrier-free copyin implementation */ -#endif - - team->t.t_control_stack_top = NULL; - - __kmp_reinitialize_team(team, new_icvs, loc); - - KMP_MB(); - KF_TRACE(10, ("__kmp_initialize_team: exit: team=%p\n", team)); -} - -#if KMP_OS_LINUX && KMP_AFFINITY_SUPPORTED -/* Sets full mask for thread and returns old mask, no changes to structures. */ -static void -__kmp_set_thread_affinity_mask_full_tmp(kmp_affin_mask_t *old_mask) { - if (KMP_AFFINITY_CAPABLE()) { - int status; - if (old_mask != NULL) { - status = __kmp_get_system_affinity(old_mask, TRUE); - int error = errno; - if (status != 0) { - __kmp_fatal(KMP_MSG(ChangeThreadAffMaskError), KMP_ERR(error), - __kmp_msg_null); - } - } - __kmp_set_system_affinity(__kmp_affin_fullMask, TRUE); - } -} -#endif - -#if OMP_40_ENABLED && KMP_AFFINITY_SUPPORTED - -// __kmp_partition_places() is the heart of the OpenMP 4.0 affinity mechanism. -// It calculats the worker + master thread's partition based upon the parent -// thread's partition, and binds each worker to a thread in their partition. -// The master thread's partition should already include its current binding. -static void __kmp_partition_places(kmp_team_t *team, int update_master_only) { - // Copy the master thread's place partion to the team struct - kmp_info_t *master_th = team->t.t_threads[0]; - KMP_DEBUG_ASSERT(master_th != NULL); - kmp_proc_bind_t proc_bind = team->t.t_proc_bind; - int first_place = master_th->th.th_first_place; - int last_place = master_th->th.th_last_place; - int masters_place = master_th->th.th_current_place; - team->t.t_first_place = first_place; - team->t.t_last_place = last_place; - - KA_TRACE(20, ("__kmp_partition_places: enter: proc_bind = %d T#%d(%d:0) " - "bound to place %d partition = [%d,%d]\n", - proc_bind, __kmp_gtid_from_thread(team->t.t_threads[0]), - team->t.t_id, masters_place, first_place, last_place)); - - switch (proc_bind) { - - case proc_bind_default: - // serial teams might have the proc_bind policy set to proc_bind_default. It - // doesn't matter, as we don't rebind master thread for any proc_bind policy - KMP_DEBUG_ASSERT(team->t.t_nproc == 1); - break; - - case proc_bind_master: { - int f; - int n_th = team->t.t_nproc; - for (f = 1; f < n_th; f++) { - kmp_info_t *th = team->t.t_threads[f]; - KMP_DEBUG_ASSERT(th != NULL); - th->th.th_first_place = first_place; - th->th.th_last_place = last_place; - th->th.th_new_place = masters_place; -#if OMP_50_ENABLED - if (__kmp_display_affinity && masters_place != th->th.th_current_place && - team->t.t_display_affinity != 1) { - team->t.t_display_affinity = 1; - } -#endif - - KA_TRACE(100, ("__kmp_partition_places: master: T#%d(%d:%d) place %d " - "partition = [%d,%d]\n", - __kmp_gtid_from_thread(team->t.t_threads[f]), team->t.t_id, - f, masters_place, first_place, last_place)); - } - } break; - - case proc_bind_close: { - int f; - int n_th = team->t.t_nproc; - int n_places; - if (first_place <= last_place) { - n_places = last_place - first_place + 1; - } else { - n_places = __kmp_affinity_num_masks - first_place + last_place + 1; - } - if (n_th <= n_places) { - int place = masters_place; - for (f = 1; f < n_th; f++) { - kmp_info_t *th = team->t.t_threads[f]; - KMP_DEBUG_ASSERT(th != NULL); - - if (place == last_place) { - place = first_place; - } else if (place == (int)(__kmp_affinity_num_masks - 1)) { - place = 0; - } else { - place++; - } - th->th.th_first_place = first_place; - th->th.th_last_place = last_place; - th->th.th_new_place = place; -#if OMP_50_ENABLED - if (__kmp_display_affinity && place != th->th.th_current_place && - team->t.t_display_affinity != 1) { - team->t.t_display_affinity = 1; - } -#endif - - KA_TRACE(100, ("__kmp_partition_places: close: T#%d(%d:%d) place %d " - "partition = [%d,%d]\n", - __kmp_gtid_from_thread(team->t.t_threads[f]), - team->t.t_id, f, place, first_place, last_place)); - } - } else { - int S, rem, gap, s_count; - S = n_th / n_places; - s_count = 0; - rem = n_th - (S * n_places); - gap = rem > 0 ? n_places / rem : n_places; - int place = masters_place; - int gap_ct = gap; - for (f = 0; f < n_th; f++) { - kmp_info_t *th = team->t.t_threads[f]; - KMP_DEBUG_ASSERT(th != NULL); - - th->th.th_first_place = first_place; - th->th.th_last_place = last_place; - th->th.th_new_place = place; -#if OMP_50_ENABLED - if (__kmp_display_affinity && place != th->th.th_current_place && - team->t.t_display_affinity != 1) { - team->t.t_display_affinity = 1; - } -#endif - s_count++; - - if ((s_count == S) && rem && (gap_ct == gap)) { - // do nothing, add an extra thread to place on next iteration - } else if ((s_count == S + 1) && rem && (gap_ct == gap)) { - // we added an extra thread to this place; move to next place - if (place == last_place) { - place = first_place; - } else if (place == (int)(__kmp_affinity_num_masks - 1)) { - place = 0; - } else { - place++; - } - s_count = 0; - gap_ct = 1; - rem--; - } else if (s_count == S) { // place full; don't add extra - if (place == last_place) { - place = first_place; - } else if (place == (int)(__kmp_affinity_num_masks - 1)) { - place = 0; - } else { - place++; - } - gap_ct++; - s_count = 0; - } - - KA_TRACE(100, - ("__kmp_partition_places: close: T#%d(%d:%d) place %d " - "partition = [%d,%d]\n", - __kmp_gtid_from_thread(team->t.t_threads[f]), team->t.t_id, f, - th->th.th_new_place, first_place, last_place)); - } - KMP_DEBUG_ASSERT(place == masters_place); - } - } break; - - case proc_bind_spread: { - int f; - int n_th = team->t.t_nproc; - int n_places; - int thidx; - if (first_place <= last_place) { - n_places = last_place - first_place + 1; - } else { - n_places = __kmp_affinity_num_masks - first_place + last_place + 1; - } - if (n_th <= n_places) { - int place = -1; - - if (n_places != static_cast(__kmp_affinity_num_masks)) { - int S = n_places / n_th; - int s_count, rem, gap, gap_ct; - - place = masters_place; - rem = n_places - n_th * S; - gap = rem ? n_th / rem : 1; - gap_ct = gap; - thidx = n_th; - if (update_master_only == 1) - thidx = 1; - for (f = 0; f < thidx; f++) { - kmp_info_t *th = team->t.t_threads[f]; - KMP_DEBUG_ASSERT(th != NULL); - - th->th.th_first_place = place; - th->th.th_new_place = place; -#if OMP_50_ENABLED - if (__kmp_display_affinity && place != th->th.th_current_place && - team->t.t_display_affinity != 1) { - team->t.t_display_affinity = 1; - } -#endif - s_count = 1; - while (s_count < S) { - if (place == last_place) { - place = first_place; - } else if (place == (int)(__kmp_affinity_num_masks - 1)) { - place = 0; - } else { - place++; - } - s_count++; - } - if (rem && (gap_ct == gap)) { - if (place == last_place) { - place = first_place; - } else if (place == (int)(__kmp_affinity_num_masks - 1)) { - place = 0; - } else { - place++; - } - rem--; - gap_ct = 0; - } - th->th.th_last_place = place; - gap_ct++; - - if (place == last_place) { - place = first_place; - } else if (place == (int)(__kmp_affinity_num_masks - 1)) { - place = 0; - } else { - place++; - } - - KA_TRACE(100, - ("__kmp_partition_places: spread: T#%d(%d:%d) place %d " - "partition = [%d,%d], __kmp_affinity_num_masks: %u\n", - __kmp_gtid_from_thread(team->t.t_threads[f]), team->t.t_id, - f, th->th.th_new_place, th->th.th_first_place, - th->th.th_last_place, __kmp_affinity_num_masks)); - } - } else { - /* Having uniform space of available computation places I can create - T partitions of round(P/T) size and put threads into the first - place of each partition. */ - double current = static_cast(masters_place); - double spacing = - (static_cast(n_places + 1) / static_cast(n_th)); - int first, last; - kmp_info_t *th; - - thidx = n_th + 1; - if (update_master_only == 1) - thidx = 1; - for (f = 0; f < thidx; f++) { - first = static_cast(current); - last = static_cast(current + spacing) - 1; - KMP_DEBUG_ASSERT(last >= first); - if (first >= n_places) { - if (masters_place) { - first -= n_places; - last -= n_places; - if (first == (masters_place + 1)) { - KMP_DEBUG_ASSERT(f == n_th); - first--; - } - if (last == masters_place) { - KMP_DEBUG_ASSERT(f == (n_th - 1)); - last--; - } - } else { - KMP_DEBUG_ASSERT(f == n_th); - first = 0; - last = 0; - } - } - if (last >= n_places) { - last = (n_places - 1); - } - place = first; - current += spacing; - if (f < n_th) { - KMP_DEBUG_ASSERT(0 <= first); - KMP_DEBUG_ASSERT(n_places > first); - KMP_DEBUG_ASSERT(0 <= last); - KMP_DEBUG_ASSERT(n_places > last); - KMP_DEBUG_ASSERT(last_place >= first_place); - th = team->t.t_threads[f]; - KMP_DEBUG_ASSERT(th); - th->th.th_first_place = first; - th->th.th_new_place = place; - th->th.th_last_place = last; -#if OMP_50_ENABLED - if (__kmp_display_affinity && place != th->th.th_current_place && - team->t.t_display_affinity != 1) { - team->t.t_display_affinity = 1; - } -#endif - KA_TRACE(100, - ("__kmp_partition_places: spread: T#%d(%d:%d) place %d " - "partition = [%d,%d], spacing = %.4f\n", - __kmp_gtid_from_thread(team->t.t_threads[f]), - team->t.t_id, f, th->th.th_new_place, - th->th.th_first_place, th->th.th_last_place, spacing)); - } - } - } - KMP_DEBUG_ASSERT(update_master_only || place == masters_place); - } else { - int S, rem, gap, s_count; - S = n_th / n_places; - s_count = 0; - rem = n_th - (S * n_places); - gap = rem > 0 ? n_places / rem : n_places; - int place = masters_place; - int gap_ct = gap; - thidx = n_th; - if (update_master_only == 1) - thidx = 1; - for (f = 0; f < thidx; f++) { - kmp_info_t *th = team->t.t_threads[f]; - KMP_DEBUG_ASSERT(th != NULL); - - th->th.th_first_place = place; - th->th.th_last_place = place; - th->th.th_new_place = place; -#if OMP_50_ENABLED - if (__kmp_display_affinity && place != th->th.th_current_place && - team->t.t_display_affinity != 1) { - team->t.t_display_affinity = 1; - } -#endif - s_count++; - - if ((s_count == S) && rem && (gap_ct == gap)) { - // do nothing, add an extra thread to place on next iteration - } else if ((s_count == S + 1) && rem && (gap_ct == gap)) { - // we added an extra thread to this place; move on to next place - if (place == last_place) { - place = first_place; - } else if (place == (int)(__kmp_affinity_num_masks - 1)) { - place = 0; - } else { - place++; - } - s_count = 0; - gap_ct = 1; - rem--; - } else if (s_count == S) { // place is full; don't add extra thread - if (place == last_place) { - place = first_place; - } else if (place == (int)(__kmp_affinity_num_masks - 1)) { - place = 0; - } else { - place++; - } - gap_ct++; - s_count = 0; - } - - KA_TRACE(100, ("__kmp_partition_places: spread: T#%d(%d:%d) place %d " - "partition = [%d,%d]\n", - __kmp_gtid_from_thread(team->t.t_threads[f]), - team->t.t_id, f, th->th.th_new_place, - th->th.th_first_place, th->th.th_last_place)); - } - KMP_DEBUG_ASSERT(update_master_only || place == masters_place); - } - } break; - - default: - break; - } - - KA_TRACE(20, ("__kmp_partition_places: exit T#%d\n", team->t.t_id)); -} - -#endif /* OMP_40_ENABLED && KMP_AFFINITY_SUPPORTED */ - -/* allocate a new team data structure to use. take one off of the free pool if - available */ -kmp_team_t * -__kmp_allocate_team(kmp_root_t *root, int new_nproc, int max_nproc, -#if OMPT_SUPPORT - ompt_data_t ompt_parallel_data, -#endif -#if OMP_40_ENABLED - kmp_proc_bind_t new_proc_bind, -#endif - kmp_internal_control_t *new_icvs, - int argc USE_NESTED_HOT_ARG(kmp_info_t *master)) { - KMP_TIME_DEVELOPER_PARTITIONED_BLOCK(KMP_allocate_team); - int f; - kmp_team_t *team; - int use_hot_team = !root->r.r_active; - int level = 0; - - KA_TRACE(20, ("__kmp_allocate_team: called\n")); - KMP_DEBUG_ASSERT(new_nproc >= 1 && argc >= 0); - KMP_DEBUG_ASSERT(max_nproc >= new_nproc); - KMP_MB(); - -#if KMP_NESTED_HOT_TEAMS - kmp_hot_team_ptr_t *hot_teams; - if (master) { - team = master->th.th_team; - level = team->t.t_active_level; - if (master->th.th_teams_microtask) { // in teams construct? - if (master->th.th_teams_size.nteams > 1 && - ( // #teams > 1 - team->t.t_pkfn == - (microtask_t)__kmp_teams_master || // inner fork of the teams - master->th.th_teams_level < - team->t.t_level)) { // or nested parallel inside the teams - ++level; // not increment if #teams==1, or for outer fork of the teams; - // increment otherwise - } - } - hot_teams = master->th.th_hot_teams; - if (level < __kmp_hot_teams_max_level && hot_teams && - hot_teams[level] - .hot_team) { // hot team has already been allocated for given level - use_hot_team = 1; - } else { - use_hot_team = 0; - } - } -#endif - // Optimization to use a "hot" team - if (use_hot_team && new_nproc > 1) { - KMP_DEBUG_ASSERT(new_nproc == max_nproc); -#if KMP_NESTED_HOT_TEAMS - team = hot_teams[level].hot_team; -#else - team = root->r.r_hot_team; -#endif -#if KMP_DEBUG - if (__kmp_tasking_mode != tskm_immediate_exec) { - KA_TRACE(20, ("__kmp_allocate_team: hot team task_team[0] = %p " - "task_team[1] = %p before reinit\n", - team->t.t_task_team[0], team->t.t_task_team[1])); - } -#endif - - // Has the number of threads changed? - /* Let's assume the most common case is that the number of threads is - unchanged, and put that case first. */ - if (team->t.t_nproc == new_nproc) { // Check changes in number of threads - KA_TRACE(20, ("__kmp_allocate_team: reusing hot team\n")); - // This case can mean that omp_set_num_threads() was called and the hot - // team size was already reduced, so we check the special flag - if (team->t.t_size_changed == -1) { - team->t.t_size_changed = 1; - } else { - KMP_CHECK_UPDATE(team->t.t_size_changed, 0); - } - - // TODO???: team->t.t_max_active_levels = new_max_active_levels; - kmp_r_sched_t new_sched = new_icvs->sched; - // set master's schedule as new run-time schedule - KMP_CHECK_UPDATE(team->t.t_sched.sched, new_sched.sched); - - __kmp_reinitialize_team(team, new_icvs, - root->r.r_uber_thread->th.th_ident); - - KF_TRACE(10, ("__kmp_allocate_team2: T#%d, this_thread=%p team=%p\n", 0, - team->t.t_threads[0], team)); - __kmp_push_current_task_to_thread(team->t.t_threads[0], team, 0); - -#if OMP_40_ENABLED -#if KMP_AFFINITY_SUPPORTED - if ((team->t.t_size_changed == 0) && - (team->t.t_proc_bind == new_proc_bind)) { - if (new_proc_bind == proc_bind_spread) { - __kmp_partition_places( - team, 1); // add flag to update only master for spread - } - KA_TRACE(200, ("__kmp_allocate_team: reusing hot team #%d bindings: " - "proc_bind = %d, partition = [%d,%d]\n", - team->t.t_id, new_proc_bind, team->t.t_first_place, - team->t.t_last_place)); - } else { - KMP_CHECK_UPDATE(team->t.t_proc_bind, new_proc_bind); - __kmp_partition_places(team); - } -#else - KMP_CHECK_UPDATE(team->t.t_proc_bind, new_proc_bind); -#endif /* KMP_AFFINITY_SUPPORTED */ -#endif /* OMP_40_ENABLED */ - } else if (team->t.t_nproc > new_nproc) { - KA_TRACE(20, - ("__kmp_allocate_team: decreasing hot team thread count to %d\n", - new_nproc)); - - team->t.t_size_changed = 1; -#if KMP_NESTED_HOT_TEAMS - if (__kmp_hot_teams_mode == 0) { - // AC: saved number of threads should correspond to team's value in this - // mode, can be bigger in mode 1, when hot team has threads in reserve - KMP_DEBUG_ASSERT(hot_teams[level].hot_team_nth == team->t.t_nproc); - hot_teams[level].hot_team_nth = new_nproc; -#endif // KMP_NESTED_HOT_TEAMS - /* release the extra threads we don't need any more */ - for (f = new_nproc; f < team->t.t_nproc; f++) { - KMP_DEBUG_ASSERT(team->t.t_threads[f]); - if (__kmp_tasking_mode != tskm_immediate_exec) { - // When decreasing team size, threads no longer in the team should - // unref task team. - team->t.t_threads[f]->th.th_task_team = NULL; - } - __kmp_free_thread(team->t.t_threads[f]); - team->t.t_threads[f] = NULL; - } -#if KMP_NESTED_HOT_TEAMS - } // (__kmp_hot_teams_mode == 0) - else { - // When keeping extra threads in team, switch threads to wait on own - // b_go flag - for (f = new_nproc; f < team->t.t_nproc; ++f) { - KMP_DEBUG_ASSERT(team->t.t_threads[f]); - kmp_balign_t *balign = team->t.t_threads[f]->th.th_bar; - for (int b = 0; b < bs_last_barrier; ++b) { - if (balign[b].bb.wait_flag == KMP_BARRIER_PARENT_FLAG) { - balign[b].bb.wait_flag = KMP_BARRIER_SWITCH_TO_OWN_FLAG; - } - KMP_CHECK_UPDATE(balign[b].bb.leaf_kids, 0); - } - } - } -#endif // KMP_NESTED_HOT_TEAMS - team->t.t_nproc = new_nproc; - // TODO???: team->t.t_max_active_levels = new_max_active_levels; - KMP_CHECK_UPDATE(team->t.t_sched.sched, new_icvs->sched.sched); - __kmp_reinitialize_team(team, new_icvs, - root->r.r_uber_thread->th.th_ident); - - /* update the remaining threads */ - for (f = 0; f < new_nproc; ++f) { - team->t.t_threads[f]->th.th_team_nproc = new_nproc; - } - // restore the current task state of the master thread: should be the - // implicit task - KF_TRACE(10, ("__kmp_allocate_team: T#%d, this_thread=%p team=%p\n", 0, - team->t.t_threads[0], team)); - - __kmp_push_current_task_to_thread(team->t.t_threads[0], team, 0); - -#ifdef KMP_DEBUG - for (f = 0; f < team->t.t_nproc; f++) { - KMP_DEBUG_ASSERT(team->t.t_threads[f] && - team->t.t_threads[f]->th.th_team_nproc == - team->t.t_nproc); - } -#endif - -#if OMP_40_ENABLED - KMP_CHECK_UPDATE(team->t.t_proc_bind, new_proc_bind); -#if KMP_AFFINITY_SUPPORTED - __kmp_partition_places(team); -#endif -#endif - } else { // team->t.t_nproc < new_nproc -#if KMP_OS_LINUX && KMP_AFFINITY_SUPPORTED - kmp_affin_mask_t *old_mask; - if (KMP_AFFINITY_CAPABLE()) { - KMP_CPU_ALLOC(old_mask); - } -#endif - - KA_TRACE(20, - ("__kmp_allocate_team: increasing hot team thread count to %d\n", - new_nproc)); - - team->t.t_size_changed = 1; - -#if KMP_NESTED_HOT_TEAMS - int avail_threads = hot_teams[level].hot_team_nth; - if (new_nproc < avail_threads) - avail_threads = new_nproc; - kmp_info_t **other_threads = team->t.t_threads; - for (f = team->t.t_nproc; f < avail_threads; ++f) { - // Adjust barrier data of reserved threads (if any) of the team - // Other data will be set in __kmp_initialize_info() below. - int b; - kmp_balign_t *balign = other_threads[f]->th.th_bar; - for (b = 0; b < bs_last_barrier; ++b) { - balign[b].bb.b_arrived = team->t.t_bar[b].b_arrived; - KMP_DEBUG_ASSERT(balign[b].bb.wait_flag != KMP_BARRIER_PARENT_FLAG); -#if USE_DEBUGGER - balign[b].bb.b_worker_arrived = team->t.t_bar[b].b_team_arrived; -#endif - } - } - if (hot_teams[level].hot_team_nth >= new_nproc) { - // we have all needed threads in reserve, no need to allocate any - // this only possible in mode 1, cannot have reserved threads in mode 0 - KMP_DEBUG_ASSERT(__kmp_hot_teams_mode == 1); - team->t.t_nproc = new_nproc; // just get reserved threads involved - } else { - // we may have some threads in reserve, but not enough - team->t.t_nproc = - hot_teams[level] - .hot_team_nth; // get reserved threads involved if any - hot_teams[level].hot_team_nth = new_nproc; // adjust hot team max size -#endif // KMP_NESTED_HOT_TEAMS - if (team->t.t_max_nproc < new_nproc) { - /* reallocate larger arrays */ - __kmp_reallocate_team_arrays(team, new_nproc); - __kmp_reinitialize_team(team, new_icvs, NULL); - } - -#if KMP_OS_LINUX && KMP_AFFINITY_SUPPORTED - /* Temporarily set full mask for master thread before creation of - workers. The reason is that workers inherit the affinity from master, - so if a lot of workers are created on the single core quickly, they - don't get a chance to set their own affinity for a long time. */ - __kmp_set_thread_affinity_mask_full_tmp(old_mask); -#endif - - /* allocate new threads for the hot team */ - for (f = team->t.t_nproc; f < new_nproc; f++) { - kmp_info_t *new_worker = __kmp_allocate_thread(root, team, f); - KMP_DEBUG_ASSERT(new_worker); - team->t.t_threads[f] = new_worker; - - KA_TRACE(20, - ("__kmp_allocate_team: team %d init T#%d arrived: " - "join=%llu, plain=%llu\n", - team->t.t_id, __kmp_gtid_from_tid(f, team), team->t.t_id, f, - team->t.t_bar[bs_forkjoin_barrier].b_arrived, - team->t.t_bar[bs_plain_barrier].b_arrived)); - - { // Initialize barrier data for new threads. - int b; - kmp_balign_t *balign = new_worker->th.th_bar; - for (b = 0; b < bs_last_barrier; ++b) { - balign[b].bb.b_arrived = team->t.t_bar[b].b_arrived; - KMP_DEBUG_ASSERT(balign[b].bb.wait_flag != - KMP_BARRIER_PARENT_FLAG); -#if USE_DEBUGGER - balign[b].bb.b_worker_arrived = team->t.t_bar[b].b_team_arrived; -#endif - } - } - } - -#if KMP_OS_LINUX && KMP_AFFINITY_SUPPORTED - if (KMP_AFFINITY_CAPABLE()) { - /* Restore initial master thread's affinity mask */ - __kmp_set_system_affinity(old_mask, TRUE); - KMP_CPU_FREE(old_mask); - } -#endif -#if KMP_NESTED_HOT_TEAMS - } // end of check of t_nproc vs. new_nproc vs. hot_team_nth -#endif // KMP_NESTED_HOT_TEAMS - /* make sure everyone is syncronized */ - int old_nproc = team->t.t_nproc; // save old value and use to update only - // new threads below - __kmp_initialize_team(team, new_nproc, new_icvs, - root->r.r_uber_thread->th.th_ident); - - /* reinitialize the threads */ - KMP_DEBUG_ASSERT(team->t.t_nproc == new_nproc); - for (f = 0; f < team->t.t_nproc; ++f) - __kmp_initialize_info(team->t.t_threads[f], team, f, - __kmp_gtid_from_tid(f, team)); - if (level) { // set th_task_state for new threads in nested hot team - // __kmp_initialize_info() no longer zeroes th_task_state, so we should - // only need to set the th_task_state for the new threads. th_task_state - // for master thread will not be accurate until after this in - // __kmp_fork_call(), so we look to the master's memo_stack to get the - // correct value. - for (f = old_nproc; f < team->t.t_nproc; ++f) - team->t.t_threads[f]->th.th_task_state = - team->t.t_threads[0]->th.th_task_state_memo_stack[level]; - } else { // set th_task_state for new threads in non-nested hot team - int old_state = - team->t.t_threads[0]->th.th_task_state; // copy master's state - for (f = old_nproc; f < team->t.t_nproc; ++f) - team->t.t_threads[f]->th.th_task_state = old_state; - } - -#ifdef KMP_DEBUG - for (f = 0; f < team->t.t_nproc; ++f) { - KMP_DEBUG_ASSERT(team->t.t_threads[f] && - team->t.t_threads[f]->th.th_team_nproc == - team->t.t_nproc); - } -#endif - -#if OMP_40_ENABLED - KMP_CHECK_UPDATE(team->t.t_proc_bind, new_proc_bind); -#if KMP_AFFINITY_SUPPORTED - __kmp_partition_places(team); -#endif -#endif - } // Check changes in number of threads - -#if OMP_40_ENABLED - kmp_info_t *master = team->t.t_threads[0]; - if (master->th.th_teams_microtask) { - for (f = 1; f < new_nproc; ++f) { - // propagate teams construct specific info to workers - kmp_info_t *thr = team->t.t_threads[f]; - thr->th.th_teams_microtask = master->th.th_teams_microtask; - thr->th.th_teams_level = master->th.th_teams_level; - thr->th.th_teams_size = master->th.th_teams_size; - } - } -#endif /* OMP_40_ENABLED */ -#if KMP_NESTED_HOT_TEAMS - if (level) { - // Sync barrier state for nested hot teams, not needed for outermost hot - // team. - for (f = 1; f < new_nproc; ++f) { - kmp_info_t *thr = team->t.t_threads[f]; - int b; - kmp_balign_t *balign = thr->th.th_bar; - for (b = 0; b < bs_last_barrier; ++b) { - balign[b].bb.b_arrived = team->t.t_bar[b].b_arrived; - KMP_DEBUG_ASSERT(balign[b].bb.wait_flag != KMP_BARRIER_PARENT_FLAG); -#if USE_DEBUGGER - balign[b].bb.b_worker_arrived = team->t.t_bar[b].b_team_arrived; -#endif - } - } - } -#endif // KMP_NESTED_HOT_TEAMS - - /* reallocate space for arguments if necessary */ - __kmp_alloc_argv_entries(argc, team, TRUE); - KMP_CHECK_UPDATE(team->t.t_argc, argc); - // The hot team re-uses the previous task team, - // if untouched during the previous release->gather phase. - - KF_TRACE(10, (" hot_team = %p\n", team)); - -#if KMP_DEBUG - if (__kmp_tasking_mode != tskm_immediate_exec) { - KA_TRACE(20, ("__kmp_allocate_team: hot team task_team[0] = %p " - "task_team[1] = %p after reinit\n", - team->t.t_task_team[0], team->t.t_task_team[1])); - } -#endif - -#if OMPT_SUPPORT - __ompt_team_assign_id(team, ompt_parallel_data); -#endif - - KMP_MB(); - - return team; - } - - /* next, let's try to take one from the team pool */ - KMP_MB(); - for (team = CCAST(kmp_team_t *, __kmp_team_pool); (team);) { - /* TODO: consider resizing undersized teams instead of reaping them, now - that we have a resizing mechanism */ - if (team->t.t_max_nproc >= max_nproc) { - /* take this team from the team pool */ - __kmp_team_pool = team->t.t_next_pool; - - /* setup the team for fresh use */ - __kmp_initialize_team(team, new_nproc, new_icvs, NULL); - - KA_TRACE(20, ("__kmp_allocate_team: setting task_team[0] %p and " - "task_team[1] %p to NULL\n", - &team->t.t_task_team[0], &team->t.t_task_team[1])); - team->t.t_task_team[0] = NULL; - team->t.t_task_team[1] = NULL; - - /* reallocate space for arguments if necessary */ - __kmp_alloc_argv_entries(argc, team, TRUE); - KMP_CHECK_UPDATE(team->t.t_argc, argc); - - KA_TRACE( - 20, ("__kmp_allocate_team: team %d init arrived: join=%u, plain=%u\n", - team->t.t_id, KMP_INIT_BARRIER_STATE, KMP_INIT_BARRIER_STATE)); - { // Initialize barrier data. - int b; - for (b = 0; b < bs_last_barrier; ++b) { - team->t.t_bar[b].b_arrived = KMP_INIT_BARRIER_STATE; -#if USE_DEBUGGER - team->t.t_bar[b].b_master_arrived = 0; - team->t.t_bar[b].b_team_arrived = 0; -#endif - } - } - -#if OMP_40_ENABLED - team->t.t_proc_bind = new_proc_bind; -#endif - - KA_TRACE(20, ("__kmp_allocate_team: using team from pool %d.\n", - team->t.t_id)); - -#if OMPT_SUPPORT - __ompt_team_assign_id(team, ompt_parallel_data); -#endif - - KMP_MB(); - - return team; - } - - /* reap team if it is too small, then loop back and check the next one */ - // not sure if this is wise, but, will be redone during the hot-teams - // rewrite. - /* TODO: Use technique to find the right size hot-team, don't reap them */ - team = __kmp_reap_team(team); - __kmp_team_pool = team; - } - - /* nothing available in the pool, no matter, make a new team! */ - KMP_MB(); - team = (kmp_team_t *)__kmp_allocate(sizeof(kmp_team_t)); - - /* and set it up */ - team->t.t_max_nproc = max_nproc; - /* NOTE well, for some reason allocating one big buffer and dividing it up - seems to really hurt performance a lot on the P4, so, let's not use this */ - __kmp_allocate_team_arrays(team, max_nproc); - - KA_TRACE(20, ("__kmp_allocate_team: making a new team\n")); - __kmp_initialize_team(team, new_nproc, new_icvs, NULL); - - KA_TRACE(20, ("__kmp_allocate_team: setting task_team[0] %p and task_team[1] " - "%p to NULL\n", - &team->t.t_task_team[0], &team->t.t_task_team[1])); - team->t.t_task_team[0] = NULL; // to be removed, as __kmp_allocate zeroes - // memory, no need to duplicate - team->t.t_task_team[1] = NULL; // to be removed, as __kmp_allocate zeroes - // memory, no need to duplicate - - if (__kmp_storage_map) { - __kmp_print_team_storage_map("team", team, team->t.t_id, new_nproc); - } - - /* allocate space for arguments */ - __kmp_alloc_argv_entries(argc, team, FALSE); - team->t.t_argc = argc; - - KA_TRACE(20, - ("__kmp_allocate_team: team %d init arrived: join=%u, plain=%u\n", - team->t.t_id, KMP_INIT_BARRIER_STATE, KMP_INIT_BARRIER_STATE)); - { // Initialize barrier data. - int b; - for (b = 0; b < bs_last_barrier; ++b) { - team->t.t_bar[b].b_arrived = KMP_INIT_BARRIER_STATE; -#if USE_DEBUGGER - team->t.t_bar[b].b_master_arrived = 0; - team->t.t_bar[b].b_team_arrived = 0; -#endif - } - } - -#if OMP_40_ENABLED - team->t.t_proc_bind = new_proc_bind; -#endif - -#if OMPT_SUPPORT - __ompt_team_assign_id(team, ompt_parallel_data); - team->t.ompt_serialized_team_info = NULL; -#endif - - KMP_MB(); - - KA_TRACE(20, ("__kmp_allocate_team: done creating a new team %d.\n", - team->t.t_id)); - - return team; -} - -/* TODO implement hot-teams at all levels */ -/* TODO implement lazy thread release on demand (disband request) */ - -/* free the team. return it to the team pool. release all the threads - * associated with it */ -void __kmp_free_team(kmp_root_t *root, - kmp_team_t *team USE_NESTED_HOT_ARG(kmp_info_t *master)) { - int f; - KA_TRACE(20, ("__kmp_free_team: T#%d freeing team %d\n", __kmp_get_gtid(), - team->t.t_id)); - - /* verify state */ - KMP_DEBUG_ASSERT(root); - KMP_DEBUG_ASSERT(team); - KMP_DEBUG_ASSERT(team->t.t_nproc <= team->t.t_max_nproc); - KMP_DEBUG_ASSERT(team->t.t_threads); - - int use_hot_team = team == root->r.r_hot_team; -#if KMP_NESTED_HOT_TEAMS - int level; - kmp_hot_team_ptr_t *hot_teams; - if (master) { - level = team->t.t_active_level - 1; - if (master->th.th_teams_microtask) { // in teams construct? - if (master->th.th_teams_size.nteams > 1) { - ++level; // level was not increased in teams construct for - // team_of_masters - } - if (team->t.t_pkfn != (microtask_t)__kmp_teams_master && - master->th.th_teams_level == team->t.t_level) { - ++level; // level was not increased in teams construct for - // team_of_workers before the parallel - } // team->t.t_level will be increased inside parallel - } - hot_teams = master->th.th_hot_teams; - if (level < __kmp_hot_teams_max_level) { - KMP_DEBUG_ASSERT(team == hot_teams[level].hot_team); - use_hot_team = 1; - } - } -#endif // KMP_NESTED_HOT_TEAMS - - /* team is done working */ - TCW_SYNC_PTR(team->t.t_pkfn, - NULL); // Important for Debugging Support Library. -#if KMP_OS_WINDOWS - team->t.t_copyin_counter = 0; // init counter for possible reuse -#endif - // Do not reset pointer to parent team to NULL for hot teams. - - /* if we are non-hot team, release our threads */ - if (!use_hot_team) { - if (__kmp_tasking_mode != tskm_immediate_exec) { - // Wait for threads to reach reapable state - for (f = 1; f < team->t.t_nproc; ++f) { - KMP_DEBUG_ASSERT(team->t.t_threads[f]); - kmp_info_t *th = team->t.t_threads[f]; - volatile kmp_uint32 *state = &th->th.th_reap_state; - while (*state != KMP_SAFE_TO_REAP) { -#if KMP_OS_WINDOWS - // On Windows a thread can be killed at any time, check this - DWORD ecode; - if (!__kmp_is_thread_alive(th, &ecode)) { - *state = KMP_SAFE_TO_REAP; // reset the flag for dead thread - break; - } -#endif - // first check if thread is sleeping - kmp_flag_64 fl(&th->th.th_bar[bs_forkjoin_barrier].bb.b_go, th); - if (fl.is_sleeping()) - fl.resume(__kmp_gtid_from_thread(th)); - KMP_CPU_PAUSE(); - } - } - - // Delete task teams - int tt_idx; - for (tt_idx = 0; tt_idx < 2; ++tt_idx) { - kmp_task_team_t *task_team = team->t.t_task_team[tt_idx]; - if (task_team != NULL) { - for (f = 0; f < team->t.t_nproc; - ++f) { // Have all threads unref task teams - team->t.t_threads[f]->th.th_task_team = NULL; - } - KA_TRACE( - 20, - ("__kmp_free_team: T#%d deactivating task_team %p on team %d\n", - __kmp_get_gtid(), task_team, team->t.t_id)); -#if KMP_NESTED_HOT_TEAMS - __kmp_free_task_team(master, task_team); -#endif - team->t.t_task_team[tt_idx] = NULL; - } - } - } - - // Reset pointer to parent team only for non-hot teams. - team->t.t_parent = NULL; - team->t.t_level = 0; - team->t.t_active_level = 0; - - /* free the worker threads */ - for (f = 1; f < team->t.t_nproc; ++f) { - KMP_DEBUG_ASSERT(team->t.t_threads[f]); - __kmp_free_thread(team->t.t_threads[f]); - team->t.t_threads[f] = NULL; - } - - /* put the team back in the team pool */ - /* TODO limit size of team pool, call reap_team if pool too large */ - team->t.t_next_pool = CCAST(kmp_team_t *, __kmp_team_pool); - __kmp_team_pool = (volatile kmp_team_t *)team; - } - - KMP_MB(); -} - -/* reap the team. destroy it, reclaim all its resources and free its memory */ -kmp_team_t *__kmp_reap_team(kmp_team_t *team) { - kmp_team_t *next_pool = team->t.t_next_pool; - - KMP_DEBUG_ASSERT(team); - KMP_DEBUG_ASSERT(team->t.t_dispatch); - KMP_DEBUG_ASSERT(team->t.t_disp_buffer); - KMP_DEBUG_ASSERT(team->t.t_threads); - KMP_DEBUG_ASSERT(team->t.t_argv); - - /* TODO clean the threads that are a part of this? */ - - /* free stuff */ - __kmp_free_team_arrays(team); - if (team->t.t_argv != &team->t.t_inline_argv[0]) - __kmp_free((void *)team->t.t_argv); - __kmp_free(team); - - KMP_MB(); - return next_pool; -} - -// Free the thread. Don't reap it, just place it on the pool of available -// threads. -// -// Changes for Quad issue 527845: We need a predictable OMP tid <-> gtid -// binding for the affinity mechanism to be useful. -// -// Now, we always keep the free list (__kmp_thread_pool) sorted by gtid. -// However, we want to avoid a potential performance problem by always -// scanning through the list to find the correct point at which to insert -// the thread (potential N**2 behavior). To do this we keep track of the -// last place a thread struct was inserted (__kmp_thread_pool_insert_pt). -// With single-level parallelism, threads will always be added to the tail -// of the list, kept track of by __kmp_thread_pool_insert_pt. With nested -// parallelism, all bets are off and we may need to scan through the entire -// free list. -// -// This change also has a potentially large performance benefit, for some -// applications. Previously, as threads were freed from the hot team, they -// would be placed back on the free list in inverse order. If the hot team -// grew back to it's original size, then the freed thread would be placed -// back on the hot team in reverse order. This could cause bad cache -// locality problems on programs where the size of the hot team regularly -// grew and shrunk. -// -// Now, for single-level parallelism, the OMP tid is alway == gtid. -void __kmp_free_thread(kmp_info_t *this_th) { - int gtid; - kmp_info_t **scan; - kmp_root_t *root = this_th->th.th_root; - - KA_TRACE(20, ("__kmp_free_thread: T#%d putting T#%d back on free pool.\n", - __kmp_get_gtid(), this_th->th.th_info.ds.ds_gtid)); - - KMP_DEBUG_ASSERT(this_th); - - // When moving thread to pool, switch thread to wait on own b_go flag, and - // uninitialized (NULL team). - int b; - kmp_balign_t *balign = this_th->th.th_bar; - for (b = 0; b < bs_last_barrier; ++b) { - if (balign[b].bb.wait_flag == KMP_BARRIER_PARENT_FLAG) - balign[b].bb.wait_flag = KMP_BARRIER_SWITCH_TO_OWN_FLAG; - balign[b].bb.team = NULL; - balign[b].bb.leaf_kids = 0; - } - this_th->th.th_task_state = 0; - this_th->th.th_reap_state = KMP_SAFE_TO_REAP; - - /* put thread back on the free pool */ - TCW_PTR(this_th->th.th_team, NULL); - TCW_PTR(this_th->th.th_root, NULL); - TCW_PTR(this_th->th.th_dispatch, NULL); /* NOT NEEDED */ - - /* If the implicit task assigned to this thread can be used by other threads - * -> multiple threads can share the data and try to free the task at - * __kmp_reap_thread at exit. This duplicate use of the task data can happen - * with higher probability when hot team is disabled but can occurs even when - * the hot team is enabled */ - __kmp_free_implicit_task(this_th); - this_th->th.th_current_task = NULL; - - // If the __kmp_thread_pool_insert_pt is already past the new insert - // point, then we need to re-scan the entire list. - gtid = this_th->th.th_info.ds.ds_gtid; - if (__kmp_thread_pool_insert_pt != NULL) { - KMP_DEBUG_ASSERT(__kmp_thread_pool != NULL); - if (__kmp_thread_pool_insert_pt->th.th_info.ds.ds_gtid > gtid) { - __kmp_thread_pool_insert_pt = NULL; - } - } - - // Scan down the list to find the place to insert the thread. - // scan is the address of a link in the list, possibly the address of - // __kmp_thread_pool itself. - // - // In the absence of nested parallism, the for loop will have 0 iterations. - if (__kmp_thread_pool_insert_pt != NULL) { - scan = &(__kmp_thread_pool_insert_pt->th.th_next_pool); - } else { - scan = CCAST(kmp_info_t **, &__kmp_thread_pool); - } - for (; (*scan != NULL) && ((*scan)->th.th_info.ds.ds_gtid < gtid); - scan = &((*scan)->th.th_next_pool)) - ; - - // Insert the new element on the list, and set __kmp_thread_pool_insert_pt - // to its address. - TCW_PTR(this_th->th.th_next_pool, *scan); - __kmp_thread_pool_insert_pt = *scan = this_th; - KMP_DEBUG_ASSERT((this_th->th.th_next_pool == NULL) || - (this_th->th.th_info.ds.ds_gtid < - this_th->th.th_next_pool->th.th_info.ds.ds_gtid)); - TCW_4(this_th->th.th_in_pool, TRUE); - __kmp_thread_pool_nth++; - - TCW_4(__kmp_nth, __kmp_nth - 1); - root->r.r_cg_nthreads--; - -#ifdef KMP_ADJUST_BLOCKTIME - /* Adjust blocktime back to user setting or default if necessary */ - /* Middle initialization might never have occurred */ - if (!__kmp_env_blocktime && (__kmp_avail_proc > 0)) { - KMP_DEBUG_ASSERT(__kmp_avail_proc > 0); - if (__kmp_nth <= __kmp_avail_proc) { - __kmp_zero_bt = FALSE; - } - } -#endif /* KMP_ADJUST_BLOCKTIME */ - - KMP_MB(); -} - -/* ------------------------------------------------------------------------ */ - -void *__kmp_launch_thread(kmp_info_t *this_thr) { - int gtid = this_thr->th.th_info.ds.ds_gtid; - /* void *stack_data;*/ - kmp_team_t *(*volatile pteam); - - KMP_MB(); - KA_TRACE(10, ("__kmp_launch_thread: T#%d start\n", gtid)); - - if (__kmp_env_consistency_check) { - this_thr->th.th_cons = __kmp_allocate_cons_stack(gtid); // ATT: Memory leak? - } - -#if OMPT_SUPPORT - ompt_data_t *thread_data; - if (ompt_enabled.enabled) { - thread_data = &(this_thr->th.ompt_thread_info.thread_data); - *thread_data = ompt_data_none; - - this_thr->th.ompt_thread_info.state = ompt_state_overhead; - this_thr->th.ompt_thread_info.wait_id = 0; - this_thr->th.ompt_thread_info.idle_frame = OMPT_GET_FRAME_ADDRESS(0); - if (ompt_enabled.ompt_callback_thread_begin) { - ompt_callbacks.ompt_callback(ompt_callback_thread_begin)( - ompt_thread_worker, thread_data); - } - } -#endif - -#if OMPT_SUPPORT - if (ompt_enabled.enabled) { - this_thr->th.ompt_thread_info.state = ompt_state_idle; - } -#endif - /* This is the place where threads wait for work */ - while (!TCR_4(__kmp_global.g.g_done)) { - KMP_DEBUG_ASSERT(this_thr == __kmp_threads[gtid]); - KMP_MB(); - - /* wait for work to do */ - KA_TRACE(20, ("__kmp_launch_thread: T#%d waiting for work\n", gtid)); - - /* No tid yet since not part of a team */ - __kmp_fork_barrier(gtid, KMP_GTID_DNE); - -#if OMPT_SUPPORT - if (ompt_enabled.enabled) { - this_thr->th.ompt_thread_info.state = ompt_state_overhead; - } -#endif - - pteam = (kmp_team_t * (*))(&this_thr->th.th_team); - - /* have we been allocated? */ - if (TCR_SYNC_PTR(*pteam) && !TCR_4(__kmp_global.g.g_done)) { - /* we were just woken up, so run our new task */ - if (TCR_SYNC_PTR((*pteam)->t.t_pkfn) != NULL) { - int rc; - KA_TRACE(20, - ("__kmp_launch_thread: T#%d(%d:%d) invoke microtask = %p\n", - gtid, (*pteam)->t.t_id, __kmp_tid_from_gtid(gtid), - (*pteam)->t.t_pkfn)); - - updateHWFPControl(*pteam); - -#if OMPT_SUPPORT - if (ompt_enabled.enabled) { - this_thr->th.ompt_thread_info.state = ompt_state_work_parallel; - } -#endif - - rc = (*pteam)->t.t_invoke(gtid); - KMP_ASSERT(rc); - - KMP_MB(); - KA_TRACE(20, ("__kmp_launch_thread: T#%d(%d:%d) done microtask = %p\n", - gtid, (*pteam)->t.t_id, __kmp_tid_from_gtid(gtid), - (*pteam)->t.t_pkfn)); - } -#if OMPT_SUPPORT - if (ompt_enabled.enabled) { - /* no frame set while outside task */ - __ompt_get_task_info_object(0)->frame.exit_frame = ompt_data_none; - - this_thr->th.ompt_thread_info.state = ompt_state_overhead; - } -#endif - /* join barrier after parallel region */ - __kmp_join_barrier(gtid); - } - } - TCR_SYNC_PTR((intptr_t)__kmp_global.g.g_done); - -#if OMPT_SUPPORT - if (ompt_enabled.ompt_callback_thread_end) { - ompt_callbacks.ompt_callback(ompt_callback_thread_end)(thread_data); - } -#endif - - this_thr->th.th_task_team = NULL; - /* run the destructors for the threadprivate data for this thread */ - __kmp_common_destroy_gtid(gtid); - - KA_TRACE(10, ("__kmp_launch_thread: T#%d done\n", gtid)); - KMP_MB(); - return this_thr; -} - -/* ------------------------------------------------------------------------ */ - -void __kmp_internal_end_dest(void *specific_gtid) { -#if KMP_COMPILER_ICC -#pragma warning(push) -#pragma warning(disable : 810) // conversion from "void *" to "int" may lose -// significant bits -#endif - // Make sure no significant bits are lost - int gtid = (kmp_intptr_t)specific_gtid - 1; -#if KMP_COMPILER_ICC -#pragma warning(pop) -#endif - - KA_TRACE(30, ("__kmp_internal_end_dest: T#%d\n", gtid)); - /* NOTE: the gtid is stored as gitd+1 in the thread-local-storage - * this is because 0 is reserved for the nothing-stored case */ - - /* josh: One reason for setting the gtid specific data even when it is being - destroyed by pthread is to allow gtid lookup through thread specific data - (__kmp_gtid_get_specific). Some of the code, especially stat code, - that gets executed in the call to __kmp_internal_end_thread, actually - gets the gtid through the thread specific data. Setting it here seems - rather inelegant and perhaps wrong, but allows __kmp_internal_end_thread - to run smoothly. - todo: get rid of this after we remove the dependence on - __kmp_gtid_get_specific */ - if (gtid >= 0 && KMP_UBER_GTID(gtid)) - __kmp_gtid_set_specific(gtid); -#ifdef KMP_TDATA_GTID - __kmp_gtid = gtid; -#endif - __kmp_internal_end_thread(gtid); -} - -#if KMP_OS_UNIX && KMP_DYNAMIC_LIB - -// 2009-09-08 (lev): It looks the destructor does not work. In simple test cases -// destructors work perfectly, but in real libomp.so I have no evidence it is -// ever called. However, -fini linker option in makefile.mk works fine. - -__attribute__((destructor)) void __kmp_internal_end_dtor(void) { - __kmp_internal_end_atexit(); -} - -void __kmp_internal_end_fini(void) { __kmp_internal_end_atexit(); } - -#endif - -/* [Windows] josh: when the atexit handler is called, there may still be more - than one thread alive */ -void __kmp_internal_end_atexit(void) { - KA_TRACE(30, ("__kmp_internal_end_atexit\n")); - /* [Windows] - josh: ideally, we want to completely shutdown the library in this atexit - handler, but stat code that depends on thread specific data for gtid fails - because that data becomes unavailable at some point during the shutdown, so - we call __kmp_internal_end_thread instead. We should eventually remove the - dependency on __kmp_get_specific_gtid in the stat code and use - __kmp_internal_end_library to cleanly shutdown the library. - - // TODO: Can some of this comment about GVS be removed? - I suspect that the offending stat code is executed when the calling thread - tries to clean up a dead root thread's data structures, resulting in GVS - code trying to close the GVS structures for that thread, but since the stat - code uses __kmp_get_specific_gtid to get the gtid with the assumption that - the calling thread is cleaning up itself instead of another thread, it get - confused. This happens because allowing a thread to unregister and cleanup - another thread is a recent modification for addressing an issue. - Based on the current design (20050722), a thread may end up - trying to unregister another thread only if thread death does not trigger - the calling of __kmp_internal_end_thread. For Linux* OS, there is the - thread specific data destructor function to detect thread death. For - Windows dynamic, there is DllMain(THREAD_DETACH). For Windows static, there - is nothing. Thus, the workaround is applicable only for Windows static - stat library. */ - __kmp_internal_end_library(-1); -#if KMP_OS_WINDOWS - __kmp_close_console(); -#endif -} - -static void __kmp_reap_thread(kmp_info_t *thread, int is_root) { - // It is assumed __kmp_forkjoin_lock is acquired. - - int gtid; - - KMP_DEBUG_ASSERT(thread != NULL); - - gtid = thread->th.th_info.ds.ds_gtid; - - if (!is_root) { - - if (__kmp_dflt_blocktime != KMP_MAX_BLOCKTIME) { - /* Assume the threads are at the fork barrier here */ - KA_TRACE( - 20, ("__kmp_reap_thread: releasing T#%d from fork barrier for reap\n", - gtid)); - /* Need release fence here to prevent seg faults for tree forkjoin barrier - * (GEH) */ - ANNOTATE_HAPPENS_BEFORE(thread); - kmp_flag_64 flag(&thread->th.th_bar[bs_forkjoin_barrier].bb.b_go, thread); - __kmp_release_64(&flag); - } - - // Terminate OS thread. - __kmp_reap_worker(thread); - - // The thread was killed asynchronously. If it was actively - // spinning in the thread pool, decrement the global count. - // - // There is a small timing hole here - if the worker thread was just waking - // up after sleeping in the pool, had reset it's th_active_in_pool flag but - // not decremented the global counter __kmp_thread_pool_active_nth yet, then - // the global counter might not get updated. - // - // Currently, this can only happen as the library is unloaded, - // so there are no harmful side effects. - if (thread->th.th_active_in_pool) { - thread->th.th_active_in_pool = FALSE; - KMP_ATOMIC_DEC(&__kmp_thread_pool_active_nth); - KMP_DEBUG_ASSERT(__kmp_thread_pool_active_nth >= 0); - } - - // Decrement # of [worker] threads in the pool. - KMP_DEBUG_ASSERT(__kmp_thread_pool_nth > 0); - --__kmp_thread_pool_nth; - } - - __kmp_free_implicit_task(thread); - -// Free the fast memory for tasking -#if USE_FAST_MEMORY - __kmp_free_fast_memory(thread); -#endif /* USE_FAST_MEMORY */ - - __kmp_suspend_uninitialize_thread(thread); - - KMP_DEBUG_ASSERT(__kmp_threads[gtid] == thread); - TCW_SYNC_PTR(__kmp_threads[gtid], NULL); - - --__kmp_all_nth; -// __kmp_nth was decremented when thread is added to the pool. - -#ifdef KMP_ADJUST_BLOCKTIME - /* Adjust blocktime back to user setting or default if necessary */ - /* Middle initialization might never have occurred */ - if (!__kmp_env_blocktime && (__kmp_avail_proc > 0)) { - KMP_DEBUG_ASSERT(__kmp_avail_proc > 0); - if (__kmp_nth <= __kmp_avail_proc) { - __kmp_zero_bt = FALSE; - } - } -#endif /* KMP_ADJUST_BLOCKTIME */ - - /* free the memory being used */ - if (__kmp_env_consistency_check) { - if (thread->th.th_cons) { - __kmp_free_cons_stack(thread->th.th_cons); - thread->th.th_cons = NULL; - } - } - - if (thread->th.th_pri_common != NULL) { - __kmp_free(thread->th.th_pri_common); - thread->th.th_pri_common = NULL; - } - - if (thread->th.th_task_state_memo_stack != NULL) { - __kmp_free(thread->th.th_task_state_memo_stack); - thread->th.th_task_state_memo_stack = NULL; - } - -#if KMP_USE_BGET - if (thread->th.th_local.bget_data != NULL) { - __kmp_finalize_bget(thread); - } -#endif - -#if KMP_AFFINITY_SUPPORTED - if (thread->th.th_affin_mask != NULL) { - KMP_CPU_FREE(thread->th.th_affin_mask); - thread->th.th_affin_mask = NULL; - } -#endif /* KMP_AFFINITY_SUPPORTED */ - -#if KMP_USE_HIER_SCHED - if (thread->th.th_hier_bar_data != NULL) { - __kmp_free(thread->th.th_hier_bar_data); - thread->th.th_hier_bar_data = NULL; - } -#endif - - __kmp_reap_team(thread->th.th_serial_team); - thread->th.th_serial_team = NULL; - __kmp_free(thread); - - KMP_MB(); - -} // __kmp_reap_thread - -static void __kmp_internal_end(void) { - int i; - - /* First, unregister the library */ - __kmp_unregister_library(); - -#if KMP_OS_WINDOWS - /* In Win static library, we can't tell when a root actually dies, so we - reclaim the data structures for any root threads that have died but not - unregistered themselves, in order to shut down cleanly. - In Win dynamic library we also can't tell when a thread dies. */ - __kmp_reclaim_dead_roots(); // AC: moved here to always clean resources of -// dead roots -#endif - - for (i = 0; i < __kmp_threads_capacity; i++) - if (__kmp_root[i]) - if (__kmp_root[i]->r.r_active) - break; - KMP_MB(); /* Flush all pending memory write invalidates. */ - TCW_SYNC_4(__kmp_global.g.g_done, TRUE); - - if (i < __kmp_threads_capacity) { -#if KMP_USE_MONITOR - // 2009-09-08 (lev): Other alive roots found. Why do we kill the monitor?? - KMP_MB(); /* Flush all pending memory write invalidates. */ - - // Need to check that monitor was initialized before reaping it. If we are - // called form __kmp_atfork_child (which sets __kmp_init_parallel = 0), then - // __kmp_monitor will appear to contain valid data, but it is only valid in - // the parent process, not the child. - // New behavior (201008): instead of keying off of the flag - // __kmp_init_parallel, the monitor thread creation is keyed off - // of the new flag __kmp_init_monitor. - __kmp_acquire_bootstrap_lock(&__kmp_monitor_lock); - if (TCR_4(__kmp_init_monitor)) { - __kmp_reap_monitor(&__kmp_monitor); - TCW_4(__kmp_init_monitor, 0); - } - __kmp_release_bootstrap_lock(&__kmp_monitor_lock); - KA_TRACE(10, ("__kmp_internal_end: monitor reaped\n")); -#endif // KMP_USE_MONITOR - } else { -/* TODO move this to cleanup code */ -#ifdef KMP_DEBUG - /* make sure that everything has properly ended */ - for (i = 0; i < __kmp_threads_capacity; i++) { - if (__kmp_root[i]) { - // KMP_ASSERT( ! KMP_UBER_GTID( i ) ); // AC: - // there can be uber threads alive here - KMP_ASSERT(!__kmp_root[i]->r.r_active); // TODO: can they be active? - } - } -#endif - - KMP_MB(); - - // Reap the worker threads. - // This is valid for now, but be careful if threads are reaped sooner. - while (__kmp_thread_pool != NULL) { // Loop thru all the thread in the pool. - // Get the next thread from the pool. - kmp_info_t *thread = CCAST(kmp_info_t *, __kmp_thread_pool); - __kmp_thread_pool = thread->th.th_next_pool; - // Reap it. - KMP_DEBUG_ASSERT(thread->th.th_reap_state == KMP_SAFE_TO_REAP); - thread->th.th_next_pool = NULL; - thread->th.th_in_pool = FALSE; - __kmp_reap_thread(thread, 0); - } - __kmp_thread_pool_insert_pt = NULL; - - // Reap teams. - while (__kmp_team_pool != NULL) { // Loop thru all the teams in the pool. - // Get the next team from the pool. - kmp_team_t *team = CCAST(kmp_team_t *, __kmp_team_pool); - __kmp_team_pool = team->t.t_next_pool; - // Reap it. - team->t.t_next_pool = NULL; - __kmp_reap_team(team); - } - - __kmp_reap_task_teams(); - -#if KMP_OS_UNIX - // Threads that are not reaped should not access any resources since they - // are going to be deallocated soon, so the shutdown sequence should wait - // until all threads either exit the final spin-waiting loop or begin - // sleeping after the given blocktime. - for (i = 0; i < __kmp_threads_capacity; i++) { - kmp_info_t *thr = __kmp_threads[i]; - while (thr && KMP_ATOMIC_LD_ACQ(&thr->th.th_blocking)) - KMP_CPU_PAUSE(); - } -#endif - - for (i = 0; i < __kmp_threads_capacity; ++i) { - // TBD: Add some checking... - // Something like KMP_DEBUG_ASSERT( __kmp_thread[ i ] == NULL ); - } - - /* Make sure all threadprivate destructors get run by joining with all - worker threads before resetting this flag */ - TCW_SYNC_4(__kmp_init_common, FALSE); - - KA_TRACE(10, ("__kmp_internal_end: all workers reaped\n")); - KMP_MB(); - -#if KMP_USE_MONITOR - // See note above: One of the possible fixes for CQ138434 / CQ140126 - // - // FIXME: push both code fragments down and CSE them? - // push them into __kmp_cleanup() ? - __kmp_acquire_bootstrap_lock(&__kmp_monitor_lock); - if (TCR_4(__kmp_init_monitor)) { - __kmp_reap_monitor(&__kmp_monitor); - TCW_4(__kmp_init_monitor, 0); - } - __kmp_release_bootstrap_lock(&__kmp_monitor_lock); - KA_TRACE(10, ("__kmp_internal_end: monitor reaped\n")); -#endif - } /* else !__kmp_global.t_active */ - TCW_4(__kmp_init_gtid, FALSE); - KMP_MB(); /* Flush all pending memory write invalidates. */ - - __kmp_cleanup(); -#if OMPT_SUPPORT - ompt_fini(); -#endif -} - -void __kmp_internal_end_library(int gtid_req) { - /* if we have already cleaned up, don't try again, it wouldn't be pretty */ - /* this shouldn't be a race condition because __kmp_internal_end() is the - only place to clear __kmp_serial_init */ - /* we'll check this later too, after we get the lock */ - // 2009-09-06: We do not set g_abort without setting g_done. This check looks - // redundaant, because the next check will work in any case. - if (__kmp_global.g.g_abort) { - KA_TRACE(11, ("__kmp_internal_end_library: abort, exiting\n")); - /* TODO abort? */ - return; - } - if (TCR_4(__kmp_global.g.g_done) || !__kmp_init_serial) { - KA_TRACE(10, ("__kmp_internal_end_library: already finished\n")); - return; - } - - KMP_MB(); /* Flush all pending memory write invalidates. */ - - /* find out who we are and what we should do */ - { - int gtid = (gtid_req >= 0) ? gtid_req : __kmp_gtid_get_specific(); - KA_TRACE( - 10, ("__kmp_internal_end_library: enter T#%d (%d)\n", gtid, gtid_req)); - if (gtid == KMP_GTID_SHUTDOWN) { - KA_TRACE(10, ("__kmp_internal_end_library: !__kmp_init_runtime, system " - "already shutdown\n")); - return; - } else if (gtid == KMP_GTID_MONITOR) { - KA_TRACE(10, ("__kmp_internal_end_library: monitor thread, gtid not " - "registered, or system shutdown\n")); - return; - } else if (gtid == KMP_GTID_DNE) { - KA_TRACE(10, ("__kmp_internal_end_library: gtid not registered or system " - "shutdown\n")); - /* we don't know who we are, but we may still shutdown the library */ - } else if (KMP_UBER_GTID(gtid)) { - /* unregister ourselves as an uber thread. gtid is no longer valid */ - if (__kmp_root[gtid]->r.r_active) { - __kmp_global.g.g_abort = -1; - TCW_SYNC_4(__kmp_global.g.g_done, TRUE); - KA_TRACE(10, - ("__kmp_internal_end_library: root still active, abort T#%d\n", - gtid)); - return; - } else { - KA_TRACE( - 10, - ("__kmp_internal_end_library: unregistering sibling T#%d\n", gtid)); - __kmp_unregister_root_current_thread(gtid); - } - } else { -/* worker threads may call this function through the atexit handler, if they - * call exit() */ -/* For now, skip the usual subsequent processing and just dump the debug buffer. - TODO: do a thorough shutdown instead */ -#ifdef DUMP_DEBUG_ON_EXIT - if (__kmp_debug_buf) - __kmp_dump_debug_buffer(); -#endif - return; - } - } - /* synchronize the termination process */ - __kmp_acquire_bootstrap_lock(&__kmp_initz_lock); - - /* have we already finished */ - if (__kmp_global.g.g_abort) { - KA_TRACE(10, ("__kmp_internal_end_library: abort, exiting\n")); - /* TODO abort? */ - __kmp_release_bootstrap_lock(&__kmp_initz_lock); - return; - } - if (TCR_4(__kmp_global.g.g_done) || !__kmp_init_serial) { - __kmp_release_bootstrap_lock(&__kmp_initz_lock); - return; - } - - /* We need this lock to enforce mutex between this reading of - __kmp_threads_capacity and the writing by __kmp_register_root. - Alternatively, we can use a counter of roots that is atomically updated by - __kmp_get_global_thread_id_reg, __kmp_do_serial_initialize and - __kmp_internal_end_*. */ - __kmp_acquire_bootstrap_lock(&__kmp_forkjoin_lock); - - /* now we can safely conduct the actual termination */ - __kmp_internal_end(); - - __kmp_release_bootstrap_lock(&__kmp_forkjoin_lock); - __kmp_release_bootstrap_lock(&__kmp_initz_lock); - - KA_TRACE(10, ("__kmp_internal_end_library: exit\n")); - -#ifdef DUMP_DEBUG_ON_EXIT - if (__kmp_debug_buf) - __kmp_dump_debug_buffer(); -#endif - -#if KMP_OS_WINDOWS - __kmp_close_console(); -#endif - - __kmp_fini_allocator(); - -} // __kmp_internal_end_library - -void __kmp_internal_end_thread(int gtid_req) { - int i; - - /* if we have already cleaned up, don't try again, it wouldn't be pretty */ - /* this shouldn't be a race condition because __kmp_internal_end() is the - * only place to clear __kmp_serial_init */ - /* we'll check this later too, after we get the lock */ - // 2009-09-06: We do not set g_abort without setting g_done. This check looks - // redundant, because the next check will work in any case. - if (__kmp_global.g.g_abort) { - KA_TRACE(11, ("__kmp_internal_end_thread: abort, exiting\n")); - /* TODO abort? */ - return; - } - if (TCR_4(__kmp_global.g.g_done) || !__kmp_init_serial) { - KA_TRACE(10, ("__kmp_internal_end_thread: already finished\n")); - return; - } - - KMP_MB(); /* Flush all pending memory write invalidates. */ - - /* find out who we are and what we should do */ - { - int gtid = (gtid_req >= 0) ? gtid_req : __kmp_gtid_get_specific(); - KA_TRACE(10, - ("__kmp_internal_end_thread: enter T#%d (%d)\n", gtid, gtid_req)); - if (gtid == KMP_GTID_SHUTDOWN) { - KA_TRACE(10, ("__kmp_internal_end_thread: !__kmp_init_runtime, system " - "already shutdown\n")); - return; - } else if (gtid == KMP_GTID_MONITOR) { - KA_TRACE(10, ("__kmp_internal_end_thread: monitor thread, gtid not " - "registered, or system shutdown\n")); - return; - } else if (gtid == KMP_GTID_DNE) { - KA_TRACE(10, ("__kmp_internal_end_thread: gtid not registered or system " - "shutdown\n")); - return; - /* we don't know who we are */ - } else if (KMP_UBER_GTID(gtid)) { - /* unregister ourselves as an uber thread. gtid is no longer valid */ - if (__kmp_root[gtid]->r.r_active) { - __kmp_global.g.g_abort = -1; - TCW_SYNC_4(__kmp_global.g.g_done, TRUE); - KA_TRACE(10, - ("__kmp_internal_end_thread: root still active, abort T#%d\n", - gtid)); - return; - } else { - KA_TRACE(10, ("__kmp_internal_end_thread: unregistering sibling T#%d\n", - gtid)); - __kmp_unregister_root_current_thread(gtid); - } - } else { - /* just a worker thread, let's leave */ - KA_TRACE(10, ("__kmp_internal_end_thread: worker thread T#%d\n", gtid)); - - if (gtid >= 0) { - __kmp_threads[gtid]->th.th_task_team = NULL; - } - - KA_TRACE(10, - ("__kmp_internal_end_thread: worker thread done, exiting T#%d\n", - gtid)); - return; - } - } -#if KMP_DYNAMIC_LIB - // AC: lets not shutdown the Linux* OS dynamic library at the exit of uber - // thread, because we will better shutdown later in the library destructor. - // The reason of this change is performance problem when non-openmp thread in - // a loop forks and joins many openmp threads. We can save a lot of time - // keeping worker threads alive until the program shutdown. - // OM: Removed Linux* OS restriction to fix the crash on OS X* (DPD200239966) - // and Windows(DPD200287443) that occurs when using critical sections from - // foreign threads. - KA_TRACE(10, ("__kmp_internal_end_thread: exiting T#%d\n", gtid_req)); - return; -#endif - /* synchronize the termination process */ - __kmp_acquire_bootstrap_lock(&__kmp_initz_lock); - - /* have we already finished */ - if (__kmp_global.g.g_abort) { - KA_TRACE(10, ("__kmp_internal_end_thread: abort, exiting\n")); - /* TODO abort? */ - __kmp_release_bootstrap_lock(&__kmp_initz_lock); - return; - } - if (TCR_4(__kmp_global.g.g_done) || !__kmp_init_serial) { - __kmp_release_bootstrap_lock(&__kmp_initz_lock); - return; - } - - /* We need this lock to enforce mutex between this reading of - __kmp_threads_capacity and the writing by __kmp_register_root. - Alternatively, we can use a counter of roots that is atomically updated by - __kmp_get_global_thread_id_reg, __kmp_do_serial_initialize and - __kmp_internal_end_*. */ - - /* should we finish the run-time? are all siblings done? */ - __kmp_acquire_bootstrap_lock(&__kmp_forkjoin_lock); - - for (i = 0; i < __kmp_threads_capacity; ++i) { - if (KMP_UBER_GTID(i)) { - KA_TRACE( - 10, - ("__kmp_internal_end_thread: remaining sibling task: gtid==%d\n", i)); - __kmp_release_bootstrap_lock(&__kmp_forkjoin_lock); - __kmp_release_bootstrap_lock(&__kmp_initz_lock); - return; - } - } - - /* now we can safely conduct the actual termination */ - - __kmp_internal_end(); - - __kmp_release_bootstrap_lock(&__kmp_forkjoin_lock); - __kmp_release_bootstrap_lock(&__kmp_initz_lock); - - KA_TRACE(10, ("__kmp_internal_end_thread: exit T#%d\n", gtid_req)); - -#ifdef DUMP_DEBUG_ON_EXIT - if (__kmp_debug_buf) - __kmp_dump_debug_buffer(); -#endif -} // __kmp_internal_end_thread - -// ----------------------------------------------------------------------------- -// Library registration stuff. - -static long __kmp_registration_flag = 0; -// Random value used to indicate library initialization. -static char *__kmp_registration_str = NULL; -// Value to be saved in env var __KMP_REGISTERED_LIB_. - -static inline char *__kmp_reg_status_name() { - /* On RHEL 3u5 if linked statically, getpid() returns different values in - each thread. If registration and unregistration go in different threads - (omp_misc_other_root_exit.cpp test case), the name of registered_lib_env - env var can not be found, because the name will contain different pid. */ - return __kmp_str_format("__KMP_REGISTERED_LIB_%d", (int)getpid()); -} // __kmp_reg_status_get - -void __kmp_register_library_startup(void) { - - char *name = __kmp_reg_status_name(); // Name of the environment variable. - int done = 0; - union { - double dtime; - long ltime; - } time; -#if KMP_ARCH_X86 || KMP_ARCH_X86_64 - __kmp_initialize_system_tick(); -#endif - __kmp_read_system_time(&time.dtime); - __kmp_registration_flag = 0xCAFE0000L | (time.ltime & 0x0000FFFFL); - __kmp_registration_str = - __kmp_str_format("%p-%lx-%s", &__kmp_registration_flag, - __kmp_registration_flag, KMP_LIBRARY_FILE); - - KA_TRACE(50, ("__kmp_register_library_startup: %s=\"%s\"\n", name, - __kmp_registration_str)); - - while (!done) { - - char *value = NULL; // Actual value of the environment variable. - - // Set environment variable, but do not overwrite if it is exist. - __kmp_env_set(name, __kmp_registration_str, 0); - // Check the variable is written. - value = __kmp_env_get(name); - if (value != NULL && strcmp(value, __kmp_registration_str) == 0) { - - done = 1; // Ok, environment variable set successfully, exit the loop. - - } else { - - // Oops. Write failed. Another copy of OpenMP RTL is in memory. - // Check whether it alive or dead. - int neighbor = 0; // 0 -- unknown status, 1 -- alive, 2 -- dead. - char *tail = value; - char *flag_addr_str = NULL; - char *flag_val_str = NULL; - char const *file_name = NULL; - __kmp_str_split(tail, '-', &flag_addr_str, &tail); - __kmp_str_split(tail, '-', &flag_val_str, &tail); - file_name = tail; - if (tail != NULL) { - long *flag_addr = 0; - long flag_val = 0; - KMP_SSCANF(flag_addr_str, "%p", RCAST(void**, &flag_addr)); - KMP_SSCANF(flag_val_str, "%lx", &flag_val); - if (flag_addr != 0 && flag_val != 0 && strcmp(file_name, "") != 0) { - // First, check whether environment-encoded address is mapped into - // addr space. - // If so, dereference it to see if it still has the right value. - if (__kmp_is_address_mapped(flag_addr) && *flag_addr == flag_val) { - neighbor = 1; - } else { - // If not, then we know the other copy of the library is no longer - // running. - neighbor = 2; - } - } - } - switch (neighbor) { - case 0: // Cannot parse environment variable -- neighbor status unknown. - // Assume it is the incompatible format of future version of the - // library. Assume the other library is alive. - // WARN( ... ); // TODO: Issue a warning. - file_name = "unknown library"; - // Attention! Falling to the next case. That's intentional. - case 1: { // Neighbor is alive. - // Check it is allowed. - char *duplicate_ok = __kmp_env_get("KMP_DUPLICATE_LIB_OK"); - if (!__kmp_str_match_true(duplicate_ok)) { - // That's not allowed. Issue fatal error. - __kmp_fatal(KMP_MSG(DuplicateLibrary, KMP_LIBRARY_FILE, file_name), - KMP_HNT(DuplicateLibrary), __kmp_msg_null); - } - KMP_INTERNAL_FREE(duplicate_ok); - __kmp_duplicate_library_ok = 1; - done = 1; // Exit the loop. - } break; - case 2: { // Neighbor is dead. - // Clear the variable and try to register library again. - __kmp_env_unset(name); - } break; - default: { KMP_DEBUG_ASSERT(0); } break; - } - } - KMP_INTERNAL_FREE((void *)value); - } - KMP_INTERNAL_FREE((void *)name); - -} // func __kmp_register_library_startup - -void __kmp_unregister_library(void) { - - char *name = __kmp_reg_status_name(); - char *value = __kmp_env_get(name); - - KMP_DEBUG_ASSERT(__kmp_registration_flag != 0); - KMP_DEBUG_ASSERT(__kmp_registration_str != NULL); - if (value != NULL && strcmp(value, __kmp_registration_str) == 0) { - // Ok, this is our variable. Delete it. - __kmp_env_unset(name); - } - - KMP_INTERNAL_FREE(__kmp_registration_str); - KMP_INTERNAL_FREE(value); - KMP_INTERNAL_FREE(name); - - __kmp_registration_flag = 0; - __kmp_registration_str = NULL; - -} // __kmp_unregister_library - -// End of Library registration stuff. -// ----------------------------------------------------------------------------- - -#if KMP_MIC_SUPPORTED - -static void __kmp_check_mic_type() { - kmp_cpuid_t cpuid_state = {0}; - kmp_cpuid_t *cs_p = &cpuid_state; - __kmp_x86_cpuid(1, 0, cs_p); - // We don't support mic1 at the moment - if ((cs_p->eax & 0xff0) == 0xB10) { - __kmp_mic_type = mic2; - } else if ((cs_p->eax & 0xf0ff0) == 0x50670) { - __kmp_mic_type = mic3; - } else { - __kmp_mic_type = non_mic; - } -} - -#endif /* KMP_MIC_SUPPORTED */ - -static void __kmp_do_serial_initialize(void) { - int i, gtid; - int size; - - KA_TRACE(10, ("__kmp_do_serial_initialize: enter\n")); - - KMP_DEBUG_ASSERT(sizeof(kmp_int32) == 4); - KMP_DEBUG_ASSERT(sizeof(kmp_uint32) == 4); - KMP_DEBUG_ASSERT(sizeof(kmp_int64) == 8); - KMP_DEBUG_ASSERT(sizeof(kmp_uint64) == 8); - KMP_DEBUG_ASSERT(sizeof(kmp_intptr_t) == sizeof(void *)); - -#if OMPT_SUPPORT - ompt_pre_init(); -#endif - - __kmp_validate_locks(); - - /* Initialize internal memory allocator */ - __kmp_init_allocator(); - - /* Register the library startup via an environment variable and check to see - whether another copy of the library is already registered. */ - - __kmp_register_library_startup(); - - /* TODO reinitialization of library */ - if (TCR_4(__kmp_global.g.g_done)) { - KA_TRACE(10, ("__kmp_do_serial_initialize: reinitialization of library\n")); - } - - __kmp_global.g.g_abort = 0; - TCW_SYNC_4(__kmp_global.g.g_done, FALSE); - -/* initialize the locks */ -#if KMP_USE_ADAPTIVE_LOCKS -#if KMP_DEBUG_ADAPTIVE_LOCKS - __kmp_init_speculative_stats(); -#endif -#endif -#if KMP_STATS_ENABLED - __kmp_stats_init(); -#endif - __kmp_init_lock(&__kmp_global_lock); - __kmp_init_queuing_lock(&__kmp_dispatch_lock); - __kmp_init_lock(&__kmp_debug_lock); - __kmp_init_atomic_lock(&__kmp_atomic_lock); - __kmp_init_atomic_lock(&__kmp_atomic_lock_1i); - __kmp_init_atomic_lock(&__kmp_atomic_lock_2i); - __kmp_init_atomic_lock(&__kmp_atomic_lock_4i); - __kmp_init_atomic_lock(&__kmp_atomic_lock_4r); - __kmp_init_atomic_lock(&__kmp_atomic_lock_8i); - __kmp_init_atomic_lock(&__kmp_atomic_lock_8r); - __kmp_init_atomic_lock(&__kmp_atomic_lock_8c); - __kmp_init_atomic_lock(&__kmp_atomic_lock_10r); - __kmp_init_atomic_lock(&__kmp_atomic_lock_16r); - __kmp_init_atomic_lock(&__kmp_atomic_lock_16c); - __kmp_init_atomic_lock(&__kmp_atomic_lock_20c); - __kmp_init_atomic_lock(&__kmp_atomic_lock_32c); - __kmp_init_bootstrap_lock(&__kmp_forkjoin_lock); - __kmp_init_bootstrap_lock(&__kmp_exit_lock); -#if KMP_USE_MONITOR - __kmp_init_bootstrap_lock(&__kmp_monitor_lock); -#endif - __kmp_init_bootstrap_lock(&__kmp_tp_cached_lock); - - /* conduct initialization and initial setup of configuration */ - - __kmp_runtime_initialize(); - -#if KMP_MIC_SUPPORTED - __kmp_check_mic_type(); -#endif - -// Some global variable initialization moved here from kmp_env_initialize() -#ifdef KMP_DEBUG - kmp_diag = 0; -#endif - __kmp_abort_delay = 0; - - // From __kmp_init_dflt_team_nth() - /* assume the entire machine will be used */ - __kmp_dflt_team_nth_ub = __kmp_xproc; - if (__kmp_dflt_team_nth_ub < KMP_MIN_NTH) { - __kmp_dflt_team_nth_ub = KMP_MIN_NTH; - } - if (__kmp_dflt_team_nth_ub > __kmp_sys_max_nth) { - __kmp_dflt_team_nth_ub = __kmp_sys_max_nth; - } - __kmp_max_nth = __kmp_sys_max_nth; - __kmp_cg_max_nth = __kmp_sys_max_nth; - __kmp_teams_max_nth = __kmp_xproc; // set a "reasonable" default - if (__kmp_teams_max_nth > __kmp_sys_max_nth) { - __kmp_teams_max_nth = __kmp_sys_max_nth; - } - - // Three vars below moved here from __kmp_env_initialize() "KMP_BLOCKTIME" - // part - __kmp_dflt_blocktime = KMP_DEFAULT_BLOCKTIME; -#if KMP_USE_MONITOR - __kmp_monitor_wakeups = - KMP_WAKEUPS_FROM_BLOCKTIME(__kmp_dflt_blocktime, __kmp_monitor_wakeups); - __kmp_bt_intervals = - KMP_INTERVALS_FROM_BLOCKTIME(__kmp_dflt_blocktime, __kmp_monitor_wakeups); -#endif - // From "KMP_LIBRARY" part of __kmp_env_initialize() - __kmp_library = library_throughput; - // From KMP_SCHEDULE initialization - __kmp_static = kmp_sch_static_balanced; -// AC: do not use analytical here, because it is non-monotonous -//__kmp_guided = kmp_sch_guided_iterative_chunked; -//__kmp_auto = kmp_sch_guided_analytical_chunked; // AC: it is the default, no -// need to repeat assignment -// Barrier initialization. Moved here from __kmp_env_initialize() Barrier branch -// bit control and barrier method control parts -#if KMP_FAST_REDUCTION_BARRIER -#define kmp_reduction_barrier_gather_bb ((int)1) -#define kmp_reduction_barrier_release_bb ((int)1) -#define kmp_reduction_barrier_gather_pat bp_hyper_bar -#define kmp_reduction_barrier_release_pat bp_hyper_bar -#endif // KMP_FAST_REDUCTION_BARRIER - for (i = bs_plain_barrier; i < bs_last_barrier; i++) { - __kmp_barrier_gather_branch_bits[i] = __kmp_barrier_gather_bb_dflt; - __kmp_barrier_release_branch_bits[i] = __kmp_barrier_release_bb_dflt; - __kmp_barrier_gather_pattern[i] = __kmp_barrier_gather_pat_dflt; - __kmp_barrier_release_pattern[i] = __kmp_barrier_release_pat_dflt; -#if KMP_FAST_REDUCTION_BARRIER - if (i == bs_reduction_barrier) { // tested and confirmed on ALTIX only ( - // lin_64 ): hyper,1 - __kmp_barrier_gather_branch_bits[i] = kmp_reduction_barrier_gather_bb; - __kmp_barrier_release_branch_bits[i] = kmp_reduction_barrier_release_bb; - __kmp_barrier_gather_pattern[i] = kmp_reduction_barrier_gather_pat; - __kmp_barrier_release_pattern[i] = kmp_reduction_barrier_release_pat; - } -#endif // KMP_FAST_REDUCTION_BARRIER - } -#if KMP_FAST_REDUCTION_BARRIER -#undef kmp_reduction_barrier_release_pat -#undef kmp_reduction_barrier_gather_pat -#undef kmp_reduction_barrier_release_bb -#undef kmp_reduction_barrier_gather_bb -#endif // KMP_FAST_REDUCTION_BARRIER -#if KMP_MIC_SUPPORTED - if (__kmp_mic_type == mic2) { // KNC - // AC: plane=3,2, forkjoin=2,1 are optimal for 240 threads on KNC - __kmp_barrier_gather_branch_bits[bs_plain_barrier] = 3; // plain gather - __kmp_barrier_release_branch_bits[bs_forkjoin_barrier] = - 1; // forkjoin release - __kmp_barrier_gather_pattern[bs_forkjoin_barrier] = bp_hierarchical_bar; - __kmp_barrier_release_pattern[bs_forkjoin_barrier] = bp_hierarchical_bar; - } -#if KMP_FAST_REDUCTION_BARRIER - if (__kmp_mic_type == mic2) { // KNC - __kmp_barrier_gather_pattern[bs_reduction_barrier] = bp_hierarchical_bar; - __kmp_barrier_release_pattern[bs_reduction_barrier] = bp_hierarchical_bar; - } -#endif // KMP_FAST_REDUCTION_BARRIER -#endif // KMP_MIC_SUPPORTED - -// From KMP_CHECKS initialization -#ifdef KMP_DEBUG - __kmp_env_checks = TRUE; /* development versions have the extra checks */ -#else - __kmp_env_checks = FALSE; /* port versions do not have the extra checks */ -#endif - - // From "KMP_FOREIGN_THREADS_THREADPRIVATE" initialization - __kmp_foreign_tp = TRUE; - - __kmp_global.g.g_dynamic = FALSE; - __kmp_global.g.g_dynamic_mode = dynamic_default; - - __kmp_env_initialize(NULL); - -// Print all messages in message catalog for testing purposes. -#ifdef KMP_DEBUG - char const *val = __kmp_env_get("KMP_DUMP_CATALOG"); - if (__kmp_str_match_true(val)) { - kmp_str_buf_t buffer; - __kmp_str_buf_init(&buffer); - __kmp_i18n_dump_catalog(&buffer); - __kmp_printf("%s", buffer.str); - __kmp_str_buf_free(&buffer); - } - __kmp_env_free(&val); -#endif - - __kmp_threads_capacity = - __kmp_initial_threads_capacity(__kmp_dflt_team_nth_ub); - // Moved here from __kmp_env_initialize() "KMP_ALL_THREADPRIVATE" part - __kmp_tp_capacity = __kmp_default_tp_capacity( - __kmp_dflt_team_nth_ub, __kmp_max_nth, __kmp_allThreadsSpecified); - - // If the library is shut down properly, both pools must be NULL. Just in - // case, set them to NULL -- some memory may leak, but subsequent code will - // work even if pools are not freed. - KMP_DEBUG_ASSERT(__kmp_thread_pool == NULL); - KMP_DEBUG_ASSERT(__kmp_thread_pool_insert_pt == NULL); - KMP_DEBUG_ASSERT(__kmp_team_pool == NULL); - __kmp_thread_pool = NULL; - __kmp_thread_pool_insert_pt = NULL; - __kmp_team_pool = NULL; - - /* Allocate all of the variable sized records */ - /* NOTE: __kmp_threads_capacity entries are allocated, but the arrays are - * expandable */ - /* Since allocation is cache-aligned, just add extra padding at the end */ - size = - (sizeof(kmp_info_t *) + sizeof(kmp_root_t *)) * __kmp_threads_capacity + - CACHE_LINE; - __kmp_threads = (kmp_info_t **)__kmp_allocate(size); - __kmp_root = (kmp_root_t **)((char *)__kmp_threads + - sizeof(kmp_info_t *) * __kmp_threads_capacity); - - /* init thread counts */ - KMP_DEBUG_ASSERT(__kmp_all_nth == - 0); // Asserts fail if the library is reinitializing and - KMP_DEBUG_ASSERT(__kmp_nth == 0); // something was wrong in termination. - __kmp_all_nth = 0; - __kmp_nth = 0; - - /* setup the uber master thread and hierarchy */ - gtid = __kmp_register_root(TRUE); - KA_TRACE(10, ("__kmp_do_serial_initialize T#%d\n", gtid)); - KMP_ASSERT(KMP_UBER_GTID(gtid)); - KMP_ASSERT(KMP_INITIAL_GTID(gtid)); - - KMP_MB(); /* Flush all pending memory write invalidates. */ - - __kmp_common_initialize(); - -#if KMP_OS_UNIX - /* invoke the child fork handler */ - __kmp_register_atfork(); -#endif - -#if !KMP_DYNAMIC_LIB - { - /* Invoke the exit handler when the program finishes, only for static - library. For dynamic library, we already have _fini and DllMain. */ - int rc = atexit(__kmp_internal_end_atexit); - if (rc != 0) { - __kmp_fatal(KMP_MSG(FunctionError, "atexit()"), KMP_ERR(rc), - __kmp_msg_null); - } - } -#endif - -#if KMP_HANDLE_SIGNALS -#if KMP_OS_UNIX - /* NOTE: make sure that this is called before the user installs their own - signal handlers so that the user handlers are called first. this way they - can return false, not call our handler, avoid terminating the library, and - continue execution where they left off. */ - __kmp_install_signals(FALSE); -#endif /* KMP_OS_UNIX */ -#if KMP_OS_WINDOWS - __kmp_install_signals(TRUE); -#endif /* KMP_OS_WINDOWS */ -#endif - - /* we have finished the serial initialization */ - __kmp_init_counter++; - - __kmp_init_serial = TRUE; - - if (__kmp_settings) { - __kmp_env_print(); - } - -#if OMP_40_ENABLED - if (__kmp_display_env || __kmp_display_env_verbose) { - __kmp_env_print_2(); - } -#endif // OMP_40_ENABLED - -#if OMPT_SUPPORT - ompt_post_init(); -#endif - - KMP_MB(); - - KA_TRACE(10, ("__kmp_do_serial_initialize: exit\n")); -} - -void __kmp_serial_initialize(void) { - if (__kmp_init_serial) { - return; - } - __kmp_acquire_bootstrap_lock(&__kmp_initz_lock); - if (__kmp_init_serial) { - __kmp_release_bootstrap_lock(&__kmp_initz_lock); - return; - } - __kmp_do_serial_initialize(); - __kmp_release_bootstrap_lock(&__kmp_initz_lock); -} - -static void __kmp_do_middle_initialize(void) { - int i, j; - int prev_dflt_team_nth; - - if (!__kmp_init_serial) { - __kmp_do_serial_initialize(); - } - - KA_TRACE(10, ("__kmp_middle_initialize: enter\n")); - - // Save the previous value for the __kmp_dflt_team_nth so that - // we can avoid some reinitialization if it hasn't changed. - prev_dflt_team_nth = __kmp_dflt_team_nth; - -#if KMP_AFFINITY_SUPPORTED - // __kmp_affinity_initialize() will try to set __kmp_ncores to the - // number of cores on the machine. - __kmp_affinity_initialize(); - - // Run through the __kmp_threads array and set the affinity mask - // for each root thread that is currently registered with the RTL. - for (i = 0; i < __kmp_threads_capacity; i++) { - if (TCR_PTR(__kmp_threads[i]) != NULL) { - __kmp_affinity_set_init_mask(i, TRUE); - } - } -#endif /* KMP_AFFINITY_SUPPORTED */ - - KMP_ASSERT(__kmp_xproc > 0); - if (__kmp_avail_proc == 0) { - __kmp_avail_proc = __kmp_xproc; - } - - // If there were empty places in num_threads list (OMP_NUM_THREADS=,,2,3), - // correct them now - j = 0; - while ((j < __kmp_nested_nth.used) && !__kmp_nested_nth.nth[j]) { - __kmp_nested_nth.nth[j] = __kmp_dflt_team_nth = __kmp_dflt_team_nth_ub = - __kmp_avail_proc; - j++; - } - - if (__kmp_dflt_team_nth == 0) { -#ifdef KMP_DFLT_NTH_CORES - // Default #threads = #cores - __kmp_dflt_team_nth = __kmp_ncores; - KA_TRACE(20, ("__kmp_middle_initialize: setting __kmp_dflt_team_nth = " - "__kmp_ncores (%d)\n", - __kmp_dflt_team_nth)); -#else - // Default #threads = #available OS procs - __kmp_dflt_team_nth = __kmp_avail_proc; - KA_TRACE(20, ("__kmp_middle_initialize: setting __kmp_dflt_team_nth = " - "__kmp_avail_proc(%d)\n", - __kmp_dflt_team_nth)); -#endif /* KMP_DFLT_NTH_CORES */ - } - - if (__kmp_dflt_team_nth < KMP_MIN_NTH) { - __kmp_dflt_team_nth = KMP_MIN_NTH; - } - if (__kmp_dflt_team_nth > __kmp_sys_max_nth) { - __kmp_dflt_team_nth = __kmp_sys_max_nth; - } - - // There's no harm in continuing if the following check fails, - // but it indicates an error in the previous logic. - KMP_DEBUG_ASSERT(__kmp_dflt_team_nth <= __kmp_dflt_team_nth_ub); - - if (__kmp_dflt_team_nth != prev_dflt_team_nth) { - // Run through the __kmp_threads array and set the num threads icv for each - // root thread that is currently registered with the RTL (which has not - // already explicitly set its nthreads-var with a call to - // omp_set_num_threads()). - for (i = 0; i < __kmp_threads_capacity; i++) { - kmp_info_t *thread = __kmp_threads[i]; - if (thread == NULL) - continue; - if (thread->th.th_current_task->td_icvs.nproc != 0) - continue; - - set__nproc(__kmp_threads[i], __kmp_dflt_team_nth); - } - } - KA_TRACE( - 20, - ("__kmp_middle_initialize: final value for __kmp_dflt_team_nth = %d\n", - __kmp_dflt_team_nth)); - -#ifdef KMP_ADJUST_BLOCKTIME - /* Adjust blocktime to zero if necessary now that __kmp_avail_proc is set */ - if (!__kmp_env_blocktime && (__kmp_avail_proc > 0)) { - KMP_DEBUG_ASSERT(__kmp_avail_proc > 0); - if (__kmp_nth > __kmp_avail_proc) { - __kmp_zero_bt = TRUE; - } - } -#endif /* KMP_ADJUST_BLOCKTIME */ - - /* we have finished middle initialization */ - TCW_SYNC_4(__kmp_init_middle, TRUE); - - KA_TRACE(10, ("__kmp_do_middle_initialize: exit\n")); -} - -void __kmp_middle_initialize(void) { - if (__kmp_init_middle) { - return; - } - __kmp_acquire_bootstrap_lock(&__kmp_initz_lock); - if (__kmp_init_middle) { - __kmp_release_bootstrap_lock(&__kmp_initz_lock); - return; - } - __kmp_do_middle_initialize(); - __kmp_release_bootstrap_lock(&__kmp_initz_lock); -} - -void __kmp_parallel_initialize(void) { - int gtid = __kmp_entry_gtid(); // this might be a new root - - /* synchronize parallel initialization (for sibling) */ - if (TCR_4(__kmp_init_parallel)) - return; - __kmp_acquire_bootstrap_lock(&__kmp_initz_lock); - if (TCR_4(__kmp_init_parallel)) { - __kmp_release_bootstrap_lock(&__kmp_initz_lock); - return; - } - - /* TODO reinitialization after we have already shut down */ - if (TCR_4(__kmp_global.g.g_done)) { - KA_TRACE( - 10, - ("__kmp_parallel_initialize: attempt to init while shutting down\n")); - __kmp_infinite_loop(); - } - - /* jc: The lock __kmp_initz_lock is already held, so calling - __kmp_serial_initialize would cause a deadlock. So we call - __kmp_do_serial_initialize directly. */ - if (!__kmp_init_middle) { - __kmp_do_middle_initialize(); - } - - /* begin initialization */ - KA_TRACE(10, ("__kmp_parallel_initialize: enter\n")); - KMP_ASSERT(KMP_UBER_GTID(gtid)); - -#if KMP_ARCH_X86 || KMP_ARCH_X86_64 - // Save the FP control regs. - // Worker threads will set theirs to these values at thread startup. - __kmp_store_x87_fpu_control_word(&__kmp_init_x87_fpu_control_word); - __kmp_store_mxcsr(&__kmp_init_mxcsr); - __kmp_init_mxcsr &= KMP_X86_MXCSR_MASK; -#endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */ - -#if KMP_OS_UNIX -#if KMP_HANDLE_SIGNALS - /* must be after __kmp_serial_initialize */ - __kmp_install_signals(TRUE); -#endif -#endif - - __kmp_suspend_initialize(); - -#if defined(USE_LOAD_BALANCE) - if (__kmp_global.g.g_dynamic_mode == dynamic_default) { - __kmp_global.g.g_dynamic_mode = dynamic_load_balance; - } -#else - if (__kmp_global.g.g_dynamic_mode == dynamic_default) { - __kmp_global.g.g_dynamic_mode = dynamic_thread_limit; - } -#endif - - if (__kmp_version) { - __kmp_print_version_2(); - } - - /* we have finished parallel initialization */ - TCW_SYNC_4(__kmp_init_parallel, TRUE); - - KMP_MB(); - KA_TRACE(10, ("__kmp_parallel_initialize: exit\n")); - - __kmp_release_bootstrap_lock(&__kmp_initz_lock); -} - -/* ------------------------------------------------------------------------ */ - -void __kmp_run_before_invoked_task(int gtid, int tid, kmp_info_t *this_thr, - kmp_team_t *team) { - kmp_disp_t *dispatch; - - KMP_MB(); - - /* none of the threads have encountered any constructs, yet. */ - this_thr->th.th_local.this_construct = 0; -#if KMP_CACHE_MANAGE - KMP_CACHE_PREFETCH(&this_thr->th.th_bar[bs_forkjoin_barrier].bb.b_arrived); -#endif /* KMP_CACHE_MANAGE */ - dispatch = (kmp_disp_t *)TCR_PTR(this_thr->th.th_dispatch); - KMP_DEBUG_ASSERT(dispatch); - KMP_DEBUG_ASSERT(team->t.t_dispatch); - // KMP_DEBUG_ASSERT( this_thr->th.th_dispatch == &team->t.t_dispatch[ - // this_thr->th.th_info.ds.ds_tid ] ); - - dispatch->th_disp_index = 0; /* reset the dispatch buffer counter */ -#if OMP_45_ENABLED - dispatch->th_doacross_buf_idx = - 0; /* reset the doacross dispatch buffer counter */ -#endif - if (__kmp_env_consistency_check) - __kmp_push_parallel(gtid, team->t.t_ident); - - KMP_MB(); /* Flush all pending memory write invalidates. */ -} - -void __kmp_run_after_invoked_task(int gtid, int tid, kmp_info_t *this_thr, - kmp_team_t *team) { - if (__kmp_env_consistency_check) - __kmp_pop_parallel(gtid, team->t.t_ident); - - __kmp_finish_implicit_task(this_thr); -} - -int __kmp_invoke_task_func(int gtid) { - int rc; - int tid = __kmp_tid_from_gtid(gtid); - kmp_info_t *this_thr = __kmp_threads[gtid]; - kmp_team_t *team = this_thr->th.th_team; - - __kmp_run_before_invoked_task(gtid, tid, this_thr, team); -#if USE_ITT_BUILD - if (__itt_stack_caller_create_ptr) { - __kmp_itt_stack_callee_enter( - (__itt_caller) - team->t.t_stack_id); // inform ittnotify about entering user's code - } -#endif /* USE_ITT_BUILD */ -#if INCLUDE_SSC_MARKS - SSC_MARK_INVOKING(); -#endif - -#if OMPT_SUPPORT - void *dummy; - void **exit_runtime_p; - ompt_data_t *my_task_data; - ompt_data_t *my_parallel_data; - int ompt_team_size; - - if (ompt_enabled.enabled) { - exit_runtime_p = &( - team->t.t_implicit_task_taskdata[tid].ompt_task_info.frame.exit_frame.ptr); - } else { - exit_runtime_p = &dummy; - } - - my_task_data = - &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data); - my_parallel_data = &(team->t.ompt_team_info.parallel_data); - if (ompt_enabled.ompt_callback_implicit_task) { - ompt_team_size = team->t.t_nproc; - ompt_callbacks.ompt_callback(ompt_callback_implicit_task)( - ompt_scope_begin, my_parallel_data, my_task_data, ompt_team_size, - __kmp_tid_from_gtid(gtid), ompt_task_implicit); // TODO: Can this be ompt_task_initial? - OMPT_CUR_TASK_INFO(this_thr)->thread_num = __kmp_tid_from_gtid(gtid); - } -#endif - - { - KMP_TIME_PARTITIONED_BLOCK(OMP_parallel); - KMP_SET_THREAD_STATE_BLOCK(IMPLICIT_TASK); - rc = - __kmp_invoke_microtask((microtask_t)TCR_SYNC_PTR(team->t.t_pkfn), gtid, - tid, (int)team->t.t_argc, (void **)team->t.t_argv -#if OMPT_SUPPORT - , - exit_runtime_p -#endif - ); -#if OMPT_SUPPORT - *exit_runtime_p = NULL; -#endif - } - -#if USE_ITT_BUILD - if (__itt_stack_caller_create_ptr) { - __kmp_itt_stack_callee_leave( - (__itt_caller) - team->t.t_stack_id); // inform ittnotify about leaving user's code - } -#endif /* USE_ITT_BUILD */ - __kmp_run_after_invoked_task(gtid, tid, this_thr, team); - - return rc; -} - -#if OMP_40_ENABLED -void __kmp_teams_master(int gtid) { - // This routine is called by all master threads in teams construct - kmp_info_t *thr = __kmp_threads[gtid]; - kmp_team_t *team = thr->th.th_team; - ident_t *loc = team->t.t_ident; - thr->th.th_set_nproc = thr->th.th_teams_size.nth; - KMP_DEBUG_ASSERT(thr->th.th_teams_microtask); - KMP_DEBUG_ASSERT(thr->th.th_set_nproc); - KA_TRACE(20, ("__kmp_teams_master: T#%d, Tid %d, microtask %p\n", gtid, - __kmp_tid_from_gtid(gtid), thr->th.th_teams_microtask)); -// Launch league of teams now, but not let workers execute -// (they hang on fork barrier until next parallel) -#if INCLUDE_SSC_MARKS - SSC_MARK_FORKING(); -#endif - __kmp_fork_call(loc, gtid, fork_context_intel, team->t.t_argc, - (microtask_t)thr->th.th_teams_microtask, // "wrapped" task - VOLATILE_CAST(launch_t) __kmp_invoke_task_func, NULL); -#if INCLUDE_SSC_MARKS - SSC_MARK_JOINING(); -#endif - - // AC: last parameter "1" eliminates join barrier which won't work because - // worker threads are in a fork barrier waiting for more parallel regions - __kmp_join_call(loc, gtid -#if OMPT_SUPPORT - , - fork_context_intel -#endif - , - 1); -} - -int __kmp_invoke_teams_master(int gtid) { - kmp_info_t *this_thr = __kmp_threads[gtid]; - kmp_team_t *team = this_thr->th.th_team; -#if KMP_DEBUG - if (!__kmp_threads[gtid]->th.th_team->t.t_serialized) - KMP_DEBUG_ASSERT((void *)__kmp_threads[gtid]->th.th_team->t.t_pkfn == - (void *)__kmp_teams_master); -#endif - __kmp_run_before_invoked_task(gtid, 0, this_thr, team); - __kmp_teams_master(gtid); - __kmp_run_after_invoked_task(gtid, 0, this_thr, team); - return 1; -} -#endif /* OMP_40_ENABLED */ - -/* this sets the requested number of threads for the next parallel region - encountered by this team. since this should be enclosed in the forkjoin - critical section it should avoid race conditions with assymmetrical nested - parallelism */ - -void __kmp_push_num_threads(ident_t *id, int gtid, int num_threads) { - kmp_info_t *thr = __kmp_threads[gtid]; - - if (num_threads > 0) - thr->th.th_set_nproc = num_threads; -} - -#if OMP_40_ENABLED - -/* this sets the requested number of teams for the teams region and/or - the number of threads for the next parallel region encountered */ -void __kmp_push_num_teams(ident_t *id, int gtid, int num_teams, - int num_threads) { - kmp_info_t *thr = __kmp_threads[gtid]; - KMP_DEBUG_ASSERT(num_teams >= 0); - KMP_DEBUG_ASSERT(num_threads >= 0); - - if (num_teams == 0) - num_teams = 1; // default number of teams is 1. - if (num_teams > __kmp_teams_max_nth) { // if too many teams requested? - if (!__kmp_reserve_warn) { - __kmp_reserve_warn = 1; - __kmp_msg(kmp_ms_warning, - KMP_MSG(CantFormThrTeam, num_teams, __kmp_teams_max_nth), - KMP_HNT(Unset_ALL_THREADS), __kmp_msg_null); - } - num_teams = __kmp_teams_max_nth; - } - // Set number of teams (number of threads in the outer "parallel" of the - // teams) - thr->th.th_set_nproc = thr->th.th_teams_size.nteams = num_teams; - - // Remember the number of threads for inner parallel regions - if (num_threads == 0) { - if (!TCR_4(__kmp_init_middle)) - __kmp_middle_initialize(); // get __kmp_avail_proc calculated - num_threads = __kmp_avail_proc / num_teams; - if (num_teams * num_threads > __kmp_teams_max_nth) { - // adjust num_threads w/o warning as it is not user setting - num_threads = __kmp_teams_max_nth / num_teams; - } - } else { - if (num_teams * num_threads > __kmp_teams_max_nth) { - int new_threads = __kmp_teams_max_nth / num_teams; - if (!__kmp_reserve_warn) { // user asked for too many threads - __kmp_reserve_warn = 1; // that conflicts with KMP_TEAMS_THREAD_LIMIT - __kmp_msg(kmp_ms_warning, - KMP_MSG(CantFormThrTeam, num_threads, new_threads), - KMP_HNT(Unset_ALL_THREADS), __kmp_msg_null); - } - num_threads = new_threads; - } - } - thr->th.th_teams_size.nth = num_threads; -} - -// Set the proc_bind var to use in the following parallel region. -void __kmp_push_proc_bind(ident_t *id, int gtid, kmp_proc_bind_t proc_bind) { - kmp_info_t *thr = __kmp_threads[gtid]; - thr->th.th_set_proc_bind = proc_bind; -} - -#endif /* OMP_40_ENABLED */ - -/* Launch the worker threads into the microtask. */ - -void __kmp_internal_fork(ident_t *id, int gtid, kmp_team_t *team) { - kmp_info_t *this_thr = __kmp_threads[gtid]; - -#ifdef KMP_DEBUG - int f; -#endif /* KMP_DEBUG */ - - KMP_DEBUG_ASSERT(team); - KMP_DEBUG_ASSERT(this_thr->th.th_team == team); - KMP_ASSERT(KMP_MASTER_GTID(gtid)); - KMP_MB(); /* Flush all pending memory write invalidates. */ - - team->t.t_construct = 0; /* no single directives seen yet */ - team->t.t_ordered.dt.t_value = - 0; /* thread 0 enters the ordered section first */ - - /* Reset the identifiers on the dispatch buffer */ - KMP_DEBUG_ASSERT(team->t.t_disp_buffer); - if (team->t.t_max_nproc > 1) { - int i; - for (i = 0; i < __kmp_dispatch_num_buffers; ++i) { - team->t.t_disp_buffer[i].buffer_index = i; -#if OMP_45_ENABLED - team->t.t_disp_buffer[i].doacross_buf_idx = i; -#endif - } - } else { - team->t.t_disp_buffer[0].buffer_index = 0; -#if OMP_45_ENABLED - team->t.t_disp_buffer[0].doacross_buf_idx = 0; -#endif - } - - KMP_MB(); /* Flush all pending memory write invalidates. */ - KMP_ASSERT(this_thr->th.th_team == team); - -#ifdef KMP_DEBUG - for (f = 0; f < team->t.t_nproc; f++) { - KMP_DEBUG_ASSERT(team->t.t_threads[f] && - team->t.t_threads[f]->th.th_team_nproc == team->t.t_nproc); - } -#endif /* KMP_DEBUG */ - - /* release the worker threads so they may begin working */ - __kmp_fork_barrier(gtid, 0); -} - -void __kmp_internal_join(ident_t *id, int gtid, kmp_team_t *team) { - kmp_info_t *this_thr = __kmp_threads[gtid]; - - KMP_DEBUG_ASSERT(team); - KMP_DEBUG_ASSERT(this_thr->th.th_team == team); - KMP_ASSERT(KMP_MASTER_GTID(gtid)); - KMP_MB(); /* Flush all pending memory write invalidates. */ - -/* Join barrier after fork */ - -#ifdef KMP_DEBUG - if (__kmp_threads[gtid] && - __kmp_threads[gtid]->th.th_team_nproc != team->t.t_nproc) { - __kmp_printf("GTID: %d, __kmp_threads[%d]=%p\n", gtid, gtid, - __kmp_threads[gtid]); - __kmp_printf("__kmp_threads[%d]->th.th_team_nproc=%d, TEAM: %p, " - "team->t.t_nproc=%d\n", - gtid, __kmp_threads[gtid]->th.th_team_nproc, team, - team->t.t_nproc); - __kmp_print_structure(); - } - KMP_DEBUG_ASSERT(__kmp_threads[gtid] && - __kmp_threads[gtid]->th.th_team_nproc == team->t.t_nproc); -#endif /* KMP_DEBUG */ - - __kmp_join_barrier(gtid); /* wait for everyone */ -#if OMPT_SUPPORT - if (ompt_enabled.enabled && - this_thr->th.ompt_thread_info.state == ompt_state_wait_barrier_implicit) { - int ds_tid = this_thr->th.th_info.ds.ds_tid; - ompt_data_t *task_data = OMPT_CUR_TASK_DATA(this_thr); - this_thr->th.ompt_thread_info.state = ompt_state_overhead; -#if OMPT_OPTIONAL - void *codeptr = NULL; - if (KMP_MASTER_TID(ds_tid) && - (ompt_callbacks.ompt_callback(ompt_callback_sync_region_wait) || - ompt_callbacks.ompt_callback(ompt_callback_sync_region))) - codeptr = OMPT_CUR_TEAM_INFO(this_thr)->master_return_address; - - if (ompt_enabled.ompt_callback_sync_region_wait) { - ompt_callbacks.ompt_callback(ompt_callback_sync_region_wait)( - ompt_sync_region_barrier, ompt_scope_end, NULL, task_data, codeptr); - } - if (ompt_enabled.ompt_callback_sync_region) { - ompt_callbacks.ompt_callback(ompt_callback_sync_region)( - ompt_sync_region_barrier, ompt_scope_end, NULL, task_data, codeptr); - } -#endif - if (!KMP_MASTER_TID(ds_tid) && ompt_enabled.ompt_callback_implicit_task) { - ompt_callbacks.ompt_callback(ompt_callback_implicit_task)( - ompt_scope_end, NULL, task_data, 0, ds_tid, ompt_task_implicit); // TODO: Can this be ompt_task_initial? - } - } -#endif - - KMP_MB(); /* Flush all pending memory write invalidates. */ - KMP_ASSERT(this_thr->th.th_team == team); -} - -/* ------------------------------------------------------------------------ */ - -#ifdef USE_LOAD_BALANCE - -// Return the worker threads actively spinning in the hot team, if we -// are at the outermost level of parallelism. Otherwise, return 0. -static int __kmp_active_hot_team_nproc(kmp_root_t *root) { - int i; - int retval; - kmp_team_t *hot_team; - - if (root->r.r_active) { - return 0; - } - hot_team = root->r.r_hot_team; - if (__kmp_dflt_blocktime == KMP_MAX_BLOCKTIME) { - return hot_team->t.t_nproc - 1; // Don't count master thread - } - - // Skip the master thread - it is accounted for elsewhere. - retval = 0; - for (i = 1; i < hot_team->t.t_nproc; i++) { - if (hot_team->t.t_threads[i]->th.th_active) { - retval++; - } - } - return retval; -} - -// Perform an automatic adjustment to the number of -// threads used by the next parallel region. -static int __kmp_load_balance_nproc(kmp_root_t *root, int set_nproc) { - int retval; - int pool_active; - int hot_team_active; - int team_curr_active; - int system_active; - - KB_TRACE(20, ("__kmp_load_balance_nproc: called root:%p set_nproc:%d\n", root, - set_nproc)); - KMP_DEBUG_ASSERT(root); - KMP_DEBUG_ASSERT(root->r.r_root_team->t.t_threads[0] - ->th.th_current_task->td_icvs.dynamic == TRUE); - KMP_DEBUG_ASSERT(set_nproc > 1); - - if (set_nproc == 1) { - KB_TRACE(20, ("__kmp_load_balance_nproc: serial execution.\n")); - return 1; - } - - // Threads that are active in the thread pool, active in the hot team for this - // particular root (if we are at the outer par level), and the currently - // executing thread (to become the master) are available to add to the new - // team, but are currently contributing to the system load, and must be - // accounted for. - pool_active = __kmp_thread_pool_active_nth; - hot_team_active = __kmp_active_hot_team_nproc(root); - team_curr_active = pool_active + hot_team_active + 1; - - // Check the system load. - system_active = __kmp_get_load_balance(__kmp_avail_proc + team_curr_active); - KB_TRACE(30, ("__kmp_load_balance_nproc: system active = %d pool active = %d " - "hot team active = %d\n", - system_active, pool_active, hot_team_active)); - - if (system_active < 0) { - // There was an error reading the necessary info from /proc, so use the - // thread limit algorithm instead. Once we set __kmp_global.g.g_dynamic_mode - // = dynamic_thread_limit, we shouldn't wind up getting back here. - __kmp_global.g.g_dynamic_mode = dynamic_thread_limit; - KMP_WARNING(CantLoadBalUsing, "KMP_DYNAMIC_MODE=thread limit"); - - // Make this call behave like the thread limit algorithm. - retval = __kmp_avail_proc - __kmp_nth + - (root->r.r_active ? 1 : root->r.r_hot_team->t.t_nproc); - if (retval > set_nproc) { - retval = set_nproc; - } - if (retval < KMP_MIN_NTH) { - retval = KMP_MIN_NTH; - } - - KB_TRACE(20, ("__kmp_load_balance_nproc: thread limit exit. retval:%d\n", - retval)); - return retval; - } - - // There is a slight delay in the load balance algorithm in detecting new - // running procs. The real system load at this instant should be at least as - // large as the #active omp thread that are available to add to the team. - if (system_active < team_curr_active) { - system_active = team_curr_active; - } - retval = __kmp_avail_proc - system_active + team_curr_active; - if (retval > set_nproc) { - retval = set_nproc; - } - if (retval < KMP_MIN_NTH) { - retval = KMP_MIN_NTH; - } - - KB_TRACE(20, ("__kmp_load_balance_nproc: exit. retval:%d\n", retval)); - return retval; -} // __kmp_load_balance_nproc() - -#endif /* USE_LOAD_BALANCE */ - -/* ------------------------------------------------------------------------ */ - -/* NOTE: this is called with the __kmp_init_lock held */ -void __kmp_cleanup(void) { - int f; - - KA_TRACE(10, ("__kmp_cleanup: enter\n")); - - if (TCR_4(__kmp_init_parallel)) { -#if KMP_HANDLE_SIGNALS - __kmp_remove_signals(); -#endif - TCW_4(__kmp_init_parallel, FALSE); - } - - if (TCR_4(__kmp_init_middle)) { -#if KMP_AFFINITY_SUPPORTED - __kmp_affinity_uninitialize(); -#endif /* KMP_AFFINITY_SUPPORTED */ - __kmp_cleanup_hierarchy(); - TCW_4(__kmp_init_middle, FALSE); - } - - KA_TRACE(10, ("__kmp_cleanup: go serial cleanup\n")); - - if (__kmp_init_serial) { - __kmp_runtime_destroy(); - __kmp_init_serial = FALSE; - } - - __kmp_cleanup_threadprivate_caches(); - - for (f = 0; f < __kmp_threads_capacity; f++) { - if (__kmp_root[f] != NULL) { - __kmp_free(__kmp_root[f]); - __kmp_root[f] = NULL; - } - } - __kmp_free(__kmp_threads); - // __kmp_threads and __kmp_root were allocated at once, as single block, so - // there is no need in freeing __kmp_root. - __kmp_threads = NULL; - __kmp_root = NULL; - __kmp_threads_capacity = 0; - -#if KMP_USE_DYNAMIC_LOCK - __kmp_cleanup_indirect_user_locks(); -#else - __kmp_cleanup_user_locks(); -#endif - -#if KMP_AFFINITY_SUPPORTED - KMP_INTERNAL_FREE(CCAST(char *, __kmp_cpuinfo_file)); - __kmp_cpuinfo_file = NULL; -#endif /* KMP_AFFINITY_SUPPORTED */ - -#if KMP_USE_ADAPTIVE_LOCKS -#if KMP_DEBUG_ADAPTIVE_LOCKS - __kmp_print_speculative_stats(); -#endif -#endif - KMP_INTERNAL_FREE(__kmp_nested_nth.nth); - __kmp_nested_nth.nth = NULL; - __kmp_nested_nth.size = 0; - __kmp_nested_nth.used = 0; - KMP_INTERNAL_FREE(__kmp_nested_proc_bind.bind_types); - __kmp_nested_proc_bind.bind_types = NULL; - __kmp_nested_proc_bind.size = 0; - __kmp_nested_proc_bind.used = 0; -#if OMP_50_ENABLED - if (__kmp_affinity_format) { - KMP_INTERNAL_FREE(__kmp_affinity_format); - __kmp_affinity_format = NULL; - } -#endif - - __kmp_i18n_catclose(); - -#if KMP_USE_HIER_SCHED - __kmp_hier_scheds.deallocate(); -#endif - -#if KMP_STATS_ENABLED - __kmp_stats_fini(); -#endif - - KA_TRACE(10, ("__kmp_cleanup: exit\n")); -} - -/* ------------------------------------------------------------------------ */ - -int __kmp_ignore_mppbeg(void) { - char *env; - - if ((env = getenv("KMP_IGNORE_MPPBEG")) != NULL) { - if (__kmp_str_match_false(env)) - return FALSE; - } - // By default __kmpc_begin() is no-op. - return TRUE; -} - -int __kmp_ignore_mppend(void) { - char *env; - - if ((env = getenv("KMP_IGNORE_MPPEND")) != NULL) { - if (__kmp_str_match_false(env)) - return FALSE; - } - // By default __kmpc_end() is no-op. - return TRUE; -} - -void __kmp_internal_begin(void) { - int gtid; - kmp_root_t *root; - - /* this is a very important step as it will register new sibling threads - and assign these new uber threads a new gtid */ - gtid = __kmp_entry_gtid(); - root = __kmp_threads[gtid]->th.th_root; - KMP_ASSERT(KMP_UBER_GTID(gtid)); - - if (root->r.r_begin) - return; - __kmp_acquire_lock(&root->r.r_begin_lock, gtid); - if (root->r.r_begin) { - __kmp_release_lock(&root->r.r_begin_lock, gtid); - return; - } - - root->r.r_begin = TRUE; - - __kmp_release_lock(&root->r.r_begin_lock, gtid); -} - -/* ------------------------------------------------------------------------ */ - -void __kmp_user_set_library(enum library_type arg) { - int gtid; - kmp_root_t *root; - kmp_info_t *thread; - - /* first, make sure we are initialized so we can get our gtid */ - - gtid = __kmp_entry_gtid(); - thread = __kmp_threads[gtid]; - - root = thread->th.th_root; - - KA_TRACE(20, ("__kmp_user_set_library: enter T#%d, arg: %d, %d\n", gtid, arg, - library_serial)); - if (root->r.r_in_parallel) { /* Must be called in serial section of top-level - thread */ - KMP_WARNING(SetLibraryIncorrectCall); - return; - } - - switch (arg) { - case library_serial: - thread->th.th_set_nproc = 0; - set__nproc(thread, 1); - break; - case library_turnaround: - thread->th.th_set_nproc = 0; - set__nproc(thread, __kmp_dflt_team_nth ? __kmp_dflt_team_nth - : __kmp_dflt_team_nth_ub); - break; - case library_throughput: - thread->th.th_set_nproc = 0; - set__nproc(thread, __kmp_dflt_team_nth ? __kmp_dflt_team_nth - : __kmp_dflt_team_nth_ub); - break; - default: - KMP_FATAL(UnknownLibraryType, arg); - } - - __kmp_aux_set_library(arg); -} - -void __kmp_aux_set_stacksize(size_t arg) { - if (!__kmp_init_serial) - __kmp_serial_initialize(); - -#if KMP_OS_DARWIN - if (arg & (0x1000 - 1)) { - arg &= ~(0x1000 - 1); - if (arg + 0x1000) /* check for overflow if we round up */ - arg += 0x1000; - } -#endif - __kmp_acquire_bootstrap_lock(&__kmp_initz_lock); - - /* only change the default stacksize before the first parallel region */ - if (!TCR_4(__kmp_init_parallel)) { - size_t value = arg; /* argument is in bytes */ - - if (value < __kmp_sys_min_stksize) - value = __kmp_sys_min_stksize; - else if (value > KMP_MAX_STKSIZE) - value = KMP_MAX_STKSIZE; - - __kmp_stksize = value; - - __kmp_env_stksize = TRUE; /* was KMP_STACKSIZE specified? */ - } - - __kmp_release_bootstrap_lock(&__kmp_initz_lock); -} - -/* set the behaviour of the runtime library */ -/* TODO this can cause some odd behaviour with sibling parallelism... */ -void __kmp_aux_set_library(enum library_type arg) { - __kmp_library = arg; - - switch (__kmp_library) { - case library_serial: { - KMP_INFORM(LibraryIsSerial); - (void)__kmp_change_library(TRUE); - } break; - case library_turnaround: - (void)__kmp_change_library(TRUE); - break; - case library_throughput: - (void)__kmp_change_library(FALSE); - break; - default: - KMP_FATAL(UnknownLibraryType, arg); - } -} - -/* Getting team information common for all team API */ -// Returns NULL if not in teams construct -static kmp_team_t *__kmp_aux_get_team_info(int &teams_serialized) { - kmp_info_t *thr = __kmp_entry_thread(); - teams_serialized = 0; - if (thr->th.th_teams_microtask) { - kmp_team_t *team = thr->th.th_team; - int tlevel = thr->th.th_teams_level; // the level of the teams construct - int ii = team->t.t_level; - teams_serialized = team->t.t_serialized; - int level = tlevel + 1; - KMP_DEBUG_ASSERT(ii >= tlevel); - while (ii > level) { - for (teams_serialized = team->t.t_serialized; - (teams_serialized > 0) && (ii > level); teams_serialized--, ii--) { - } - if (team->t.t_serialized && (!teams_serialized)) { - team = team->t.t_parent; - continue; - } - if (ii > level) { - team = team->t.t_parent; - ii--; - } - } - return team; - } - return NULL; -} - -int __kmp_aux_get_team_num() { - int serialized; - kmp_team_t *team = __kmp_aux_get_team_info(serialized); - if (team) { - if (serialized > 1) { - return 0; // teams region is serialized ( 1 team of 1 thread ). - } else { - return team->t.t_master_tid; - } - } - return 0; -} - -int __kmp_aux_get_num_teams() { - int serialized; - kmp_team_t *team = __kmp_aux_get_team_info(serialized); - if (team) { - if (serialized > 1) { - return 1; - } else { - return team->t.t_parent->t.t_nproc; - } - } - return 1; -} - -/* ------------------------------------------------------------------------ */ - -#if OMP_50_ENABLED -/* - * Affinity Format Parser - * - * Field is in form of: %[[[0].]size]type - * % and type are required (%% means print a literal '%') - * type is either single char or long name surrounded by {}, - * e.g., N or {num_threads} - * 0 => leading zeros - * . => right justified when size is specified - * by default output is left justified - * size is the *minimum* field length - * All other characters are printed as is - * - * Available field types: - * L {thread_level} - omp_get_level() - * n {thread_num} - omp_get_thread_num() - * h {host} - name of host machine - * P {process_id} - process id (integer) - * T {thread_identifier} - native thread identifier (integer) - * N {num_threads} - omp_get_num_threads() - * A {ancestor_tnum} - omp_get_ancestor_thread_num(omp_get_level()-1) - * a {thread_affinity} - comma separated list of integers or integer ranges - * (values of affinity mask) - * - * Implementation-specific field types can be added - * If a type is unknown, print "undefined" -*/ - -// Structure holding the short name, long name, and corresponding data type -// for snprintf. A table of these will represent the entire valid keyword -// field types. -typedef struct kmp_affinity_format_field_t { - char short_name; // from spec e.g., L -> thread level - const char *long_name; // from spec thread_level -> thread level - char field_format; // data type for snprintf (typically 'd' or 's' - // for integer or string) -} kmp_affinity_format_field_t; - -static const kmp_affinity_format_field_t __kmp_affinity_format_table[] = { -#if KMP_AFFINITY_SUPPORTED - {'A', "thread_affinity", 's'}, -#endif - {'t', "team_num", 'd'}, - {'T', "num_teams", 'd'}, - {'L', "nesting_level", 'd'}, - {'n', "thread_num", 'd'}, - {'N', "num_threads", 'd'}, - {'a', "ancestor_tnum", 'd'}, - {'H', "host", 's'}, - {'P', "process_id", 'd'}, - {'i', "native_thread_id", 'd'}}; - -// Return the number of characters it takes to hold field -static int __kmp_aux_capture_affinity_field(int gtid, const kmp_info_t *th, - const char **ptr, - kmp_str_buf_t *field_buffer) { - int rc, format_index, field_value; - const char *width_left, *width_right; - bool pad_zeros, right_justify, parse_long_name, found_valid_name; - static const int FORMAT_SIZE = 20; - char format[FORMAT_SIZE] = {0}; - char absolute_short_name = 0; - - KMP_DEBUG_ASSERT(gtid >= 0); - KMP_DEBUG_ASSERT(th); - KMP_DEBUG_ASSERT(**ptr == '%'); - KMP_DEBUG_ASSERT(field_buffer); - - __kmp_str_buf_clear(field_buffer); - - // Skip the initial % - (*ptr)++; - - // Check for %% first - if (**ptr == '%') { - __kmp_str_buf_cat(field_buffer, "%", 1); - (*ptr)++; // skip over the second % - return 1; - } - - // Parse field modifiers if they are present - pad_zeros = false; - if (**ptr == '0') { - pad_zeros = true; - (*ptr)++; // skip over 0 - } - right_justify = false; - if (**ptr == '.') { - right_justify = true; - (*ptr)++; // skip over . - } - // Parse width of field: [width_left, width_right) - width_left = width_right = NULL; - if (**ptr >= '0' && **ptr <= '9') { - width_left = *ptr; - SKIP_DIGITS(*ptr); - width_right = *ptr; - } - - // Create the format for KMP_SNPRINTF based on flags parsed above - format_index = 0; - format[format_index++] = '%'; - if (!right_justify) - format[format_index++] = '-'; - if (pad_zeros) - format[format_index++] = '0'; - if (width_left && width_right) { - int i = 0; - // Only allow 8 digit number widths. - // This also prevents overflowing format variable - while (i < 8 && width_left < width_right) { - format[format_index++] = *width_left; - width_left++; - i++; - } - } - - // Parse a name (long or short) - // Canonicalize the name into absolute_short_name - found_valid_name = false; - parse_long_name = (**ptr == '{'); - if (parse_long_name) - (*ptr)++; // skip initial left brace - for (size_t i = 0; i < sizeof(__kmp_affinity_format_table) / - sizeof(__kmp_affinity_format_table[0]); - ++i) { - char short_name = __kmp_affinity_format_table[i].short_name; - const char *long_name = __kmp_affinity_format_table[i].long_name; - char field_format = __kmp_affinity_format_table[i].field_format; - if (parse_long_name) { - int length = KMP_STRLEN(long_name); - if (strncmp(*ptr, long_name, length) == 0) { - found_valid_name = true; - (*ptr) += length; // skip the long name - } - } else if (**ptr == short_name) { - found_valid_name = true; - (*ptr)++; // skip the short name - } - if (found_valid_name) { - format[format_index++] = field_format; - format[format_index++] = '\0'; - absolute_short_name = short_name; - break; - } - } - if (parse_long_name) { - if (**ptr != '}') { - absolute_short_name = 0; - } else { - (*ptr)++; // skip over the right brace - } - } - - // Attempt to fill the buffer with the requested - // value using snprintf within __kmp_str_buf_print() - switch (absolute_short_name) { - case 't': - rc = __kmp_str_buf_print(field_buffer, format, __kmp_aux_get_team_num()); - break; - case 'T': - rc = __kmp_str_buf_print(field_buffer, format, __kmp_aux_get_num_teams()); - break; - case 'L': - rc = __kmp_str_buf_print(field_buffer, format, th->th.th_team->t.t_level); - break; - case 'n': - rc = __kmp_str_buf_print(field_buffer, format, __kmp_tid_from_gtid(gtid)); - break; - case 'H': { - static const int BUFFER_SIZE = 256; - char buf[BUFFER_SIZE]; - __kmp_expand_host_name(buf, BUFFER_SIZE); - rc = __kmp_str_buf_print(field_buffer, format, buf); - } break; - case 'P': - rc = __kmp_str_buf_print(field_buffer, format, getpid()); - break; - case 'i': - rc = __kmp_str_buf_print(field_buffer, format, __kmp_gettid()); - break; - case 'N': - rc = __kmp_str_buf_print(field_buffer, format, th->th.th_team->t.t_nproc); - break; - case 'a': - field_value = - __kmp_get_ancestor_thread_num(gtid, th->th.th_team->t.t_level - 1); - rc = __kmp_str_buf_print(field_buffer, format, field_value); - break; -#if KMP_AFFINITY_SUPPORTED - case 'A': { - kmp_str_buf_t buf; - __kmp_str_buf_init(&buf); - __kmp_affinity_str_buf_mask(&buf, th->th.th_affin_mask); - rc = __kmp_str_buf_print(field_buffer, format, buf.str); - __kmp_str_buf_free(&buf); - } break; -#endif - default: - // According to spec, If an implementation does not have info for field - // type, then "undefined" is printed - rc = __kmp_str_buf_print(field_buffer, "%s", "undefined"); - // Skip the field - if (parse_long_name) { - SKIP_TOKEN(*ptr); - if (**ptr == '}') - (*ptr)++; - } else { - (*ptr)++; - } - } - - KMP_ASSERT(format_index <= FORMAT_SIZE); - return rc; -} - -/* - * Return number of characters needed to hold the affinity string - * (not including null byte character) - * The resultant string is printed to buffer, which the caller can then - * handle afterwards -*/ -size_t __kmp_aux_capture_affinity(int gtid, const char *format, - kmp_str_buf_t *buffer) { - const char *parse_ptr; - size_t retval; - const kmp_info_t *th; - kmp_str_buf_t field; - - KMP_DEBUG_ASSERT(buffer); - KMP_DEBUG_ASSERT(gtid >= 0); - - __kmp_str_buf_init(&field); - __kmp_str_buf_clear(buffer); - - th = __kmp_threads[gtid]; - retval = 0; - - // If format is NULL or zero-length string, then we use - // affinity-format-var ICV - parse_ptr = format; - if (parse_ptr == NULL || *parse_ptr == '\0') { - parse_ptr = __kmp_affinity_format; - } - KMP_DEBUG_ASSERT(parse_ptr); - - while (*parse_ptr != '\0') { - // Parse a field - if (*parse_ptr == '%') { - // Put field in the buffer - int rc = __kmp_aux_capture_affinity_field(gtid, th, &parse_ptr, &field); - __kmp_str_buf_catbuf(buffer, &field); - retval += rc; - } else { - // Put literal character in buffer - __kmp_str_buf_cat(buffer, parse_ptr, 1); - retval++; - parse_ptr++; - } - } - __kmp_str_buf_free(&field); - return retval; -} - -// Displays the affinity string to stdout -void __kmp_aux_display_affinity(int gtid, const char *format) { - kmp_str_buf_t buf; - __kmp_str_buf_init(&buf); - __kmp_aux_capture_affinity(gtid, format, &buf); - __kmp_fprintf(kmp_out, "%s" KMP_END_OF_LINE, buf.str); - __kmp_str_buf_free(&buf); -} -#endif // OMP_50_ENABLED - -/* ------------------------------------------------------------------------ */ - -void __kmp_aux_set_blocktime(int arg, kmp_info_t *thread, int tid) { - int blocktime = arg; /* argument is in milliseconds */ -#if KMP_USE_MONITOR - int bt_intervals; -#endif - int bt_set; - - __kmp_save_internal_controls(thread); - - /* Normalize and set blocktime for the teams */ - if (blocktime < KMP_MIN_BLOCKTIME) - blocktime = KMP_MIN_BLOCKTIME; - else if (blocktime > KMP_MAX_BLOCKTIME) - blocktime = KMP_MAX_BLOCKTIME; - - set__blocktime_team(thread->th.th_team, tid, blocktime); - set__blocktime_team(thread->th.th_serial_team, 0, blocktime); - -#if KMP_USE_MONITOR - /* Calculate and set blocktime intervals for the teams */ - bt_intervals = KMP_INTERVALS_FROM_BLOCKTIME(blocktime, __kmp_monitor_wakeups); - - set__bt_intervals_team(thread->th.th_team, tid, bt_intervals); - set__bt_intervals_team(thread->th.th_serial_team, 0, bt_intervals); -#endif - - /* Set whether blocktime has been set to "TRUE" */ - bt_set = TRUE; - - set__bt_set_team(thread->th.th_team, tid, bt_set); - set__bt_set_team(thread->th.th_serial_team, 0, bt_set); -#if KMP_USE_MONITOR - KF_TRACE(10, ("kmp_set_blocktime: T#%d(%d:%d), blocktime=%d, " - "bt_intervals=%d, monitor_updates=%d\n", - __kmp_gtid_from_tid(tid, thread->th.th_team), - thread->th.th_team->t.t_id, tid, blocktime, bt_intervals, - __kmp_monitor_wakeups)); -#else - KF_TRACE(10, ("kmp_set_blocktime: T#%d(%d:%d), blocktime=%d\n", - __kmp_gtid_from_tid(tid, thread->th.th_team), - thread->th.th_team->t.t_id, tid, blocktime)); -#endif -} - -void __kmp_aux_set_defaults(char const *str, int len) { - if (!__kmp_init_serial) { - __kmp_serial_initialize(); - } - __kmp_env_initialize(str); - - if (__kmp_settings -#if OMP_40_ENABLED - || __kmp_display_env || __kmp_display_env_verbose -#endif // OMP_40_ENABLED - ) { - __kmp_env_print(); - } -} // __kmp_aux_set_defaults - -/* ------------------------------------------------------------------------ */ -/* internal fast reduction routines */ - -PACKED_REDUCTION_METHOD_T -__kmp_determine_reduction_method( - ident_t *loc, kmp_int32 global_tid, kmp_int32 num_vars, size_t reduce_size, - void *reduce_data, void (*reduce_func)(void *lhs_data, void *rhs_data), - kmp_critical_name *lck) { - - // Default reduction method: critical construct ( lck != NULL, like in current - // PAROPT ) - // If ( reduce_data!=NULL && reduce_func!=NULL ): the tree-reduction method - // can be selected by RTL - // If loc->flags contains KMP_IDENT_ATOMIC_REDUCE, the atomic reduce method - // can be selected by RTL - // Finally, it's up to OpenMP RTL to make a decision on which method to select - // among generated by PAROPT. - - PACKED_REDUCTION_METHOD_T retval; - - int team_size; - - KMP_DEBUG_ASSERT(loc); // it would be nice to test ( loc != 0 ) - KMP_DEBUG_ASSERT(lck); // it would be nice to test ( lck != 0 ) - -#define FAST_REDUCTION_ATOMIC_METHOD_GENERATED \ - ((loc->flags & (KMP_IDENT_ATOMIC_REDUCE)) == (KMP_IDENT_ATOMIC_REDUCE)) -#define FAST_REDUCTION_TREE_METHOD_GENERATED ((reduce_data) && (reduce_func)) - - retval = critical_reduce_block; - - // another choice of getting a team size (with 1 dynamic deference) is slower - team_size = __kmp_get_team_num_threads(global_tid); - if (team_size == 1) { - - retval = empty_reduce_block; - - } else { - - int atomic_available = FAST_REDUCTION_ATOMIC_METHOD_GENERATED; - -#if KMP_ARCH_X86_64 || KMP_ARCH_PPC64 || KMP_ARCH_AARCH64 || KMP_ARCH_MIPS64 - -#if KMP_OS_LINUX || KMP_OS_DRAGONFLY || KMP_OS_FREEBSD || KMP_OS_NETBSD || \ - KMP_OS_OPENBSD || KMP_OS_WINDOWS || KMP_OS_DARWIN || KMP_OS_HURD - - int teamsize_cutoff = 4; - -#if KMP_MIC_SUPPORTED - if (__kmp_mic_type != non_mic) { - teamsize_cutoff = 8; - } -#endif - int tree_available = FAST_REDUCTION_TREE_METHOD_GENERATED; - if (tree_available) { - if (team_size <= teamsize_cutoff) { - if (atomic_available) { - retval = atomic_reduce_block; - } - } else { - retval = TREE_REDUCE_BLOCK_WITH_REDUCTION_BARRIER; - } - } else if (atomic_available) { - retval = atomic_reduce_block; - } -#else -#error "Unknown or unsupported OS" -#endif // KMP_OS_LINUX || KMP_OS_DRAGONFLY || KMP_OS_FREEBSD || KMP_OS_NETBSD || - // KMP_OS_OPENBSD || KMP_OS_WINDOWS || KMP_OS_DARWIN || KMP_OS_HURD - -#elif KMP_ARCH_X86 || KMP_ARCH_ARM || KMP_ARCH_AARCH || KMP_ARCH_MIPS - -#if KMP_OS_LINUX || KMP_OS_WINDOWS || KMP_OS_HURD - - // basic tuning - - if (atomic_available) { - if (num_vars <= 2) { // && ( team_size <= 8 ) due to false-sharing ??? - retval = atomic_reduce_block; - } - } // otherwise: use critical section - -#elif KMP_OS_DARWIN - - int tree_available = FAST_REDUCTION_TREE_METHOD_GENERATED; - if (atomic_available && (num_vars <= 3)) { - retval = atomic_reduce_block; - } else if (tree_available) { - if ((reduce_size > (9 * sizeof(kmp_real64))) && - (reduce_size < (2000 * sizeof(kmp_real64)))) { - retval = TREE_REDUCE_BLOCK_WITH_PLAIN_BARRIER; - } - } // otherwise: use critical section - -#else -#error "Unknown or unsupported OS" -#endif - -#else -#error "Unknown or unsupported architecture" -#endif - } - - // KMP_FORCE_REDUCTION - - // If the team is serialized (team_size == 1), ignore the forced reduction - // method and stay with the unsynchronized method (empty_reduce_block) - if (__kmp_force_reduction_method != reduction_method_not_defined && - team_size != 1) { - - PACKED_REDUCTION_METHOD_T forced_retval = critical_reduce_block; - - int atomic_available, tree_available; - - switch ((forced_retval = __kmp_force_reduction_method)) { - case critical_reduce_block: - KMP_ASSERT(lck); // lck should be != 0 - break; - - case atomic_reduce_block: - atomic_available = FAST_REDUCTION_ATOMIC_METHOD_GENERATED; - if (!atomic_available) { - KMP_WARNING(RedMethodNotSupported, "atomic"); - forced_retval = critical_reduce_block; - } - break; - - case tree_reduce_block: - tree_available = FAST_REDUCTION_TREE_METHOD_GENERATED; - if (!tree_available) { - KMP_WARNING(RedMethodNotSupported, "tree"); - forced_retval = critical_reduce_block; - } else { -#if KMP_FAST_REDUCTION_BARRIER - forced_retval = TREE_REDUCE_BLOCK_WITH_REDUCTION_BARRIER; -#endif - } - break; - - default: - KMP_ASSERT(0); // "unsupported method specified" - } - - retval = forced_retval; - } - - KA_TRACE(10, ("reduction method selected=%08x\n", retval)); - -#undef FAST_REDUCTION_TREE_METHOD_GENERATED -#undef FAST_REDUCTION_ATOMIC_METHOD_GENERATED - - return (retval); -} - -// this function is for testing set/get/determine reduce method -kmp_int32 __kmp_get_reduce_method(void) { - return ((__kmp_entry_thread()->th.th_local.packed_reduction_method) >> 8); -} Property changes on: vendor/llvm-openmp/openmp-release80-r363030/runtime/src/kmp_runtime.cpp ___________________________________________________________________ Deleted: svn:eol-style ## -1 +0,0 ## -native \ No newline at end of property Deleted: svn:keywords ## -1 +0,0 ## -FreeBSD=%H \ No newline at end of property Deleted: svn:mime-type ## -1 +0,0 ## -text/plain \ No newline at end of property Index: vendor/llvm-openmp/openmp-release80-r363030/runtime/src/test-touch.c =================================================================== --- vendor/llvm-openmp/openmp-release80-r363030/runtime/src/test-touch.c (revision 348960) +++ vendor/llvm-openmp/openmp-release80-r363030/runtime/src/test-touch.c (nonexistent) @@ -1,31 +0,0 @@ -// test-touch.c // - - -//===----------------------------------------------------------------------===// -// -// The LLVM Compiler Infrastructure -// -// This file is dual licensed under the MIT and the University of Illinois Open -// Source Licenses. See LICENSE.txt for details. -// -//===----------------------------------------------------------------------===// - - -#ifdef __cplusplus -extern "C" { -#endif -extern double omp_get_wtime(); -extern int omp_get_num_threads(); -extern int omp_get_max_threads(); -#ifdef __cplusplus -} -#endif - -int main() { - omp_get_wtime(); - omp_get_num_threads(); - omp_get_max_threads(); - return 0; -} - -// end of file // Property changes on: vendor/llvm-openmp/openmp-release80-r363030/runtime/src/test-touch.c ___________________________________________________________________ Deleted: svn:eol-style ## -1 +0,0 ## -native \ No newline at end of property Deleted: svn:keywords ## -1 +0,0 ## -FreeBSD=%H \ No newline at end of property Deleted: svn:mime-type ## -1 +0,0 ## -text/plain \ No newline at end of property Index: vendor/llvm-openmp/openmp-release80-r363030/runtime/src/extractExternal.cpp =================================================================== --- vendor/llvm-openmp/openmp-release80-r363030/runtime/src/extractExternal.cpp (revision 348960) +++ vendor/llvm-openmp/openmp-release80-r363030/runtime/src/extractExternal.cpp (nonexistent) @@ -1,484 +0,0 @@ -/* - * extractExternal.cpp - */ - -//===----------------------------------------------------------------------===// -// -// The LLVM Compiler Infrastructure -// -// This file is dual licensed under the MIT and the University of Illinois Open -// Source Licenses. See LICENSE.txt for details. -// -//===----------------------------------------------------------------------===// - -#include -#include -#include -#include -#include -#include -#include - -/* Given a set of n object files h ('external' object files) and a set of m - object files o ('internal' object files), - 1. Determines r, the subset of h that o depends on, directly or indirectly - 2. Removes the files in h - r from the file system - 3. For each external symbol defined in some file in r, rename it in r U o - by prefixing it with "__kmp_external_" - Usage: - hide.exe - - Thus, the prefixed symbols become hidden in the sense that they now have a - special prefix. -*/ - -using namespace std; - -void stop(char *errorMsg) { - printf("%s\n", errorMsg); - exit(1); -} - -// an entry in the symbol table of a .OBJ file -class Symbol { -public: - __int64 name; - unsigned value; - unsigned short sectionNum, type; - char storageClass, nAux; -}; - -class _rstream : public istrstream { -private: - const char *buf; - -protected: - _rstream(pair p) - : istrstream(p.first, p.second), buf(p.first) {} - ~_rstream() { delete[] buf; } -}; - -// A stream encapuslating the content of a file or the content of a string, -// overriding the >> operator to read various integer types in binary form, -// as well as a symbol table entry. -class rstream : public _rstream { -private: - template inline rstream &doRead(T &x) { - read((char *)&x, sizeof(T)); - return *this; - } - static pair getBuf(const char *fileName) { - ifstream raw(fileName, ios::binary | ios::in); - if (!raw.is_open()) - stop("rstream.getBuf: Error opening file"); - raw.seekg(0, ios::end); - streampos fileSize = raw.tellg(); - if (fileSize < 0) - stop("rstream.getBuf: Error reading file"); - char *buf = new char[fileSize]; - raw.seekg(0, ios::beg); - raw.read(buf, fileSize); - return pair(buf, fileSize); - } - -public: - // construct from a string - rstream(const char *buf, streamsize size) - : _rstream(pair(buf, size)) {} - // construct from a file whole content is fully read once to initialize the - // content of this stream - rstream(const char *fileName) : _rstream(getBuf(fileName)) {} - rstream &operator>>(int &x) { return doRead(x); } - rstream &operator>>(unsigned &x) { return doRead(x); } - rstream &operator>>(short &x) { return doRead(x); } - rstream &operator>>(unsigned short &x) { return doRead(x); } - rstream &operator>>(Symbol &e) { - read((char *)&e, 18); - return *this; - } -}; - -// string table in a .OBJ file -class StringTable { -private: - map directory; - size_t length; - char *data; - - // make from bytes in - void makeDirectory(void) { - unsigned i = 4; - while (i < length) { - string s = string(data + i); - directory.insert(make_pair(s, i)); - i += s.size() + 1; - } - } - // initialize and with contents specified by the arguments - void init(const char *_data) { - unsigned _length = *(unsigned *)_data; - - if (_length < sizeof(unsigned) || _length != *(unsigned *)_data) - stop("StringTable.init: Invalid symbol table"); - if (_data[_length - 1]) { - // to prevent runaway strings, make sure the data ends with a zero - data = new char[length = _length + 1]; - data[_length] = 0; - } else { - data = new char[length = _length]; - } - *(unsigned *)data = length; - KMP_MEMCPY(data + sizeof(unsigned), _data + sizeof(unsigned), - length - sizeof(unsigned)); - makeDirectory(); - } - -public: - StringTable(rstream &f) { - // Construct string table by reading from f. - streampos s; - unsigned strSize; - char *strData; - - s = f.tellg(); - f >> strSize; - if (strSize < sizeof(unsigned)) - stop("StringTable: Invalid string table"); - strData = new char[strSize]; - *(unsigned *)strData = strSize; - // read the raw data into - f.read(strData + sizeof(unsigned), strSize - sizeof(unsigned)); - s = f.tellg() - s; - if (s < strSize) - stop("StringTable: Unexpected EOF"); - init(strData); - delete[] strData; - } - StringTable(const set &strings) { - // Construct string table from given strings. - char *p; - set::const_iterator it; - size_t s; - - // count required size for data - for (length = sizeof(unsigned), it = strings.begin(); it != strings.end(); - ++it) { - size_t l = (*it).size(); - - if (l > (unsigned)0xFFFFFFFF) - stop("StringTable: String too long"); - if (l > 8) { - length += l + 1; - if (length > (unsigned)0xFFFFFFFF) - stop("StringTable: Symbol table too long"); - } - } - data = new char[length]; - *(unsigned *)data = length; - // populate data and directory - for (p = data + sizeof(unsigned), it = strings.begin(); it != strings.end(); - ++it) { - const string &str = *it; - size_t l = str.size(); - if (l > 8) { - directory.insert(make_pair(str, p - data)); - KMP_MEMCPY(p, str.c_str(), l); - p[l] = 0; - p += l + 1; - } - } - } - ~StringTable() { delete[] data; } - // Returns encoding for given string based on this string table. Error if - // string length is greater than 8 but string is not in the string table - // -- returns 0. - __int64 encode(const string &str) { - __int64 r; - - if (str.size() <= 8) { - // encoded directly - ((char *)&r)[7] = 0; - KMP_STRNCPY_S((char *)&r, sizeof(r), str.c_str(), 8); - return r; - } else { - // represented as index into table - map::const_iterator it = directory.find(str); - if (it == directory.end()) - stop("StringTable::encode: String now found in string table"); - ((unsigned *)&r)[0] = 0; - ((unsigned *)&r)[1] = (*it).second; - return r; - } - } - // Returns string represented by x based on this string table. Error if x - // references an invalid position in the table--returns the empty string. - string decode(__int64 x) const { - if (*(unsigned *)&x == 0) { - // represented as index into table - unsigned &p = ((unsigned *)&x)[1]; - if (p >= length) - stop("StringTable::decode: Invalid string table lookup"); - return string(data + p); - } else { - // encoded directly - char *p = (char *)&x; - int i; - - for (i = 0; i < 8 && p[i]; ++i) - ; - return string(p, i); - } - } - void write(ostream &os) { os.write(data, length); } -}; - -// for the named object file, determines the set of defined symbols and the set -// of undefined external symbols and writes them to and -// respectively -void computeExternalSymbols(const char *fileName, set *defined, - set *undefined) { - streampos fileSize; - size_t strTabStart; - unsigned symTabStart, symNEntries; - rstream f(fileName); - - f.seekg(0, ios::end); - fileSize = f.tellg(); - - f.seekg(8); - f >> symTabStart >> symNEntries; - // seek to the string table - f.seekg(strTabStart = symTabStart + 18 * (size_t)symNEntries); - if (f.eof()) { - printf("computeExternalSymbols: fileName='%s', fileSize = %lu, symTabStart " - "= %u, symNEntries = %u\n", - fileName, (unsigned long)fileSize, symTabStart, symNEntries); - stop("computeExternalSymbols: Unexpected EOF 1"); - } - StringTable stringTable(f); // read the string table - if (f.tellg() != fileSize) - stop("computeExternalSymbols: Unexpected data after string table"); - - f.clear(); - f.seekg(symTabStart); // seek to the symbol table - - defined->clear(); - undefined->clear(); - for (int i = 0; i < symNEntries; ++i) { - // process each entry - Symbol e; - - if (f.eof()) - stop("computeExternalSymbols: Unexpected EOF 2"); - f >> e; - if (f.fail()) - stop("computeExternalSymbols: File read error"); - if (e.nAux) { // auxiliary entry: skip - f.seekg(e.nAux * 18, ios::cur); - i += e.nAux; - } - // if symbol is extern and defined in the current file, insert it - if (e.storageClass == 2) - if (e.sectionNum) - defined->insert(stringTable.decode(e.name)); - else - undefined->insert(stringTable.decode(e.name)); - } -} - -// For each occurrence of an external symbol in the object file named by -// by that is a member of , renames it by prefixing -// with "__kmp_external_", writing back the file in-place -void hideSymbols(char *fileName, const set &hide) { - static const string prefix("__kmp_external_"); - set strings; // set of all occurring symbols, appropriately prefixed - streampos fileSize; - size_t strTabStart; - unsigned symTabStart, symNEntries; - int i; - rstream in(fileName); - - in.seekg(0, ios::end); - fileSize = in.tellg(); - - in.seekg(8); - in >> symTabStart >> symNEntries; - in.seekg(strTabStart = symTabStart + 18 * (size_t)symNEntries); - if (in.eof()) - stop("hideSymbols: Unexpected EOF"); - StringTable stringTableOld(in); // read original string table - - if (in.tellg() != fileSize) - stop("hideSymbols: Unexpected data after string table"); - - // compute set of occurring strings with prefix added - for (i = 0; i < symNEntries; ++i) { - Symbol e; - - in.seekg(symTabStart + i * 18); - if (in.eof()) - stop("hideSymbols: Unexpected EOF"); - in >> e; - if (in.fail()) - stop("hideSymbols: File read error"); - if (e.nAux) - i += e.nAux; - const string &s = stringTableOld.decode(e.name); - // if symbol is extern and found in , prefix and insert into strings, - // otherwise, just insert into strings without prefix - strings.insert( - (e.storageClass == 2 && hide.find(s) != hide.end()) ? prefix + s : s); - } - - ofstream out(fileName, ios::trunc | ios::out | ios::binary); - if (!out.is_open()) - stop("hideSymbols: Error opening output file"); - - // make new string table from string set - StringTable stringTableNew = StringTable(strings); - - // copy input file to output file up to just before the symbol table - in.seekg(0); - char *buf = new char[symTabStart]; - in.read(buf, symTabStart); - out.write(buf, symTabStart); - delete[] buf; - - // copy input symbol table to output symbol table with name translation - for (i = 0; i < symNEntries; ++i) { - Symbol e; - - in.seekg(symTabStart + i * 18); - if (in.eof()) - stop("hideSymbols: Unexpected EOF"); - in >> e; - if (in.fail()) - stop("hideSymbols: File read error"); - const string &s = stringTableOld.decode(e.name); - out.seekp(symTabStart + i * 18); - e.name = stringTableNew.encode( - (e.storageClass == 2 && hide.find(s) != hide.end()) ? prefix + s : s); - out.write((char *)&e, 18); - if (out.fail()) - stop("hideSymbols: File write error"); - if (e.nAux) { - // copy auxiliary symbol table entries - int nAux = e.nAux; - for (int j = 1; j <= nAux; ++j) { - in >> e; - out.seekp(symTabStart + (i + j) * 18); - out.write((char *)&e, 18); - } - i += nAux; - } - } - // output string table - stringTableNew.write(out); -} - -// returns true iff and have no common element -template bool isDisjoint(const set &a, const set &b) { - set::const_iterator ita, itb; - - for (ita = a.begin(), itb = b.begin(); ita != a.end() && itb != b.end();) { - const T &ta = *ita, &tb = *itb; - if (ta < tb) - ++ita; - else if (tb < ta) - ++itb; - else - return false; - } - return true; -} - -// PRE: and are arrays with elements where -// >= . The first elements correspond to the -// external object files and the rest correspond to the internal object files. -// POST: file x is said to depend on file y if undefined[x] and defined[y] are -// not disjoint. Returns the transitive closure of the set of internal object -// files, as a set of file indexes, under the 'depends on' relation, minus the -// set of internal object files. -set *findRequiredExternal(int nExternal, int nTotal, set *defined, - set *undefined) { - set *required = new set; - set fresh[2]; - int i, cur = 0; - bool changed; - - for (i = nTotal - 1; i >= nExternal; --i) - fresh[cur].insert(i); - do { - changed = false; - for (set::iterator it = fresh[cur].begin(); it != fresh[cur].end(); - ++it) { - set &s = undefined[*it]; - - for (i = 0; i < nExternal; ++i) { - if (required->find(i) == required->end()) { - if (!isDisjoint(defined[i], s)) { - // found a new qualifying element - required->insert(i); - fresh[1 - cur].insert(i); - changed = true; - } - } - } - } - fresh[cur].clear(); - cur = 1 - cur; - } while (changed); - return required; -} - -int main(int argc, char **argv) { - int nExternal, nInternal, i; - set *defined, *undefined; - set::iterator it; - - if (argc < 3) - stop("Please specify a positive integer followed by a list of object " - "filenames"); - nExternal = atoi(argv[1]); - if (nExternal <= 0) - stop("Please specify a positive integer followed by a list of object " - "filenames"); - if (nExternal + 2 > argc) - stop("Too few external objects"); - nInternal = argc - nExternal - 2; - defined = new set[argc - 2]; - undefined = new set[argc - 2]; - - // determine the set of defined and undefined external symbols - for (i = 2; i < argc; ++i) - computeExternalSymbols(argv[i], defined + i - 2, undefined + i - 2); - - // determine the set of required external files - set *requiredExternal = - findRequiredExternal(nExternal, argc - 2, defined, undefined); - set hide; - - // determine the set of symbols to hide--namely defined external symbols of - // the required external files - for (it = requiredExternal->begin(); it != requiredExternal->end(); ++it) { - int idx = *it; - set::iterator it2; - // We have to insert one element at a time instead of inserting a range - // because the insert member function taking a range doesn't exist on - // Windows* OS, at least at the time of this writing. - for (it2 = defined[idx].begin(); it2 != defined[idx].end(); ++it2) - hide.insert(*it2); - } - - // process the external files--removing those that are not required and hiding - // the appropriate symbols in the others - for (i = 0; i < nExternal; ++i) - if (requiredExternal->find(i) != requiredExternal->end()) - hideSymbols(argv[2 + i], hide); - else - remove(argv[2 + i]); - // hide the appropriate symbols in the internal files - for (i = nExternal + 2; i < argc; ++i) - hideSymbols(argv[i], hide); - return 0; -} Property changes on: vendor/llvm-openmp/openmp-release80-r363030/runtime/src/extractExternal.cpp ___________________________________________________________________ Deleted: svn:eol-style ## -1 +0,0 ## -native \ No newline at end of property Deleted: svn:keywords ## -1 +0,0 ## -FreeBSD=%H \ No newline at end of property Deleted: svn:mime-type ## -1 +0,0 ## -text/plain \ No newline at end of property Index: vendor/llvm-openmp/openmp-release80-r363030/runtime/src/kmp_dispatch_hier.h =================================================================== --- vendor/llvm-openmp/openmp-release80-r363030/runtime/src/kmp_dispatch_hier.h (revision 348960) +++ vendor/llvm-openmp/openmp-release80-r363030/runtime/src/kmp_dispatch_hier.h (nonexistent) @@ -1,1090 +0,0 @@ -#ifndef KMP_DISPATCH_HIER_H -#define KMP_DISPATCH_HIER_H -#include "kmp.h" -#include "kmp_dispatch.h" - -// Layer type for scheduling hierarchy -enum kmp_hier_layer_e { - LAYER_THREAD = -1, - LAYER_L1, - LAYER_L2, - LAYER_L3, - LAYER_NUMA, - LAYER_LOOP, - LAYER_LAST -}; - -// Convert hierarchy type (LAYER_L1, LAYER_L2, etc.) to C-style string -static inline const char *__kmp_get_hier_str(kmp_hier_layer_e type) { - switch (type) { - case kmp_hier_layer_e::LAYER_THREAD: - return "THREAD"; - case kmp_hier_layer_e::LAYER_L1: - return "L1"; - case kmp_hier_layer_e::LAYER_L2: - return "L2"; - case kmp_hier_layer_e::LAYER_L3: - return "L3"; - case kmp_hier_layer_e::LAYER_NUMA: - return "NUMA"; - case kmp_hier_layer_e::LAYER_LOOP: - return "WHOLE_LOOP"; - case kmp_hier_layer_e::LAYER_LAST: - return "LAST"; - } - KMP_ASSERT(0); - // Appease compilers, should never get here - return "ERROR"; -} - -// Structure to store values parsed from OMP_SCHEDULE for scheduling hierarchy -typedef struct kmp_hier_sched_env_t { - int size; - int capacity; - enum sched_type *scheds; - kmp_int32 *small_chunks; - kmp_int64 *large_chunks; - kmp_hier_layer_e *layers; - // Append a level of the hierarchy - void append(enum sched_type sched, kmp_int32 chunk, kmp_hier_layer_e layer) { - if (capacity == 0) { - scheds = (enum sched_type *)__kmp_allocate(sizeof(enum sched_type) * - kmp_hier_layer_e::LAYER_LAST); - small_chunks = (kmp_int32 *)__kmp_allocate(sizeof(kmp_int32) * - kmp_hier_layer_e::LAYER_LAST); - large_chunks = (kmp_int64 *)__kmp_allocate(sizeof(kmp_int64) * - kmp_hier_layer_e::LAYER_LAST); - layers = (kmp_hier_layer_e *)__kmp_allocate(sizeof(kmp_hier_layer_e) * - kmp_hier_layer_e::LAYER_LAST); - capacity = kmp_hier_layer_e::LAYER_LAST; - } - int current_size = size; - KMP_DEBUG_ASSERT(current_size < kmp_hier_layer_e::LAYER_LAST); - scheds[current_size] = sched; - layers[current_size] = layer; - small_chunks[current_size] = chunk; - large_chunks[current_size] = (kmp_int64)chunk; - size++; - } - // Sort the hierarchy using selection sort, size will always be small - // (less than LAYER_LAST) so it is not necessary to use an nlog(n) algorithm - void sort() { - if (size <= 1) - return; - for (int i = 0; i < size; ++i) { - int switch_index = i; - for (int j = i + 1; j < size; ++j) { - if (layers[j] < layers[switch_index]) - switch_index = j; - } - if (switch_index != i) { - kmp_hier_layer_e temp1 = layers[i]; - enum sched_type temp2 = scheds[i]; - kmp_int32 temp3 = small_chunks[i]; - kmp_int64 temp4 = large_chunks[i]; - layers[i] = layers[switch_index]; - scheds[i] = scheds[switch_index]; - small_chunks[i] = small_chunks[switch_index]; - large_chunks[i] = large_chunks[switch_index]; - layers[switch_index] = temp1; - scheds[switch_index] = temp2; - small_chunks[switch_index] = temp3; - large_chunks[switch_index] = temp4; - } - } - } - // Free all memory - void deallocate() { - if (capacity > 0) { - __kmp_free(scheds); - __kmp_free(layers); - __kmp_free(small_chunks); - __kmp_free(large_chunks); - scheds = NULL; - layers = NULL; - small_chunks = NULL; - large_chunks = NULL; - } - size = 0; - capacity = 0; - } -} kmp_hier_sched_env_t; - -extern int __kmp_dispatch_hand_threading; -extern kmp_hier_sched_env_t __kmp_hier_scheds; - -// Sizes of layer arrays bounded by max number of detected L1s, L2s, etc. -extern int __kmp_hier_max_units[kmp_hier_layer_e::LAYER_LAST + 1]; -extern int __kmp_hier_threads_per[kmp_hier_layer_e::LAYER_LAST + 1]; - -extern int __kmp_dispatch_get_index(int tid, kmp_hier_layer_e type); -extern int __kmp_dispatch_get_id(int gtid, kmp_hier_layer_e type); -extern int __kmp_dispatch_get_t1_per_t2(kmp_hier_layer_e t1, - kmp_hier_layer_e t2); -extern void __kmp_dispatch_free_hierarchies(kmp_team_t *team); - -template struct kmp_hier_shared_bdata_t { - typedef typename traits_t::signed_t ST; - volatile kmp_uint64 val[2]; - kmp_int32 status[2]; - T lb[2]; - T ub[2]; - ST st[2]; - dispatch_shared_info_template sh[2]; - void zero() { - val[0] = val[1] = 0; - status[0] = status[1] = 0; - lb[0] = lb[1] = 0; - ub[0] = ub[1] = 0; - st[0] = st[1] = 0; - sh[0].u.s.iteration = sh[1].u.s.iteration = 0; - } - void set_next_hand_thread(T nlb, T nub, ST nst, kmp_int32 nstatus, - kmp_uint64 index) { - lb[1 - index] = nlb; - ub[1 - index] = nub; - st[1 - index] = nst; - status[1 - index] = nstatus; - } - void set_next(T nlb, T nub, ST nst, kmp_int32 nstatus, kmp_uint64 index) { - lb[1 - index] = nlb; - ub[1 - index] = nub; - st[1 - index] = nst; - status[1 - index] = nstatus; - sh[1 - index].u.s.iteration = 0; - } - - kmp_int32 get_next_status(kmp_uint64 index) const { - return status[1 - index]; - } - T get_next_lb(kmp_uint64 index) const { return lb[1 - index]; } - T get_next_ub(kmp_uint64 index) const { return ub[1 - index]; } - ST get_next_st(kmp_uint64 index) const { return st[1 - index]; } - dispatch_shared_info_template volatile *get_next_sh(kmp_uint64 index) { - return &(sh[1 - index]); - } - - kmp_int32 get_curr_status(kmp_uint64 index) const { return status[index]; } - T get_curr_lb(kmp_uint64 index) const { return lb[index]; } - T get_curr_ub(kmp_uint64 index) const { return ub[index]; } - ST get_curr_st(kmp_uint64 index) const { return st[index]; } - dispatch_shared_info_template volatile *get_curr_sh(kmp_uint64 index) { - return &(sh[index]); - } -}; - -/* - * In the barrier implementations, num_active is the number of threads that are - * attached to the kmp_hier_top_unit_t structure in the scheduling hierarchy. - * bdata is the shared barrier data that resides on the kmp_hier_top_unit_t - * structure. tdata is the thread private data that resides on the thread - * data structure. - * - * The reset_shared() method is used to initialize the barrier data on the - * kmp_hier_top_unit_t hierarchy structure - * - * The reset_private() method is used to initialize the barrier data on the - * thread's private dispatch buffer structure - * - * The barrier() method takes an id, which is that thread's id for the - * kmp_hier_top_unit_t structure, and implements the barrier. All threads wait - * inside barrier() until all fellow threads who are attached to that - * kmp_hier_top_unit_t structure have arrived. - */ - -// Core barrier implementation -// Can be used in a unit with between 2 to 8 threads -template class core_barrier_impl { - static inline kmp_uint64 get_wait_val(int num_active) { - kmp_uint64 wait_val; - switch (num_active) { - case 2: - wait_val = 0x0101LL; - break; - case 3: - wait_val = 0x010101LL; - break; - case 4: - wait_val = 0x01010101LL; - break; - case 5: - wait_val = 0x0101010101LL; - break; - case 6: - wait_val = 0x010101010101LL; - break; - case 7: - wait_val = 0x01010101010101LL; - break; - case 8: - wait_val = 0x0101010101010101LL; - break; - default: - // don't use the core_barrier_impl for more than 8 threads - KMP_ASSERT(0); - } - return wait_val; - } - -public: - static void reset_private(kmp_int32 num_active, - kmp_hier_private_bdata_t *tdata); - static void reset_shared(kmp_int32 num_active, - kmp_hier_shared_bdata_t *bdata); - static void barrier(kmp_int32 id, kmp_hier_shared_bdata_t *bdata, - kmp_hier_private_bdata_t *tdata); -}; - -template -void core_barrier_impl::reset_private(kmp_int32 num_active, - kmp_hier_private_bdata_t *tdata) { - tdata->num_active = num_active; - tdata->index = 0; - tdata->wait_val[0] = tdata->wait_val[1] = get_wait_val(num_active); -} -template -void core_barrier_impl::reset_shared(kmp_int32 num_active, - kmp_hier_shared_bdata_t *bdata) { - bdata->val[0] = bdata->val[1] = 0LL; - bdata->status[0] = bdata->status[1] = 0LL; -} -template -void core_barrier_impl::barrier(kmp_int32 id, - kmp_hier_shared_bdata_t *bdata, - kmp_hier_private_bdata_t *tdata) { - kmp_uint64 current_index = tdata->index; - kmp_uint64 next_index = 1 - current_index; - kmp_uint64 current_wait_value = tdata->wait_val[current_index]; - kmp_uint64 next_wait_value = - (current_wait_value ? 0 : get_wait_val(tdata->num_active)); - KD_TRACE(10, ("core_barrier_impl::barrier(): T#%d current_index:%llu " - "next_index:%llu curr_wait:%llu next_wait:%llu\n", - __kmp_get_gtid(), current_index, next_index, current_wait_value, - next_wait_value)); - char v = (current_wait_value ? 0x1 : 0x0); - (RCAST(volatile char *, &(bdata->val[current_index])))[id] = v; - __kmp_wait_yield(&(bdata->val[current_index]), current_wait_value, - __kmp_eq USE_ITT_BUILD_ARG(NULL)); - tdata->wait_val[current_index] = next_wait_value; - tdata->index = next_index; -} - -// Counter barrier implementation -// Can be used in a unit with arbitrary number of active threads -template class counter_barrier_impl { -public: - static void reset_private(kmp_int32 num_active, - kmp_hier_private_bdata_t *tdata); - static void reset_shared(kmp_int32 num_active, - kmp_hier_shared_bdata_t *bdata); - static void barrier(kmp_int32 id, kmp_hier_shared_bdata_t *bdata, - kmp_hier_private_bdata_t *tdata); -}; - -template -void counter_barrier_impl::reset_private(kmp_int32 num_active, - kmp_hier_private_bdata_t *tdata) { - tdata->num_active = num_active; - tdata->index = 0; - tdata->wait_val[0] = tdata->wait_val[1] = (kmp_uint64)num_active; -} -template -void counter_barrier_impl::reset_shared(kmp_int32 num_active, - kmp_hier_shared_bdata_t *bdata) { - bdata->val[0] = bdata->val[1] = 0LL; - bdata->status[0] = bdata->status[1] = 0LL; -} -template -void counter_barrier_impl::barrier(kmp_int32 id, - kmp_hier_shared_bdata_t *bdata, - kmp_hier_private_bdata_t *tdata) { - volatile kmp_int64 *val; - kmp_uint64 current_index = tdata->index; - kmp_uint64 next_index = 1 - current_index; - kmp_uint64 current_wait_value = tdata->wait_val[current_index]; - kmp_uint64 next_wait_value = current_wait_value + tdata->num_active; - - KD_TRACE(10, ("counter_barrier_impl::barrier(): T#%d current_index:%llu " - "next_index:%llu curr_wait:%llu next_wait:%llu\n", - __kmp_get_gtid(), current_index, next_index, current_wait_value, - next_wait_value)); - val = RCAST(volatile kmp_int64 *, &(bdata->val[current_index])); - KMP_TEST_THEN_INC64(val); - __kmp_wait_yield(&(bdata->val[current_index]), current_wait_value, - __kmp_ge USE_ITT_BUILD_ARG(NULL)); - tdata->wait_val[current_index] = next_wait_value; - tdata->index = next_index; -} - -// Data associated with topology unit within a layer -// For example, one kmp_hier_top_unit_t corresponds to one L1 cache -template struct kmp_hier_top_unit_t { - typedef typename traits_t::signed_t ST; - typedef typename traits_t::unsigned_t UT; - kmp_int32 active; // number of topology units that communicate with this unit - // chunk information (lower/upper bound, stride, etc.) - dispatch_private_info_template hier_pr; - kmp_hier_top_unit_t *hier_parent; // pointer to parent unit - kmp_hier_shared_bdata_t hier_barrier; // shared barrier data for this unit - - kmp_int32 get_hier_id() const { return hier_pr.hier_id; } - void reset_shared_barrier() { - KMP_DEBUG_ASSERT(active > 0); - if (active == 1) - return; - hier_barrier.zero(); - if (active >= 2 && active <= 8) { - core_barrier_impl::reset_shared(active, &hier_barrier); - } else { - counter_barrier_impl::reset_shared(active, &hier_barrier); - } - } - void reset_private_barrier(kmp_hier_private_bdata_t *tdata) { - KMP_DEBUG_ASSERT(tdata); - KMP_DEBUG_ASSERT(active > 0); - if (active == 1) - return; - if (active >= 2 && active <= 8) { - core_barrier_impl::reset_private(active, tdata); - } else { - counter_barrier_impl::reset_private(active, tdata); - } - } - void barrier(kmp_int32 id, kmp_hier_private_bdata_t *tdata) { - KMP_DEBUG_ASSERT(tdata); - KMP_DEBUG_ASSERT(active > 0); - KMP_DEBUG_ASSERT(id >= 0 && id < active); - if (active == 1) { - tdata->index = 1 - tdata->index; - return; - } - if (active >= 2 && active <= 8) { - core_barrier_impl::barrier(id, &hier_barrier, tdata); - } else { - counter_barrier_impl::barrier(id, &hier_barrier, tdata); - } - } - - kmp_int32 get_next_status(kmp_uint64 index) const { - return hier_barrier.get_next_status(index); - } - T get_next_lb(kmp_uint64 index) const { - return hier_barrier.get_next_lb(index); - } - T get_next_ub(kmp_uint64 index) const { - return hier_barrier.get_next_ub(index); - } - ST get_next_st(kmp_uint64 index) const { - return hier_barrier.get_next_st(index); - } - dispatch_shared_info_template volatile *get_next_sh(kmp_uint64 index) { - return hier_barrier.get_next_sh(index); - } - - kmp_int32 get_curr_status(kmp_uint64 index) const { - return hier_barrier.get_curr_status(index); - } - T get_curr_lb(kmp_uint64 index) const { - return hier_barrier.get_curr_lb(index); - } - T get_curr_ub(kmp_uint64 index) const { - return hier_barrier.get_curr_ub(index); - } - ST get_curr_st(kmp_uint64 index) const { - return hier_barrier.get_curr_st(index); - } - dispatch_shared_info_template volatile *get_curr_sh(kmp_uint64 index) { - return hier_barrier.get_curr_sh(index); - } - - void set_next_hand_thread(T lb, T ub, ST st, kmp_int32 status, - kmp_uint64 index) { - hier_barrier.set_next_hand_thread(lb, ub, st, status, index); - } - void set_next(T lb, T ub, ST st, kmp_int32 status, kmp_uint64 index) { - hier_barrier.set_next(lb, ub, st, status, index); - } - dispatch_private_info_template *get_my_pr() { return &hier_pr; } - kmp_hier_top_unit_t *get_parent() { return hier_parent; } - dispatch_private_info_template *get_parent_pr() { - return &(hier_parent->hier_pr); - } - - kmp_int32 is_active() const { return active; } - kmp_int32 get_num_active() const { return active; } - void print() { - KD_TRACE( - 10, - (" kmp_hier_top_unit_t: active:%d pr:%p lb:%d ub:%d st:%d tc:%d\n", - active, &hier_pr, hier_pr.u.p.lb, hier_pr.u.p.ub, hier_pr.u.p.st, - hier_pr.u.p.tc)); - } -}; - -// Information regarding a single layer within the scheduling hierarchy -template struct kmp_hier_layer_info_t { - int num_active; // number of threads active in this level - kmp_hier_layer_e type; // LAYER_L1, LAYER_L2, etc. - enum sched_type sched; // static, dynamic, guided, etc. - typename traits_t::signed_t chunk; // chunk size associated with schedule - int length; // length of the kmp_hier_top_unit_t array - - // Print this layer's information - void print() { - const char *t = __kmp_get_hier_str(type); - KD_TRACE( - 10, - (" kmp_hier_layer_info_t: num_active:%d type:%s sched:%d chunk:%d " - "length:%d\n", - num_active, t, sched, chunk, length)); - } -}; - -/* - * Structure to implement entire hierarchy - * - * The hierarchy is kept as an array of arrays to represent the different - * layers. Layer 0 is the lowest layer to layer num_layers - 1 which is the - * highest layer. - * Example: - * [ 2 ] -> [ L3 | L3 ] - * [ 1 ] -> [ L2 | L2 | L2 | L2 ] - * [ 0 ] -> [ L1 | L1 | L1 | L1 | L1 | L1 | L1 | L1 ] - * There is also an array of layer_info_t which has information regarding - * each layer - */ -template struct kmp_hier_t { -public: - typedef typename traits_t::unsigned_t UT; - typedef typename traits_t::signed_t ST; - -private: - int next_recurse(ident_t *loc, int gtid, kmp_hier_top_unit_t *current, - kmp_int32 *p_last, T *p_lb, T *p_ub, ST *p_st, - kmp_int32 previous_id, int hier_level) { - int status; - kmp_info_t *th = __kmp_threads[gtid]; - auto parent = current->get_parent(); - bool last_layer = (hier_level == get_num_layers() - 1); - KMP_DEBUG_ASSERT(th); - kmp_hier_private_bdata_t *tdata = &(th->th.th_hier_bar_data[hier_level]); - KMP_DEBUG_ASSERT(current); - KMP_DEBUG_ASSERT(hier_level >= 0); - KMP_DEBUG_ASSERT(hier_level < get_num_layers()); - KMP_DEBUG_ASSERT(tdata); - KMP_DEBUG_ASSERT(parent || last_layer); - - KD_TRACE( - 1, ("kmp_hier_t.next_recurse(): T#%d (%d) called\n", gtid, hier_level)); - - T hier_id = (T)current->get_hier_id(); - // Attempt to grab next iteration range for this level - if (previous_id == 0) { - KD_TRACE(1, ("kmp_hier_t.next_recurse(): T#%d (%d) is master of unit\n", - gtid, hier_level)); - kmp_int32 contains_last; - T my_lb, my_ub; - ST my_st; - T nproc; - dispatch_shared_info_template volatile *my_sh; - dispatch_private_info_template *my_pr; - if (last_layer) { - // last layer below the very top uses the single shared buffer - // from the team struct. - KD_TRACE(10, - ("kmp_hier_t.next_recurse(): T#%d (%d) using top level sh\n", - gtid, hier_level)); - my_sh = reinterpret_cast volatile *>( - th->th.th_dispatch->th_dispatch_sh_current); - nproc = (T)get_top_level_nproc(); - } else { - // middle layers use the shared buffer inside the kmp_hier_top_unit_t - // structure - KD_TRACE(10, ("kmp_hier_t.next_recurse(): T#%d (%d) using hier sh\n", - gtid, hier_level)); - my_sh = - parent->get_curr_sh(th->th.th_hier_bar_data[hier_level + 1].index); - nproc = (T)parent->get_num_active(); - } - my_pr = current->get_my_pr(); - KMP_DEBUG_ASSERT(my_sh); - KMP_DEBUG_ASSERT(my_pr); - enum sched_type schedule = get_sched(hier_level); - ST chunk = (ST)get_chunk(hier_level); - status = __kmp_dispatch_next_algorithm(gtid, my_pr, my_sh, - &contains_last, &my_lb, &my_ub, - &my_st, nproc, hier_id); - KD_TRACE( - 10, - ("kmp_hier_t.next_recurse(): T#%d (%d) next_pr_sh() returned %d\n", - gtid, hier_level, status)); - // When no iterations are found (status == 0) and this is not the last - // layer, attempt to go up the hierarchy for more iterations - if (status == 0 && !last_layer) { - status = next_recurse(loc, gtid, parent, &contains_last, &my_lb, &my_ub, - &my_st, hier_id, hier_level + 1); - KD_TRACE( - 10, - ("kmp_hier_t.next_recurse(): T#%d (%d) hier_next() returned %d\n", - gtid, hier_level, status)); - if (status == 1) { - kmp_hier_private_bdata_t *upper_tdata = - &(th->th.th_hier_bar_data[hier_level + 1]); - my_sh = parent->get_curr_sh(upper_tdata->index); - KD_TRACE(10, ("kmp_hier_t.next_recurse(): T#%d (%d) about to init\n", - gtid, hier_level)); - __kmp_dispatch_init_algorithm(loc, gtid, my_pr, schedule, - parent->get_curr_lb(upper_tdata->index), - parent->get_curr_ub(upper_tdata->index), - parent->get_curr_st(upper_tdata->index), -#if USE_ITT_BUILD - NULL, -#endif - chunk, nproc, hier_id); - status = __kmp_dispatch_next_algorithm( - gtid, my_pr, my_sh, &contains_last, &my_lb, &my_ub, &my_st, nproc, - hier_id); - if (!status) { - KD_TRACE(10, ("kmp_hier_t.next_recurse(): T#%d (%d) status not 1 " - "setting to 2!\n", - gtid, hier_level)); - status = 2; - } - } - } - current->set_next(my_lb, my_ub, my_st, status, tdata->index); - // Propagate whether a unit holds the actual global last iteration - // The contains_last attribute is sent downwards from the top to the - // bottom of the hierarchy via the contains_last flag inside the - // private dispatch buffers in the hierarchy's middle layers - if (contains_last) { - // If the next_algorithm() method returns 1 for p_last and it is the - // last layer or our parent contains the last serial chunk, then the - // chunk must contain the last serial iteration. - if (last_layer || parent->hier_pr.flags.contains_last) { - KD_TRACE(10, ("kmp_hier_t.next_recurse(): T#%d (%d) Setting this pr " - "to contain last.\n", - gtid, hier_level)); - current->hier_pr.flags.contains_last = contains_last; - } - if (!current->hier_pr.flags.contains_last) - contains_last = FALSE; - } - if (p_last) - *p_last = contains_last; - } // if master thread of this unit - if (hier_level > 0 || !__kmp_dispatch_hand_threading) { - KD_TRACE(10, - ("kmp_hier_t.next_recurse(): T#%d (%d) going into barrier.\n", - gtid, hier_level)); - current->barrier(previous_id, tdata); - KD_TRACE(10, - ("kmp_hier_t.next_recurse(): T#%d (%d) released and exit %d\n", - gtid, hier_level, current->get_curr_status(tdata->index))); - } else { - KMP_DEBUG_ASSERT(previous_id == 0); - return status; - } - return current->get_curr_status(tdata->index); - } - -public: - int top_level_nproc; - int num_layers; - bool valid; - int type_size; - kmp_hier_layer_info_t *info; - kmp_hier_top_unit_t **layers; - // Deallocate all memory from this hierarchy - void deallocate() { - for (int i = 0; i < num_layers; ++i) - if (layers[i] != NULL) { - __kmp_free(layers[i]); - } - if (layers != NULL) { - __kmp_free(layers); - layers = NULL; - } - if (info != NULL) { - __kmp_free(info); - info = NULL; - } - num_layers = 0; - valid = false; - } - // Returns true if reallocation is needed else false - bool need_to_reallocate(int n, const kmp_hier_layer_e *new_layers, - const enum sched_type *new_scheds, - const ST *new_chunks) const { - if (!valid || layers == NULL || info == NULL || - traits_t::type_size != type_size || n != num_layers) - return true; - for (int i = 0; i < n; ++i) { - if (info[i].type != new_layers[i]) - return true; - if (info[i].sched != new_scheds[i]) - return true; - if (info[i].chunk != new_chunks[i]) - return true; - } - return false; - } - // A single thread should call this function while the other threads wait - // create a new scheduling hierarchy consisting of new_layers, new_scheds - // and new_chunks. These should come pre-sorted according to - // kmp_hier_layer_e value. This function will try to avoid reallocation - // if it can - void allocate_hier(int n, const kmp_hier_layer_e *new_layers, - const enum sched_type *new_scheds, const ST *new_chunks) { - top_level_nproc = 0; - if (!need_to_reallocate(n, new_layers, new_scheds, new_chunks)) { - KD_TRACE( - 10, - ("kmp_hier_t::allocate_hier: T#0 do not need to reallocate\n")); - for (int i = 0; i < n; ++i) { - info[i].num_active = 0; - for (int j = 0; j < get_length(i); ++j) - layers[i][j].active = 0; - } - return; - } - KD_TRACE(10, ("kmp_hier_t::allocate_hier: T#0 full alloc\n")); - deallocate(); - type_size = traits_t::type_size; - num_layers = n; - info = (kmp_hier_layer_info_t *)__kmp_allocate( - sizeof(kmp_hier_layer_info_t) * n); - layers = (kmp_hier_top_unit_t **)__kmp_allocate( - sizeof(kmp_hier_top_unit_t *) * n); - for (int i = 0; i < n; ++i) { - int max = 0; - kmp_hier_layer_e layer = new_layers[i]; - info[i].num_active = 0; - info[i].type = layer; - info[i].sched = new_scheds[i]; - info[i].chunk = new_chunks[i]; - max = __kmp_hier_max_units[layer + 1]; - if (max == 0) { - valid = false; - KMP_WARNING(HierSchedInvalid, __kmp_get_hier_str(layer)); - deallocate(); - return; - } - info[i].length = max; - layers[i] = (kmp_hier_top_unit_t *)__kmp_allocate( - sizeof(kmp_hier_top_unit_t) * max); - for (int j = 0; j < max; ++j) { - layers[i][j].active = 0; - } - } - valid = true; - } - // loc - source file location - // gtid - global thread identifier - // pr - this thread's private dispatch buffer (corresponding with gtid) - // p_last (return value) - pointer to flag indicating this set of iterations - // contains last - // iteration - // p_lb (return value) - lower bound for this chunk of iterations - // p_ub (return value) - upper bound for this chunk of iterations - // p_st (return value) - stride for this chunk of iterations - // - // Returns 1 if there are more iterations to perform, 0 otherwise - int next(ident_t *loc, int gtid, dispatch_private_info_template *pr, - kmp_int32 *p_last, T *p_lb, T *p_ub, ST *p_st) { - int status; - kmp_int32 contains_last = 0; - kmp_info_t *th = __kmp_threads[gtid]; - kmp_hier_private_bdata_t *tdata = &(th->th.th_hier_bar_data[0]); - auto parent = pr->get_parent(); - KMP_DEBUG_ASSERT(parent); - KMP_DEBUG_ASSERT(th); - KMP_DEBUG_ASSERT(tdata); - KMP_DEBUG_ASSERT(parent); - T nproc = (T)parent->get_num_active(); - T unit_id = (T)pr->get_hier_id(); - KD_TRACE( - 10, - ("kmp_hier_t.next(): T#%d THREAD LEVEL nproc:%d unit_id:%d called\n", - gtid, nproc, unit_id)); - // Handthreading implementation - // Each iteration is performed by all threads on last unit (typically - // cores/tiles) - // e.g., threads 0,1,2,3 all execute iteration 0 - // threads 0,1,2,3 all execute iteration 1 - // threads 4,5,6,7 all execute iteration 2 - // threads 4,5,6,7 all execute iteration 3 - // ... etc. - if (__kmp_dispatch_hand_threading) { - KD_TRACE(10, - ("kmp_hier_t.next(): T#%d THREAD LEVEL using hand threading\n", - gtid)); - if (unit_id == 0) { - // For hand threading, the sh buffer on the lowest level is only ever - // modified and read by the master thread on that level. Because of - // this, we can always use the first sh buffer. - auto sh = &(parent->hier_barrier.sh[0]); - KMP_DEBUG_ASSERT(sh); - status = __kmp_dispatch_next_algorithm( - gtid, pr, sh, &contains_last, p_lb, p_ub, p_st, nproc, unit_id); - if (!status) { - bool done = false; - while (!done) { - done = true; - status = next_recurse(loc, gtid, parent, &contains_last, p_lb, p_ub, - p_st, unit_id, 0); - if (status == 1) { - __kmp_dispatch_init_algorithm(loc, gtid, pr, pr->schedule, - parent->get_next_lb(tdata->index), - parent->get_next_ub(tdata->index), - parent->get_next_st(tdata->index), -#if USE_ITT_BUILD - NULL, -#endif - pr->u.p.parm1, nproc, unit_id); - sh->u.s.iteration = 0; - status = __kmp_dispatch_next_algorithm( - gtid, pr, sh, &contains_last, p_lb, p_ub, p_st, nproc, - unit_id); - if (!status) { - KD_TRACE(10, - ("kmp_hier_t.next(): T#%d THREAD LEVEL status == 0 " - "after next_pr_sh()" - "trying again.\n", - gtid)); - done = false; - } - } else if (status == 2) { - KD_TRACE(10, ("kmp_hier_t.next(): T#%d THREAD LEVEL status == 2 " - "trying again.\n", - gtid)); - done = false; - } - } - } - parent->set_next_hand_thread(*p_lb, *p_ub, *p_st, status, tdata->index); - } // if master thread of lowest unit level - parent->barrier(pr->get_hier_id(), tdata); - if (unit_id != 0) { - *p_lb = parent->get_curr_lb(tdata->index); - *p_ub = parent->get_curr_ub(tdata->index); - *p_st = parent->get_curr_st(tdata->index); - status = parent->get_curr_status(tdata->index); - } - } else { - // Normal implementation - // Each thread grabs an iteration chunk and executes it (no cooperation) - auto sh = parent->get_curr_sh(tdata->index); - KMP_DEBUG_ASSERT(sh); - status = __kmp_dispatch_next_algorithm( - gtid, pr, sh, &contains_last, p_lb, p_ub, p_st, nproc, unit_id); - KD_TRACE(10, - ("kmp_hier_t.next(): T#%d THREAD LEVEL next_algorithm status:%d " - "contains_last:%d p_lb:%d p_ub:%d p_st:%d\n", - gtid, status, contains_last, *p_lb, *p_ub, *p_st)); - if (!status) { - bool done = false; - while (!done) { - done = true; - status = next_recurse(loc, gtid, parent, &contains_last, p_lb, p_ub, - p_st, unit_id, 0); - if (status == 1) { - sh = parent->get_curr_sh(tdata->index); - __kmp_dispatch_init_algorithm(loc, gtid, pr, pr->schedule, - parent->get_curr_lb(tdata->index), - parent->get_curr_ub(tdata->index), - parent->get_curr_st(tdata->index), -#if USE_ITT_BUILD - NULL, -#endif - pr->u.p.parm1, nproc, unit_id); - status = __kmp_dispatch_next_algorithm( - gtid, pr, sh, &contains_last, p_lb, p_ub, p_st, nproc, unit_id); - if (!status) { - KD_TRACE(10, ("kmp_hier_t.next(): T#%d THREAD LEVEL status == 0 " - "after next_pr_sh()" - "trying again.\n", - gtid)); - done = false; - } - } else if (status == 2) { - KD_TRACE(10, ("kmp_hier_t.next(): T#%d THREAD LEVEL status == 2 " - "trying again.\n", - gtid)); - done = false; - } - } - } - } - if (contains_last && !parent->hier_pr.flags.contains_last) { - KD_TRACE(10, ("kmp_hier_t.next(): T#%d THREAD LEVEL resetting " - "contains_last to FALSE\n", - gtid)); - contains_last = FALSE; - } - if (p_last) - *p_last = contains_last; - KD_TRACE(10, ("kmp_hier_t.next(): T#%d THREAD LEVEL exit status %d\n", gtid, - status)); - return status; - } - // These functions probe the layer info structure - // Returns the type of topology unit given level - kmp_hier_layer_e get_type(int level) const { - KMP_DEBUG_ASSERT(level >= 0); - KMP_DEBUG_ASSERT(level < num_layers); - return info[level].type; - } - // Returns the schedule type at given level - enum sched_type get_sched(int level) const { - KMP_DEBUG_ASSERT(level >= 0); - KMP_DEBUG_ASSERT(level < num_layers); - return info[level].sched; - } - // Returns the chunk size at given level - ST get_chunk(int level) const { - KMP_DEBUG_ASSERT(level >= 0); - KMP_DEBUG_ASSERT(level < num_layers); - return info[level].chunk; - } - // Returns the number of active threads at given level - int get_num_active(int level) const { - KMP_DEBUG_ASSERT(level >= 0); - KMP_DEBUG_ASSERT(level < num_layers); - return info[level].num_active; - } - // Returns the length of topology unit array at given level - int get_length(int level) const { - KMP_DEBUG_ASSERT(level >= 0); - KMP_DEBUG_ASSERT(level < num_layers); - return info[level].length; - } - // Returns the topology unit given the level and index - kmp_hier_top_unit_t *get_unit(int level, int index) { - KMP_DEBUG_ASSERT(level >= 0); - KMP_DEBUG_ASSERT(level < num_layers); - KMP_DEBUG_ASSERT(index >= 0); - KMP_DEBUG_ASSERT(index < get_length(level)); - return &(layers[level][index]); - } - // Returns the number of layers in the hierarchy - int get_num_layers() const { return num_layers; } - // Returns the number of threads in the top layer - // This is necessary because we don't store a topology unit as - // the very top level and the scheduling algorithms need this information - int get_top_level_nproc() const { return top_level_nproc; } - // Return whether this hierarchy is valid or not - bool is_valid() const { return valid; } - // Print the hierarchy - void print() { - KD_TRACE(10, ("kmp_hier_t:\n")); - for (int i = num_layers - 1; i >= 0; --i) { - KD_TRACE(10, ("Info[%d] = ", i)); - info[i].print(); - } - for (int i = num_layers - 1; i >= 0; --i) { - KD_TRACE(10, ("Layer[%d] =\n", i)); - for (int j = 0; j < info[i].length; ++j) { - layers[i][j].print(); - } - } - } -}; - -template -void __kmp_dispatch_init_hierarchy(ident_t *loc, int n, - kmp_hier_layer_e *new_layers, - enum sched_type *new_scheds, - typename traits_t::signed_t *new_chunks, - T lb, T ub, - typename traits_t::signed_t st) { - typedef typename traits_t::signed_t ST; - typedef typename traits_t::unsigned_t UT; - int tid, gtid, num_hw_threads, num_threads_per_layer1, active; - int my_buffer_index; - kmp_info_t *th; - kmp_team_t *team; - dispatch_private_info_template *pr; - dispatch_shared_info_template volatile *sh; - gtid = __kmp_entry_gtid(); - tid = __kmp_tid_from_gtid(gtid); -#ifdef KMP_DEBUG - KD_TRACE(10, ("__kmp_dispatch_init_hierarchy: T#%d called: %d layer(s)\n", - gtid, n)); - for (int i = 0; i < n; ++i) { - const char *layer = __kmp_get_hier_str(new_layers[i]); - KD_TRACE(10, ("__kmp_dispatch_init_hierarchy: T#%d: new_layers[%d] = %s, " - "new_scheds[%d] = %d, new_chunks[%d] = %u\n", - gtid, i, layer, i, (int)new_scheds[i], i, new_chunks[i])); - } -#endif // KMP_DEBUG - KMP_DEBUG_ASSERT(n > 0); - KMP_DEBUG_ASSERT(new_layers); - KMP_DEBUG_ASSERT(new_scheds); - KMP_DEBUG_ASSERT(new_chunks); - if (!TCR_4(__kmp_init_parallel)) - __kmp_parallel_initialize(); - th = __kmp_threads[gtid]; - team = th->th.th_team; - active = !team->t.t_serialized; - th->th.th_ident = loc; - num_hw_threads = __kmp_hier_max_units[kmp_hier_layer_e::LAYER_THREAD + 1]; - if (!active) { - KD_TRACE(10, ("__kmp_dispatch_init_hierarchy: T#%d not active parallel. " - "Using normal dispatch functions.\n", - gtid)); - pr = reinterpret_cast *>( - th->th.th_dispatch->th_disp_buffer); - KMP_DEBUG_ASSERT(pr); - pr->flags.use_hier = FALSE; - pr->flags.contains_last = FALSE; - return; - } - KMP_DEBUG_ASSERT(th->th.th_dispatch == - &th->th.th_team->t.t_dispatch[th->th.th_info.ds.ds_tid]); - - my_buffer_index = th->th.th_dispatch->th_disp_index; - pr = reinterpret_cast *>( - &th->th.th_dispatch - ->th_disp_buffer[my_buffer_index % __kmp_dispatch_num_buffers]); - sh = reinterpret_cast volatile *>( - &team->t.t_disp_buffer[my_buffer_index % __kmp_dispatch_num_buffers]); - KMP_DEBUG_ASSERT(pr); - KMP_DEBUG_ASSERT(sh); - pr->flags.use_hier = TRUE; - pr->u.p.tc = 0; - // Have master allocate the hierarchy - if (__kmp_tid_from_gtid(gtid) == 0) { - KD_TRACE(10, ("__kmp_dispatch_init_hierarchy: T#%d pr:%p sh:%p allocating " - "hierarchy\n", - gtid, pr, sh)); - if (sh->hier == NULL) { - sh->hier = (kmp_hier_t *)__kmp_allocate(sizeof(kmp_hier_t)); - } - sh->hier->allocate_hier(n, new_layers, new_scheds, new_chunks); - sh->u.s.iteration = 0; - } - __kmp_barrier(bs_plain_barrier, gtid, FALSE, 0, NULL, NULL); - // Check to make sure the hierarchy is valid - kmp_hier_t *hier = sh->hier; - if (!sh->hier->is_valid()) { - pr->flags.use_hier = FALSE; - return; - } - // Have threads allocate their thread-private barrier data if it hasn't - // already been allocated - if (th->th.th_hier_bar_data == NULL) { - th->th.th_hier_bar_data = (kmp_hier_private_bdata_t *)__kmp_allocate( - sizeof(kmp_hier_private_bdata_t) * kmp_hier_layer_e::LAYER_LAST); - } - // Have threads "register" themselves by modifiying the active count for each - // level they are involved in. The active count will act as nthreads for that - // level regarding the scheduling algorithms - for (int i = 0; i < n; ++i) { - int index = __kmp_dispatch_get_index(tid, hier->get_type(i)); - kmp_hier_top_unit_t *my_unit = hier->get_unit(i, index); - // Setup the thread's private dispatch buffer's hierarchy pointers - if (i == 0) - pr->hier_parent = my_unit; - // If this unit is already active, then increment active count and wait - if (my_unit->is_active()) { - KD_TRACE(10, ("__kmp_dispatch_init_hierarchy: T#%d my_unit (%p) " - "is already active (%d)\n", - gtid, my_unit, my_unit->active)); - KMP_TEST_THEN_INC32(&(my_unit->active)); - break; - } - // Flag that this unit is active - if (KMP_COMPARE_AND_STORE_ACQ32(&(my_unit->active), 0, 1)) { - // Do not setup parent pointer for top level unit since it has no parent - if (i < n - 1) { - // Setup middle layer pointers to parents - my_unit->get_my_pr()->hier_id = - index % __kmp_dispatch_get_t1_per_t2(hier->get_type(i), - hier->get_type(i + 1)); - int parent_index = __kmp_dispatch_get_index(tid, hier->get_type(i + 1)); - my_unit->hier_parent = hier->get_unit(i + 1, parent_index); - } else { - // Setup top layer information (no parent pointers are set) - my_unit->get_my_pr()->hier_id = - index % __kmp_dispatch_get_t1_per_t2(hier->get_type(i), - kmp_hier_layer_e::LAYER_LOOP); - KMP_TEST_THEN_INC32(&(hier->top_level_nproc)); - my_unit->hier_parent = nullptr; - } - // Set trip count to 0 so that next() operation will initially climb up - // the hierarchy to get more iterations (early exit in next() for tc == 0) - my_unit->get_my_pr()->u.p.tc = 0; - // Increment this layer's number of active units - KMP_TEST_THEN_INC32(&(hier->info[i].num_active)); - KD_TRACE(10, ("__kmp_dispatch_init_hierarchy: T#%d my_unit (%p) " - "incrementing num_active\n", - gtid, my_unit)); - } else { - KMP_TEST_THEN_INC32(&(my_unit->active)); - break; - } - } - // Set this thread's id - num_threads_per_layer1 = __kmp_dispatch_get_t1_per_t2( - kmp_hier_layer_e::LAYER_THREAD, hier->get_type(0)); - pr->hier_id = tid % num_threads_per_layer1; - // For oversubscribed threads, increment their index within the lowest unit - // This is done to prevent having two or more threads with id 0, id 1, etc. - if (tid >= num_hw_threads) - pr->hier_id += ((tid / num_hw_threads) * num_threads_per_layer1); - KD_TRACE( - 10, ("__kmp_dispatch_init_hierarchy: T#%d setting lowest hier_id to %d\n", - gtid, pr->hier_id)); - - pr->flags.contains_last = FALSE; - __kmp_barrier(bs_plain_barrier, gtid, FALSE, 0, NULL, NULL); - - // Now that the number of active threads at each level is determined, - // the barrier data for each unit can be initialized and the last layer's - // loop information can be initialized. - int prev_id = pr->get_hier_id(); - for (int i = 0; i < n; ++i) { - if (prev_id != 0) - break; - int index = __kmp_dispatch_get_index(tid, hier->get_type(i)); - kmp_hier_top_unit_t *my_unit = hier->get_unit(i, index); - // Only master threads of this unit within the hierarchy do initialization - KD_TRACE(10, ("__kmp_dispatch_init_hierarchy: T#%d (%d) prev_id is 0\n", - gtid, i)); - my_unit->reset_shared_barrier(); - my_unit->hier_pr.flags.contains_last = FALSE; - // Last layer, initialize the private buffers with entire loop information - // Now the next next_algorithim() call will get the first chunk of - // iterations properly - if (i == n - 1) { - __kmp_dispatch_init_algorithm( - loc, gtid, my_unit->get_my_pr(), hier->get_sched(i), lb, ub, st, -#if USE_ITT_BUILD - NULL, -#endif - hier->get_chunk(i), hier->get_num_active(i), my_unit->get_hier_id()); - } - prev_id = my_unit->get_hier_id(); - } - // Initialize each layer of the thread's private barrier data - kmp_hier_top_unit_t *unit = pr->hier_parent; - for (int i = 0; i < n && unit; ++i, unit = unit->get_parent()) { - kmp_hier_private_bdata_t *tdata = &(th->th.th_hier_bar_data[i]); - unit->reset_private_barrier(tdata); - } - __kmp_barrier(bs_plain_barrier, gtid, FALSE, 0, NULL, NULL); - -#ifdef KMP_DEBUG - if (__kmp_tid_from_gtid(gtid) == 0) { - for (int i = 0; i < n; ++i) { - KD_TRACE(10, - ("__kmp_dispatch_init_hierarchy: T#%d active count[%d] = %d\n", - gtid, i, hier->get_num_active(i))); - } - hier->print(); - } - __kmp_barrier(bs_plain_barrier, gtid, FALSE, 0, NULL, NULL); -#endif // KMP_DEBUG -} -#endif Property changes on: vendor/llvm-openmp/openmp-release80-r363030/runtime/src/kmp_dispatch_hier.h ___________________________________________________________________ Deleted: svn:eol-style ## -1 +0,0 ## -native \ No newline at end of property Deleted: svn:keywords ## -1 +0,0 ## -FreeBSD=%H \ No newline at end of property Deleted: svn:mime-type ## -1 +0,0 ## -text/plain \ No newline at end of property Index: vendor/llvm-openmp/openmp-release80-r363030/runtime/src/kmp_ftn_entry.h =================================================================== --- vendor/llvm-openmp/openmp-release80-r363030/runtime/src/kmp_ftn_entry.h (revision 348960) +++ vendor/llvm-openmp/openmp-release80-r363030/runtime/src/kmp_ftn_entry.h (nonexistent) @@ -1,1446 +0,0 @@ -/* - * kmp_ftn_entry.h -- Fortran entry linkage support for OpenMP. - */ - -//===----------------------------------------------------------------------===// -// -// The LLVM Compiler Infrastructure -// -// This file is dual licensed under the MIT and the University of Illinois Open -// Source Licenses. See LICENSE.txt for details. -// -//===----------------------------------------------------------------------===// - -#ifndef FTN_STDCALL -#error The support file kmp_ftn_entry.h should not be compiled by itself. -#endif - -#ifdef KMP_STUB -#include "kmp_stub.h" -#endif - -#include "kmp_i18n.h" - -#if OMP_50_ENABLED -// For affinity format functions -#include "kmp_io.h" -#include "kmp_str.h" -#endif - -#if OMPT_SUPPORT -#include "ompt-specific.h" -#endif - -#ifdef __cplusplus -extern "C" { -#endif // __cplusplus - -/* For compatibility with the Gnu/MS Open MP codegen, omp_set_num_threads(), - * omp_set_nested(), and omp_set_dynamic() [in lowercase on MS, and w/o - * a trailing underscore on Linux* OS] take call by value integer arguments. - * + omp_set_max_active_levels() - * + omp_set_schedule() - * - * For backward compatibility with 9.1 and previous Intel compiler, these - * entry points take call by reference integer arguments. */ -#ifdef KMP_GOMP_COMPAT -#if (KMP_FTN_ENTRIES == KMP_FTN_PLAIN) || (KMP_FTN_ENTRIES == KMP_FTN_UPPER) -#define PASS_ARGS_BY_VALUE 1 -#endif -#endif -#if KMP_OS_WINDOWS -#if (KMP_FTN_ENTRIES == KMP_FTN_PLAIN) || (KMP_FTN_ENTRIES == KMP_FTN_APPEND) -#define PASS_ARGS_BY_VALUE 1 -#endif -#endif - -// This macro helps to reduce code duplication. -#ifdef PASS_ARGS_BY_VALUE -#define KMP_DEREF -#else -#define KMP_DEREF * -#endif - -void FTN_STDCALL FTN_SET_STACKSIZE(int KMP_DEREF arg) { -#ifdef KMP_STUB - __kmps_set_stacksize(KMP_DEREF arg); -#else - // __kmp_aux_set_stacksize initializes the library if needed - __kmp_aux_set_stacksize((size_t)KMP_DEREF arg); -#endif -} - -void FTN_STDCALL FTN_SET_STACKSIZE_S(size_t KMP_DEREF arg) { -#ifdef KMP_STUB - __kmps_set_stacksize(KMP_DEREF arg); -#else - // __kmp_aux_set_stacksize initializes the library if needed - __kmp_aux_set_stacksize(KMP_DEREF arg); -#endif -} - -int FTN_STDCALL FTN_GET_STACKSIZE(void) { -#ifdef KMP_STUB - return __kmps_get_stacksize(); -#else - if (!__kmp_init_serial) { - __kmp_serial_initialize(); - } - return (int)__kmp_stksize; -#endif -} - -size_t FTN_STDCALL FTN_GET_STACKSIZE_S(void) { -#ifdef KMP_STUB - return __kmps_get_stacksize(); -#else - if (!__kmp_init_serial) { - __kmp_serial_initialize(); - } - return __kmp_stksize; -#endif -} - -void FTN_STDCALL FTN_SET_BLOCKTIME(int KMP_DEREF arg) { -#ifdef KMP_STUB - __kmps_set_blocktime(KMP_DEREF arg); -#else - int gtid, tid; - kmp_info_t *thread; - - gtid = __kmp_entry_gtid(); - tid = __kmp_tid_from_gtid(gtid); - thread = __kmp_thread_from_gtid(gtid); - - __kmp_aux_set_blocktime(KMP_DEREF arg, thread, tid); -#endif -} - -int FTN_STDCALL FTN_GET_BLOCKTIME(void) { -#ifdef KMP_STUB - return __kmps_get_blocktime(); -#else - int gtid, tid; - kmp_info_t *thread; - kmp_team_p *team; - - gtid = __kmp_entry_gtid(); - tid = __kmp_tid_from_gtid(gtid); - thread = __kmp_thread_from_gtid(gtid); - team = __kmp_threads[gtid]->th.th_team; - - /* These must match the settings used in __kmp_wait_sleep() */ - if (__kmp_dflt_blocktime == KMP_MAX_BLOCKTIME) { - KF_TRACE(10, ("kmp_get_blocktime: T#%d(%d:%d), blocktime=%d\n", gtid, - team->t.t_id, tid, KMP_MAX_BLOCKTIME)); - return KMP_MAX_BLOCKTIME; - } -#ifdef KMP_ADJUST_BLOCKTIME - else if (__kmp_zero_bt && !get__bt_set(team, tid)) { - KF_TRACE(10, ("kmp_get_blocktime: T#%d(%d:%d), blocktime=%d\n", gtid, - team->t.t_id, tid, 0)); - return 0; - } -#endif /* KMP_ADJUST_BLOCKTIME */ - else { - KF_TRACE(10, ("kmp_get_blocktime: T#%d(%d:%d), blocktime=%d\n", gtid, - team->t.t_id, tid, get__blocktime(team, tid))); - return get__blocktime(team, tid); - } -#endif -} - -void FTN_STDCALL FTN_SET_LIBRARY_SERIAL(void) { -#ifdef KMP_STUB - __kmps_set_library(library_serial); -#else - // __kmp_user_set_library initializes the library if needed - __kmp_user_set_library(library_serial); -#endif -} - -void FTN_STDCALL FTN_SET_LIBRARY_TURNAROUND(void) { -#ifdef KMP_STUB - __kmps_set_library(library_turnaround); -#else - // __kmp_user_set_library initializes the library if needed - __kmp_user_set_library(library_turnaround); -#endif -} - -void FTN_STDCALL FTN_SET_LIBRARY_THROUGHPUT(void) { -#ifdef KMP_STUB - __kmps_set_library(library_throughput); -#else - // __kmp_user_set_library initializes the library if needed - __kmp_user_set_library(library_throughput); -#endif -} - -void FTN_STDCALL FTN_SET_LIBRARY(int KMP_DEREF arg) { -#ifdef KMP_STUB - __kmps_set_library(KMP_DEREF arg); -#else - enum library_type lib; - lib = (enum library_type)KMP_DEREF arg; - // __kmp_user_set_library initializes the library if needed - __kmp_user_set_library(lib); -#endif -} - -int FTN_STDCALL FTN_GET_LIBRARY(void) { -#ifdef KMP_STUB - return __kmps_get_library(); -#else - if (!__kmp_init_serial) { - __kmp_serial_initialize(); - } - return ((int)__kmp_library); -#endif -} - -void FTN_STDCALL FTN_SET_DISP_NUM_BUFFERS(int KMP_DEREF arg) { -#ifdef KMP_STUB - ; // empty routine -#else - // ignore after initialization because some teams have already - // allocated dispatch buffers - if (__kmp_init_serial == 0 && (KMP_DEREF arg) > 0) - __kmp_dispatch_num_buffers = KMP_DEREF arg; -#endif -} - -int FTN_STDCALL FTN_SET_AFFINITY(void **mask) { -#if defined(KMP_STUB) || !KMP_AFFINITY_SUPPORTED - return -1; -#else - if (!TCR_4(__kmp_init_middle)) { - __kmp_middle_initialize(); - } - return __kmp_aux_set_affinity(mask); -#endif -} - -int FTN_STDCALL FTN_GET_AFFINITY(void **mask) { -#if defined(KMP_STUB) || !KMP_AFFINITY_SUPPORTED - return -1; -#else - if (!TCR_4(__kmp_init_middle)) { - __kmp_middle_initialize(); - } - return __kmp_aux_get_affinity(mask); -#endif -} - -int FTN_STDCALL FTN_GET_AFFINITY_MAX_PROC(void) { -#if defined(KMP_STUB) || !KMP_AFFINITY_SUPPORTED - return 0; -#else - // We really only NEED serial initialization here. - if (!TCR_4(__kmp_init_middle)) { - __kmp_middle_initialize(); - } - return __kmp_aux_get_affinity_max_proc(); -#endif -} - -void FTN_STDCALL FTN_CREATE_AFFINITY_MASK(void **mask) { -#if defined(KMP_STUB) || !KMP_AFFINITY_SUPPORTED - *mask = NULL; -#else - // We really only NEED serial initialization here. - kmp_affin_mask_t *mask_internals; - if (!TCR_4(__kmp_init_middle)) { - __kmp_middle_initialize(); - } - mask_internals = __kmp_affinity_dispatch->allocate_mask(); - KMP_CPU_ZERO(mask_internals); - *mask = mask_internals; -#endif -} - -void FTN_STDCALL FTN_DESTROY_AFFINITY_MASK(void **mask) { -#if defined(KMP_STUB) || !KMP_AFFINITY_SUPPORTED -// Nothing -#else - // We really only NEED serial initialization here. - kmp_affin_mask_t *mask_internals; - if (!TCR_4(__kmp_init_middle)) { - __kmp_middle_initialize(); - } - if (__kmp_env_consistency_check) { - if (*mask == NULL) { - KMP_FATAL(AffinityInvalidMask, "kmp_destroy_affinity_mask"); - } - } - mask_internals = (kmp_affin_mask_t *)(*mask); - __kmp_affinity_dispatch->deallocate_mask(mask_internals); - *mask = NULL; -#endif -} - -int FTN_STDCALL FTN_SET_AFFINITY_MASK_PROC(int KMP_DEREF proc, void **mask) { -#if defined(KMP_STUB) || !KMP_AFFINITY_SUPPORTED - return -1; -#else - if (!TCR_4(__kmp_init_middle)) { - __kmp_middle_initialize(); - } - return __kmp_aux_set_affinity_mask_proc(KMP_DEREF proc, mask); -#endif -} - -int FTN_STDCALL FTN_UNSET_AFFINITY_MASK_PROC(int KMP_DEREF proc, void **mask) { -#if defined(KMP_STUB) || !KMP_AFFINITY_SUPPORTED - return -1; -#else - if (!TCR_4(__kmp_init_middle)) { - __kmp_middle_initialize(); - } - return __kmp_aux_unset_affinity_mask_proc(KMP_DEREF proc, mask); -#endif -} - -int FTN_STDCALL FTN_GET_AFFINITY_MASK_PROC(int KMP_DEREF proc, void **mask) { -#if defined(KMP_STUB) || !KMP_AFFINITY_SUPPORTED - return -1; -#else - if (!TCR_4(__kmp_init_middle)) { - __kmp_middle_initialize(); - } - return __kmp_aux_get_affinity_mask_proc(KMP_DEREF proc, mask); -#endif -} - -/* ------------------------------------------------------------------------ */ - -/* sets the requested number of threads for the next parallel region */ -void FTN_STDCALL KMP_EXPAND_NAME(FTN_SET_NUM_THREADS)(int KMP_DEREF arg) { -#ifdef KMP_STUB -// Nothing. -#else - __kmp_set_num_threads(KMP_DEREF arg, __kmp_entry_gtid()); -#endif -} - -/* returns the number of threads in current team */ -int FTN_STDCALL KMP_EXPAND_NAME(FTN_GET_NUM_THREADS)(void) { -#ifdef KMP_STUB - return 1; -#else - // __kmpc_bound_num_threads initializes the library if needed - return __kmpc_bound_num_threads(NULL); -#endif -} - -int FTN_STDCALL KMP_EXPAND_NAME(FTN_GET_MAX_THREADS)(void) { -#ifdef KMP_STUB - return 1; -#else - int gtid; - kmp_info_t *thread; - if (!TCR_4(__kmp_init_middle)) { - __kmp_middle_initialize(); - } - gtid = __kmp_entry_gtid(); - thread = __kmp_threads[gtid]; - // return thread -> th.th_team -> t.t_current_task[ - // thread->th.th_info.ds.ds_tid ] -> icvs.nproc; - return thread->th.th_current_task->td_icvs.nproc; -#endif -} - -#if OMP_50_ENABLED -int FTN_STDCALL FTN_CONTROL_TOOL(int command, int modifier, void *arg) { -#if defined(KMP_STUB) || !OMPT_SUPPORT - return -2; -#else - OMPT_STORE_RETURN_ADDRESS(__kmp_entry_gtid()); - if (!TCR_4(__kmp_init_middle)) { - return -2; - } - kmp_info_t *this_thr = __kmp_threads[__kmp_entry_gtid()]; - ompt_task_info_t *parent_task_info = OMPT_CUR_TASK_INFO(this_thr); - parent_task_info->frame.enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0); - int ret = __kmp_control_tool(command, modifier, arg); - parent_task_info->frame.enter_frame.ptr = 0; - return ret; -#endif -} - -/* OpenMP 5.0 Memory Management support */ -void FTN_STDCALL FTN_SET_DEFAULT_ALLOCATOR(const omp_allocator_t *allocator) { -#ifndef KMP_STUB - __kmpc_set_default_allocator(__kmp_entry_gtid(), allocator); -#endif -} -const omp_allocator_t *FTN_STDCALL FTN_GET_DEFAULT_ALLOCATOR(void) { -#ifdef KMP_STUB - return NULL; -#else - return __kmpc_get_default_allocator(__kmp_entry_gtid()); -#endif -} -void *FTN_STDCALL FTN_ALLOC(size_t size, const omp_allocator_t *allocator) { -#ifdef KMP_STUB - return malloc(size); -#else - return __kmpc_alloc(__kmp_entry_gtid(), size, allocator); -#endif -} -void FTN_STDCALL FTN_FREE(void *ptr, const omp_allocator_t *allocator) { -#ifdef KMP_STUB - free(ptr); -#else - __kmpc_free(__kmp_entry_gtid(), ptr, allocator); -#endif -} - -/* OpenMP 5.0 affinity format support */ - -#ifndef KMP_STUB -static void __kmp_fortran_strncpy_truncate(char *buffer, size_t buf_size, - char const *csrc, size_t csrc_size) { - size_t capped_src_size = csrc_size; - if (csrc_size >= buf_size) { - capped_src_size = buf_size - 1; - } - KMP_STRNCPY_S(buffer, buf_size, csrc, capped_src_size); - if (csrc_size >= buf_size) { - KMP_DEBUG_ASSERT(buffer[buf_size - 1] == '\0'); - buffer[buf_size - 1] = csrc[buf_size - 1]; - } else { - for (size_t i = csrc_size; i < buf_size; ++i) - buffer[i] = ' '; - } -} - -// Convert a Fortran string to a C string by adding null byte -class ConvertedString { - char *buf; - kmp_info_t *th; - -public: - ConvertedString(char const *fortran_str, size_t size) { - th = __kmp_get_thread(); - buf = (char *)__kmp_thread_malloc(th, size + 1); - KMP_STRNCPY_S(buf, size + 1, fortran_str, size); - buf[size] = '\0'; - } - ~ConvertedString() { __kmp_thread_free(th, buf); } - const char *get() const { return buf; } -}; -#endif // KMP_STUB - -/* - * Set the value of the affinity-format-var ICV on the current device to the - * format specified in the argument. -*/ -void FTN_STDCALL FTN_SET_AFFINITY_FORMAT(char const *format, size_t size) { -#ifdef KMP_STUB - return; -#else - if (!__kmp_init_serial) { - __kmp_serial_initialize(); - } - ConvertedString cformat(format, size); - // Since the __kmp_affinity_format variable is a C string, do not - // use the fortran strncpy function - __kmp_strncpy_truncate(__kmp_affinity_format, KMP_AFFINITY_FORMAT_SIZE, - cformat.get(), KMP_STRLEN(cformat.get())); -#endif -} - -/* - * Returns the number of characters required to hold the entire affinity format - * specification (not including null byte character) and writes the value of the - * affinity-format-var ICV on the current device to buffer. If the return value - * is larger than size, the affinity format specification is truncated. -*/ -size_t FTN_STDCALL FTN_GET_AFFINITY_FORMAT(char *buffer, size_t size) { -#ifdef KMP_STUB - return 0; -#else - size_t format_size; - if (!__kmp_init_serial) { - __kmp_serial_initialize(); - } - format_size = KMP_STRLEN(__kmp_affinity_format); - if (buffer && size) { - __kmp_fortran_strncpy_truncate(buffer, size, __kmp_affinity_format, - format_size); - } - return format_size; -#endif -} - -/* - * Prints the thread affinity information of the current thread in the format - * specified by the format argument. If the format is NULL or a zero-length - * string, the value of the affinity-format-var ICV is used. -*/ -void FTN_STDCALL FTN_DISPLAY_AFFINITY(char const *format, size_t size) { -#ifdef KMP_STUB - return; -#else - int gtid; - if (!TCR_4(__kmp_init_middle)) { - __kmp_middle_initialize(); - } - gtid = __kmp_get_gtid(); - ConvertedString cformat(format, size); - __kmp_aux_display_affinity(gtid, cformat.get()); -#endif -} - -/* - * Returns the number of characters required to hold the entire affinity format - * specification (not including null byte) and prints the thread affinity - * information of the current thread into the character string buffer with the - * size of size in the format specified by the format argument. If the format is - * NULL or a zero-length string, the value of the affinity-format-var ICV is - * used. The buffer must be allocated prior to calling the routine. If the - * return value is larger than size, the affinity format specification is - * truncated. -*/ -size_t FTN_STDCALL FTN_CAPTURE_AFFINITY(char *buffer, char const *format, - size_t buf_size, size_t for_size) { -#if defined(KMP_STUB) - return 0; -#else - int gtid; - size_t num_required; - kmp_str_buf_t capture_buf; - if (!TCR_4(__kmp_init_middle)) { - __kmp_middle_initialize(); - } - gtid = __kmp_get_gtid(); - __kmp_str_buf_init(&capture_buf); - ConvertedString cformat(format, for_size); - num_required = __kmp_aux_capture_affinity(gtid, cformat.get(), &capture_buf); - if (buffer && buf_size) { - __kmp_fortran_strncpy_truncate(buffer, buf_size, capture_buf.str, - capture_buf.used); - } - __kmp_str_buf_free(&capture_buf); - return num_required; -#endif -} -#endif /* OMP_50_ENABLED */ - -int FTN_STDCALL KMP_EXPAND_NAME(FTN_GET_THREAD_NUM)(void) { -#ifdef KMP_STUB - return 0; -#else - int gtid; - -#if KMP_OS_DARWIN || KMP_OS_DRAGONFLY || KMP_OS_FREEBSD || KMP_OS_NETBSD || \ - KMP_OS_HURD - gtid = __kmp_entry_gtid(); -#elif KMP_OS_WINDOWS - if (!__kmp_init_parallel || - (gtid = (int)((kmp_intptr_t)TlsGetValue(__kmp_gtid_threadprivate_key))) == - 0) { - // Either library isn't initialized or thread is not registered - // 0 is the correct TID in this case - return 0; - } - --gtid; // We keep (gtid+1) in TLS -#elif KMP_OS_LINUX -#ifdef KMP_TDATA_GTID - if (__kmp_gtid_mode >= 3) { - if ((gtid = __kmp_gtid) == KMP_GTID_DNE) { - return 0; - } - } else { -#endif - if (!__kmp_init_parallel || - (gtid = (kmp_intptr_t)( - pthread_getspecific(__kmp_gtid_threadprivate_key))) == 0) { - return 0; - } - --gtid; -#ifdef KMP_TDATA_GTID - } -#endif -#else -#error Unknown or unsupported OS -#endif - - return __kmp_tid_from_gtid(gtid); -#endif -} - -int FTN_STDCALL FTN_GET_NUM_KNOWN_THREADS(void) { -#ifdef KMP_STUB - return 1; -#else - if (!__kmp_init_serial) { - __kmp_serial_initialize(); - } - /* NOTE: this is not syncronized, so it can change at any moment */ - /* NOTE: this number also includes threads preallocated in hot-teams */ - return TCR_4(__kmp_nth); -#endif -} - -int FTN_STDCALL KMP_EXPAND_NAME(FTN_GET_NUM_PROCS)(void) { -#ifdef KMP_STUB - return 1; -#else - if (!TCR_4(__kmp_init_middle)) { - __kmp_middle_initialize(); - } - return __kmp_avail_proc; -#endif -} - -void FTN_STDCALL KMP_EXPAND_NAME(FTN_SET_NESTED)(int KMP_DEREF flag) { -#ifdef KMP_STUB - __kmps_set_nested(KMP_DEREF flag); -#else - kmp_info_t *thread; - /* For the thread-private internal controls implementation */ - thread = __kmp_entry_thread(); - __kmp_save_internal_controls(thread); - set__nested(thread, ((KMP_DEREF flag) ? TRUE : FALSE)); -#endif -} - -int FTN_STDCALL KMP_EXPAND_NAME(FTN_GET_NESTED)(void) { -#ifdef KMP_STUB - return __kmps_get_nested(); -#else - kmp_info_t *thread; - thread = __kmp_entry_thread(); - return get__nested(thread); -#endif -} - -void FTN_STDCALL KMP_EXPAND_NAME(FTN_SET_DYNAMIC)(int KMP_DEREF flag) { -#ifdef KMP_STUB - __kmps_set_dynamic(KMP_DEREF flag ? TRUE : FALSE); -#else - kmp_info_t *thread; - /* For the thread-private implementation of the internal controls */ - thread = __kmp_entry_thread(); - // !!! What if foreign thread calls it? - __kmp_save_internal_controls(thread); - set__dynamic(thread, KMP_DEREF flag ? TRUE : FALSE); -#endif -} - -int FTN_STDCALL KMP_EXPAND_NAME(FTN_GET_DYNAMIC)(void) { -#ifdef KMP_STUB - return __kmps_get_dynamic(); -#else - kmp_info_t *thread; - thread = __kmp_entry_thread(); - return get__dynamic(thread); -#endif -} - -int FTN_STDCALL KMP_EXPAND_NAME(FTN_IN_PARALLEL)(void) { -#ifdef KMP_STUB - return 0; -#else - kmp_info_t *th = __kmp_entry_thread(); -#if OMP_40_ENABLED - if (th->th.th_teams_microtask) { - // AC: r_in_parallel does not work inside teams construct where real - // parallel is inactive, but all threads have same root, so setting it in - // one team affects other teams. - // The solution is to use per-team nesting level - return (th->th.th_team->t.t_active_level ? 1 : 0); - } else -#endif /* OMP_40_ENABLED */ - return (th->th.th_root->r.r_in_parallel ? FTN_TRUE : FTN_FALSE); -#endif -} - -void FTN_STDCALL KMP_EXPAND_NAME(FTN_SET_SCHEDULE)(kmp_sched_t KMP_DEREF kind, - int KMP_DEREF modifier) { -#ifdef KMP_STUB - __kmps_set_schedule(KMP_DEREF kind, KMP_DEREF modifier); -#else - /* TO DO: For the per-task implementation of the internal controls */ - __kmp_set_schedule(__kmp_entry_gtid(), KMP_DEREF kind, KMP_DEREF modifier); -#endif -} - -void FTN_STDCALL KMP_EXPAND_NAME(FTN_GET_SCHEDULE)(kmp_sched_t *kind, - int *modifier) { -#ifdef KMP_STUB - __kmps_get_schedule(kind, modifier); -#else - /* TO DO: For the per-task implementation of the internal controls */ - __kmp_get_schedule(__kmp_entry_gtid(), kind, modifier); -#endif -} - -void FTN_STDCALL KMP_EXPAND_NAME(FTN_SET_MAX_ACTIVE_LEVELS)(int KMP_DEREF arg) { -#ifdef KMP_STUB -// Nothing. -#else - /* TO DO: We want per-task implementation of this internal control */ - __kmp_set_max_active_levels(__kmp_entry_gtid(), KMP_DEREF arg); -#endif -} - -int FTN_STDCALL KMP_EXPAND_NAME(FTN_GET_MAX_ACTIVE_LEVELS)(void) { -#ifdef KMP_STUB - return 0; -#else - /* TO DO: We want per-task implementation of this internal control */ - return __kmp_get_max_active_levels(__kmp_entry_gtid()); -#endif -} - -int FTN_STDCALL KMP_EXPAND_NAME(FTN_GET_ACTIVE_LEVEL)(void) { -#ifdef KMP_STUB - return 0; // returns 0 if it is called from the sequential part of the program -#else - /* TO DO: For the per-task implementation of the internal controls */ - return __kmp_entry_thread()->th.th_team->t.t_active_level; -#endif -} - -int FTN_STDCALL KMP_EXPAND_NAME(FTN_GET_LEVEL)(void) { -#ifdef KMP_STUB - return 0; // returns 0 if it is called from the sequential part of the program -#else - /* TO DO: For the per-task implementation of the internal controls */ - return __kmp_entry_thread()->th.th_team->t.t_level; -#endif -} - -int FTN_STDCALL - KMP_EXPAND_NAME(FTN_GET_ANCESTOR_THREAD_NUM)(int KMP_DEREF level) { -#ifdef KMP_STUB - return (KMP_DEREF level) ? (-1) : (0); -#else - return __kmp_get_ancestor_thread_num(__kmp_entry_gtid(), KMP_DEREF level); -#endif -} - -int FTN_STDCALL KMP_EXPAND_NAME(FTN_GET_TEAM_SIZE)(int KMP_DEREF level) { -#ifdef KMP_STUB - return (KMP_DEREF level) ? (-1) : (1); -#else - return __kmp_get_team_size(__kmp_entry_gtid(), KMP_DEREF level); -#endif -} - -int FTN_STDCALL KMP_EXPAND_NAME(FTN_GET_THREAD_LIMIT)(void) { -#ifdef KMP_STUB - return 1; // TO DO: clarify whether it returns 1 or 0? -#else - if (!__kmp_init_serial) { - __kmp_serial_initialize(); - } - /* global ICV */ - return __kmp_cg_max_nth; -#endif -} - -int FTN_STDCALL KMP_EXPAND_NAME(FTN_IN_FINAL)(void) { -#ifdef KMP_STUB - return 0; // TO DO: clarify whether it returns 1 or 0? -#else - if (!TCR_4(__kmp_init_parallel)) { - return 0; - } - return __kmp_entry_thread()->th.th_current_task->td_flags.final; -#endif -} - -#if OMP_40_ENABLED - -kmp_proc_bind_t FTN_STDCALL KMP_EXPAND_NAME(FTN_GET_PROC_BIND)(void) { -#ifdef KMP_STUB - return __kmps_get_proc_bind(); -#else - return get__proc_bind(__kmp_entry_thread()); -#endif -} - -#if OMP_45_ENABLED -int FTN_STDCALL KMP_EXPAND_NAME(FTN_GET_NUM_PLACES)(void) { -#if defined(KMP_STUB) || !KMP_AFFINITY_SUPPORTED - return 0; -#else - if (!TCR_4(__kmp_init_middle)) { - __kmp_middle_initialize(); - } - if (!KMP_AFFINITY_CAPABLE()) - return 0; - return __kmp_affinity_num_masks; -#endif -} - -int FTN_STDCALL KMP_EXPAND_NAME(FTN_GET_PLACE_NUM_PROCS)(int place_num) { -#if defined(KMP_STUB) || !KMP_AFFINITY_SUPPORTED - return 0; -#else - int i; - int retval = 0; - if (!TCR_4(__kmp_init_middle)) { - __kmp_middle_initialize(); - } - if (!KMP_AFFINITY_CAPABLE()) - return 0; - if (place_num < 0 || place_num >= (int)__kmp_affinity_num_masks) - return 0; - kmp_affin_mask_t *mask = KMP_CPU_INDEX(__kmp_affinity_masks, place_num); - KMP_CPU_SET_ITERATE(i, mask) { - if ((!KMP_CPU_ISSET(i, __kmp_affin_fullMask)) || - (!KMP_CPU_ISSET(i, mask))) { - continue; - } - ++retval; - } - return retval; -#endif -} - -void FTN_STDCALL KMP_EXPAND_NAME(FTN_GET_PLACE_PROC_IDS)(int place_num, - int *ids) { -#if defined(KMP_STUB) || !KMP_AFFINITY_SUPPORTED -// Nothing. -#else - int i, j; - if (!TCR_4(__kmp_init_middle)) { - __kmp_middle_initialize(); - } - if (!KMP_AFFINITY_CAPABLE()) - return; - if (place_num < 0 || place_num >= (int)__kmp_affinity_num_masks) - return; - kmp_affin_mask_t *mask = KMP_CPU_INDEX(__kmp_affinity_masks, place_num); - j = 0; - KMP_CPU_SET_ITERATE(i, mask) { - if ((!KMP_CPU_ISSET(i, __kmp_affin_fullMask)) || - (!KMP_CPU_ISSET(i, mask))) { - continue; - } - ids[j++] = i; - } -#endif -} - -int FTN_STDCALL KMP_EXPAND_NAME(FTN_GET_PLACE_NUM)(void) { -#if defined(KMP_STUB) || !KMP_AFFINITY_SUPPORTED - return -1; -#else - int gtid; - kmp_info_t *thread; - if (!TCR_4(__kmp_init_middle)) { - __kmp_middle_initialize(); - } - if (!KMP_AFFINITY_CAPABLE()) - return -1; - gtid = __kmp_entry_gtid(); - thread = __kmp_thread_from_gtid(gtid); - if (thread->th.th_current_place < 0) - return -1; - return thread->th.th_current_place; -#endif -} - -int FTN_STDCALL KMP_EXPAND_NAME(FTN_GET_PARTITION_NUM_PLACES)(void) { -#if defined(KMP_STUB) || !KMP_AFFINITY_SUPPORTED - return 0; -#else - int gtid, num_places, first_place, last_place; - kmp_info_t *thread; - if (!TCR_4(__kmp_init_middle)) { - __kmp_middle_initialize(); - } - if (!KMP_AFFINITY_CAPABLE()) - return 0; - gtid = __kmp_entry_gtid(); - thread = __kmp_thread_from_gtid(gtid); - first_place = thread->th.th_first_place; - last_place = thread->th.th_last_place; - if (first_place < 0 || last_place < 0) - return 0; - if (first_place <= last_place) - num_places = last_place - first_place + 1; - else - num_places = __kmp_affinity_num_masks - first_place + last_place + 1; - return num_places; -#endif -} - -void - FTN_STDCALL KMP_EXPAND_NAME(FTN_GET_PARTITION_PLACE_NUMS)(int *place_nums) { -#if defined(KMP_STUB) || !KMP_AFFINITY_SUPPORTED -// Nothing. -#else - int i, gtid, place_num, first_place, last_place, start, end; - kmp_info_t *thread; - if (!TCR_4(__kmp_init_middle)) { - __kmp_middle_initialize(); - } - if (!KMP_AFFINITY_CAPABLE()) - return; - gtid = __kmp_entry_gtid(); - thread = __kmp_thread_from_gtid(gtid); - first_place = thread->th.th_first_place; - last_place = thread->th.th_last_place; - if (first_place < 0 || last_place < 0) - return; - if (first_place <= last_place) { - start = first_place; - end = last_place; - } else { - start = last_place; - end = first_place; - } - for (i = 0, place_num = start; place_num <= end; ++place_num, ++i) { - place_nums[i] = place_num; - } -#endif -} -#endif - -int FTN_STDCALL KMP_EXPAND_NAME(FTN_GET_NUM_TEAMS)(void) { -#ifdef KMP_STUB - return 1; -#else - return __kmp_aux_get_num_teams(); -#endif -} - -int FTN_STDCALL KMP_EXPAND_NAME(FTN_GET_TEAM_NUM)(void) { -#ifdef KMP_STUB - return 0; -#else - return __kmp_aux_get_team_num(); -#endif -} - -int FTN_STDCALL KMP_EXPAND_NAME(FTN_GET_DEFAULT_DEVICE)(void) { -#if KMP_MIC || KMP_OS_DARWIN || defined(KMP_STUB) - return 0; -#else - return __kmp_entry_thread()->th.th_current_task->td_icvs.default_device; -#endif -} - -void FTN_STDCALL KMP_EXPAND_NAME(FTN_SET_DEFAULT_DEVICE)(int KMP_DEREF arg) { -#if KMP_MIC || KMP_OS_DARWIN || defined(KMP_STUB) -// Nothing. -#else - __kmp_entry_thread()->th.th_current_task->td_icvs.default_device = - KMP_DEREF arg; -#endif -} - -// Get number of NON-HOST devices. -// libomptarget, if loaded, provides this function in api.cpp. -int FTN_STDCALL KMP_EXPAND_NAME(FTN_GET_NUM_DEVICES)(void) KMP_WEAK_ATTRIBUTE; -int FTN_STDCALL KMP_EXPAND_NAME(FTN_GET_NUM_DEVICES)(void) { -#if KMP_MIC || KMP_OS_DARWIN || KMP_OS_WINDOWS || defined(KMP_STUB) - return 0; -#else - int (*fptr)(); - if ((*(void **)(&fptr) = dlsym(RTLD_DEFAULT, "_Offload_number_of_devices"))) { - return (*fptr)(); - } else if ((*(void **)(&fptr) = dlsym(RTLD_NEXT, "omp_get_num_devices"))) { - return (*fptr)(); - } else { // liboffload & libomptarget don't exist - return 0; - } -#endif // KMP_MIC || KMP_OS_DARWIN || KMP_OS_WINDOWS || defined(KMP_STUB) -} - -// This function always returns true when called on host device. -// Compilier/libomptarget should handle when it is called inside target region. -int FTN_STDCALL KMP_EXPAND_NAME(FTN_IS_INITIAL_DEVICE)(void) KMP_WEAK_ATTRIBUTE; -int FTN_STDCALL KMP_EXPAND_NAME(FTN_IS_INITIAL_DEVICE)(void) { - return 1; // This is the host -} - -#endif // OMP_40_ENABLED - -#if OMP_45_ENABLED -// OpenMP 4.5 entries - -// libomptarget, if loaded, provides this function -int FTN_STDCALL FTN_GET_INITIAL_DEVICE(void) KMP_WEAK_ATTRIBUTE; -int FTN_STDCALL FTN_GET_INITIAL_DEVICE(void) { -#if KMP_MIC || KMP_OS_DARWIN || KMP_OS_WINDOWS || defined(KMP_STUB) - return KMP_HOST_DEVICE; -#else - int (*fptr)(); - if ((*(void **)(&fptr) = dlsym(RTLD_NEXT, "omp_get_initial_device"))) { - return (*fptr)(); - } else { // liboffload & libomptarget don't exist - return KMP_HOST_DEVICE; - } -#endif -} - -#if defined(KMP_STUB) -// Entries for stubs library -// As all *target* functions are C-only parameters always passed by value -void *FTN_STDCALL FTN_TARGET_ALLOC(size_t size, int device_num) { return 0; } - -void FTN_STDCALL FTN_TARGET_FREE(void *device_ptr, int device_num) {} - -int FTN_STDCALL FTN_TARGET_IS_PRESENT(void *ptr, int device_num) { return 0; } - -int FTN_STDCALL FTN_TARGET_MEMCPY(void *dst, void *src, size_t length, - size_t dst_offset, size_t src_offset, - int dst_device, int src_device) { - return -1; -} - -int FTN_STDCALL FTN_TARGET_MEMCPY_RECT( - void *dst, void *src, size_t element_size, int num_dims, - const size_t *volume, const size_t *dst_offsets, const size_t *src_offsets, - const size_t *dst_dimensions, const size_t *src_dimensions, int dst_device, - int src_device) { - return -1; -} - -int FTN_STDCALL FTN_TARGET_ASSOCIATE_PTR(void *host_ptr, void *device_ptr, - size_t size, size_t device_offset, - int device_num) { - return -1; -} - -int FTN_STDCALL FTN_TARGET_DISASSOCIATE_PTR(void *host_ptr, int device_num) { - return -1; -} -#endif // defined(KMP_STUB) -#endif // OMP_45_ENABLED - -#ifdef KMP_STUB -typedef enum { UNINIT = -1, UNLOCKED, LOCKED } kmp_stub_lock_t; -#endif /* KMP_STUB */ - -#if KMP_USE_DYNAMIC_LOCK -void FTN_STDCALL FTN_INIT_LOCK_WITH_HINT(void **user_lock, - uintptr_t KMP_DEREF hint) { -#ifdef KMP_STUB - *((kmp_stub_lock_t *)user_lock) = UNLOCKED; -#else - int gtid = __kmp_entry_gtid(); -#if OMPT_SUPPORT && OMPT_OPTIONAL - OMPT_STORE_RETURN_ADDRESS(gtid); -#endif - __kmpc_init_lock_with_hint(NULL, gtid, user_lock, KMP_DEREF hint); -#endif -} - -void FTN_STDCALL FTN_INIT_NEST_LOCK_WITH_HINT(void **user_lock, - uintptr_t KMP_DEREF hint) { -#ifdef KMP_STUB - *((kmp_stub_lock_t *)user_lock) = UNLOCKED; -#else - int gtid = __kmp_entry_gtid(); -#if OMPT_SUPPORT && OMPT_OPTIONAL - OMPT_STORE_RETURN_ADDRESS(gtid); -#endif - __kmpc_init_nest_lock_with_hint(NULL, gtid, user_lock, KMP_DEREF hint); -#endif -} -#endif - -/* initialize the lock */ -void FTN_STDCALL KMP_EXPAND_NAME(FTN_INIT_LOCK)(void **user_lock) { -#ifdef KMP_STUB - *((kmp_stub_lock_t *)user_lock) = UNLOCKED; -#else - int gtid = __kmp_entry_gtid(); -#if OMPT_SUPPORT && OMPT_OPTIONAL - OMPT_STORE_RETURN_ADDRESS(gtid); -#endif - __kmpc_init_lock(NULL, gtid, user_lock); -#endif -} - -/* initialize the lock */ -void FTN_STDCALL KMP_EXPAND_NAME(FTN_INIT_NEST_LOCK)(void **user_lock) { -#ifdef KMP_STUB - *((kmp_stub_lock_t *)user_lock) = UNLOCKED; -#else - int gtid = __kmp_entry_gtid(); -#if OMPT_SUPPORT && OMPT_OPTIONAL - OMPT_STORE_RETURN_ADDRESS(gtid); -#endif - __kmpc_init_nest_lock(NULL, gtid, user_lock); -#endif -} - -void FTN_STDCALL KMP_EXPAND_NAME(FTN_DESTROY_LOCK)(void **user_lock) { -#ifdef KMP_STUB - *((kmp_stub_lock_t *)user_lock) = UNINIT; -#else - int gtid = __kmp_entry_gtid(); -#if OMPT_SUPPORT && OMPT_OPTIONAL - OMPT_STORE_RETURN_ADDRESS(gtid); -#endif - __kmpc_destroy_lock(NULL, gtid, user_lock); -#endif -} - -void FTN_STDCALL KMP_EXPAND_NAME(FTN_DESTROY_NEST_LOCK)(void **user_lock) { -#ifdef KMP_STUB - *((kmp_stub_lock_t *)user_lock) = UNINIT; -#else - int gtid = __kmp_entry_gtid(); -#if OMPT_SUPPORT && OMPT_OPTIONAL - OMPT_STORE_RETURN_ADDRESS(gtid); -#endif - __kmpc_destroy_nest_lock(NULL, gtid, user_lock); -#endif -} - -void FTN_STDCALL KMP_EXPAND_NAME(FTN_SET_LOCK)(void **user_lock) { -#ifdef KMP_STUB - if (*((kmp_stub_lock_t *)user_lock) == UNINIT) { - // TODO: Issue an error. - } - if (*((kmp_stub_lock_t *)user_lock) != UNLOCKED) { - // TODO: Issue an error. - } - *((kmp_stub_lock_t *)user_lock) = LOCKED; -#else - int gtid = __kmp_entry_gtid(); -#if OMPT_SUPPORT && OMPT_OPTIONAL - OMPT_STORE_RETURN_ADDRESS(gtid); -#endif - __kmpc_set_lock(NULL, gtid, user_lock); -#endif -} - -void FTN_STDCALL KMP_EXPAND_NAME(FTN_SET_NEST_LOCK)(void **user_lock) { -#ifdef KMP_STUB - if (*((kmp_stub_lock_t *)user_lock) == UNINIT) { - // TODO: Issue an error. - } - (*((int *)user_lock))++; -#else - int gtid = __kmp_entry_gtid(); -#if OMPT_SUPPORT && OMPT_OPTIONAL - OMPT_STORE_RETURN_ADDRESS(gtid); -#endif - __kmpc_set_nest_lock(NULL, gtid, user_lock); -#endif -} - -void FTN_STDCALL KMP_EXPAND_NAME(FTN_UNSET_LOCK)(void **user_lock) { -#ifdef KMP_STUB - if (*((kmp_stub_lock_t *)user_lock) == UNINIT) { - // TODO: Issue an error. - } - if (*((kmp_stub_lock_t *)user_lock) == UNLOCKED) { - // TODO: Issue an error. - } - *((kmp_stub_lock_t *)user_lock) = UNLOCKED; -#else - int gtid = __kmp_entry_gtid(); -#if OMPT_SUPPORT && OMPT_OPTIONAL - OMPT_STORE_RETURN_ADDRESS(gtid); -#endif - __kmpc_unset_lock(NULL, gtid, user_lock); -#endif -} - -void FTN_STDCALL KMP_EXPAND_NAME(FTN_UNSET_NEST_LOCK)(void **user_lock) { -#ifdef KMP_STUB - if (*((kmp_stub_lock_t *)user_lock) == UNINIT) { - // TODO: Issue an error. - } - if (*((kmp_stub_lock_t *)user_lock) == UNLOCKED) { - // TODO: Issue an error. - } - (*((int *)user_lock))--; -#else - int gtid = __kmp_entry_gtid(); -#if OMPT_SUPPORT && OMPT_OPTIONAL - OMPT_STORE_RETURN_ADDRESS(gtid); -#endif - __kmpc_unset_nest_lock(NULL, gtid, user_lock); -#endif -} - -int FTN_STDCALL KMP_EXPAND_NAME(FTN_TEST_LOCK)(void **user_lock) { -#ifdef KMP_STUB - if (*((kmp_stub_lock_t *)user_lock) == UNINIT) { - // TODO: Issue an error. - } - if (*((kmp_stub_lock_t *)user_lock) == LOCKED) { - return 0; - } - *((kmp_stub_lock_t *)user_lock) = LOCKED; - return 1; -#else - int gtid = __kmp_entry_gtid(); -#if OMPT_SUPPORT && OMPT_OPTIONAL - OMPT_STORE_RETURN_ADDRESS(gtid); -#endif - return __kmpc_test_lock(NULL, gtid, user_lock); -#endif -} - -int FTN_STDCALL KMP_EXPAND_NAME(FTN_TEST_NEST_LOCK)(void **user_lock) { -#ifdef KMP_STUB - if (*((kmp_stub_lock_t *)user_lock) == UNINIT) { - // TODO: Issue an error. - } - return ++(*((int *)user_lock)); -#else - int gtid = __kmp_entry_gtid(); -#if OMPT_SUPPORT && OMPT_OPTIONAL - OMPT_STORE_RETURN_ADDRESS(gtid); -#endif - return __kmpc_test_nest_lock(NULL, gtid, user_lock); -#endif -} - -double FTN_STDCALL KMP_EXPAND_NAME(FTN_GET_WTIME)(void) { -#ifdef KMP_STUB - return __kmps_get_wtime(); -#else - double data; -#if !KMP_OS_LINUX - // We don't need library initialization to get the time on Linux* OS. The - // routine can be used to measure library initialization time on Linux* OS now - if (!__kmp_init_serial) { - __kmp_serial_initialize(); - } -#endif - __kmp_elapsed(&data); - return data; -#endif -} - -double FTN_STDCALL KMP_EXPAND_NAME(FTN_GET_WTICK)(void) { -#ifdef KMP_STUB - return __kmps_get_wtick(); -#else - double data; - if (!__kmp_init_serial) { - __kmp_serial_initialize(); - } - __kmp_elapsed_tick(&data); - return data; -#endif -} - -/* ------------------------------------------------------------------------ */ - -void *FTN_STDCALL FTN_MALLOC(size_t KMP_DEREF size) { - // kmpc_malloc initializes the library if needed - return kmpc_malloc(KMP_DEREF size); -} - -void *FTN_STDCALL FTN_ALIGNED_MALLOC(size_t KMP_DEREF size, - size_t KMP_DEREF alignment) { - // kmpc_aligned_malloc initializes the library if needed - return kmpc_aligned_malloc(KMP_DEREF size, KMP_DEREF alignment); -} - -void *FTN_STDCALL FTN_CALLOC(size_t KMP_DEREF nelem, size_t KMP_DEREF elsize) { - // kmpc_calloc initializes the library if needed - return kmpc_calloc(KMP_DEREF nelem, KMP_DEREF elsize); -} - -void *FTN_STDCALL FTN_REALLOC(void *KMP_DEREF ptr, size_t KMP_DEREF size) { - // kmpc_realloc initializes the library if needed - return kmpc_realloc(KMP_DEREF ptr, KMP_DEREF size); -} - -void FTN_STDCALL FTN_KFREE(void *KMP_DEREF ptr) { - // does nothing if the library is not initialized - kmpc_free(KMP_DEREF ptr); -} - -void FTN_STDCALL FTN_SET_WARNINGS_ON(void) { -#ifndef KMP_STUB - __kmp_generate_warnings = kmp_warnings_explicit; -#endif -} - -void FTN_STDCALL FTN_SET_WARNINGS_OFF(void) { -#ifndef KMP_STUB - __kmp_generate_warnings = FALSE; -#endif -} - -void FTN_STDCALL FTN_SET_DEFAULTS(char const *str -#ifndef PASS_ARGS_BY_VALUE - , - int len -#endif - ) { -#ifndef KMP_STUB -#ifdef PASS_ARGS_BY_VALUE - int len = (int)KMP_STRLEN(str); -#endif - __kmp_aux_set_defaults(str, len); -#endif -} - -/* ------------------------------------------------------------------------ */ - -#if OMP_40_ENABLED -/* returns the status of cancellation */ -int FTN_STDCALL KMP_EXPAND_NAME(FTN_GET_CANCELLATION)(void) { -#ifdef KMP_STUB - return 0 /* false */; -#else - // initialize the library if needed - if (!__kmp_init_serial) { - __kmp_serial_initialize(); - } - return __kmp_omp_cancellation; -#endif -} - -int FTN_STDCALL FTN_GET_CANCELLATION_STATUS(int cancel_kind) { -#ifdef KMP_STUB - return 0 /* false */; -#else - return __kmp_get_cancellation_status(cancel_kind); -#endif -} - -#endif // OMP_40_ENABLED - -#if OMP_45_ENABLED -/* returns the maximum allowed task priority */ -int FTN_STDCALL KMP_EXPAND_NAME(FTN_GET_MAX_TASK_PRIORITY)(void) { -#ifdef KMP_STUB - return 0; -#else - if (!__kmp_init_serial) { - __kmp_serial_initialize(); - } - return __kmp_max_task_priority; -#endif -} -#endif - -#if OMP_50_ENABLED -// This function will be defined in libomptarget. When libomptarget is not -// loaded, we assume we are on the host and return KMP_HOST_DEVICE. -// Compiler/libomptarget will handle this if called inside target. -int FTN_STDCALL FTN_GET_DEVICE_NUM(void) KMP_WEAK_ATTRIBUTE; -int FTN_STDCALL FTN_GET_DEVICE_NUM(void) { return KMP_HOST_DEVICE; } -#endif // OMP_50_ENABLED - -// GCC compatibility (versioned symbols) -#ifdef KMP_USE_VERSION_SYMBOLS - -/* These following sections create versioned symbols for the - omp_* routines. The KMP_VERSION_SYMBOL macro expands the API name and - then maps it to a versioned symbol. - libgomp ``versions'' its symbols (OMP_1.0, OMP_2.0, OMP_3.0, ...) while also - retaining the default version which libomp uses: VERSION (defined in - exports_so.txt). If you want to see the versioned symbols for libgomp.so.1 - then just type: - - objdump -T /path/to/libgomp.so.1 | grep omp_ - - Example: - Step 1) Create __kmp_api_omp_set_num_threads_10_alias which is alias of - __kmp_api_omp_set_num_threads - Step 2) Set __kmp_api_omp_set_num_threads_10_alias to version: - omp_set_num_threads@OMP_1.0 - Step 2B) Set __kmp_api_omp_set_num_threads to default version: - omp_set_num_threads@@VERSION -*/ - -// OMP_1.0 versioned symbols -KMP_VERSION_SYMBOL(FTN_SET_NUM_THREADS, 10, "OMP_1.0"); -KMP_VERSION_SYMBOL(FTN_GET_NUM_THREADS, 10, "OMP_1.0"); -KMP_VERSION_SYMBOL(FTN_GET_MAX_THREADS, 10, "OMP_1.0"); -KMP_VERSION_SYMBOL(FTN_GET_THREAD_NUM, 10, "OMP_1.0"); -KMP_VERSION_SYMBOL(FTN_GET_NUM_PROCS, 10, "OMP_1.0"); -KMP_VERSION_SYMBOL(FTN_IN_PARALLEL, 10, "OMP_1.0"); -KMP_VERSION_SYMBOL(FTN_SET_DYNAMIC, 10, "OMP_1.0"); -KMP_VERSION_SYMBOL(FTN_GET_DYNAMIC, 10, "OMP_1.0"); -KMP_VERSION_SYMBOL(FTN_SET_NESTED, 10, "OMP_1.0"); -KMP_VERSION_SYMBOL(FTN_GET_NESTED, 10, "OMP_1.0"); -KMP_VERSION_SYMBOL(FTN_INIT_LOCK, 10, "OMP_1.0"); -KMP_VERSION_SYMBOL(FTN_INIT_NEST_LOCK, 10, "OMP_1.0"); -KMP_VERSION_SYMBOL(FTN_DESTROY_LOCK, 10, "OMP_1.0"); -KMP_VERSION_SYMBOL(FTN_DESTROY_NEST_LOCK, 10, "OMP_1.0"); -KMP_VERSION_SYMBOL(FTN_SET_LOCK, 10, "OMP_1.0"); -KMP_VERSION_SYMBOL(FTN_SET_NEST_LOCK, 10, "OMP_1.0"); -KMP_VERSION_SYMBOL(FTN_UNSET_LOCK, 10, "OMP_1.0"); -KMP_VERSION_SYMBOL(FTN_UNSET_NEST_LOCK, 10, "OMP_1.0"); -KMP_VERSION_SYMBOL(FTN_TEST_LOCK, 10, "OMP_1.0"); -KMP_VERSION_SYMBOL(FTN_TEST_NEST_LOCK, 10, "OMP_1.0"); - -// OMP_2.0 versioned symbols -KMP_VERSION_SYMBOL(FTN_GET_WTICK, 20, "OMP_2.0"); -KMP_VERSION_SYMBOL(FTN_GET_WTIME, 20, "OMP_2.0"); - -// OMP_3.0 versioned symbols -KMP_VERSION_SYMBOL(FTN_SET_SCHEDULE, 30, "OMP_3.0"); -KMP_VERSION_SYMBOL(FTN_GET_SCHEDULE, 30, "OMP_3.0"); -KMP_VERSION_SYMBOL(FTN_GET_THREAD_LIMIT, 30, "OMP_3.0"); -KMP_VERSION_SYMBOL(FTN_SET_MAX_ACTIVE_LEVELS, 30, "OMP_3.0"); -KMP_VERSION_SYMBOL(FTN_GET_MAX_ACTIVE_LEVELS, 30, "OMP_3.0"); -KMP_VERSION_SYMBOL(FTN_GET_ANCESTOR_THREAD_NUM, 30, "OMP_3.0"); -KMP_VERSION_SYMBOL(FTN_GET_LEVEL, 30, "OMP_3.0"); -KMP_VERSION_SYMBOL(FTN_GET_TEAM_SIZE, 30, "OMP_3.0"); -KMP_VERSION_SYMBOL(FTN_GET_ACTIVE_LEVEL, 30, "OMP_3.0"); - -// the lock routines have a 1.0 and 3.0 version -KMP_VERSION_SYMBOL(FTN_INIT_LOCK, 30, "OMP_3.0"); -KMP_VERSION_SYMBOL(FTN_INIT_NEST_LOCK, 30, "OMP_3.0"); -KMP_VERSION_SYMBOL(FTN_DESTROY_LOCK, 30, "OMP_3.0"); -KMP_VERSION_SYMBOL(FTN_DESTROY_NEST_LOCK, 30, "OMP_3.0"); -KMP_VERSION_SYMBOL(FTN_SET_LOCK, 30, "OMP_3.0"); -KMP_VERSION_SYMBOL(FTN_SET_NEST_LOCK, 30, "OMP_3.0"); -KMP_VERSION_SYMBOL(FTN_UNSET_LOCK, 30, "OMP_3.0"); -KMP_VERSION_SYMBOL(FTN_UNSET_NEST_LOCK, 30, "OMP_3.0"); -KMP_VERSION_SYMBOL(FTN_TEST_LOCK, 30, "OMP_3.0"); -KMP_VERSION_SYMBOL(FTN_TEST_NEST_LOCK, 30, "OMP_3.0"); - -// OMP_3.1 versioned symbol -KMP_VERSION_SYMBOL(FTN_IN_FINAL, 31, "OMP_3.1"); - -#if OMP_40_ENABLED -// OMP_4.0 versioned symbols -KMP_VERSION_SYMBOL(FTN_GET_PROC_BIND, 40, "OMP_4.0"); -KMP_VERSION_SYMBOL(FTN_GET_NUM_TEAMS, 40, "OMP_4.0"); -KMP_VERSION_SYMBOL(FTN_GET_TEAM_NUM, 40, "OMP_4.0"); -KMP_VERSION_SYMBOL(FTN_GET_CANCELLATION, 40, "OMP_4.0"); -KMP_VERSION_SYMBOL(FTN_GET_DEFAULT_DEVICE, 40, "OMP_4.0"); -KMP_VERSION_SYMBOL(FTN_SET_DEFAULT_DEVICE, 40, "OMP_4.0"); -KMP_VERSION_SYMBOL(FTN_IS_INITIAL_DEVICE, 40, "OMP_4.0"); -KMP_VERSION_SYMBOL(FTN_GET_NUM_DEVICES, 40, "OMP_4.0"); -#endif /* OMP_40_ENABLED */ - -#if OMP_45_ENABLED -// OMP_4.5 versioned symbols -KMP_VERSION_SYMBOL(FTN_GET_MAX_TASK_PRIORITY, 45, "OMP_4.5"); -KMP_VERSION_SYMBOL(FTN_GET_NUM_PLACES, 45, "OMP_4.5"); -KMP_VERSION_SYMBOL(FTN_GET_PLACE_NUM_PROCS, 45, "OMP_4.5"); -KMP_VERSION_SYMBOL(FTN_GET_PLACE_PROC_IDS, 45, "OMP_4.5"); -KMP_VERSION_SYMBOL(FTN_GET_PLACE_NUM, 45, "OMP_4.5"); -KMP_VERSION_SYMBOL(FTN_GET_PARTITION_NUM_PLACES, 45, "OMP_4.5"); -KMP_VERSION_SYMBOL(FTN_GET_PARTITION_PLACE_NUMS, 45, "OMP_4.5"); -// KMP_VERSION_SYMBOL(FTN_GET_INITIAL_DEVICE, 45, "OMP_4.5"); -#endif - -#if OMP_50_ENABLED -// OMP_5.0 versioned symbols -// KMP_VERSION_SYMBOL(FTN_GET_DEVICE_NUM, 50, "OMP_5.0"); -#endif - -#endif // KMP_USE_VERSION_SYMBOLS - -#ifdef __cplusplus -} // extern "C" -#endif // __cplusplus - -// end of file // Property changes on: vendor/llvm-openmp/openmp-release80-r363030/runtime/src/kmp_ftn_entry.h ___________________________________________________________________ Deleted: svn:eol-style ## -1 +0,0 ## -native \ No newline at end of property Deleted: svn:keywords ## -1 +0,0 ## -FreeBSD=%H \ No newline at end of property Deleted: svn:mime-type ## -1 +0,0 ## -text/plain \ No newline at end of property Index: vendor/llvm-openmp/openmp-release80-r363030/runtime/src/kmp_i18n.cpp =================================================================== --- vendor/llvm-openmp/openmp-release80-r363030/runtime/src/kmp_i18n.cpp (revision 348960) +++ vendor/llvm-openmp/openmp-release80-r363030/runtime/src/kmp_i18n.cpp (nonexistent) @@ -1,872 +0,0 @@ -/* - * kmp_i18n.cpp - */ - -//===----------------------------------------------------------------------===// -// -// The LLVM Compiler Infrastructure -// -// This file is dual licensed under the MIT and the University of Illinois Open -// Source Licenses. See LICENSE.txt for details. -// -//===----------------------------------------------------------------------===// - -#include "kmp_i18n.h" - -#include "kmp.h" -#include "kmp_debug.h" -#include "kmp_io.h" // __kmp_printf. -#include "kmp_lock.h" -#include "kmp_os.h" - -#include -#include -#include -#include -#include - -#include "kmp_environment.h" -#include "kmp_i18n_default.inc" -#include "kmp_str.h" - -#undef KMP_I18N_OK - -#define get_section(id) ((id) >> 16) -#define get_number(id) ((id)&0xFFFF) - -kmp_msg_t __kmp_msg_null = {kmp_mt_dummy, 0, NULL, 0}; -static char const *no_message_available = "(No message available)"; - -static void __kmp_msg(kmp_msg_severity_t severity, kmp_msg_t message, - va_list ap); - -enum kmp_i18n_cat_status { - KMP_I18N_CLOSED, // Not yet opened or closed. - KMP_I18N_OPENED, // Opened successfully, ready to use. - KMP_I18N_ABSENT // Opening failed, message catalog should not be used. -}; // enum kmp_i18n_cat_status -typedef enum kmp_i18n_cat_status kmp_i18n_cat_status_t; -static volatile kmp_i18n_cat_status_t status = KMP_I18N_CLOSED; - -/* Message catalog is opened at first usage, so we have to synchronize opening - to avoid race and multiple openings. - - Closing does not require synchronization, because catalog is closed very late - at library shutting down, when no other threads are alive. */ - -static void __kmp_i18n_do_catopen(); -static kmp_bootstrap_lock_t lock = KMP_BOOTSTRAP_LOCK_INITIALIZER(lock); -// `lock' variable may be placed into __kmp_i18n_catopen function because it is -// used only by that function. But we afraid a (buggy) compiler may treat it -// wrongly. So we put it outside of function just in case. - -void __kmp_i18n_catopen() { - if (status == KMP_I18N_CLOSED) { - __kmp_acquire_bootstrap_lock(&lock); - if (status == KMP_I18N_CLOSED) { - __kmp_i18n_do_catopen(); - } - __kmp_release_bootstrap_lock(&lock); - } -} // func __kmp_i18n_catopen - -/* Linux* OS and OS X* part */ -#if KMP_OS_UNIX -#define KMP_I18N_OK - -#include - -#define KMP_I18N_NULLCAT ((nl_catd)(-1)) -static nl_catd cat = KMP_I18N_NULLCAT; // !!! Shall it be volatile? -static char const *name = - (KMP_VERSION_MAJOR == 4 ? "libguide.cat" : "libomp.cat"); - -/* Useful links: -http://www.opengroup.org/onlinepubs/000095399/basedefs/xbd_chap08.html#tag_08_02 -http://www.opengroup.org/onlinepubs/000095399/functions/catopen.html -http://www.opengroup.org/onlinepubs/000095399/functions/setlocale.html -*/ - -void __kmp_i18n_do_catopen() { - int english = 0; - char *lang = __kmp_env_get("LANG"); - // TODO: What about LC_ALL or LC_MESSAGES? - - KMP_DEBUG_ASSERT(status == KMP_I18N_CLOSED); - KMP_DEBUG_ASSERT(cat == KMP_I18N_NULLCAT); - - english = lang == NULL || // In all these cases English language is used. - strcmp(lang, "") == 0 || strcmp(lang, " ") == 0 || - // Workaround for Fortran RTL bug DPD200137873 "Fortran runtime - // resets LANG env var to space if it is not set". - strcmp(lang, "C") == 0 || strcmp(lang, "POSIX") == 0; - - if (!english) { // English language is not yet detected, let us continue. - // Format of LANG is: [language[_territory][.codeset][@modifier]] - // Strip all parts except language. - char *tail = NULL; - __kmp_str_split(lang, '@', &lang, &tail); - __kmp_str_split(lang, '.', &lang, &tail); - __kmp_str_split(lang, '_', &lang, &tail); - english = (strcmp(lang, "en") == 0); - } - - KMP_INTERNAL_FREE(lang); - - // Do not try to open English catalog because internal messages are - // exact copy of messages in English catalog. - if (english) { - status = KMP_I18N_ABSENT; // mark catalog as absent so it will not - // be re-opened. - return; - } - - cat = catopen(name, 0); - // TODO: Why do we pass 0 in flags? - status = (cat == KMP_I18N_NULLCAT ? KMP_I18N_ABSENT : KMP_I18N_OPENED); - - if (status == KMP_I18N_ABSENT) { - if (__kmp_generate_warnings > kmp_warnings_low) { - // AC: only issue warning in case explicitly asked to - int error = errno; // Save errno immediately. - char *nlspath = __kmp_env_get("NLSPATH"); - char *lang = __kmp_env_get("LANG"); - - // Infinite recursion will not occur -- status is KMP_I18N_ABSENT now, so - // __kmp_i18n_catgets() will not try to open catalog, but will return - // default message. - kmp_msg_t err_code = KMP_ERR(error); - __kmp_msg(kmp_ms_warning, KMP_MSG(CantOpenMessageCatalog, name), err_code, - KMP_HNT(CheckEnvVar, "NLSPATH", nlspath), - KMP_HNT(CheckEnvVar, "LANG", lang), __kmp_msg_null); - if (__kmp_generate_warnings == kmp_warnings_off) { - __kmp_str_free(&err_code.str); - } - - KMP_INFORM(WillUseDefaultMessages); - KMP_INTERNAL_FREE(nlspath); - KMP_INTERNAL_FREE(lang); - } - } else { // status == KMP_I18N_OPENED - int section = get_section(kmp_i18n_prp_Version); - int number = get_number(kmp_i18n_prp_Version); - char const *expected = __kmp_i18n_default_table.sect[section].str[number]; - // Expected version of the catalog. - kmp_str_buf_t version; // Actual version of the catalog. - __kmp_str_buf_init(&version); - __kmp_str_buf_print(&version, "%s", catgets(cat, section, number, NULL)); - - // String returned by catgets is invalid after closing catalog, so copy it. - if (strcmp(version.str, expected) != 0) { - __kmp_i18n_catclose(); // Close bad catalog. - status = KMP_I18N_ABSENT; // And mark it as absent. - if (__kmp_generate_warnings > kmp_warnings_low) { - // AC: only issue warning in case explicitly asked to - // And now print a warning using default messages. - char const *name = "NLSPATH"; - char const *nlspath = __kmp_env_get(name); - __kmp_msg(kmp_ms_warning, - KMP_MSG(WrongMessageCatalog, name, version.str, expected), - KMP_HNT(CheckEnvVar, name, nlspath), __kmp_msg_null); - KMP_INFORM(WillUseDefaultMessages); - KMP_INTERNAL_FREE(CCAST(char *, nlspath)); - } // __kmp_generate_warnings - } - __kmp_str_buf_free(&version); - } -} // func __kmp_i18n_do_catopen - -void __kmp_i18n_catclose() { - if (status == KMP_I18N_OPENED) { - KMP_DEBUG_ASSERT(cat != KMP_I18N_NULLCAT); - catclose(cat); - cat = KMP_I18N_NULLCAT; - } - status = KMP_I18N_CLOSED; -} // func __kmp_i18n_catclose - -char const *__kmp_i18n_catgets(kmp_i18n_id_t id) { - - int section = get_section(id); - int number = get_number(id); - char const *message = NULL; - - if (1 <= section && section <= __kmp_i18n_default_table.size) { - if (1 <= number && number <= __kmp_i18n_default_table.sect[section].size) { - if (status == KMP_I18N_CLOSED) { - __kmp_i18n_catopen(); - } - if (status == KMP_I18N_OPENED) { - message = catgets(cat, section, number, - __kmp_i18n_default_table.sect[section].str[number]); - } - if (message == NULL) { - message = __kmp_i18n_default_table.sect[section].str[number]; - } - } - } - if (message == NULL) { - message = no_message_available; - } - return message; - -} // func __kmp_i18n_catgets - -#endif // KMP_OS_UNIX - -/* Windows* OS part. */ - -#if KMP_OS_WINDOWS -#define KMP_I18N_OK - -#include "kmp_environment.h" -#include - -#define KMP_I18N_NULLCAT NULL -static HMODULE cat = KMP_I18N_NULLCAT; // !!! Shall it be volatile? -static char const *name = - (KMP_VERSION_MAJOR == 4 ? "libguide40ui.dll" : "libompui.dll"); - -static kmp_i18n_table_t table = {0, NULL}; -// Messages formatted by FormatMessage() should be freed, but catgets() -// interface assumes user will not free messages. So we cache all the retrieved -// messages in the table, which are freed at catclose(). -static UINT const default_code_page = CP_OEMCP; -static UINT code_page = default_code_page; - -static char const *___catgets(kmp_i18n_id_t id); -static UINT get_code_page(); -static void kmp_i18n_table_free(kmp_i18n_table_t *table); - -static UINT get_code_page() { - - UINT cp = default_code_page; - char const *value = __kmp_env_get("KMP_CODEPAGE"); - if (value != NULL) { - if (_stricmp(value, "ANSI") == 0) { - cp = CP_ACP; - } else if (_stricmp(value, "OEM") == 0) { - cp = CP_OEMCP; - } else if (_stricmp(value, "UTF-8") == 0 || _stricmp(value, "UTF8") == 0) { - cp = CP_UTF8; - } else if (_stricmp(value, "UTF-7") == 0 || _stricmp(value, "UTF7") == 0) { - cp = CP_UTF7; - } else { - // !!! TODO: Issue a warning? - } - } - KMP_INTERNAL_FREE((void *)value); - return cp; - -} // func get_code_page - -static void kmp_i18n_table_free(kmp_i18n_table_t *table) { - int s; - int m; - for (s = 0; s < table->size; ++s) { - for (m = 0; m < table->sect[s].size; ++m) { - // Free message. - KMP_INTERNAL_FREE((void *)table->sect[s].str[m]); - table->sect[s].str[m] = NULL; - } - table->sect[s].size = 0; - // Free section itself. - KMP_INTERNAL_FREE((void *)table->sect[s].str); - table->sect[s].str = NULL; - } - table->size = 0; - KMP_INTERNAL_FREE((void *)table->sect); - table->sect = NULL; -} // kmp_i18n_table_free - -void __kmp_i18n_do_catopen() { - - LCID locale_id = GetThreadLocale(); - WORD lang_id = LANGIDFROMLCID(locale_id); - WORD primary_lang_id = PRIMARYLANGID(lang_id); - kmp_str_buf_t path; - - KMP_DEBUG_ASSERT(status == KMP_I18N_CLOSED); - KMP_DEBUG_ASSERT(cat == KMP_I18N_NULLCAT); - - __kmp_str_buf_init(&path); - - // Do not try to open English catalog because internal messages are exact copy - // of messages in English catalog. - if (primary_lang_id == LANG_ENGLISH) { - status = KMP_I18N_ABSENT; // mark catalog as absent so it will not - // be re-opened. - goto end; - } - - // Construct resource DLL name. - /* Simple LoadLibrary( name ) is not suitable due to security issue (see - http://www.microsoft.com/technet/security/advisory/2269637.mspx). We have - to specify full path to the message catalog. */ - { - // Get handle of our DLL first. - HMODULE handle; - BOOL brc = GetModuleHandleEx( - GET_MODULE_HANDLE_EX_FLAG_FROM_ADDRESS | - GET_MODULE_HANDLE_EX_FLAG_UNCHANGED_REFCOUNT, - reinterpret_cast(&__kmp_i18n_do_catopen), &handle); - if (!brc) { // Error occurred. - status = KMP_I18N_ABSENT; // mark catalog as absent so it will not be - // re-opened. - goto end; - // TODO: Enable multiple messages (KMP_MSG) to be passed to __kmp_msg; and - // print a proper warning. - } - - // Now get path to the our DLL. - for (;;) { - DWORD drc = GetModuleFileName(handle, path.str, path.size); - if (drc == 0) { // Error occurred. - status = KMP_I18N_ABSENT; - goto end; - } - if (drc < path.size) { - path.used = drc; - break; - } - __kmp_str_buf_reserve(&path, path.size * 2); - } - - // Now construct the name of message catalog. - kmp_str_fname fname; - __kmp_str_fname_init(&fname, path.str); - __kmp_str_buf_clear(&path); - __kmp_str_buf_print(&path, "%s%lu/%s", fname.dir, - (unsigned long)(locale_id), name); - __kmp_str_fname_free(&fname); - } - - // For security reasons, use LoadLibraryEx() and load message catalog as a - // data file. - cat = LoadLibraryEx(path.str, NULL, LOAD_LIBRARY_AS_DATAFILE); - status = (cat == KMP_I18N_NULLCAT ? KMP_I18N_ABSENT : KMP_I18N_OPENED); - - if (status == KMP_I18N_ABSENT) { - if (__kmp_generate_warnings > kmp_warnings_low) { - // AC: only issue warning in case explicitly asked to - DWORD error = GetLastError(); - // Infinite recursion will not occur -- status is KMP_I18N_ABSENT now, so - // __kmp_i18n_catgets() will not try to open catalog but will return - // default message. - /* If message catalog for another architecture found (e.g. OpenMP RTL for - IA-32 architecture opens libompui.dll for Intel(R) 64) Windows* OS - returns error 193 (ERROR_BAD_EXE_FORMAT). However, FormatMessage fails - to return a message for this error, so user will see: - - OMP: Warning #2: Cannot open message catalog "1041\libompui.dll": - OMP: System error #193: (No system error message available) - OMP: Info #3: Default messages will be used. - - Issue hint in this case so cause of trouble is more understandable. */ - kmp_msg_t err_code = KMP_SYSERRCODE(error); - __kmp_msg(kmp_ms_warning, KMP_MSG(CantOpenMessageCatalog, path.str), - err_code, (error == ERROR_BAD_EXE_FORMAT - ? KMP_HNT(BadExeFormat, path.str, KMP_ARCH_STR) - : __kmp_msg_null), - __kmp_msg_null); - if (__kmp_generate_warnings == kmp_warnings_off) { - __kmp_str_free(&err_code.str); - } - KMP_INFORM(WillUseDefaultMessages); - } - } else { // status == KMP_I18N_OPENED - - int section = get_section(kmp_i18n_prp_Version); - int number = get_number(kmp_i18n_prp_Version); - char const *expected = __kmp_i18n_default_table.sect[section].str[number]; - kmp_str_buf_t version; // Actual version of the catalog. - __kmp_str_buf_init(&version); - __kmp_str_buf_print(&version, "%s", ___catgets(kmp_i18n_prp_Version)); - // String returned by catgets is invalid after closing catalog, so copy it. - if (strcmp(version.str, expected) != 0) { - // Close bad catalog. - __kmp_i18n_catclose(); - status = KMP_I18N_ABSENT; // And mark it as absent. - if (__kmp_generate_warnings > kmp_warnings_low) { - // And now print a warning using default messages. - __kmp_msg(kmp_ms_warning, - KMP_MSG(WrongMessageCatalog, path.str, version.str, expected), - __kmp_msg_null); - KMP_INFORM(WillUseDefaultMessages); - } // __kmp_generate_warnings - } - __kmp_str_buf_free(&version); - } - code_page = get_code_page(); - -end: - __kmp_str_buf_free(&path); - return; -} // func __kmp_i18n_do_catopen - -void __kmp_i18n_catclose() { - if (status == KMP_I18N_OPENED) { - KMP_DEBUG_ASSERT(cat != KMP_I18N_NULLCAT); - kmp_i18n_table_free(&table); - FreeLibrary(cat); - cat = KMP_I18N_NULLCAT; - } - code_page = default_code_page; - status = KMP_I18N_CLOSED; -} // func __kmp_i18n_catclose - -/* We use FormatMessage() to get strings from catalog, get system error - messages, etc. FormatMessage() tends to return Windows* OS-style - end-of-lines, "\r\n". When string is printed, printf() also replaces all the - occurrences of "\n" with "\r\n" (again!), so sequences like "\r\r\r\n" - appear in output. It is not too good. - - Additional mess comes from message catalog: Our catalog source en_US.mc file - (generated by message-converter.pl) contains only "\n" characters, but - en_US_msg_1033.bin file (produced by mc.exe) may contain "\r\n" or just "\n". - This mess goes from en_US_msg_1033.bin file to message catalog, - libompui.dll. For example, message - - Error - - (there is "\n" at the end) is compiled by mc.exe to "Error\r\n", while - - OMP: Error %1!d!: %2!s!\n - - (there is "\n" at the end as well) is compiled to "OMP: Error %1!d!: - %2!s!\r\n\n". - - Thus, stripping all "\r" normalizes string and returns it to canonical form, - so printf() will produce correct end-of-line sequences. - - ___strip_crs() serves for this purpose: it removes all the occurrences of - "\r" in-place and returns new length of string. */ -static int ___strip_crs(char *str) { - int in = 0; // Input character index. - int out = 0; // Output character index. - for (;;) { - if (str[in] != '\r') { - str[out] = str[in]; - ++out; - } - if (str[in] == 0) { - break; - } - ++in; - } - return out - 1; -} // func __strip_crs - -static char const *___catgets(kmp_i18n_id_t id) { - - char *result = NULL; - PVOID addr = NULL; - wchar_t *wmsg = NULL; - DWORD wlen = 0; - char *msg = NULL; - int len = 0; - int rc; - - KMP_DEBUG_ASSERT(cat != KMP_I18N_NULLCAT); - wlen = // wlen does *not* include terminating null. - FormatMessageW(FORMAT_MESSAGE_ALLOCATE_BUFFER | - FORMAT_MESSAGE_FROM_HMODULE | - FORMAT_MESSAGE_IGNORE_INSERTS, - cat, id, - 0, // LangId - (LPWSTR)&addr, - 0, // Size in elements, not in bytes. - NULL); - if (wlen <= 0) { - goto end; - } - wmsg = (wchar_t *)addr; // Warning: wmsg may be not nul-terminated! - - // Calculate length of multibyte message. - // Since wlen does not include terminating null, len does not include it also. - len = WideCharToMultiByte(code_page, - 0, // Flags. - wmsg, wlen, // Wide buffer and size. - NULL, 0, // Buffer and size. - NULL, NULL // Default char and used default char. - ); - if (len <= 0) { - goto end; - } - - // Allocate memory. - msg = (char *)KMP_INTERNAL_MALLOC(len + 1); - - // Convert wide message to multibyte one. - rc = WideCharToMultiByte(code_page, - 0, // Flags. - wmsg, wlen, // Wide buffer and size. - msg, len, // Buffer and size. - NULL, NULL // Default char and used default char. - ); - if (rc <= 0 || rc > len) { - goto end; - } - KMP_DEBUG_ASSERT(rc == len); - len = rc; - msg[len] = 0; // Put terminating null to the end. - - // Stripping all "\r" before stripping last end-of-line simplifies the task. - len = ___strip_crs(msg); - - // Every message in catalog is terminated with "\n". Strip it. - if (len >= 1 && msg[len - 1] == '\n') { - --len; - msg[len] = 0; - } - - // Everything looks ok. - result = msg; - msg = NULL; - -end: - - if (msg != NULL) { - KMP_INTERNAL_FREE(msg); - } - if (wmsg != NULL) { - LocalFree(wmsg); - } - - return result; - -} // ___catgets - -char const *__kmp_i18n_catgets(kmp_i18n_id_t id) { - - int section = get_section(id); - int number = get_number(id); - char const *message = NULL; - - if (1 <= section && section <= __kmp_i18n_default_table.size) { - if (1 <= number && number <= __kmp_i18n_default_table.sect[section].size) { - if (status == KMP_I18N_CLOSED) { - __kmp_i18n_catopen(); - } - if (cat != KMP_I18N_NULLCAT) { - if (table.size == 0) { - table.sect = (kmp_i18n_section_t *)KMP_INTERNAL_CALLOC( - (__kmp_i18n_default_table.size + 2), sizeof(kmp_i18n_section_t)); - table.size = __kmp_i18n_default_table.size; - } - if (table.sect[section].size == 0) { - table.sect[section].str = (const char **)KMP_INTERNAL_CALLOC( - __kmp_i18n_default_table.sect[section].size + 2, - sizeof(char const *)); - table.sect[section].size = - __kmp_i18n_default_table.sect[section].size; - } - if (table.sect[section].str[number] == NULL) { - table.sect[section].str[number] = ___catgets(id); - } - message = table.sect[section].str[number]; - } - if (message == NULL) { - // Catalog is not opened or message is not found, return default - // message. - message = __kmp_i18n_default_table.sect[section].str[number]; - } - } - } - if (message == NULL) { - message = no_message_available; - } - return message; - -} // func __kmp_i18n_catgets - -#endif // KMP_OS_WINDOWS - -// ----------------------------------------------------------------------------- - -#ifndef KMP_I18N_OK -#error I18n support is not implemented for this OS. -#endif // KMP_I18N_OK - -// ----------------------------------------------------------------------------- - -void __kmp_i18n_dump_catalog(kmp_str_buf_t *buffer) { - - struct kmp_i18n_id_range_t { - kmp_i18n_id_t first; - kmp_i18n_id_t last; - }; // struct kmp_i18n_id_range_t - - static struct kmp_i18n_id_range_t ranges[] = { - {kmp_i18n_prp_first, kmp_i18n_prp_last}, - {kmp_i18n_str_first, kmp_i18n_str_last}, - {kmp_i18n_fmt_first, kmp_i18n_fmt_last}, - {kmp_i18n_msg_first, kmp_i18n_msg_last}, - {kmp_i18n_hnt_first, kmp_i18n_hnt_last}}; // ranges - - int num_of_ranges = sizeof(ranges) / sizeof(struct kmp_i18n_id_range_t); - int range; - kmp_i18n_id_t id; - - for (range = 0; range < num_of_ranges; ++range) { - __kmp_str_buf_print(buffer, "*** Set #%d ***\n", range + 1); - for (id = (kmp_i18n_id_t)(ranges[range].first + 1); id < ranges[range].last; - id = (kmp_i18n_id_t)(id + 1)) { - __kmp_str_buf_print(buffer, "%d: <<%s>>\n", id, __kmp_i18n_catgets(id)); - } - } - - __kmp_printf("%s", buffer->str); - -} // __kmp_i18n_dump_catalog - -// ----------------------------------------------------------------------------- -kmp_msg_t __kmp_msg_format(unsigned id_arg, ...) { - - kmp_msg_t msg; - va_list args; - kmp_str_buf_t buffer; - __kmp_str_buf_init(&buffer); - - va_start(args, id_arg); - - // We use unsigned for the ID argument and explicitly cast it here to the - // right enumerator because variadic functions are not compatible with - // default promotions. - kmp_i18n_id_t id = (kmp_i18n_id_t)id_arg; - -#if KMP_OS_UNIX - // On Linux* OS and OS X*, printf() family functions process parameter - // numbers, for example: "%2$s %1$s". - __kmp_str_buf_vprint(&buffer, __kmp_i18n_catgets(id), args); -#elif KMP_OS_WINDOWS - // On Winodws, printf() family functions does not recognize GNU style - // parameter numbers, so we have to use FormatMessage() instead. It recognizes - // parameter numbers, e. g.: "%2!s! "%1!s!". - { - LPTSTR str = NULL; - int len; - FormatMessage(FORMAT_MESSAGE_FROM_STRING | FORMAT_MESSAGE_ALLOCATE_BUFFER, - __kmp_i18n_catgets(id), 0, 0, (LPTSTR)(&str), 0, &args); - len = ___strip_crs(str); - __kmp_str_buf_cat(&buffer, str, len); - LocalFree(str); - } -#else -#error -#endif - va_end(args); - __kmp_str_buf_detach(&buffer); - - msg.type = (kmp_msg_type_t)(id >> 16); - msg.num = id & 0xFFFF; - msg.str = buffer.str; - msg.len = buffer.used; - - return msg; - -} // __kmp_msg_format - -// ----------------------------------------------------------------------------- -static char *sys_error(int err) { - - char *message = NULL; - -#if KMP_OS_WINDOWS - - LPVOID buffer = NULL; - int len; - DWORD rc; - rc = FormatMessage( - FORMAT_MESSAGE_ALLOCATE_BUFFER | FORMAT_MESSAGE_FROM_SYSTEM, NULL, err, - MAKELANGID(LANG_NEUTRAL, SUBLANG_DEFAULT), // Default language. - (LPTSTR)&buffer, 0, NULL); - if (rc > 0) { - // Message formatted. Copy it (so we can free it later with normal free(). - message = __kmp_str_format("%s", (char *)buffer); - len = ___strip_crs(message); // Delete carriage returns if any. - // Strip trailing newlines. - while (len > 0 && message[len - 1] == '\n') { - --len; - } - message[len] = 0; - } else { - // FormatMessage() failed to format system error message. GetLastError() - // would give us error code, which we would convert to message... this it - // dangerous recursion, which cannot clarify original error, so we will not - // even start it. - } - if (buffer != NULL) { - LocalFree(buffer); - } - -#else // Non-Windows* OS: Linux* OS or OS X* - -/* There are 2 incompatible versions of strerror_r: - - char * strerror_r( int, char *, size_t ); // GNU version - int strerror_r( int, char *, size_t ); // XSI version -*/ - -#if (defined(__GLIBC__) && defined(_GNU_SOURCE)) || \ - (defined(__BIONIC__) && defined(_GNU_SOURCE) && \ - __ANDROID_API__ >= __ANDROID_API_M__) - // GNU version of strerror_r. - - char buffer[2048]; - char *const err_msg = strerror_r(err, buffer, sizeof(buffer)); - // Do not eliminate this assignment to temporary variable, otherwise compiler - // would not issue warning if strerror_r() returns `int' instead of expected - // `char *'. - message = __kmp_str_format("%s", err_msg); - -#else // OS X*, FreeBSD* etc. - // XSI version of strerror_r. - int size = 2048; - char *buffer = (char *)KMP_INTERNAL_MALLOC(size); - int rc; - if (buffer == NULL) { - KMP_FATAL(MemoryAllocFailed); - } - rc = strerror_r(err, buffer, size); - if (rc == -1) { - rc = errno; // XSI version sets errno. - } - while (rc == ERANGE) { // ERANGE means the buffer is too small. - KMP_INTERNAL_FREE(buffer); - size *= 2; - buffer = (char *)KMP_INTERNAL_MALLOC(size); - if (buffer == NULL) { - KMP_FATAL(MemoryAllocFailed); - } - rc = strerror_r(err, buffer, size); - if (rc == -1) { - rc = errno; // XSI version sets errno. - } - } - if (rc == 0) { - message = buffer; - } else { // Buffer is unused. Free it. - KMP_INTERNAL_FREE(buffer); - } - -#endif - -#endif /* KMP_OS_WINDOWS */ - - if (message == NULL) { - // TODO: I18n this message. - message = __kmp_str_format("%s", "(No system error message available)"); - } - return message; -} // sys_error - -// ----------------------------------------------------------------------------- -kmp_msg_t __kmp_msg_error_code(int code) { - - kmp_msg_t msg; - msg.type = kmp_mt_syserr; - msg.num = code; - msg.str = sys_error(code); - msg.len = KMP_STRLEN(msg.str); - return msg; - -} // __kmp_msg_error_code - -// ----------------------------------------------------------------------------- -kmp_msg_t __kmp_msg_error_mesg(char const *mesg) { - - kmp_msg_t msg; - msg.type = kmp_mt_syserr; - msg.num = 0; - msg.str = __kmp_str_format("%s", mesg); - msg.len = KMP_STRLEN(msg.str); - return msg; - -} // __kmp_msg_error_mesg - -// ----------------------------------------------------------------------------- -void __kmp_msg(kmp_msg_severity_t severity, kmp_msg_t message, va_list args) { - kmp_i18n_id_t format; // format identifier - kmp_msg_t fmsg; // formatted message - kmp_str_buf_t buffer; - - if (severity != kmp_ms_fatal && __kmp_generate_warnings == kmp_warnings_off) - return; // no reason to form a string in order to not print it - - __kmp_str_buf_init(&buffer); - - // Format the primary message. - switch (severity) { - case kmp_ms_inform: { - format = kmp_i18n_fmt_Info; - } break; - case kmp_ms_warning: { - format = kmp_i18n_fmt_Warning; - } break; - case kmp_ms_fatal: { - format = kmp_i18n_fmt_Fatal; - } break; - default: { KMP_DEBUG_ASSERT(0); } - } - fmsg = __kmp_msg_format(format, message.num, message.str); - __kmp_str_free(&message.str); - __kmp_str_buf_cat(&buffer, fmsg.str, fmsg.len); - __kmp_str_free(&fmsg.str); - - // Format other messages. - for (;;) { - message = va_arg(args, kmp_msg_t); - if (message.type == kmp_mt_dummy && message.str == NULL) { - break; - } - switch (message.type) { - case kmp_mt_hint: { - format = kmp_i18n_fmt_Hint; - // we cannot skip %1$ and only use %2$ to print the message without the - // number - fmsg = __kmp_msg_format(format, message.str); - } break; - case kmp_mt_syserr: { - format = kmp_i18n_fmt_SysErr; - fmsg = __kmp_msg_format(format, message.num, message.str); - } break; - default: { KMP_DEBUG_ASSERT(0); } - } - __kmp_str_free(&message.str); - __kmp_str_buf_cat(&buffer, fmsg.str, fmsg.len); - __kmp_str_free(&fmsg.str); - } - - // Print formatted messages. - // This lock prevents multiple fatal errors on the same problem. - // __kmp_acquire_bootstrap_lock( & lock ); // GEH - This lock causing tests - // to hang on OS X*. - __kmp_printf("%s", buffer.str); - __kmp_str_buf_free(&buffer); - - // __kmp_release_bootstrap_lock( & lock ); // GEH - this lock causing tests - // to hang on OS X*. - -} // __kmp_msg - -void __kmp_msg(kmp_msg_severity_t severity, kmp_msg_t message, ...) { - va_list args; - va_start(args, message); - __kmp_msg(severity, message, args); - va_end(args); -} - -void __kmp_fatal(kmp_msg_t message, ...) { - va_list args; - va_start(args, message); - __kmp_msg(kmp_ms_fatal, message, args); - va_end(args); -#if KMP_OS_WINDOWS - // Delay to give message a chance to appear before reaping - __kmp_thread_sleep(500); -#endif - __kmp_abort_process(); -} // __kmp_fatal - -// end of file // Property changes on: vendor/llvm-openmp/openmp-release80-r363030/runtime/src/kmp_i18n.cpp ___________________________________________________________________ Deleted: svn:eol-style ## -1 +0,0 ## -native \ No newline at end of property Deleted: svn:keywords ## -1 +0,0 ## -FreeBSD=%H \ No newline at end of property Deleted: svn:mime-type ## -1 +0,0 ## -text/plain \ No newline at end of property Index: vendor/llvm-openmp/openmp-release80-r363030/runtime/src/tsan_annotations.h =================================================================== --- vendor/llvm-openmp/openmp-release80-r363030/runtime/src/tsan_annotations.h (revision 348960) +++ vendor/llvm-openmp/openmp-release80-r363030/runtime/src/tsan_annotations.h (nonexistent) @@ -1,170 +0,0 @@ -/*! \file */ -/* - * tsan_annotations.h -- ThreadSanitizer annotations to support data - * race detection in OpenMP programs. - */ - -//===----------------------------------------------------------------------===// -// -// The LLVM Compiler Infrastructure -// -// This file is dual licensed under the MIT and the University of Illinois Open -// Source Licenses. See LICENSE.txt for details. -// -//===----------------------------------------------------------------------===// - -#ifndef TSAN_ANNOTATIONS_H -#define TSAN_ANNOTATIONS_H - -#include "kmp_config.h" - -/* types as used in tsan/rtl/tsan_interface_ann.cc */ -typedef unsigned long uptr; -typedef signed long sptr; - -#ifdef __cplusplus -extern "C" { -#endif - -/* Declaration of all annotation functions in tsan/rtl/tsan_interface_ann.cc */ -void AnnotateHappensBefore(const char *f, int l, uptr addr); -void AnnotateHappensAfter(const char *f, int l, uptr addr); -void AnnotateCondVarSignal(const char *f, int l, uptr cv); -void AnnotateCondVarSignalAll(const char *f, int l, uptr cv); -void AnnotateMutexIsNotPHB(const char *f, int l, uptr mu); -void AnnotateCondVarWait(const char *f, int l, uptr cv, uptr lock); -void AnnotateRWLockCreate(const char *f, int l, uptr m); -void AnnotateRWLockCreateStatic(const char *f, int l, uptr m); -void AnnotateRWLockDestroy(const char *f, int l, uptr m); -void AnnotateRWLockAcquired(const char *f, int l, uptr m, uptr is_w); -void AnnotateRWLockReleased(const char *f, int l, uptr m, uptr is_w); -void AnnotateTraceMemory(const char *f, int l, uptr mem); -void AnnotateFlushState(const char *f, int l); -void AnnotateNewMemory(const char *f, int l, uptr mem, uptr size); -void AnnotateNoOp(const char *f, int l, uptr mem); -void AnnotateFlushExpectedRaces(const char *f, int l); -void AnnotateEnableRaceDetection(const char *f, int l, int enable); -void AnnotateMutexIsUsedAsCondVar(const char *f, int l, uptr mu); -void AnnotatePCQGet(const char *f, int l, uptr pcq); -void AnnotatePCQPut(const char *f, int l, uptr pcq); -void AnnotatePCQDestroy(const char *f, int l, uptr pcq); -void AnnotatePCQCreate(const char *f, int l, uptr pcq); -void AnnotateExpectRace(const char *f, int l, uptr mem, char *desc); -void AnnotateBenignRaceSized(const char *f, int l, uptr mem, uptr size, - char *desc); -void AnnotateBenignRace(const char *f, int l, uptr mem, char *desc); -void AnnotateIgnoreReadsBegin(const char *f, int l); -void AnnotateIgnoreReadsEnd(const char *f, int l); -void AnnotateIgnoreWritesBegin(const char *f, int l); -void AnnotateIgnoreWritesEnd(const char *f, int l); -void AnnotateIgnoreSyncBegin(const char *f, int l); -void AnnotateIgnoreSyncEnd(const char *f, int l); -void AnnotatePublishMemoryRange(const char *f, int l, uptr addr, uptr size); -void AnnotateUnpublishMemoryRange(const char *f, int l, uptr addr, uptr size); -void AnnotateThreadName(const char *f, int l, char *name); -void WTFAnnotateHappensBefore(const char *f, int l, uptr addr); -void WTFAnnotateHappensAfter(const char *f, int l, uptr addr); -void WTFAnnotateBenignRaceSized(const char *f, int l, uptr mem, uptr sz, - char *desc); -int RunningOnValgrind(); -double ValgrindSlowdown(void); -const char *ThreadSanitizerQuery(const char *query); -void AnnotateMemoryIsInitialized(const char *f, int l, uptr mem, uptr sz); - -#ifdef __cplusplus -} -#endif - -#ifdef TSAN_SUPPORT -#define ANNOTATE_HAPPENS_AFTER(addr) \ - AnnotateHappensAfter(__FILE__, __LINE__, (uptr)addr) -#define ANNOTATE_HAPPENS_BEFORE(addr) \ - AnnotateHappensBefore(__FILE__, __LINE__, (uptr)addr) -#define ANNOTATE_IGNORE_WRITES_BEGIN() \ - AnnotateIgnoreWritesBegin(__FILE__, __LINE__) -#define ANNOTATE_IGNORE_WRITES_END() AnnotateIgnoreWritesEnd(__FILE__, __LINE__) -#define ANNOTATE_RWLOCK_CREATE(lck) \ - AnnotateRWLockCreate(__FILE__, __LINE__, (uptr)lck) -#define ANNOTATE_RWLOCK_RELEASED(lck) \ - AnnotateRWLockAcquired(__FILE__, __LINE__, (uptr)lck, 1) -#define ANNOTATE_RWLOCK_ACQUIRED(lck) \ - AnnotateRWLockReleased(__FILE__, __LINE__, (uptr)lck, 1) -#define ANNOTATE_BARRIER_BEGIN(addr) \ - AnnotateHappensBefore(__FILE__, __LINE__, (uptr)addr) -#define ANNOTATE_BARRIER_END(addr) \ - AnnotateHappensAfter(__FILE__, __LINE__, (uptr)addr) -#define ANNOTATE_REDUCE_AFTER(addr) \ - AnnotateHappensAfter(__FILE__, __LINE__, (uptr)addr) -#define ANNOTATE_REDUCE_BEFORE(addr) \ - AnnotateHappensBefore(__FILE__, __LINE__, (uptr)addr) -#else -#define ANNOTATE_HAPPENS_AFTER(addr) -#define ANNOTATE_HAPPENS_BEFORE(addr) -#define ANNOTATE_IGNORE_WRITES_BEGIN() -#define ANNOTATE_IGNORE_WRITES_END() -#define ANNOTATE_RWLOCK_CREATE(lck) -#define ANNOTATE_RWLOCK_RELEASED(lck) -#define ANNOTATE_RWLOCK_ACQUIRED(lck) -#define ANNOTATE_BARRIER_BEGIN(addr) -#define ANNOTATE_BARRIER_END(addr) -#define ANNOTATE_REDUCE_AFTER(addr) -#define ANNOTATE_REDUCE_BEFORE(addr) -#endif - -#define ANNOTATE_QUEUING -#define ANNOTATE_TICKET -#define ANNOTATE_FUTEX -#define ANNOTATE_TAS -#define ANNOTATE_DRDPA - -#ifdef ANNOTATE_QUEUING -#define ANNOTATE_QUEUING_CREATE(lck) -#define ANNOTATE_QUEUING_RELEASED(lck) ANNOTATE_HAPPENS_BEFORE(lck) -#define ANNOTATE_QUEUING_ACQUIRED(lck) ANNOTATE_HAPPENS_AFTER(lck) -#else -#define ANNOTATE_QUEUING_CREATE(lck) -#define ANNOTATE_QUEUING_RELEASED(lck) -#define ANNOTATE_QUEUING_ACQUIRED(lck) -#endif - -#ifdef ANNOTATE_TICKET -#define ANNOTATE_TICKET_CREATE(lck) -#define ANNOTATE_TICKET_RELEASED(lck) ANNOTATE_HAPPENS_BEFORE(lck) -#define ANNOTATE_TICKET_ACQUIRED(lck) ANNOTATE_HAPPENS_AFTER(lck) -#else -#define ANNOTATE_TICKET_CREATE(lck) -#define ANNOTATE_TICKET_RELEASED(lck) -#define ANNOTATE_TICKET_ACQUIRED(lck) -#endif - -#ifdef ANNOTATE_FUTEX -#define ANNOTATE_FUTEX_CREATE(lck) -#define ANNOTATE_FUTEX_RELEASED(lck) ANNOTATE_HAPPENS_BEFORE(lck) -#define ANNOTATE_FUTEX_ACQUIRED(lck) ANNOTATE_HAPPENS_AFTER(lck) -#else -#define ANNOTATE_FUTEX_CREATE(lck) -#define ANNOTATE_FUTEX_RELEASED(lck) -#define ANNOTATE_FUTEX_ACQUIRED(lck) -#endif - -#ifdef ANNOTATE_TAS -#define ANNOTATE_TAS_CREATE(lck) -#define ANNOTATE_TAS_RELEASED(lck) ANNOTATE_HAPPENS_BEFORE(lck) -#define ANNOTATE_TAS_ACQUIRED(lck) ANNOTATE_HAPPENS_AFTER(lck) -#else -#define ANNOTATE_TAS_CREATE(lck) -#define ANNOTATE_TAS_RELEASED(lck) -#define ANNOTATE_TAS_ACQUIRED(lck) -#endif - -#ifdef ANNOTATE_DRDPA -#define ANNOTATE_DRDPA_CREATE(lck) -#define ANNOTATE_DRDPA_RELEASED(lck) ANNOTATE_HAPPENS_BEFORE(lck) -#define ANNOTATE_DRDPA_ACQUIRED(lck) ANNOTATE_HAPPENS_AFTER(lck) -#else -#define ANNOTATE_DRDPA_CREATE(lck) -#define ANNOTATE_DRDPA_RELEASED(lck) -#define ANNOTATE_DRDPA_ACQUIRED(lck) -#endif - -#endif Property changes on: vendor/llvm-openmp/openmp-release80-r363030/runtime/src/tsan_annotations.h ___________________________________________________________________ Deleted: svn:eol-style ## -1 +0,0 ## -native \ No newline at end of property Deleted: svn:keywords ## -1 +0,0 ## -FreeBSD=%H \ No newline at end of property Deleted: svn:mime-type ## -1 +0,0 ## -text/plain \ No newline at end of property Index: vendor/llvm-openmp/openmp-release80-r363030/runtime/src/kmp_stats.h =================================================================== --- vendor/llvm-openmp/openmp-release80-r363030/runtime/src/kmp_stats.h (revision 348960) +++ vendor/llvm-openmp/openmp-release80-r363030/runtime/src/kmp_stats.h (nonexistent) @@ -1,1002 +0,0 @@ -#ifndef KMP_STATS_H -#define KMP_STATS_H - -/** @file kmp_stats.h - * Functions for collecting statistics. - */ - -//===----------------------------------------------------------------------===// -// -// The LLVM Compiler Infrastructure -// -// This file is dual licensed under the MIT and the University of Illinois Open -// Source Licenses. See LICENSE.txt for details. -// -//===----------------------------------------------------------------------===// - -#include "kmp_config.h" -#include "kmp_debug.h" - -#if KMP_STATS_ENABLED -/* Statistics accumulator. - Accumulates number of samples and computes min, max, mean, standard deviation - on the fly. - - Online variance calculation algorithm from - http://en.wikipedia.org/wiki/Algorithms_for_calculating_variance#On-line_algorithm - */ - -#include "kmp_stats_timing.h" -#include -#include -#include // placement new -#include -#include -#include - -/* Enable developer statistics here if you want them. They are more detailed - than is useful for application characterisation and are intended for the - runtime library developer. */ -#define KMP_DEVELOPER_STATS 0 - -/* Enable/Disable histogram output */ -#define KMP_STATS_HIST 0 - -/*! - * @ingroup STATS_GATHERING - * \brief flags to describe the statistic (timer or counter) - * - */ -enum stats_flags_e { - noTotal = 1 << 0, //!< do not show a TOTAL_aggregation for this statistic - onlyInMaster = 1 << 1, //!< statistic is valid only for master - noUnits = 1 << 2, //!< statistic doesn't need units printed next to it - notInMaster = 1 << 3, //!< statistic is valid only for non-master threads - logEvent = 1 << 4 //!< statistic can be logged on the event timeline when - //! KMP_STATS_EVENTS is on (valid only for timers) -}; - -/*! - * @ingroup STATS_GATHERING - * \brief the states which a thread can be in - * - */ -enum stats_state_e { - IDLE, - SERIAL_REGION, - FORK_JOIN_BARRIER, - PLAIN_BARRIER, - TASKWAIT, - TASKYIELD, - TASKGROUP, - IMPLICIT_TASK, - EXPLICIT_TASK -}; - -/*! - * \brief Add new counters under KMP_FOREACH_COUNTER() macro in kmp_stats.h - * - * @param macro a user defined macro that takes three arguments - - * macro(COUNTER_NAME, flags, arg) - * @param arg a user defined argument to send to the user defined macro - * - * \details A counter counts the occurrence of some event. Each thread - * accumulates its own count, at the end of execution the counts are aggregated - * treating each thread as a separate measurement. (Unless onlyInMaster is set, - * in which case there's only a single measurement). The min,mean,max are - * therefore the values for the threads. Adding the counter here and then - * putting a KMP_BLOCK_COUNTER(name) at the point you want to count is all you - * need to do. All of the tables and printing is generated from this macro. - * Format is "macro(name, flags, arg)" - * - * @ingroup STATS_GATHERING - */ -// clang-format off -#define KMP_FOREACH_COUNTER(macro, arg) \ - macro(OMP_PARALLEL,stats_flags_e::onlyInMaster|stats_flags_e::noTotal,arg) \ - macro(OMP_NESTED_PARALLEL, 0, arg) \ - macro(OMP_LOOP_STATIC, 0, arg) \ - macro(OMP_LOOP_STATIC_STEAL, 0, arg) \ - macro(OMP_LOOP_DYNAMIC, 0, arg) \ - macro(OMP_DISTRIBUTE, 0, arg) \ - macro(OMP_BARRIER, 0, arg) \ - macro(OMP_CRITICAL, 0, arg) \ - macro(OMP_SINGLE, 0, arg) \ - macro(OMP_MASTER, 0, arg) \ - macro(OMP_TEAMS, 0, arg) \ - macro(OMP_set_lock, 0, arg) \ - macro(OMP_test_lock, 0, arg) \ - macro(REDUCE_wait, 0, arg) \ - macro(REDUCE_nowait, 0, arg) \ - macro(OMP_TASKYIELD, 0, arg) \ - macro(OMP_TASKLOOP, 0, arg) \ - macro(TASK_executed, 0, arg) \ - macro(TASK_cancelled, 0, arg) \ - macro(TASK_stolen, 0, arg) -// clang-format on - -/*! - * \brief Add new timers under KMP_FOREACH_TIMER() macro in kmp_stats.h - * - * @param macro a user defined macro that takes three arguments - - * macro(TIMER_NAME, flags, arg) - * @param arg a user defined argument to send to the user defined macro - * - * \details A timer collects multiple samples of some count in each thread and - * then finally aggregates all of the samples from all of the threads. For most - * timers the printing code also provides an aggregation over the thread totals. - * These are printed as TOTAL_foo. The count is normally a time (in ticks), - * hence the name "timer". (But can be any value, so we use this for "number of - * arguments passed to fork" as well). For timers the threads are not - * significant, it's the individual observations that count, so the statistics - * are at that level. Format is "macro(name, flags, arg)" - * - * @ingroup STATS_GATHERING2 - */ -// clang-format off -#define KMP_FOREACH_TIMER(macro, arg) \ - macro (OMP_worker_thread_life, stats_flags_e::logEvent, arg) \ - macro (OMP_parallel, stats_flags_e::logEvent, arg) \ - macro (OMP_parallel_overhead, stats_flags_e::logEvent, arg) \ - macro (OMP_loop_static, 0, arg) \ - macro (OMP_loop_static_scheduling, 0, arg) \ - macro (OMP_loop_dynamic, 0, arg) \ - macro (OMP_loop_dynamic_scheduling, 0, arg) \ - macro (OMP_critical, 0, arg) \ - macro (OMP_critical_wait, 0, arg) \ - macro (OMP_single, 0, arg) \ - macro (OMP_master, 0, arg) \ - macro (OMP_task_immediate, 0, arg) \ - macro (OMP_task_taskwait, 0, arg) \ - macro (OMP_task_taskyield, 0, arg) \ - macro (OMP_task_taskgroup, 0, arg) \ - macro (OMP_task_join_bar, 0, arg) \ - macro (OMP_task_plain_bar, 0, arg) \ - macro (OMP_taskloop_scheduling, 0, arg) \ - macro (OMP_plain_barrier, stats_flags_e::logEvent, arg) \ - macro (OMP_idle, stats_flags_e::logEvent, arg) \ - macro (OMP_fork_barrier, stats_flags_e::logEvent, arg) \ - macro (OMP_join_barrier, stats_flags_e::logEvent, arg) \ - macro (OMP_serial, stats_flags_e::logEvent, arg) \ - macro (OMP_set_numthreads, stats_flags_e::noUnits | stats_flags_e::noTotal, \ - arg) \ - macro (OMP_PARALLEL_args, stats_flags_e::noUnits | stats_flags_e::noTotal, \ - arg) \ - macro (OMP_loop_static_iterations, \ - stats_flags_e::noUnits | stats_flags_e::noTotal, arg) \ - macro (OMP_loop_dynamic_iterations, \ - stats_flags_e::noUnits | stats_flags_e::noTotal, arg) \ - KMP_FOREACH_DEVELOPER_TIMER(macro, arg) -// clang-format on - -// OMP_worker_thread_life -- Time from thread becoming an OpenMP thread (either -// initializing OpenMP or being created by a master) -// until the thread is destroyed -// OMP_parallel -- Time thread spends executing work directly -// within a #pragma omp parallel -// OMP_parallel_overhead -- Time thread spends setting up a parallel region -// OMP_loop_static -- Time thread spends executing loop iterations from -// a statically scheduled loop -// OMP_loop_static_scheduling -- Time thread spends scheduling loop iterations -// from a statically scheduled loop -// OMP_loop_dynamic -- Time thread spends executing loop iterations from -// a dynamically scheduled loop -// OMP_loop_dynamic_scheduling -- Time thread spends scheduling loop iterations -// from a dynamically scheduled loop -// OMP_critical -- Time thread spends executing critical section -// OMP_critical_wait -- Time thread spends waiting to enter -// a critcal seciton -// OMP_single -- Time spent executing a "single" region -// OMP_master -- Time spent executing a "master" region -// OMP_task_immediate -- Time spent executing non-deferred tasks -// OMP_task_taskwait -- Time spent executing tasks inside a taskwait -// construct -// OMP_task_taskyield -- Time spent executing tasks inside a taskyield -// construct -// OMP_task_taskgroup -- Time spent executing tasks inside a taskygroup -// construct -// OMP_task_join_bar -- Time spent executing tasks inside a join barrier -// OMP_task_plain_bar -- Time spent executing tasks inside a barrier -// construct -// OMP_taskloop_scheduling -- Time spent scheduling tasks inside a taskloop -// construct -// OMP_plain_barrier -- Time spent in a #pragma omp barrier construct or -// inside implicit barrier at end of worksharing -// construct -// OMP_idle -- Time worker threads spend waiting for next -// parallel region -// OMP_fork_barrier -- Time spent in a the fork barrier surrounding a -// parallel region -// OMP_join_barrier -- Time spent in a the join barrier surrounding a -// parallel region -// OMP_serial -- Time thread zero spends executing serial code -// OMP_set_numthreads -- Values passed to omp_set_num_threads -// OMP_PARALLEL_args -- Number of arguments passed to a parallel region -// OMP_loop_static_iterations -- Number of iterations thread is assigned for -// statically scheduled loops -// OMP_loop_dynamic_iterations -- Number of iterations thread is assigned for -// dynamically scheduled loops - -#if (KMP_DEVELOPER_STATS) -// Timers which are of interest to runtime library developers, not end users. -// These have to be explicitly enabled in addition to the other stats. - -// KMP_fork_barrier -- time in __kmp_fork_barrier -// KMP_join_barrier -- time in __kmp_join_barrier -// KMP_barrier -- time in __kmp_barrier -// KMP_end_split_barrier -- time in __kmp_end_split_barrier -// KMP_setup_icv_copy -- time in __kmp_setup_icv_copy -// KMP_icv_copy -- start/stop timer for any ICV copying -// KMP_linear_gather -- time in __kmp_linear_barrier_gather -// KMP_linear_release -- time in __kmp_linear_barrier_release -// KMP_tree_gather -- time in __kmp_tree_barrier_gather -// KMP_tree_release -- time in __kmp_tree_barrier_release -// KMP_hyper_gather -- time in __kmp_hyper_barrier_gather -// KMP_hyper_release -- time in __kmp_hyper_barrier_release -// clang-format off -#define KMP_FOREACH_DEVELOPER_TIMER(macro, arg) \ - macro(KMP_fork_call, 0, arg) \ - macro(KMP_join_call, 0, arg) \ - macro(KMP_end_split_barrier, 0, arg) \ - macro(KMP_hier_gather, 0, arg) \ - macro(KMP_hier_release, 0, arg) \ - macro(KMP_hyper_gather, 0, arg) \ - macro(KMP_hyper_release, 0, arg) \ - macro(KMP_linear_gather, 0, arg) \ - macro(KMP_linear_release, 0, arg) \ - macro(KMP_tree_gather, 0, arg) \ - macro(KMP_tree_release, 0, arg) \ - macro(USER_resume, 0, arg) \ - macro(USER_suspend, 0, arg) \ - macro(KMP_allocate_team, 0, arg) \ - macro(KMP_setup_icv_copy, 0, arg) \ - macro(USER_icv_copy, 0, arg) \ - macro (FOR_static_steal_stolen, \ - stats_flags_e::noUnits | stats_flags_e::noTotal, arg) \ - macro (FOR_static_steal_chunks, \ - stats_flags_e::noUnits | stats_flags_e::noTotal, arg) -#else -#define KMP_FOREACH_DEVELOPER_TIMER(macro, arg) -#endif -// clang-format on - -/*! - * \brief Add new explicit timers under KMP_FOREACH_EXPLICIT_TIMER() macro. - * - * @param macro a user defined macro that takes three arguments - - * macro(TIMER_NAME, flags, arg) - * @param arg a user defined argument to send to the user defined macro - * - * \warning YOU MUST HAVE THE SAME NAMED TIMER UNDER KMP_FOREACH_TIMER() OR ELSE - * BAD THINGS WILL HAPPEN! - * - * \details Explicit timers are ones where we need to allocate a timer itself - * (as well as the accumulated timing statistics). We allocate these on a - * per-thread basis, and explicitly start and stop them. Block timers just - * allocate the timer itself on the stack, and use the destructor to notice - * block exit; they don't need to be defined here. The name here should be the - * same as that of a timer above. - * - * @ingroup STATS_GATHERING -*/ -#define KMP_FOREACH_EXPLICIT_TIMER(macro, arg) KMP_FOREACH_TIMER(macro, arg) - -#define ENUMERATE(name, ignore, prefix) prefix##name, -enum timer_e { KMP_FOREACH_TIMER(ENUMERATE, TIMER_) TIMER_LAST }; - -enum explicit_timer_e { - KMP_FOREACH_EXPLICIT_TIMER(ENUMERATE, EXPLICIT_TIMER_) EXPLICIT_TIMER_LAST -}; - -enum counter_e { KMP_FOREACH_COUNTER(ENUMERATE, COUNTER_) COUNTER_LAST }; -#undef ENUMERATE - -/* - * A logarithmic histogram. It accumulates the number of values in each power of - * ten bin. So 1<=x<10, 10<=x<100, ... - * Mostly useful where we have some big outliers and want to see information - * about them. - */ -class logHistogram { - enum { - numBins = 31, /* Number of powers of 10. If this changes you need to change - * the initializer for binMax */ - - /* - * If you want to use this to analyse values that may be less than 1, (for - * instance times in s), then the logOffset gives you negative powers. - * In our case here, we're just looking at times in ticks, or counts, so we - * can never see values with magnitude < 1 (other than zero), so we can set - * it to 0. As above change the initializer if you change this. - */ - logOffset = 0 - }; - uint32_t KMP_ALIGN_CACHE zeroCount; - struct { - uint32_t count; - double total; - } bins[numBins]; - - static double binMax[numBins]; - -#ifdef KMP_DEBUG - uint64_t _total; - - void check() const { - uint64_t t = zeroCount; - for (int i = 0; i < numBins; i++) - t += bins[i].count; - KMP_DEBUG_ASSERT(t == _total); - } -#else - void check() const {} -#endif - -public: - logHistogram() { reset(); } - - logHistogram(logHistogram const &o) { - for (int i = 0; i < numBins; i++) - bins[i] = o.bins[i]; -#ifdef KMP_DEBUG - _total = o._total; -#endif - } - - void reset() { - zeroCount = 0; - for (int i = 0; i < numBins; i++) { - bins[i].count = 0; - bins[i].total = 0; - } - -#ifdef KMP_DEBUG - _total = 0; -#endif - } - uint32_t count(int b) const { return bins[b + logOffset].count; } - double total(int b) const { return bins[b + logOffset].total; } - static uint32_t findBin(double sample); - - logHistogram &operator+=(logHistogram const &o) { - zeroCount += o.zeroCount; - for (int i = 0; i < numBins; i++) { - bins[i].count += o.bins[i].count; - bins[i].total += o.bins[i].total; - } -#ifdef KMP_DEBUG - _total += o._total; - check(); -#endif - - return *this; - } - - void addSample(double sample); - int minBin() const; - int maxBin() const; - - std::string format(char) const; -}; - -class statistic { - double KMP_ALIGN_CACHE minVal; - double maxVal; - double meanVal; - double m2; - uint64_t sampleCount; - double offset; - bool collectingHist; - logHistogram hist; - -public: - statistic(bool doHist = bool(KMP_STATS_HIST)) { - reset(); - collectingHist = doHist; - } - statistic(statistic const &o) - : minVal(o.minVal), maxVal(o.maxVal), meanVal(o.meanVal), m2(o.m2), - sampleCount(o.sampleCount), offset(o.offset), - collectingHist(o.collectingHist), hist(o.hist) {} - statistic(double minv, double maxv, double meanv, uint64_t sc, double sd) - : minVal(minv), maxVal(maxv), meanVal(meanv), m2(sd * sd * sc), - sampleCount(sc), offset(0.0), collectingHist(false) {} - bool haveHist() const { return collectingHist; } - double getMin() const { return minVal; } - double getMean() const { return meanVal; } - double getMax() const { return maxVal; } - uint64_t getCount() const { return sampleCount; } - double getSD() const { return sqrt(m2 / sampleCount); } - double getTotal() const { return sampleCount * meanVal; } - logHistogram const *getHist() const { return &hist; } - void setOffset(double d) { offset = d; } - - void reset() { - minVal = std::numeric_limits::max(); - maxVal = -minVal; - meanVal = 0.0; - m2 = 0.0; - sampleCount = 0; - offset = 0.0; - hist.reset(); - } - void addSample(double sample); - void scale(double factor); - void scaleDown(double f) { scale(1. / f); } - void forceCount(uint64_t count) { sampleCount = count; } - statistic &operator+=(statistic const &other); - - std::string format(char unit, bool total = false) const; - std::string formatHist(char unit) const { return hist.format(unit); } -}; - -struct statInfo { - const char *name; - uint32_t flags; -}; - -class timeStat : public statistic { - static statInfo timerInfo[]; - -public: - timeStat() : statistic() {} - static const char *name(timer_e e) { return timerInfo[e].name; } - static bool noTotal(timer_e e) { - return timerInfo[e].flags & stats_flags_e::noTotal; - } - static bool masterOnly(timer_e e) { - return timerInfo[e].flags & stats_flags_e::onlyInMaster; - } - static bool workerOnly(timer_e e) { - return timerInfo[e].flags & stats_flags_e::notInMaster; - } - static bool noUnits(timer_e e) { - return timerInfo[e].flags & stats_flags_e::noUnits; - } - static bool logEvent(timer_e e) { - return timerInfo[e].flags & stats_flags_e::logEvent; - } - static void clearEventFlags() { - for (int i = 0; i < TIMER_LAST; i++) { - timerInfo[i].flags &= (~(stats_flags_e::logEvent)); - } - } -}; - -// Where we need explicitly to start and end the timer, this version can be used -// Since these timers normally aren't nicely scoped, so don't have a good place -// to live on the stack of the thread, they're more work to use. -class explicitTimer { - timeStat *stat; - timer_e timerEnumValue; - tsc_tick_count startTime; - tsc_tick_count pauseStartTime; - tsc_tick_count::tsc_interval_t totalPauseTime; - -public: - explicitTimer(timeStat *s, timer_e te) - : stat(s), timerEnumValue(te), startTime(), pauseStartTime(0), - totalPauseTime() {} - - // void setStat(timeStat *s) { stat = s; } - void start(tsc_tick_count tick); - void pause(tsc_tick_count tick) { pauseStartTime = tick; } - void resume(tsc_tick_count tick) { - totalPauseTime += (tick - pauseStartTime); - } - void stop(tsc_tick_count tick, kmp_stats_list *stats_ptr = nullptr); - void reset() { - startTime = 0; - pauseStartTime = 0; - totalPauseTime = 0; - } - timer_e get_type() const { return timerEnumValue; } -}; - -// Where you need to partition a threads clock ticks into separate states -// e.g., a partitionedTimers class with two timers of EXECUTING_TASK, and -// DOING_NOTHING would render these conditions: -// time(EXECUTING_TASK) + time(DOING_NOTHING) = total time thread is alive -// No clock tick in the EXECUTING_TASK is a member of DOING_NOTHING and vice -// versa -class partitionedTimers { -private: - std::vector timer_stack; - -public: - partitionedTimers(); - void init(explicitTimer timer); - void exchange(explicitTimer timer); - void push(explicitTimer timer); - void pop(); - void windup(); -}; - -// Special wrapper around the partioned timers to aid timing code blocks -// It avoids the need to have an explicit end, leaving the scope suffices. -class blockPartitionedTimer { - partitionedTimers *part_timers; - -public: - blockPartitionedTimer(partitionedTimers *pt, explicitTimer timer) - : part_timers(pt) { - part_timers->push(timer); - } - ~blockPartitionedTimer() { part_timers->pop(); } -}; - -// Special wrapper around the thread state to aid in keeping state in code -// blocks It avoids the need to have an explicit end, leaving the scope -// suffices. -class blockThreadState { - stats_state_e *state_pointer; - stats_state_e old_state; - -public: - blockThreadState(stats_state_e *thread_state_pointer, stats_state_e new_state) - : state_pointer(thread_state_pointer), old_state(*thread_state_pointer) { - *state_pointer = new_state; - } - ~blockThreadState() { *state_pointer = old_state; } -}; - -// If all you want is a count, then you can use this... -// The individual per-thread counts will be aggregated into a statistic at -// program exit. -class counter { - uint64_t value; - static const statInfo counterInfo[]; - -public: - counter() : value(0) {} - void increment() { value++; } - uint64_t getValue() const { return value; } - void reset() { value = 0; } - static const char *name(counter_e e) { return counterInfo[e].name; } - static bool masterOnly(counter_e e) { - return counterInfo[e].flags & stats_flags_e::onlyInMaster; - } -}; - -/* **************************************************************** - Class to implement an event - - There are four components to an event: start time, stop time - nest_level, and timer_name. - The start and stop time should be obvious (recorded in clock ticks). - The nest_level relates to the bar width in the timeline graph. - The timer_name is used to determine which timer event triggered this event. - - the interface to this class is through four read-only operations: - 1) getStart() -- returns the start time as 64 bit integer - 2) getStop() -- returns the stop time as 64 bit integer - 3) getNestLevel() -- returns the nest level of the event - 4) getTimerName() -- returns the timer name that triggered event - - *MORE ON NEST_LEVEL* - The nest level is used in the bar graph that represents the timeline. - Its main purpose is for showing how events are nested inside eachother. - For example, say events, A, B, and C are recorded. If the timeline - looks like this: - -Begin -------------------------------------------------------------> Time - | | | | | | - A B C C B A - start start start end end end - - Then A, B, C will have a nest level of 1, 2, 3 respectively. - These values are then used to calculate the barwidth so you can - see that inside A, B has occurred, and inside B, C has occurred. - Currently, this is shown with A's bar width being larger than B's - bar width, and B's bar width being larger than C's bar width. - -**************************************************************** */ -class kmp_stats_event { - uint64_t start; - uint64_t stop; - int nest_level; - timer_e timer_name; - -public: - kmp_stats_event() - : start(0), stop(0), nest_level(0), timer_name(TIMER_LAST) {} - kmp_stats_event(uint64_t strt, uint64_t stp, int nst, timer_e nme) - : start(strt), stop(stp), nest_level(nst), timer_name(nme) {} - inline uint64_t getStart() const { return start; } - inline uint64_t getStop() const { return stop; } - inline int getNestLevel() const { return nest_level; } - inline timer_e getTimerName() const { return timer_name; } -}; - -/* **************************************************************** - Class to implement a dynamically expandable array of events - - --------------------------------------------------------- - | event 1 | event 2 | event 3 | event 4 | ... | event N | - --------------------------------------------------------- - - An event is pushed onto the back of this array at every - explicitTimer->stop() call. The event records the thread #, - start time, stop time, and nest level related to the bar width. - - The event vector starts at size INIT_SIZE and grows (doubles in size) - if needed. An implication of this behavior is that log(N) - reallocations are needed (where N is number of events). If you want - to avoid reallocations, then set INIT_SIZE to a large value. - - the interface to this class is through six operations: - 1) reset() -- sets the internal_size back to 0 but does not deallocate any - memory - 2) size() -- returns the number of valid elements in the vector - 3) push_back(start, stop, nest, timer_name) -- pushes an event onto - the back of the array - 4) deallocate() -- frees all memory associated with the vector - 5) sort() -- sorts the vector by start time - 6) operator[index] or at(index) -- returns event reference at that index -**************************************************************** */ -class kmp_stats_event_vector { - kmp_stats_event *events; - int internal_size; - int allocated_size; - static const int INIT_SIZE = 1024; - -public: - kmp_stats_event_vector() { - events = - (kmp_stats_event *)__kmp_allocate(sizeof(kmp_stats_event) * INIT_SIZE); - internal_size = 0; - allocated_size = INIT_SIZE; - } - ~kmp_stats_event_vector() {} - inline void reset() { internal_size = 0; } - inline int size() const { return internal_size; } - void push_back(uint64_t start_time, uint64_t stop_time, int nest_level, - timer_e name) { - int i; - if (internal_size == allocated_size) { - kmp_stats_event *tmp = (kmp_stats_event *)__kmp_allocate( - sizeof(kmp_stats_event) * allocated_size * 2); - for (i = 0; i < internal_size; i++) - tmp[i] = events[i]; - __kmp_free(events); - events = tmp; - allocated_size *= 2; - } - events[internal_size] = - kmp_stats_event(start_time, stop_time, nest_level, name); - internal_size++; - return; - } - void deallocate(); - void sort(); - const kmp_stats_event &operator[](int index) const { return events[index]; } - kmp_stats_event &operator[](int index) { return events[index]; } - const kmp_stats_event &at(int index) const { return events[index]; } - kmp_stats_event &at(int index) { return events[index]; } -}; - -/* **************************************************************** - Class to implement a doubly-linked, circular, statistics list - - |---| ---> |---| ---> |---| ---> |---| ---> ... next - | | | | | | | | - |---| <--- |---| <--- |---| <--- |---| <--- ... prev - Sentinel first second third - Node node node node - - The Sentinel Node is the user handle on the list. - The first node corresponds to thread 0's statistics. - The second node corresponds to thread 1's statistics and so on... - - Each node has a _timers, _counters, and _explicitTimers array to hold that - thread's statistics. The _explicitTimers point to the correct _timer and - update its statistics at every stop() call. The explicitTimers' pointers are - set up in the constructor. Each node also has an event vector to hold that - thread's timing events. The event vector expands as necessary and records - the start-stop times for each timer. - - The nestLevel variable is for plotting events and is related - to the bar width in the timeline graph. - - Every thread will have a thread local pointer to its node in - the list. The sentinel node is used by the master thread to - store "dummy" statistics before __kmp_create_worker() is called. -**************************************************************** */ -class kmp_stats_list { - int gtid; - timeStat _timers[TIMER_LAST + 1]; - counter _counters[COUNTER_LAST + 1]; - explicitTimer thread_life_timer; - partitionedTimers _partitionedTimers; - int _nestLevel; // one per thread - kmp_stats_event_vector _event_vector; - kmp_stats_list *next; - kmp_stats_list *prev; - stats_state_e state; - int thread_is_idle_flag; - -public: - kmp_stats_list() - : thread_life_timer(&_timers[TIMER_OMP_worker_thread_life], - TIMER_OMP_worker_thread_life), - _nestLevel(0), _event_vector(), next(this), prev(this), state(IDLE), - thread_is_idle_flag(0) {} - ~kmp_stats_list() {} - inline timeStat *getTimer(timer_e idx) { return &_timers[idx]; } - inline counter *getCounter(counter_e idx) { return &_counters[idx]; } - inline partitionedTimers *getPartitionedTimers() { - return &_partitionedTimers; - } - inline timeStat *getTimers() { return _timers; } - inline counter *getCounters() { return _counters; } - inline kmp_stats_event_vector &getEventVector() { return _event_vector; } - inline void startLife() { thread_life_timer.start(tsc_tick_count::now()); } - inline void endLife() { thread_life_timer.stop(tsc_tick_count::now(), this); } - inline void resetEventVector() { _event_vector.reset(); } - inline void incrementNestValue() { _nestLevel++; } - inline int getNestValue() { return _nestLevel; } - inline void decrementNestValue() { _nestLevel--; } - inline int getGtid() const { return gtid; } - inline void setGtid(int newgtid) { gtid = newgtid; } - inline void setState(stats_state_e newstate) { state = newstate; } - inline stats_state_e getState() const { return state; } - inline stats_state_e *getStatePointer() { return &state; } - inline bool isIdle() { return thread_is_idle_flag == 1; } - inline void setIdleFlag() { thread_is_idle_flag = 1; } - inline void resetIdleFlag() { thread_is_idle_flag = 0; } - kmp_stats_list *push_back(int gtid); // returns newly created list node - inline void push_event(uint64_t start_time, uint64_t stop_time, - int nest_level, timer_e name) { - _event_vector.push_back(start_time, stop_time, nest_level, name); - } - void deallocate(); - class iterator; - kmp_stats_list::iterator begin(); - kmp_stats_list::iterator end(); - int size(); - class iterator { - kmp_stats_list *ptr; - friend kmp_stats_list::iterator kmp_stats_list::begin(); - friend kmp_stats_list::iterator kmp_stats_list::end(); - - public: - iterator(); - ~iterator(); - iterator operator++(); - iterator operator++(int dummy); - iterator operator--(); - iterator operator--(int dummy); - bool operator!=(const iterator &rhs); - bool operator==(const iterator &rhs); - kmp_stats_list *operator*() const; // dereference operator - }; -}; - -/* **************************************************************** - Class to encapsulate all output functions and the environment variables - - This module holds filenames for various outputs (normal stats, events, plot - file), as well as coloring information for the plot file. - - The filenames and flags variables are read from environment variables. - These are read once by the constructor of the global variable - __kmp_stats_output which calls init(). - - During this init() call, event flags for the timeStat::timerInfo[] global - array are cleared if KMP_STATS_EVENTS is not true (on, 1, yes). - - The only interface function that is public is outputStats(heading). This - function should print out everything it needs to, either to files or stderr, - depending on the environment variables described below - - ENVIRONMENT VARIABLES: - KMP_STATS_FILE -- if set, all statistics (not events) will be printed to this - file, otherwise, print to stderr - KMP_STATS_THREADS -- if set to "on", then will print per thread statistics to - either KMP_STATS_FILE or stderr - KMP_STATS_PLOT_FILE -- if set, print the ploticus plot file to this filename, - otherwise, the plot file is sent to "events.plt" - KMP_STATS_EVENTS -- if set to "on", then log events, otherwise, don't log - events - KMP_STATS_EVENTS_FILE -- if set, all events are outputted to this file, - otherwise, output is sent to "events.dat" -**************************************************************** */ -class kmp_stats_output_module { - -public: - struct rgb_color { - float r; - float g; - float b; - }; - -private: - std::string outputFileName; - static const char *eventsFileName; - static const char *plotFileName; - static int printPerThreadFlag; - static int printPerThreadEventsFlag; - static const rgb_color globalColorArray[]; - static rgb_color timerColorInfo[]; - - void init(); - static void setupEventColors(); - static void printPloticusFile(); - static void printHeaderInfo(FILE *statsOut); - static void printTimerStats(FILE *statsOut, statistic const *theStats, - statistic const *totalStats); - static void printCounterStats(FILE *statsOut, statistic const *theStats); - static void printCounters(FILE *statsOut, counter const *theCounters); - static void printEvents(FILE *eventsOut, kmp_stats_event_vector *theEvents, - int gtid); - static rgb_color getEventColor(timer_e e) { return timerColorInfo[e]; } - static void windupExplicitTimers(); - bool eventPrintingEnabled() const { return printPerThreadEventsFlag; } - -public: - kmp_stats_output_module() { init(); } - void outputStats(const char *heading); -}; - -#ifdef __cplusplus -extern "C" { -#endif -void __kmp_stats_init(); -void __kmp_stats_fini(); -void __kmp_reset_stats(); -void __kmp_output_stats(const char *); -void __kmp_accumulate_stats_at_exit(void); -// thread local pointer to stats node within list -extern KMP_THREAD_LOCAL kmp_stats_list *__kmp_stats_thread_ptr; -// head to stats list. -extern kmp_stats_list *__kmp_stats_list; -// lock for __kmp_stats_list -extern kmp_tas_lock_t __kmp_stats_lock; -// reference start time -extern tsc_tick_count __kmp_stats_start_time; -// interface to output -extern kmp_stats_output_module __kmp_stats_output; - -#ifdef __cplusplus -} -#endif - -// Simple, standard interfaces that drop out completely if stats aren't enabled - -/*! - * \brief Adds value to specified timer (name). - * - * @param name timer name as specified under the KMP_FOREACH_TIMER() macro - * @param value double precision sample value to add to statistics for the timer - * - * \details Use KMP_COUNT_VALUE(name, value) macro to add a particular value to - * a timer statistics. - * - * @ingroup STATS_GATHERING -*/ -#define KMP_COUNT_VALUE(name, value) \ - __kmp_stats_thread_ptr->getTimer(TIMER_##name)->addSample(value) - -/*! - * \brief Increments specified counter (name). - * - * @param name counter name as specified under the KMP_FOREACH_COUNTER() macro - * - * \details Use KMP_COUNT_BLOCK(name, value) macro to increment a statistics - * counter for the executing thread. - * - * @ingroup STATS_GATHERING -*/ -#define KMP_COUNT_BLOCK(name) \ - __kmp_stats_thread_ptr->getCounter(COUNTER_##name)->increment() - -/*! - * \brief Outputs the current thread statistics and reset them. - * - * @param heading_string heading put above the final stats output - * - * \details Explicitly stops all timers and outputs all stats. Environment - * variable, `OMPTB_STATSFILE=filename`, can be used to output the stats to a - * filename instead of stderr. Environment variable, - * `OMPTB_STATSTHREADS=true|undefined`, can be used to output thread specific - * stats. For now the `OMPTB_STATSTHREADS` environment variable can either be - * defined with any value, which will print out thread specific stats, or it can - * be undefined (not specified in the environment) and thread specific stats - * won't be printed. It should be noted that all statistics are reset when this - * macro is called. - * - * @ingroup STATS_GATHERING -*/ -#define KMP_OUTPUT_STATS(heading_string) __kmp_output_stats(heading_string) - -/*! - * \brief Initializes the paritioned timers to begin with name. - * - * @param name timer which you want this thread to begin with - * - * @ingroup STATS_GATHERING -*/ -#define KMP_INIT_PARTITIONED_TIMERS(name) \ - __kmp_stats_thread_ptr->getPartitionedTimers()->init(explicitTimer( \ - __kmp_stats_thread_ptr->getTimer(TIMER_##name), TIMER_##name)) - -#define KMP_TIME_PARTITIONED_BLOCK(name) \ - blockPartitionedTimer __PBLOCKTIME__( \ - __kmp_stats_thread_ptr->getPartitionedTimers(), \ - explicitTimer(__kmp_stats_thread_ptr->getTimer(TIMER_##name), \ - TIMER_##name)) - -#define KMP_PUSH_PARTITIONED_TIMER(name) \ - __kmp_stats_thread_ptr->getPartitionedTimers()->push(explicitTimer( \ - __kmp_stats_thread_ptr->getTimer(TIMER_##name), TIMER_##name)) - -#define KMP_POP_PARTITIONED_TIMER() \ - __kmp_stats_thread_ptr->getPartitionedTimers()->pop() - -#define KMP_EXCHANGE_PARTITIONED_TIMER(name) \ - __kmp_stats_thread_ptr->getPartitionedTimers()->exchange(explicitTimer( \ - __kmp_stats_thread_ptr->getTimer(TIMER_##name), TIMER_##name)) - -#define KMP_SET_THREAD_STATE(state_name) \ - __kmp_stats_thread_ptr->setState(state_name) - -#define KMP_GET_THREAD_STATE() __kmp_stats_thread_ptr->getState() - -#define KMP_SET_THREAD_STATE_BLOCK(state_name) \ - blockThreadState __BTHREADSTATE__(__kmp_stats_thread_ptr->getStatePointer(), \ - state_name) - -/*! - * \brief resets all stats (counters to 0, timers to 0 elapsed ticks) - * - * \details Reset all stats for all threads. - * - * @ingroup STATS_GATHERING -*/ -#define KMP_RESET_STATS() __kmp_reset_stats() - -#if (KMP_DEVELOPER_STATS) -#define KMP_TIME_DEVELOPER_BLOCK(n) KMP_TIME_BLOCK(n) -#define KMP_COUNT_DEVELOPER_VALUE(n, v) KMP_COUNT_VALUE(n, v) -#define KMP_COUNT_DEVELOPER_BLOCK(n) KMP_COUNT_BLOCK(n) -#define KMP_START_DEVELOPER_EXPLICIT_TIMER(n) KMP_START_EXPLICIT_TIMER(n) -#define KMP_STOP_DEVELOPER_EXPLICIT_TIMER(n) KMP_STOP_EXPLICIT_TIMER(n) -#define KMP_TIME_DEVELOPER_PARTITIONED_BLOCK(n) KMP_TIME_PARTITIONED_BLOCK(n) -#else -// Null definitions -#define KMP_TIME_DEVELOPER_BLOCK(n) ((void)0) -#define KMP_COUNT_DEVELOPER_VALUE(n, v) ((void)0) -#define KMP_COUNT_DEVELOPER_BLOCK(n) ((void)0) -#define KMP_START_DEVELOPER_EXPLICIT_TIMER(n) ((void)0) -#define KMP_STOP_DEVELOPER_EXPLICIT_TIMER(n) ((void)0) -#define KMP_TIME_DEVELOPER_PARTITIONED_BLOCK(n) ((void)0) -#endif - -#else // KMP_STATS_ENABLED - -// Null definitions -#define KMP_TIME_BLOCK(n) ((void)0) -#define KMP_COUNT_VALUE(n, v) ((void)0) -#define KMP_COUNT_BLOCK(n) ((void)0) -#define KMP_START_EXPLICIT_TIMER(n) ((void)0) -#define KMP_STOP_EXPLICIT_TIMER(n) ((void)0) - -#define KMP_OUTPUT_STATS(heading_string) ((void)0) -#define KMP_RESET_STATS() ((void)0) - -#define KMP_TIME_DEVELOPER_BLOCK(n) ((void)0) -#define KMP_COUNT_DEVELOPER_VALUE(n, v) ((void)0) -#define KMP_COUNT_DEVELOPER_BLOCK(n) ((void)0) -#define KMP_START_DEVELOPER_EXPLICIT_TIMER(n) ((void)0) -#define KMP_STOP_DEVELOPER_EXPLICIT_TIMER(n) ((void)0) -#define KMP_INIT_PARTITIONED_TIMERS(name) ((void)0) -#define KMP_TIME_PARTITIONED_BLOCK(name) ((void)0) -#define KMP_TIME_DEVELOPER_PARTITIONED_BLOCK(n) ((void)0) -#define KMP_PUSH_PARTITIONED_TIMER(name) ((void)0) -#define KMP_POP_PARTITIONED_TIMER() ((void)0) -#define KMP_SET_THREAD_STATE(state_name) ((void)0) -#define KMP_GET_THREAD_STATE() ((void)0) -#define KMP_SET_THREAD_STATE_BLOCK(state_name) ((void)0) -#endif // KMP_STATS_ENABLED - -#endif // KMP_STATS_H Property changes on: vendor/llvm-openmp/openmp-release80-r363030/runtime/src/kmp_stats.h ___________________________________________________________________ Deleted: svn:eol-style ## -1 +0,0 ## -native \ No newline at end of property Deleted: svn:keywords ## -1 +0,0 ## -FreeBSD=%H \ No newline at end of property Deleted: svn:mime-type ## -1 +0,0 ## -text/plain \ No newline at end of property Index: vendor/llvm-openmp/openmp-release80-r363030/runtime/src/libomp.rc.var =================================================================== --- vendor/llvm-openmp/openmp-release80-r363030/runtime/src/libomp.rc.var (revision 348960) +++ vendor/llvm-openmp/openmp-release80-r363030/runtime/src/libomp.rc.var (nonexistent) @@ -1,70 +0,0 @@ -// libomp.rc.var - -// -////===----------------------------------------------------------------------===// -//// -//// The LLVM Compiler Infrastructure -//// -//// This file is dual licensed under the MIT and the University of Illinois Open -//// Source Licenses. See LICENSE.txt for details. -//// -////===----------------------------------------------------------------------===// -// - -#include "winresrc.h" -#include "kmp_config.h" - -LANGUAGE LANG_ENGLISH, SUBLANG_ENGLISH_US // English (U.S.) resources -#pragma code_page(1252) - -VS_VERSION_INFO VERSIONINFO - // Parts of FILEVERSION and PRODUCTVERSION are 16-bit fields, entire build date yyyymmdd - // does not fit into one version part, so we need to split it into yyyy and mmdd: - FILEVERSION @LIBOMP_VERSION_MAJOR@,@LIBOMP_VERSION_MINOR@,@LIBOMP_VERSION_BUILD_YEAR@,@LIBOMP_VERSION_BUILD_MONTH_DAY@ - PRODUCTVERSION @LIBOMP_VERSION_MAJOR@,@LIBOMP_VERSION_MINOR@,@LIBOMP_VERSION_BUILD_YEAR@,@LIBOMP_VERSION_BUILD_MONTH_DAY@ - FILEFLAGSMASK VS_FFI_FILEFLAGSMASK - FILEFLAGS 0 -#if KMP_DEBUG - | VS_FF_DEBUG -#endif -#if @LIBOMP_VERSION_BUILD@ == 0 - | VS_FF_PRIVATEBUILD | VS_FF_PRERELEASE -#endif - FILEOS VOS_NT_WINDOWS32 // Windows* Server* 2003, XP*, 2000, or NT* - FILETYPE VFT_DLL - BEGIN - BLOCK "StringFileInfo" - BEGIN - BLOCK "040904b0" // U.S. English, Unicode (0x04b0 == 1200) - BEGIN - - // FileDescription and LegalCopyright should be short. - VALUE "FileDescription", "LLVM* OpenMP* Runtime Library\0" - // Following values may be relatively long. - VALUE "CompanyName", "LLVM\0" - // VALUE "LegalTrademarks", "\0" // Not used for now. - VALUE "ProductName", "LLVM* OpenMP* Runtime Library\0" - VALUE "ProductVersion", "@LIBOMP_VERSION_MAJOR@.@LIBOMP_VERSION_MINOR@\0" - VALUE "FileVersion", "@LIBOMP_VERSION_BUILD@\0" - VALUE "InternalName", "@LIBOMP_LIB_FILE@\0" - VALUE "OriginalFilename", "@LIBOMP_LIB_FILE@\0" - VALUE "Comments", - "LLVM* OpenMP* @LIBOMP_LEGAL_TYPE@ Library " - "version @LIBOMP_VERSION_MAJOR@.@LIBOMP_VERSION_MINOR@.@LIBOMP_VERSION_BUILD@ " - "for @LIBOMP_LEGAL_ARCH@ architecture built on @LIBOMP_BUILD_DATE@.\0" -#if @LIBOMP_VERSION_BUILD@ == 0 - VALUE "PrivateBuild", - "This is a development build.\0" -#endif - // VALUE "SpecialBuild", "\0" // Not used for now. - - END - END - BLOCK "VarFileInfo" - BEGIN - VALUE "Translation", 1033, 1200 - // 1033 -- U.S. English, 1200 -- Unicode - END - END - -// end of file // Index: vendor/llvm-openmp/openmp-release80-r363030/runtime/src/kmp_config.h.cmake =================================================================== --- vendor/llvm-openmp/openmp-release80-r363030/runtime/src/kmp_config.h.cmake (revision 348960) +++ vendor/llvm-openmp/openmp-release80-r363030/runtime/src/kmp_config.h.cmake (nonexistent) @@ -1,117 +0,0 @@ -/* - * kmp_config.h -- Feature macros - */ -//===----------------------------------------------------------------------===// -// -// The LLVM Compiler Infrastructure -// -// This file is dual licensed under the MIT and the University of Illinois Open -// Source Licenses. See LICENSE.txt for details. -// -//===----------------------------------------------------------------------===// -#ifndef KMP_CONFIG_H -#define KMP_CONFIG_H - -#include "kmp_platform.h" - -// cmakedefine01 MACRO will define MACRO as either 0 or 1 -// cmakedefine MACRO 1 will define MACRO as 1 or leave undefined -#cmakedefine01 DEBUG_BUILD -#cmakedefine01 RELWITHDEBINFO_BUILD -#cmakedefine01 LIBOMP_USE_ITT_NOTIFY -#define USE_ITT_NOTIFY LIBOMP_USE_ITT_NOTIFY -#if ! LIBOMP_USE_ITT_NOTIFY -# define INTEL_NO_ITTNOTIFY_API -#endif -#cmakedefine01 LIBOMP_USE_VERSION_SYMBOLS -#if LIBOMP_USE_VERSION_SYMBOLS -# define KMP_USE_VERSION_SYMBOLS -#endif -#cmakedefine01 LIBOMP_HAVE_WEAK_ATTRIBUTE -#define KMP_HAVE_WEAK_ATTRIBUTE LIBOMP_HAVE_WEAK_ATTRIBUTE -#cmakedefine01 LIBOMP_HAVE_PSAPI -#define KMP_HAVE_PSAPI LIBOMP_HAVE_PSAPI -#cmakedefine01 LIBOMP_STATS -#define KMP_STATS_ENABLED LIBOMP_STATS -#cmakedefine01 LIBOMP_HAVE_X86INTRIN_H -#define KMP_HAVE_X86INTRIN_H LIBOMP_HAVE_X86INTRIN_H -#cmakedefine01 LIBOMP_HAVE___BUILTIN_READCYCLECOUNTER -#define KMP_HAVE___BUILTIN_READCYCLECOUNTER LIBOMP_HAVE___BUILTIN_READCYCLECOUNTER -#cmakedefine01 LIBOMP_HAVE___RDTSC -#define KMP_HAVE___RDTSC LIBOMP_HAVE___RDTSC -#cmakedefine01 LIBOMP_USE_DEBUGGER -#define USE_DEBUGGER LIBOMP_USE_DEBUGGER -#cmakedefine01 LIBOMP_OMPT_DEBUG -#define OMPT_DEBUG LIBOMP_OMPT_DEBUG -#cmakedefine01 LIBOMP_OMPT_SUPPORT -#define OMPT_SUPPORT LIBOMP_OMPT_SUPPORT -#cmakedefine01 LIBOMP_OMPT_OPTIONAL -#define OMPT_OPTIONAL LIBOMP_OMPT_OPTIONAL -#cmakedefine01 LIBOMP_USE_ADAPTIVE_LOCKS -#define KMP_USE_ADAPTIVE_LOCKS LIBOMP_USE_ADAPTIVE_LOCKS -#define KMP_DEBUG_ADAPTIVE_LOCKS 0 -#cmakedefine01 LIBOMP_USE_INTERNODE_ALIGNMENT -#define KMP_USE_INTERNODE_ALIGNMENT LIBOMP_USE_INTERNODE_ALIGNMENT -#cmakedefine01 LIBOMP_ENABLE_ASSERTIONS -#define KMP_USE_ASSERT LIBOMP_ENABLE_ASSERTIONS -#cmakedefine01 LIBOMP_USE_HIER_SCHED -#define KMP_USE_HIER_SCHED LIBOMP_USE_HIER_SCHED -#cmakedefine01 STUBS_LIBRARY -#cmakedefine01 LIBOMP_USE_HWLOC -#define KMP_USE_HWLOC LIBOMP_USE_HWLOC -#cmakedefine01 LIBOMP_ENABLE_SHARED -#define KMP_DYNAMIC_LIB LIBOMP_ENABLE_SHARED -#define KMP_ARCH_STR "@LIBOMP_LEGAL_ARCH@" -#define KMP_LIBRARY_FILE "@LIBOMP_LIB_FILE@" -#define KMP_VERSION_MAJOR @LIBOMP_VERSION_MAJOR@ -#define KMP_VERSION_MINOR @LIBOMP_VERSION_MINOR@ -#define LIBOMP_OMP_VERSION @LIBOMP_OMP_VERSION@ -#define OMP_50_ENABLED (LIBOMP_OMP_VERSION >= 50) -#define OMP_45_ENABLED (LIBOMP_OMP_VERSION >= 45) -#define OMP_40_ENABLED (LIBOMP_OMP_VERSION >= 40) -#define OMP_30_ENABLED (LIBOMP_OMP_VERSION >= 30) -#cmakedefine01 LIBOMP_TSAN_SUPPORT -#if LIBOMP_TSAN_SUPPORT -#define TSAN_SUPPORT -#endif -#cmakedefine01 MSVC -#define KMP_MSVC_COMPAT MSVC - -// Configured cache line based on architecture -#if KMP_ARCH_PPC64 -# define CACHE_LINE 128 -#else -# define CACHE_LINE 64 -#endif - -#if ! KMP_32_BIT_ARCH -# define BUILD_I8 1 -#endif - -#define KMP_NESTED_HOT_TEAMS 1 -#define KMP_ADJUST_BLOCKTIME 1 -#define BUILD_PARALLEL_ORDERED 1 -#define KMP_ASM_INTRINS 1 -#define USE_ITT_BUILD LIBOMP_USE_ITT_NOTIFY -#define INTEL_ITTNOTIFY_PREFIX __kmp_itt_ -#if ! KMP_MIC -# define USE_LOAD_BALANCE 1 -#endif -#if ! (KMP_OS_WINDOWS || KMP_OS_DARWIN) -# define KMP_TDATA_GTID 1 -#endif -#if STUBS_LIBRARY -# define KMP_STUB 1 -#endif -#if DEBUG_BUILD || RELWITHDEBINFO_BUILD -# define KMP_DEBUG 1 -#endif - -#if KMP_OS_WINDOWS -# define KMP_WIN_CDECL -#else -# define BUILD_TV -# define KMP_GOMP_COMPAT -#endif - -#endif // KMP_CONFIG_H Index: vendor/llvm-openmp/openmp-release80-r363030/runtime/src/kmp_wait_release.h =================================================================== --- vendor/llvm-openmp/openmp-release80-r363030/runtime/src/kmp_wait_release.h (revision 348960) +++ vendor/llvm-openmp/openmp-release80-r363030/runtime/src/kmp_wait_release.h (nonexistent) @@ -1,905 +0,0 @@ -/* - * kmp_wait_release.h -- Wait/Release implementation - */ - -//===----------------------------------------------------------------------===// -// -// The LLVM Compiler Infrastructure -// -// This file is dual licensed under the MIT and the University of Illinois Open -// Source Licenses. See LICENSE.txt for details. -// -//===----------------------------------------------------------------------===// - -#ifndef KMP_WAIT_RELEASE_H -#define KMP_WAIT_RELEASE_H - -#include "kmp.h" -#include "kmp_itt.h" -#include "kmp_stats.h" -#if OMPT_SUPPORT -#include "ompt-specific.h" -#endif - -/*! -@defgroup WAIT_RELEASE Wait/Release operations - -The definitions and functions here implement the lowest level thread -synchronizations of suspending a thread and awaking it. They are used to build -higher level operations such as barriers and fork/join. -*/ - -/*! -@ingroup WAIT_RELEASE -@{ -*/ - -/*! - * The flag_type describes the storage used for the flag. - */ -enum flag_type { - flag32, /**< 32 bit flags */ - flag64, /**< 64 bit flags */ - flag_oncore /**< special 64-bit flag for on-core barrier (hierarchical) */ -}; - -/*! - * Base class for wait/release volatile flag - */ -template class kmp_flag_native { - volatile P *loc; - flag_type t; - -public: - typedef P flag_t; - kmp_flag_native(volatile P *p, flag_type ft) : loc(p), t(ft) {} - volatile P *get() { return loc; } - void *get_void_p() { return RCAST(void *, CCAST(P *, loc)); } - void set(volatile P *new_loc) { loc = new_loc; } - flag_type get_type() { return t; } - P load() { return *loc; } - void store(P val) { *loc = val; } -}; - -/*! - * Base class for wait/release atomic flag - */ -template class kmp_flag { - std::atomic

- *loc; /**< Pointer to the flag storage that is modified by another thread - */ - flag_type t; /**< "Type" of the flag in loc */ -public: - typedef P flag_t; - kmp_flag(std::atomic

*p, flag_type ft) : loc(p), t(ft) {} - /*! - * @result the pointer to the actual flag - */ - std::atomic

*get() { return loc; } - /*! - * @result void* pointer to the actual flag - */ - void *get_void_p() { return RCAST(void *, loc); } - /*! - * @param new_loc in set loc to point at new_loc - */ - void set(std::atomic

*new_loc) { loc = new_loc; } - /*! - * @result the flag_type - */ - flag_type get_type() { return t; } - /*! - * @result flag value - */ - P load() { return loc->load(std::memory_order_acquire); } - /*! - * @param val the new flag value to be stored - */ - void store(P val) { loc->store(val, std::memory_order_release); } - // Derived classes must provide the following: - /* - kmp_info_t * get_waiter(kmp_uint32 i); - kmp_uint32 get_num_waiters(); - bool done_check(); - bool done_check_val(P old_loc); - bool notdone_check(); - P internal_release(); - void suspend(int th_gtid); - void resume(int th_gtid); - P set_sleeping(); - P unset_sleeping(); - bool is_sleeping(); - bool is_any_sleeping(); - bool is_sleeping_val(P old_loc); - int execute_tasks(kmp_info_t *this_thr, kmp_int32 gtid, int final_spin, - int *thread_finished - USE_ITT_BUILD_ARG(void * itt_sync_obj), kmp_int32 - is_constrained); - */ -}; - -#if OMPT_SUPPORT -OMPT_NOINLINE -static void __ompt_implicit_task_end(kmp_info_t *this_thr, - ompt_state_t ompt_state, - ompt_data_t *tId) { - int ds_tid = this_thr->th.th_info.ds.ds_tid; - if (ompt_state == ompt_state_wait_barrier_implicit) { - this_thr->th.ompt_thread_info.state = ompt_state_overhead; -#if OMPT_OPTIONAL - void *codeptr = NULL; - if (ompt_enabled.ompt_callback_sync_region_wait) { - ompt_callbacks.ompt_callback(ompt_callback_sync_region_wait)( - ompt_sync_region_barrier, ompt_scope_end, NULL, tId, codeptr); - } - if (ompt_enabled.ompt_callback_sync_region) { - ompt_callbacks.ompt_callback(ompt_callback_sync_region)( - ompt_sync_region_barrier, ompt_scope_end, NULL, tId, codeptr); - } -#endif - if (!KMP_MASTER_TID(ds_tid)) { - if (ompt_enabled.ompt_callback_implicit_task) { - ompt_callbacks.ompt_callback(ompt_callback_implicit_task)( - ompt_scope_end, NULL, tId, 0, ds_tid, ompt_task_implicit); - } - // return to idle state - this_thr->th.ompt_thread_info.state = ompt_state_idle; - } else { - this_thr->th.ompt_thread_info.state = ompt_state_overhead; - } - } -} -#endif - -/* Spin wait loop that first does pause, then yield, then sleep. A thread that - calls __kmp_wait_* must make certain that another thread calls __kmp_release - to wake it back up to prevent deadlocks! - - NOTE: We may not belong to a team at this point. */ -template -static inline void -__kmp_wait_template(kmp_info_t *this_thr, - C *flag USE_ITT_BUILD_ARG(void *itt_sync_obj)) { -#if USE_ITT_BUILD && USE_ITT_NOTIFY - volatile void *spin = flag->get(); -#endif - kmp_uint32 spins; - int th_gtid; - int tasks_completed = FALSE; - int oversubscribed; -#if !KMP_USE_MONITOR - kmp_uint64 poll_count; - kmp_uint64 hibernate_goal; -#else - kmp_uint32 hibernate; -#endif - - KMP_FSYNC_SPIN_INIT(spin, NULL); - if (flag->done_check()) { - KMP_FSYNC_SPIN_ACQUIRED(CCAST(void *, spin)); - return; - } - th_gtid = this_thr->th.th_info.ds.ds_gtid; -#if KMP_OS_UNIX - if (final_spin) - KMP_ATOMIC_ST_REL(&this_thr->th.th_blocking, true); -#endif - KA_TRACE(20, - ("__kmp_wait_sleep: T#%d waiting for flag(%p)\n", th_gtid, flag)); -#if KMP_STATS_ENABLED - stats_state_e thread_state = KMP_GET_THREAD_STATE(); -#endif - -/* OMPT Behavior: -THIS function is called from - __kmp_barrier (2 times) (implicit or explicit barrier in parallel regions) - these have join / fork behavior - - In these cases, we don't change the state or trigger events in THIS -function. - Events are triggered in the calling code (__kmp_barrier): - - state := ompt_state_overhead - barrier-begin - barrier-wait-begin - state := ompt_state_wait_barrier - call join-barrier-implementation (finally arrive here) - {} - call fork-barrier-implementation (finally arrive here) - {} - state := ompt_state_overhead - barrier-wait-end - barrier-end - state := ompt_state_work_parallel - - - __kmp_fork_barrier (after thread creation, before executing implicit task) - call fork-barrier-implementation (finally arrive here) - {} // worker arrive here with state = ompt_state_idle - - - __kmp_join_barrier (implicit barrier at end of parallel region) - state := ompt_state_barrier_implicit - barrier-begin - barrier-wait-begin - call join-barrier-implementation (finally arrive here -final_spin=FALSE) - { - } - __kmp_fork_barrier (implicit barrier at end of parallel region) - call fork-barrier-implementation (finally arrive here final_spin=TRUE) - - Worker after task-team is finished: - barrier-wait-end - barrier-end - implicit-task-end - idle-begin - state := ompt_state_idle - - Before leaving, if state = ompt_state_idle - idle-end - state := ompt_state_overhead -*/ -#if OMPT_SUPPORT - ompt_state_t ompt_entry_state; - ompt_data_t *tId; - if (ompt_enabled.enabled) { - ompt_entry_state = this_thr->th.ompt_thread_info.state; - if (!final_spin || ompt_entry_state != ompt_state_wait_barrier_implicit || - KMP_MASTER_TID(this_thr->th.th_info.ds.ds_tid)) { - ompt_lw_taskteam_t *team = - this_thr->th.th_team->t.ompt_serialized_team_info; - if (team) { - tId = &(team->ompt_task_info.task_data); - } else { - tId = OMPT_CUR_TASK_DATA(this_thr); - } - } else { - tId = &(this_thr->th.ompt_thread_info.task_data); - } - if (final_spin && (__kmp_tasking_mode == tskm_immediate_exec || - this_thr->th.th_task_team == NULL)) { - // implicit task is done. Either no taskqueue, or task-team finished - __ompt_implicit_task_end(this_thr, ompt_entry_state, tId); - } - } -#endif - - // Setup for waiting - KMP_INIT_YIELD(spins); - - if (__kmp_dflt_blocktime != KMP_MAX_BLOCKTIME) { -#if KMP_USE_MONITOR -// The worker threads cannot rely on the team struct existing at this point. -// Use the bt values cached in the thread struct instead. -#ifdef KMP_ADJUST_BLOCKTIME - if (__kmp_zero_bt && !this_thr->th.th_team_bt_set) - // Force immediate suspend if not set by user and more threads than - // available procs - hibernate = 0; - else - hibernate = this_thr->th.th_team_bt_intervals; -#else - hibernate = this_thr->th.th_team_bt_intervals; -#endif /* KMP_ADJUST_BLOCKTIME */ - - /* If the blocktime is nonzero, we want to make sure that we spin wait for - the entirety of the specified #intervals, plus up to one interval more. - This increment make certain that this thread doesn't go to sleep too - soon. */ - if (hibernate != 0) - hibernate++; - - // Add in the current time value. - hibernate += TCR_4(__kmp_global.g.g_time.dt.t_value); - KF_TRACE(20, ("__kmp_wait_sleep: T#%d now=%d, hibernate=%d, intervals=%d\n", - th_gtid, __kmp_global.g.g_time.dt.t_value, hibernate, - hibernate - __kmp_global.g.g_time.dt.t_value)); -#else - hibernate_goal = KMP_NOW() + this_thr->th.th_team_bt_intervals; - poll_count = 0; -#endif // KMP_USE_MONITOR - } - - oversubscribed = (TCR_4(__kmp_nth) > __kmp_avail_proc); - KMP_MB(); - - // Main wait spin loop - while (flag->notdone_check()) { - int in_pool; - kmp_task_team_t *task_team = NULL; - if (__kmp_tasking_mode != tskm_immediate_exec) { - task_team = this_thr->th.th_task_team; - /* If the thread's task team pointer is NULL, it means one of 3 things: - 1) A newly-created thread is first being released by - __kmp_fork_barrier(), and its task team has not been set up yet. - 2) All tasks have been executed to completion. - 3) Tasking is off for this region. This could be because we are in a - serialized region (perhaps the outer one), or else tasking was manually - disabled (KMP_TASKING=0). */ - if (task_team != NULL) { - if (TCR_SYNC_4(task_team->tt.tt_active)) { - if (KMP_TASKING_ENABLED(task_team)) - flag->execute_tasks( - this_thr, th_gtid, final_spin, - &tasks_completed USE_ITT_BUILD_ARG(itt_sync_obj), 0); - else - this_thr->th.th_reap_state = KMP_SAFE_TO_REAP; - } else { - KMP_DEBUG_ASSERT(!KMP_MASTER_TID(this_thr->th.th_info.ds.ds_tid)); -#if OMPT_SUPPORT - // task-team is done now, other cases should be catched above - if (final_spin && ompt_enabled.enabled) - __ompt_implicit_task_end(this_thr, ompt_entry_state, tId); -#endif - this_thr->th.th_task_team = NULL; - this_thr->th.th_reap_state = KMP_SAFE_TO_REAP; - } - } else { - this_thr->th.th_reap_state = KMP_SAFE_TO_REAP; - } // if - } // if - - KMP_FSYNC_SPIN_PREPARE(CCAST(void *, spin)); - if (TCR_4(__kmp_global.g.g_done)) { - if (__kmp_global.g.g_abort) - __kmp_abort_thread(); - break; - } - - // If we are oversubscribed, or have waited a bit (and - // KMP_LIBRARY=throughput), then yield - // TODO: Should it be number of cores instead of thread contexts? Like: - // KMP_YIELD(TCR_4(__kmp_nth) > __kmp_ncores); - // Need performance improvement data to make the change... - if (oversubscribed) { - KMP_YIELD(1); - } else { - KMP_YIELD_SPIN(spins); - } - // Check if this thread was transferred from a team - // to the thread pool (or vice-versa) while spinning. - in_pool = !!TCR_4(this_thr->th.th_in_pool); - if (in_pool != !!this_thr->th.th_active_in_pool) { - if (in_pool) { // Recently transferred from team to pool - KMP_ATOMIC_INC(&__kmp_thread_pool_active_nth); - this_thr->th.th_active_in_pool = TRUE; - /* Here, we cannot assert that: - KMP_DEBUG_ASSERT(TCR_4(__kmp_thread_pool_active_nth) <= - __kmp_thread_pool_nth); - __kmp_thread_pool_nth is inc/dec'd by the master thread while the - fork/join lock is held, whereas __kmp_thread_pool_active_nth is - inc/dec'd asynchronously by the workers. The two can get out of sync - for brief periods of time. */ - } else { // Recently transferred from pool to team - KMP_ATOMIC_DEC(&__kmp_thread_pool_active_nth); - KMP_DEBUG_ASSERT(TCR_4(__kmp_thread_pool_active_nth) >= 0); - this_thr->th.th_active_in_pool = FALSE; - } - } - -#if KMP_STATS_ENABLED - // Check if thread has been signalled to idle state - // This indicates that the logical "join-barrier" has finished - if (this_thr->th.th_stats->isIdle() && - KMP_GET_THREAD_STATE() == FORK_JOIN_BARRIER) { - KMP_SET_THREAD_STATE(IDLE); - KMP_PUSH_PARTITIONED_TIMER(OMP_idle); - } -#endif - - // Don't suspend if KMP_BLOCKTIME is set to "infinite" - if (__kmp_dflt_blocktime == KMP_MAX_BLOCKTIME) - continue; - - // Don't suspend if there is a likelihood of new tasks being spawned. - if ((task_team != NULL) && TCR_4(task_team->tt.tt_found_tasks)) - continue; - -#if KMP_USE_MONITOR - // If we have waited a bit more, fall asleep - if (TCR_4(__kmp_global.g.g_time.dt.t_value) < hibernate) - continue; -#else - if (KMP_BLOCKING(hibernate_goal, poll_count++)) - continue; -#endif - - KF_TRACE(50, ("__kmp_wait_sleep: T#%d suspend time reached\n", th_gtid)); -#if KMP_OS_UNIX - if (final_spin) - KMP_ATOMIC_ST_REL(&this_thr->th.th_blocking, false); -#endif - flag->suspend(th_gtid); -#if KMP_OS_UNIX - if (final_spin) - KMP_ATOMIC_ST_REL(&this_thr->th.th_blocking, true); -#endif - - if (TCR_4(__kmp_global.g.g_done)) { - if (__kmp_global.g.g_abort) - __kmp_abort_thread(); - break; - } else if (__kmp_tasking_mode != tskm_immediate_exec && - this_thr->th.th_reap_state == KMP_SAFE_TO_REAP) { - this_thr->th.th_reap_state = KMP_NOT_SAFE_TO_REAP; - } - // TODO: If thread is done with work and times out, disband/free - } - -#if OMPT_SUPPORT - ompt_state_t ompt_exit_state = this_thr->th.ompt_thread_info.state; - if (ompt_enabled.enabled && ompt_exit_state != ompt_state_undefined) { -#if OMPT_OPTIONAL - if (final_spin) { - __ompt_implicit_task_end(this_thr, ompt_exit_state, tId); - ompt_exit_state = this_thr->th.ompt_thread_info.state; - } -#endif - if (ompt_exit_state == ompt_state_idle) { - this_thr->th.ompt_thread_info.state = ompt_state_overhead; - } - } -#endif -#if KMP_STATS_ENABLED - // If we were put into idle state, pop that off the state stack - if (KMP_GET_THREAD_STATE() == IDLE) { - KMP_POP_PARTITIONED_TIMER(); - KMP_SET_THREAD_STATE(thread_state); - this_thr->th.th_stats->resetIdleFlag(); - } -#endif - -#if KMP_OS_UNIX - if (final_spin) - KMP_ATOMIC_ST_REL(&this_thr->th.th_blocking, false); -#endif - KMP_FSYNC_SPIN_ACQUIRED(CCAST(void *, spin)); -} - -/* Release any threads specified as waiting on the flag by releasing the flag - and resume the waiting thread if indicated by the sleep bit(s). A thread that - calls __kmp_wait_template must call this function to wake up the potentially - sleeping thread and prevent deadlocks! */ -template static inline void __kmp_release_template(C *flag) { -#ifdef KMP_DEBUG - int gtid = TCR_4(__kmp_init_gtid) ? __kmp_get_gtid() : -1; -#endif - KF_TRACE(20, ("__kmp_release: T#%d releasing flag(%x)\n", gtid, flag->get())); - KMP_DEBUG_ASSERT(flag->get()); - KMP_FSYNC_RELEASING(flag->get_void_p()); - - flag->internal_release(); - - KF_TRACE(100, ("__kmp_release: T#%d set new spin=%d\n", gtid, flag->get(), - flag->load())); - - if (__kmp_dflt_blocktime != KMP_MAX_BLOCKTIME) { - // Only need to check sleep stuff if infinite block time not set. - // Are *any* threads waiting on flag sleeping? - if (flag->is_any_sleeping()) { - for (unsigned int i = 0; i < flag->get_num_waiters(); ++i) { - // if sleeping waiter exists at i, sets current_waiter to i inside flag - kmp_info_t *waiter = flag->get_waiter(i); - if (waiter) { - int wait_gtid = waiter->th.th_info.ds.ds_gtid; - // Wake up thread if needed - KF_TRACE(50, ("__kmp_release: T#%d waking up thread T#%d since sleep " - "flag(%p) set\n", - gtid, wait_gtid, flag->get())); - flag->resume(wait_gtid); // unsets flag's current_waiter when done - } - } - } - } -} - -template struct flag_traits {}; - -template <> struct flag_traits { - typedef kmp_uint32 flag_t; - static const flag_type t = flag32; - static inline flag_t tcr(flag_t f) { return TCR_4(f); } - static inline flag_t test_then_add4(volatile flag_t *f) { - return KMP_TEST_THEN_ADD4_32(RCAST(volatile kmp_int32 *, f)); - } - static inline flag_t test_then_or(volatile flag_t *f, flag_t v) { - return KMP_TEST_THEN_OR32(f, v); - } - static inline flag_t test_then_and(volatile flag_t *f, flag_t v) { - return KMP_TEST_THEN_AND32(f, v); - } -}; - -template <> struct flag_traits { - typedef kmp_uint64 flag_t; - static const flag_type t = flag64; - static inline flag_t tcr(flag_t f) { return TCR_8(f); } - static inline flag_t test_then_add4(volatile flag_t *f) { - return KMP_TEST_THEN_ADD4_64(RCAST(volatile kmp_int64 *, f)); - } - static inline flag_t test_then_or(volatile flag_t *f, flag_t v) { - return KMP_TEST_THEN_OR64(f, v); - } - static inline flag_t test_then_and(volatile flag_t *f, flag_t v) { - return KMP_TEST_THEN_AND64(f, v); - } -}; - -// Basic flag that does not use C11 Atomics -template -class kmp_basic_flag_native : public kmp_flag_native { - typedef flag_traits traits_type; - FlagType checker; /**< Value to compare flag to to check if flag has been - released. */ - kmp_info_t - *waiting_threads[1]; /**< Array of threads sleeping on this thread. */ - kmp_uint32 - num_waiting_threads; /**< Number of threads sleeping on this thread. */ -public: - kmp_basic_flag_native(volatile FlagType *p) - : kmp_flag_native(p, traits_type::t), num_waiting_threads(0) {} - kmp_basic_flag_native(volatile FlagType *p, kmp_info_t *thr) - : kmp_flag_native(p, traits_type::t), num_waiting_threads(1) { - waiting_threads[0] = thr; - } - kmp_basic_flag_native(volatile FlagType *p, FlagType c) - : kmp_flag_native(p, traits_type::t), checker(c), - num_waiting_threads(0) {} - /*! - * param i in index into waiting_threads - * @result the thread that is waiting at index i - */ - kmp_info_t *get_waiter(kmp_uint32 i) { - KMP_DEBUG_ASSERT(i < num_waiting_threads); - return waiting_threads[i]; - } - /*! - * @result num_waiting_threads - */ - kmp_uint32 get_num_waiters() { return num_waiting_threads; } - /*! - * @param thr in the thread which is now waiting - * - * Insert a waiting thread at index 0. - */ - void set_waiter(kmp_info_t *thr) { - waiting_threads[0] = thr; - num_waiting_threads = 1; - } - /*! - * @result true if the flag object has been released. - */ - bool done_check() { return traits_type::tcr(*(this->get())) == checker; } - /*! - * @param old_loc in old value of flag - * @result true if the flag's old value indicates it was released. - */ - bool done_check_val(FlagType old_loc) { return old_loc == checker; } - /*! - * @result true if the flag object is not yet released. - * Used in __kmp_wait_template like: - * @code - * while (flag.notdone_check()) { pause(); } - * @endcode - */ - bool notdone_check() { return traits_type::tcr(*(this->get())) != checker; } - /*! - * @result Actual flag value before release was applied. - * Trigger all waiting threads to run by modifying flag to release state. - */ - void internal_release() { - (void)traits_type::test_then_add4((volatile FlagType *)this->get()); - } - /*! - * @result Actual flag value before sleep bit(s) set. - * Notes that there is at least one thread sleeping on the flag by setting - * sleep bit(s). - */ - FlagType set_sleeping() { - return traits_type::test_then_or((volatile FlagType *)this->get(), - KMP_BARRIER_SLEEP_STATE); - } - /*! - * @result Actual flag value before sleep bit(s) cleared. - * Notes that there are no longer threads sleeping on the flag by clearing - * sleep bit(s). - */ - FlagType unset_sleeping() { - return traits_type::test_then_and((volatile FlagType *)this->get(), - ~KMP_BARRIER_SLEEP_STATE); - } - /*! - * @param old_loc in old value of flag - * Test whether there are threads sleeping on the flag's old value in old_loc. - */ - bool is_sleeping_val(FlagType old_loc) { - return old_loc & KMP_BARRIER_SLEEP_STATE; - } - /*! - * Test whether there are threads sleeping on the flag. - */ - bool is_sleeping() { return is_sleeping_val(*(this->get())); } - bool is_any_sleeping() { return is_sleeping_val(*(this->get())); } - kmp_uint8 *get_stolen() { return NULL; } - enum barrier_type get_bt() { return bs_last_barrier; } -}; - -template class kmp_basic_flag : public kmp_flag { - typedef flag_traits traits_type; - FlagType checker; /**< Value to compare flag to to check if flag has been - released. */ - kmp_info_t - *waiting_threads[1]; /**< Array of threads sleeping on this thread. */ - kmp_uint32 - num_waiting_threads; /**< Number of threads sleeping on this thread. */ -public: - kmp_basic_flag(std::atomic *p) - : kmp_flag(p, traits_type::t), num_waiting_threads(0) {} - kmp_basic_flag(std::atomic *p, kmp_info_t *thr) - : kmp_flag(p, traits_type::t), num_waiting_threads(1) { - waiting_threads[0] = thr; - } - kmp_basic_flag(std::atomic *p, FlagType c) - : kmp_flag(p, traits_type::t), checker(c), - num_waiting_threads(0) {} - /*! - * param i in index into waiting_threads - * @result the thread that is waiting at index i - */ - kmp_info_t *get_waiter(kmp_uint32 i) { - KMP_DEBUG_ASSERT(i < num_waiting_threads); - return waiting_threads[i]; - } - /*! - * @result num_waiting_threads - */ - kmp_uint32 get_num_waiters() { return num_waiting_threads; } - /*! - * @param thr in the thread which is now waiting - * - * Insert a waiting thread at index 0. - */ - void set_waiter(kmp_info_t *thr) { - waiting_threads[0] = thr; - num_waiting_threads = 1; - } - /*! - * @result true if the flag object has been released. - */ - bool done_check() { return this->load() == checker; } - /*! - * @param old_loc in old value of flag - * @result true if the flag's old value indicates it was released. - */ - bool done_check_val(FlagType old_loc) { return old_loc == checker; } - /*! - * @result true if the flag object is not yet released. - * Used in __kmp_wait_template like: - * @code - * while (flag.notdone_check()) { pause(); } - * @endcode - */ - bool notdone_check() { return this->load() != checker; } - /*! - * @result Actual flag value before release was applied. - * Trigger all waiting threads to run by modifying flag to release state. - */ - void internal_release() { KMP_ATOMIC_ADD(this->get(), 4); } - /*! - * @result Actual flag value before sleep bit(s) set. - * Notes that there is at least one thread sleeping on the flag by setting - * sleep bit(s). - */ - FlagType set_sleeping() { - return KMP_ATOMIC_OR(this->get(), KMP_BARRIER_SLEEP_STATE); - } - /*! - * @result Actual flag value before sleep bit(s) cleared. - * Notes that there are no longer threads sleeping on the flag by clearing - * sleep bit(s). - */ - FlagType unset_sleeping() { - return KMP_ATOMIC_AND(this->get(), ~KMP_BARRIER_SLEEP_STATE); - } - /*! - * @param old_loc in old value of flag - * Test whether there are threads sleeping on the flag's old value in old_loc. - */ - bool is_sleeping_val(FlagType old_loc) { - return old_loc & KMP_BARRIER_SLEEP_STATE; - } - /*! - * Test whether there are threads sleeping on the flag. - */ - bool is_sleeping() { return is_sleeping_val(this->load()); } - bool is_any_sleeping() { return is_sleeping_val(this->load()); } - kmp_uint8 *get_stolen() { return NULL; } - enum barrier_type get_bt() { return bs_last_barrier; } -}; - -class kmp_flag_32 : public kmp_basic_flag { -public: - kmp_flag_32(std::atomic *p) : kmp_basic_flag(p) {} - kmp_flag_32(std::atomic *p, kmp_info_t *thr) - : kmp_basic_flag(p, thr) {} - kmp_flag_32(std::atomic *p, kmp_uint32 c) - : kmp_basic_flag(p, c) {} - void suspend(int th_gtid) { __kmp_suspend_32(th_gtid, this); } - void resume(int th_gtid) { __kmp_resume_32(th_gtid, this); } - int execute_tasks(kmp_info_t *this_thr, kmp_int32 gtid, int final_spin, - int *thread_finished USE_ITT_BUILD_ARG(void *itt_sync_obj), - kmp_int32 is_constrained) { - return __kmp_execute_tasks_32( - this_thr, gtid, this, final_spin, - thread_finished USE_ITT_BUILD_ARG(itt_sync_obj), is_constrained); - } - void wait(kmp_info_t *this_thr, - int final_spin USE_ITT_BUILD_ARG(void *itt_sync_obj)) { - if (final_spin) - __kmp_wait_template( - this_thr, this USE_ITT_BUILD_ARG(itt_sync_obj)); - else - __kmp_wait_template( - this_thr, this USE_ITT_BUILD_ARG(itt_sync_obj)); - } - void release() { __kmp_release_template(this); } - flag_type get_ptr_type() { return flag32; } -}; - -class kmp_flag_64 : public kmp_basic_flag_native { -public: - kmp_flag_64(volatile kmp_uint64 *p) : kmp_basic_flag_native(p) {} - kmp_flag_64(volatile kmp_uint64 *p, kmp_info_t *thr) - : kmp_basic_flag_native(p, thr) {} - kmp_flag_64(volatile kmp_uint64 *p, kmp_uint64 c) - : kmp_basic_flag_native(p, c) {} - void suspend(int th_gtid) { __kmp_suspend_64(th_gtid, this); } - void resume(int th_gtid) { __kmp_resume_64(th_gtid, this); } - int execute_tasks(kmp_info_t *this_thr, kmp_int32 gtid, int final_spin, - int *thread_finished USE_ITT_BUILD_ARG(void *itt_sync_obj), - kmp_int32 is_constrained) { - return __kmp_execute_tasks_64( - this_thr, gtid, this, final_spin, - thread_finished USE_ITT_BUILD_ARG(itt_sync_obj), is_constrained); - } - void wait(kmp_info_t *this_thr, - int final_spin USE_ITT_BUILD_ARG(void *itt_sync_obj)) { - if (final_spin) - __kmp_wait_template( - this_thr, this USE_ITT_BUILD_ARG(itt_sync_obj)); - else - __kmp_wait_template( - this_thr, this USE_ITT_BUILD_ARG(itt_sync_obj)); - } - void release() { __kmp_release_template(this); } - flag_type get_ptr_type() { return flag64; } -}; - -// Hierarchical 64-bit on-core barrier instantiation -class kmp_flag_oncore : public kmp_flag_native { - kmp_uint64 checker; - kmp_info_t *waiting_threads[1]; - kmp_uint32 num_waiting_threads; - kmp_uint32 - offset; /**< Portion of flag that is of interest for an operation. */ - bool flag_switch; /**< Indicates a switch in flag location. */ - enum barrier_type bt; /**< Barrier type. */ - kmp_info_t *this_thr; /**< Thread that may be redirected to different flag - location. */ -#if USE_ITT_BUILD - void * - itt_sync_obj; /**< ITT object that must be passed to new flag location. */ -#endif - unsigned char &byteref(volatile kmp_uint64 *loc, size_t offset) { - return (RCAST(unsigned char *, CCAST(kmp_uint64 *, loc)))[offset]; - } - -public: - kmp_flag_oncore(volatile kmp_uint64 *p) - : kmp_flag_native(p, flag_oncore), num_waiting_threads(0), - flag_switch(false) {} - kmp_flag_oncore(volatile kmp_uint64 *p, kmp_uint32 idx) - : kmp_flag_native(p, flag_oncore), num_waiting_threads(0), - offset(idx), flag_switch(false) {} - kmp_flag_oncore(volatile kmp_uint64 *p, kmp_uint64 c, kmp_uint32 idx, - enum barrier_type bar_t, - kmp_info_t *thr USE_ITT_BUILD_ARG(void *itt)) - : kmp_flag_native(p, flag_oncore), checker(c), - num_waiting_threads(0), offset(idx), flag_switch(false), bt(bar_t), - this_thr(thr) USE_ITT_BUILD_ARG(itt_sync_obj(itt)) {} - kmp_info_t *get_waiter(kmp_uint32 i) { - KMP_DEBUG_ASSERT(i < num_waiting_threads); - return waiting_threads[i]; - } - kmp_uint32 get_num_waiters() { return num_waiting_threads; } - void set_waiter(kmp_info_t *thr) { - waiting_threads[0] = thr; - num_waiting_threads = 1; - } - bool done_check_val(kmp_uint64 old_loc) { - return byteref(&old_loc, offset) == checker; - } - bool done_check() { return done_check_val(*get()); } - bool notdone_check() { - // Calculate flag_switch - if (this_thr->th.th_bar[bt].bb.wait_flag == KMP_BARRIER_SWITCH_TO_OWN_FLAG) - flag_switch = true; - if (byteref(get(), offset) != 1 && !flag_switch) - return true; - else if (flag_switch) { - this_thr->th.th_bar[bt].bb.wait_flag = KMP_BARRIER_SWITCHING; - kmp_flag_64 flag(&this_thr->th.th_bar[bt].bb.b_go, - (kmp_uint64)KMP_BARRIER_STATE_BUMP); - __kmp_wait_64(this_thr, &flag, TRUE USE_ITT_BUILD_ARG(itt_sync_obj)); - } - return false; - } - void internal_release() { - // Other threads can write their own bytes simultaneously. - if (__kmp_dflt_blocktime == KMP_MAX_BLOCKTIME) { - byteref(get(), offset) = 1; - } else { - kmp_uint64 mask = 0; - byteref(&mask, offset) = 1; - KMP_TEST_THEN_OR64(get(), mask); - } - } - kmp_uint64 set_sleeping() { - return KMP_TEST_THEN_OR64(get(), KMP_BARRIER_SLEEP_STATE); - } - kmp_uint64 unset_sleeping() { - return KMP_TEST_THEN_AND64(get(), ~KMP_BARRIER_SLEEP_STATE); - } - bool is_sleeping_val(kmp_uint64 old_loc) { - return old_loc & KMP_BARRIER_SLEEP_STATE; - } - bool is_sleeping() { return is_sleeping_val(*get()); } - bool is_any_sleeping() { return is_sleeping_val(*get()); } - void wait(kmp_info_t *this_thr, int final_spin) { - if (final_spin) - __kmp_wait_template( - this_thr, this USE_ITT_BUILD_ARG(itt_sync_obj)); - else - __kmp_wait_template( - this_thr, this USE_ITT_BUILD_ARG(itt_sync_obj)); - } - void release() { __kmp_release_template(this); } - void suspend(int th_gtid) { __kmp_suspend_oncore(th_gtid, this); } - void resume(int th_gtid) { __kmp_resume_oncore(th_gtid, this); } - int execute_tasks(kmp_info_t *this_thr, kmp_int32 gtid, int final_spin, - int *thread_finished USE_ITT_BUILD_ARG(void *itt_sync_obj), - kmp_int32 is_constrained) { - return __kmp_execute_tasks_oncore( - this_thr, gtid, this, final_spin, - thread_finished USE_ITT_BUILD_ARG(itt_sync_obj), is_constrained); - } - kmp_uint8 *get_stolen() { return NULL; } - enum barrier_type get_bt() { return bt; } - flag_type get_ptr_type() { return flag_oncore; } -}; - -// Used to wake up threads, volatile void* flag is usually the th_sleep_loc -// associated with int gtid. -static inline void __kmp_null_resume_wrapper(int gtid, volatile void *flag) { - if (!flag) - return; - - switch (RCAST(kmp_flag_64 *, CCAST(void *, flag))->get_type()) { - case flag32: - __kmp_resume_32(gtid, NULL); - break; - case flag64: - __kmp_resume_64(gtid, NULL); - break; - case flag_oncore: - __kmp_resume_oncore(gtid, NULL); - break; - } -} - -/*! -@} -*/ - -#endif // KMP_WAIT_RELEASE_H Property changes on: vendor/llvm-openmp/openmp-release80-r363030/runtime/src/kmp_wait_release.h ___________________________________________________________________ Deleted: svn:eol-style ## -1 +0,0 ## -native \ No newline at end of property Deleted: svn:keywords ## -1 +0,0 ## -FreeBSD=%H \ No newline at end of property Deleted: svn:mime-type ## -1 +0,0 ## -text/plain \ No newline at end of property Index: vendor/llvm-openmp/openmp-release80-r363030/runtime/src/ompt-event-specific.h =================================================================== --- vendor/llvm-openmp/openmp-release80-r363030/runtime/src/ompt-event-specific.h (revision 348960) +++ vendor/llvm-openmp/openmp-release80-r363030/runtime/src/ompt-event-specific.h (nonexistent) @@ -1,112 +0,0 @@ -/****************************************************************************** - * File: ompt-event-specific.h - * - * Description: - * - * specify which of the OMPT events are implemented by this runtime system - * and the level of their implementation by a runtime system. - *****************************************************************************/ - -//===----------------------------------------------------------------------===// -// -// The LLVM Compiler Infrastructure -// -// This file is dual licensed under the MIT and the University of Illinois Open -// Source Licenses. See LICENSE.txt for details. -// -//===----------------------------------------------------------------------===// - -#ifndef __OMPT_EVENT_SPECIFIC_H__ -#define __OMPT_EVENT_SPECIFIC_H__ - -#define _ompt_tokenpaste_helper(x, y) x##y -#define _ompt_tokenpaste(x, y) _ompt_tokenpaste_helper(x, y) -#define ompt_event_implementation_status(e) _ompt_tokenpaste(e, _implemented) - -/*---------------------------------------------------------------------------- - | Specify whether an event may occur or not, and whether event callbacks - | never, sometimes, or always occur. - | - | The values for these constants are defined in section 6.1.2 of - | the OMPT TR. They are exposed to tools through ompt_set_callback. - +--------------------------------------------------------------------------*/ - -#define ompt_event_UNIMPLEMENTED ompt_set_never -#define ompt_event_MAY_CONVENIENT ompt_set_sometimes -#define ompt_event_MAY_ALWAYS ompt_set_always - -#if OMPT_OPTIONAL -#define ompt_event_MAY_ALWAYS_OPTIONAL ompt_event_MAY_ALWAYS -#else -#define ompt_event_MAY_ALWAYS_OPTIONAL ompt_event_UNIMPLEMENTED -#endif - -/*---------------------------------------------------------------------------- - | Mandatory Events - +--------------------------------------------------------------------------*/ - -#define ompt_callback_thread_begin_implemented ompt_event_MAY_ALWAYS -#define ompt_callback_thread_end_implemented ompt_event_MAY_ALWAYS - -#define ompt_callback_parallel_begin_implemented ompt_event_MAY_ALWAYS -#define ompt_callback_parallel_end_implemented ompt_event_MAY_ALWAYS - -#define ompt_callback_task_create_implemented ompt_event_MAY_ALWAYS -#define ompt_callback_task_schedule_implemented ompt_event_MAY_ALWAYS - -#define ompt_callback_implicit_task_implemented ompt_event_MAY_ALWAYS - -#define ompt_callback_target_implemented ompt_event_UNIMPLEMENTED -#define ompt_callback_target_data_op_implemented ompt_event_UNIMPLEMENTED -#define ompt_callback_target_submit_implemented ompt_event_UNIMPLEMENTED - -#define ompt_callback_control_tool_implemented ompt_event_MAY_ALWAYS - -#define ompt_callback_device_initialize_implemented ompt_event_UNIMPLEMENTED -#define ompt_callback_device_finalize_implemented ompt_event_UNIMPLEMENTED - -#define ompt_callback_device_load_implemented ompt_event_UNIMPLEMENTED -#define ompt_callback_device_unload_implemented ompt_event_UNIMPLEMENTED - -/*---------------------------------------------------------------------------- - | Optional Events - +--------------------------------------------------------------------------*/ - -#define ompt_callback_sync_region_wait_implemented \ - ompt_event_MAY_ALWAYS_OPTIONAL - -#define ompt_callback_mutex_released_implemented ompt_event_MAY_ALWAYS_OPTIONAL - -#if OMP_40_ENABLED -#define ompt_callback_dependences_implemented \ - ompt_event_MAY_ALWAYS_OPTIONAL -#define ompt_callback_task_dependence_implemented ompt_event_MAY_ALWAYS_OPTIONAL -#else -#define ompt_callback_dependences_implemented ompt_event_UNIMPLEMENTED -#define ompt_callback_task_dependence_implemented ompt_event_UNIMPLEMENTED -#endif /* OMP_40_ENABLED */ - -#define ompt_callback_work_implemented ompt_event_MAY_ALWAYS_OPTIONAL - -#define ompt_callback_master_implemented ompt_event_MAY_ALWAYS_OPTIONAL - -#define ompt_callback_target_map_implemented ompt_event_UNIMPLEMENTED - -#define ompt_callback_sync_region_implemented ompt_event_MAY_ALWAYS_OPTIONAL - -#define ompt_callback_lock_init_implemented ompt_event_MAY_ALWAYS_OPTIONAL -#define ompt_callback_lock_destroy_implemented ompt_event_MAY_ALWAYS_OPTIONAL - -#define ompt_callback_mutex_acquire_implemented ompt_event_MAY_ALWAYS_OPTIONAL -#define ompt_callback_mutex_acquired_implemented ompt_event_MAY_ALWAYS_OPTIONAL -#define ompt_callback_nest_lock_implemented ompt_event_MAY_ALWAYS_OPTIONAL - -#define ompt_callback_flush_implemented ompt_event_MAY_ALWAYS_OPTIONAL - -#define ompt_callback_cancel_implemented ompt_event_MAY_ALWAYS_OPTIONAL - -#define ompt_callback_reduction_implemented ompt_event_UNIMPLEMENTED - -#define ompt_callback_dispatch_implemented ompt_event_UNIMPLEMENTED - -#endif Property changes on: vendor/llvm-openmp/openmp-release80-r363030/runtime/src/ompt-event-specific.h ___________________________________________________________________ Deleted: svn:eol-style ## -1 +0,0 ## -native \ No newline at end of property Deleted: svn:keywords ## -1 +0,0 ## -FreeBSD=%H \ No newline at end of property Deleted: svn:mime-type ## -1 +0,0 ## -text/plain \ No newline at end of property Index: vendor/llvm-openmp/openmp-release80-r363030/runtime/src/kmp_wrapper_getpid.h =================================================================== --- vendor/llvm-openmp/openmp-release80-r363030/runtime/src/kmp_wrapper_getpid.h (revision 348960) +++ vendor/llvm-openmp/openmp-release80-r363030/runtime/src/kmp_wrapper_getpid.h (nonexistent) @@ -1,73 +0,0 @@ -/* - * kmp_wrapper_getpid.h -- getpid() declaration. - */ - -//===----------------------------------------------------------------------===// -// -// The LLVM Compiler Infrastructure -// -// This file is dual licensed under the MIT and the University of Illinois Open -// Source Licenses. See LICENSE.txt for details. -// -//===----------------------------------------------------------------------===// - -#ifndef KMP_WRAPPER_GETPID_H -#define KMP_WRAPPER_GETPID_H - -#if KMP_OS_UNIX - -// On Unix-like systems (Linux* OS and OS X*) getpid() is declared in standard -// headers. -#include -#include -#include -#if KMP_OS_DARWIN -// OS X -#define __kmp_gettid() syscall(SYS_thread_selfid) -#elif KMP_OS_NETBSD -#include -#define __kmp_gettid() _lwp_self() -#elif defined(SYS_gettid) -// Hopefully other Unix systems define SYS_gettid syscall for getting os thread -// id -#define __kmp_gettid() syscall(SYS_gettid) -#else -#warning No gettid found, use getpid instead -#define __kmp_gettid() getpid() -#endif - -#elif KMP_OS_WINDOWS - -// On Windows* OS _getpid() returns int (not pid_t) and is declared in -// "process.h". -#include -// Let us simulate Unix. -#if KMP_MSVC_COMPAT -typedef int pid_t; -#endif -#define getpid _getpid -#define __kmp_gettid() GetCurrentThreadId() - -#else - -#error Unknown or unsupported OS. - -#endif - -/* TODO: All the libomp source code uses pid_t type for storing the result of - getpid(), it is good. But often it printed as "%d", that is not good, because - it ignores pid_t definition (may pid_t be longer that int?). It seems all pid - prints should be rewritten as: - - printf( "%" KMP_UINT64_SPEC, (kmp_uint64) pid ); - - or (at least) as - - printf( "%" KMP_UINT32_SPEC, (kmp_uint32) pid ); - - (kmp_uint32, kmp_uint64, KMP_UINT64_SPEC, and KMP_UNIT32_SPEC are defined in - "kmp_os.h".) */ - -#endif // KMP_WRAPPER_GETPID_H - -// end of file // Property changes on: vendor/llvm-openmp/openmp-release80-r363030/runtime/src/kmp_wrapper_getpid.h ___________________________________________________________________ Deleted: svn:eol-style ## -1 +0,0 ## -native \ No newline at end of property Deleted: svn:keywords ## -1 +0,0 ## -FreeBSD=%H \ No newline at end of property Deleted: svn:mime-type ## -1 +0,0 ## -text/plain \ No newline at end of property Index: vendor/llvm-openmp/openmp-release80-r363030/runtime/src/kmp_lock.h =================================================================== --- vendor/llvm-openmp/openmp-release80-r363030/runtime/src/kmp_lock.h (revision 348960) +++ vendor/llvm-openmp/openmp-release80-r363030/runtime/src/kmp_lock.h (nonexistent) @@ -1,1297 +0,0 @@ -/* - * kmp_lock.h -- lock header file - */ - -//===----------------------------------------------------------------------===// -// -// The LLVM Compiler Infrastructure -// -// This file is dual licensed under the MIT and the University of Illinois Open -// Source Licenses. See LICENSE.txt for details. -// -//===----------------------------------------------------------------------===// - -#ifndef KMP_LOCK_H -#define KMP_LOCK_H - -#include // CHAR_BIT -#include // offsetof - -#include "kmp_debug.h" -#include "kmp_os.h" - -#ifdef __cplusplus -#include - -extern "C" { -#endif // __cplusplus - -// ---------------------------------------------------------------------------- -// Have to copy these definitions from kmp.h because kmp.h cannot be included -// due to circular dependencies. Will undef these at end of file. - -#define KMP_PAD(type, sz) \ - (sizeof(type) + (sz - ((sizeof(type) - 1) % (sz)) - 1)) -#define KMP_GTID_DNE (-2) - -// Forward declaration of ident and ident_t - -struct ident; -typedef struct ident ident_t; - -// End of copied code. -// ---------------------------------------------------------------------------- - -// We need to know the size of the area we can assume that the compiler(s) -// allocated for obects of type omp_lock_t and omp_nest_lock_t. The Intel -// compiler always allocates a pointer-sized area, as does visual studio. -// -// gcc however, only allocates 4 bytes for regular locks, even on 64-bit -// intel archs. It allocates at least 8 bytes for nested lock (more on -// recent versions), but we are bounded by the pointer-sized chunks that -// the Intel compiler allocates. - -#if KMP_OS_LINUX && defined(KMP_GOMP_COMPAT) -#define OMP_LOCK_T_SIZE sizeof(int) -#define OMP_NEST_LOCK_T_SIZE sizeof(void *) -#else -#define OMP_LOCK_T_SIZE sizeof(void *) -#define OMP_NEST_LOCK_T_SIZE sizeof(void *) -#endif - -// The Intel compiler allocates a 32-byte chunk for a critical section. -// Both gcc and visual studio only allocate enough space for a pointer. -// Sometimes we know that the space was allocated by the Intel compiler. -#define OMP_CRITICAL_SIZE sizeof(void *) -#define INTEL_CRITICAL_SIZE 32 - -// lock flags -typedef kmp_uint32 kmp_lock_flags_t; - -#define kmp_lf_critical_section 1 - -// When a lock table is used, the indices are of kmp_lock_index_t -typedef kmp_uint32 kmp_lock_index_t; - -// When memory allocated for locks are on the lock pool (free list), -// it is treated as structs of this type. -struct kmp_lock_pool { - union kmp_user_lock *next; - kmp_lock_index_t index; -}; - -typedef struct kmp_lock_pool kmp_lock_pool_t; - -extern void __kmp_validate_locks(void); - -// ---------------------------------------------------------------------------- -// There are 5 lock implementations: -// 1. Test and set locks. -// 2. futex locks (Linux* OS on x86 and -// Intel(R) Many Integrated Core Architecture) -// 3. Ticket (Lamport bakery) locks. -// 4. Queuing locks (with separate spin fields). -// 5. DRPA (Dynamically Reconfigurable Distributed Polling Area) locks -// -// and 3 lock purposes: -// 1. Bootstrap locks -- Used for a few locks available at library -// startup-shutdown time. -// These do not require non-negative global thread ID's. -// 2. Internal RTL locks -- Used everywhere else in the RTL -// 3. User locks (includes critical sections) -// ---------------------------------------------------------------------------- - -// ============================================================================ -// Lock implementations. -// -// Test and set locks. -// -// Non-nested test and set locks differ from the other lock kinds (except -// futex) in that we use the memory allocated by the compiler for the lock, -// rather than a pointer to it. -// -// On lin32, lin_32e, and win_32, the space allocated may be as small as 4 -// bytes, so we have to use a lock table for nested locks, and avoid accessing -// the depth_locked field for non-nested locks. -// -// Information normally available to the tools, such as lock location, lock -// usage (normal lock vs. critical section), etc. is not available with test and -// set locks. -// ---------------------------------------------------------------------------- - -struct kmp_base_tas_lock { - // KMP_LOCK_FREE(tas) => unlocked; locked: (gtid+1) of owning thread - std::atomic poll; - kmp_int32 depth_locked; // depth locked, for nested locks only -}; - -typedef struct kmp_base_tas_lock kmp_base_tas_lock_t; - -union kmp_tas_lock { - kmp_base_tas_lock_t lk; - kmp_lock_pool_t pool; // make certain struct is large enough - double lk_align; // use worst case alignment; no cache line padding -}; - -typedef union kmp_tas_lock kmp_tas_lock_t; - -// Static initializer for test and set lock variables. Usage: -// kmp_tas_lock_t xlock = KMP_TAS_LOCK_INITIALIZER( xlock ); -#define KMP_TAS_LOCK_INITIALIZER(lock) \ - { \ - { ATOMIC_VAR_INIT(KMP_LOCK_FREE(tas)), 0 } \ - } - -extern int __kmp_acquire_tas_lock(kmp_tas_lock_t *lck, kmp_int32 gtid); -extern int __kmp_test_tas_lock(kmp_tas_lock_t *lck, kmp_int32 gtid); -extern int __kmp_release_tas_lock(kmp_tas_lock_t *lck, kmp_int32 gtid); -extern void __kmp_init_tas_lock(kmp_tas_lock_t *lck); -extern void __kmp_destroy_tas_lock(kmp_tas_lock_t *lck); - -extern int __kmp_acquire_nested_tas_lock(kmp_tas_lock_t *lck, kmp_int32 gtid); -extern int __kmp_test_nested_tas_lock(kmp_tas_lock_t *lck, kmp_int32 gtid); -extern int __kmp_release_nested_tas_lock(kmp_tas_lock_t *lck, kmp_int32 gtid); -extern void __kmp_init_nested_tas_lock(kmp_tas_lock_t *lck); -extern void __kmp_destroy_nested_tas_lock(kmp_tas_lock_t *lck); - -#define KMP_LOCK_RELEASED 1 -#define KMP_LOCK_STILL_HELD 0 -#define KMP_LOCK_ACQUIRED_FIRST 1 -#define KMP_LOCK_ACQUIRED_NEXT 0 -#ifndef KMP_USE_FUTEX -#define KMP_USE_FUTEX \ - (KMP_OS_LINUX && !KMP_OS_CNK && \ - (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64)) -#endif -#if KMP_USE_FUTEX - -// ---------------------------------------------------------------------------- -// futex locks. futex locks are only available on Linux* OS. -// -// Like non-nested test and set lock, non-nested futex locks use the memory -// allocated by the compiler for the lock, rather than a pointer to it. -// -// Information normally available to the tools, such as lock location, lock -// usage (normal lock vs. critical section), etc. is not available with test and -// set locks. With non-nested futex locks, the lock owner is not even available. -// ---------------------------------------------------------------------------- - -struct kmp_base_futex_lock { - volatile kmp_int32 poll; // KMP_LOCK_FREE(futex) => unlocked - // 2*(gtid+1) of owning thread, 0 if unlocked - // locked: (gtid+1) of owning thread - kmp_int32 depth_locked; // depth locked, for nested locks only -}; - -typedef struct kmp_base_futex_lock kmp_base_futex_lock_t; - -union kmp_futex_lock { - kmp_base_futex_lock_t lk; - kmp_lock_pool_t pool; // make certain struct is large enough - double lk_align; // use worst case alignment - // no cache line padding -}; - -typedef union kmp_futex_lock kmp_futex_lock_t; - -// Static initializer for futex lock variables. Usage: -// kmp_futex_lock_t xlock = KMP_FUTEX_LOCK_INITIALIZER( xlock ); -#define KMP_FUTEX_LOCK_INITIALIZER(lock) \ - { \ - { KMP_LOCK_FREE(futex), 0 } \ - } - -extern int __kmp_acquire_futex_lock(kmp_futex_lock_t *lck, kmp_int32 gtid); -extern int __kmp_test_futex_lock(kmp_futex_lock_t *lck, kmp_int32 gtid); -extern int __kmp_release_futex_lock(kmp_futex_lock_t *lck, kmp_int32 gtid); -extern void __kmp_init_futex_lock(kmp_futex_lock_t *lck); -extern void __kmp_destroy_futex_lock(kmp_futex_lock_t *lck); - -extern int __kmp_acquire_nested_futex_lock(kmp_futex_lock_t *lck, - kmp_int32 gtid); -extern int __kmp_test_nested_futex_lock(kmp_futex_lock_t *lck, kmp_int32 gtid); -extern int __kmp_release_nested_futex_lock(kmp_futex_lock_t *lck, - kmp_int32 gtid); -extern void __kmp_init_nested_futex_lock(kmp_futex_lock_t *lck); -extern void __kmp_destroy_nested_futex_lock(kmp_futex_lock_t *lck); - -#endif // KMP_USE_FUTEX - -// ---------------------------------------------------------------------------- -// Ticket locks. - -#ifdef __cplusplus - -#ifdef _MSC_VER -// MSVC won't allow use of std::atomic<> in a union since it has non-trivial -// copy constructor. - -struct kmp_base_ticket_lock { - // `initialized' must be the first entry in the lock data structure! - std::atomic_bool initialized; - volatile union kmp_ticket_lock *self; // points to the lock union - ident_t const *location; // Source code location of omp_init_lock(). - std::atomic_uint - next_ticket; // ticket number to give to next thread which acquires - std::atomic_uint now_serving; // ticket number for thread which holds the lock - std::atomic_int owner_id; // (gtid+1) of owning thread, 0 if unlocked - std::atomic_int depth_locked; // depth locked, for nested locks only - kmp_lock_flags_t flags; // lock specifics, e.g. critical section lock -}; -#else -struct kmp_base_ticket_lock { - // `initialized' must be the first entry in the lock data structure! - std::atomic initialized; - volatile union kmp_ticket_lock *self; // points to the lock union - ident_t const *location; // Source code location of omp_init_lock(). - std::atomic - next_ticket; // ticket number to give to next thread which acquires - std::atomic - now_serving; // ticket number for thread which holds the lock - std::atomic owner_id; // (gtid+1) of owning thread, 0 if unlocked - std::atomic depth_locked; // depth locked, for nested locks only - kmp_lock_flags_t flags; // lock specifics, e.g. critical section lock -}; -#endif - -#else // __cplusplus - -struct kmp_base_ticket_lock; - -#endif // !__cplusplus - -typedef struct kmp_base_ticket_lock kmp_base_ticket_lock_t; - -union KMP_ALIGN_CACHE kmp_ticket_lock { - kmp_base_ticket_lock_t - lk; // This field must be first to allow static initializing. - kmp_lock_pool_t pool; - double lk_align; // use worst case alignment - char lk_pad[KMP_PAD(kmp_base_ticket_lock_t, CACHE_LINE)]; -}; - -typedef union kmp_ticket_lock kmp_ticket_lock_t; - -// Static initializer for simple ticket lock variables. Usage: -// kmp_ticket_lock_t xlock = KMP_TICKET_LOCK_INITIALIZER( xlock ); -// Note the macro argument. It is important to make var properly initialized. -#define KMP_TICKET_LOCK_INITIALIZER(lock) \ - { \ - { \ - ATOMIC_VAR_INIT(true) \ - , &(lock), NULL, ATOMIC_VAR_INIT(0U), ATOMIC_VAR_INIT(0U), \ - ATOMIC_VAR_INIT(0), ATOMIC_VAR_INIT(-1) \ - } \ - } - -extern int __kmp_acquire_ticket_lock(kmp_ticket_lock_t *lck, kmp_int32 gtid); -extern int __kmp_test_ticket_lock(kmp_ticket_lock_t *lck, kmp_int32 gtid); -extern int __kmp_test_ticket_lock_with_cheks(kmp_ticket_lock_t *lck, - kmp_int32 gtid); -extern int __kmp_release_ticket_lock(kmp_ticket_lock_t *lck, kmp_int32 gtid); -extern void __kmp_init_ticket_lock(kmp_ticket_lock_t *lck); -extern void __kmp_destroy_ticket_lock(kmp_ticket_lock_t *lck); - -extern int __kmp_acquire_nested_ticket_lock(kmp_ticket_lock_t *lck, - kmp_int32 gtid); -extern int __kmp_test_nested_ticket_lock(kmp_ticket_lock_t *lck, - kmp_int32 gtid); -extern int __kmp_release_nested_ticket_lock(kmp_ticket_lock_t *lck, - kmp_int32 gtid); -extern void __kmp_init_nested_ticket_lock(kmp_ticket_lock_t *lck); -extern void __kmp_destroy_nested_ticket_lock(kmp_ticket_lock_t *lck); - -// ---------------------------------------------------------------------------- -// Queuing locks. - -#if KMP_USE_ADAPTIVE_LOCKS - -struct kmp_adaptive_lock_info; - -typedef struct kmp_adaptive_lock_info kmp_adaptive_lock_info_t; - -#if KMP_DEBUG_ADAPTIVE_LOCKS - -struct kmp_adaptive_lock_statistics { - /* So we can get stats from locks that haven't been destroyed. */ - kmp_adaptive_lock_info_t *next; - kmp_adaptive_lock_info_t *prev; - - /* Other statistics */ - kmp_uint32 successfulSpeculations; - kmp_uint32 hardFailedSpeculations; - kmp_uint32 softFailedSpeculations; - kmp_uint32 nonSpeculativeAcquires; - kmp_uint32 nonSpeculativeAcquireAttempts; - kmp_uint32 lemmingYields; -}; - -typedef struct kmp_adaptive_lock_statistics kmp_adaptive_lock_statistics_t; - -extern void __kmp_print_speculative_stats(); -extern void __kmp_init_speculative_stats(); - -#endif // KMP_DEBUG_ADAPTIVE_LOCKS - -struct kmp_adaptive_lock_info { - /* Values used for adaptivity. - Although these are accessed from multiple threads we don't access them - atomically, because if we miss updates it probably doesn't matter much. (It - just affects our decision about whether to try speculation on the lock). */ - kmp_uint32 volatile badness; - kmp_uint32 volatile acquire_attempts; - /* Parameters of the lock. */ - kmp_uint32 max_badness; - kmp_uint32 max_soft_retries; - -#if KMP_DEBUG_ADAPTIVE_LOCKS - kmp_adaptive_lock_statistics_t volatile stats; -#endif -}; - -#endif // KMP_USE_ADAPTIVE_LOCKS - -struct kmp_base_queuing_lock { - - // `initialized' must be the first entry in the lock data structure! - volatile union kmp_queuing_lock - *initialized; // Points to the lock union if in initialized state. - - ident_t const *location; // Source code location of omp_init_lock(). - - KMP_ALIGN(8) // tail_id must be 8-byte aligned! - - volatile kmp_int32 - tail_id; // (gtid+1) of thread at tail of wait queue, 0 if empty - // Must be no padding here since head/tail used in 8-byte CAS - volatile kmp_int32 - head_id; // (gtid+1) of thread at head of wait queue, 0 if empty - // Decl order assumes little endian - // bakery-style lock - volatile kmp_uint32 - next_ticket; // ticket number to give to next thread which acquires - volatile kmp_uint32 - now_serving; // ticket number for thread which holds the lock - volatile kmp_int32 owner_id; // (gtid+1) of owning thread, 0 if unlocked - kmp_int32 depth_locked; // depth locked, for nested locks only - - kmp_lock_flags_t flags; // lock specifics, e.g. critical section lock -}; - -typedef struct kmp_base_queuing_lock kmp_base_queuing_lock_t; - -KMP_BUILD_ASSERT(offsetof(kmp_base_queuing_lock_t, tail_id) % 8 == 0); - -union KMP_ALIGN_CACHE kmp_queuing_lock { - kmp_base_queuing_lock_t - lk; // This field must be first to allow static initializing. - kmp_lock_pool_t pool; - double lk_align; // use worst case alignment - char lk_pad[KMP_PAD(kmp_base_queuing_lock_t, CACHE_LINE)]; -}; - -typedef union kmp_queuing_lock kmp_queuing_lock_t; - -extern int __kmp_acquire_queuing_lock(kmp_queuing_lock_t *lck, kmp_int32 gtid); -extern int __kmp_test_queuing_lock(kmp_queuing_lock_t *lck, kmp_int32 gtid); -extern int __kmp_release_queuing_lock(kmp_queuing_lock_t *lck, kmp_int32 gtid); -extern void __kmp_init_queuing_lock(kmp_queuing_lock_t *lck); -extern void __kmp_destroy_queuing_lock(kmp_queuing_lock_t *lck); - -extern int __kmp_acquire_nested_queuing_lock(kmp_queuing_lock_t *lck, - kmp_int32 gtid); -extern int __kmp_test_nested_queuing_lock(kmp_queuing_lock_t *lck, - kmp_int32 gtid); -extern int __kmp_release_nested_queuing_lock(kmp_queuing_lock_t *lck, - kmp_int32 gtid); -extern void __kmp_init_nested_queuing_lock(kmp_queuing_lock_t *lck); -extern void __kmp_destroy_nested_queuing_lock(kmp_queuing_lock_t *lck); - -#if KMP_USE_ADAPTIVE_LOCKS - -// ---------------------------------------------------------------------------- -// Adaptive locks. -struct kmp_base_adaptive_lock { - kmp_base_queuing_lock qlk; - KMP_ALIGN(CACHE_LINE) - kmp_adaptive_lock_info_t - adaptive; // Information for the speculative adaptive lock -}; - -typedef struct kmp_base_adaptive_lock kmp_base_adaptive_lock_t; - -union KMP_ALIGN_CACHE kmp_adaptive_lock { - kmp_base_adaptive_lock_t lk; - kmp_lock_pool_t pool; - double lk_align; - char lk_pad[KMP_PAD(kmp_base_adaptive_lock_t, CACHE_LINE)]; -}; -typedef union kmp_adaptive_lock kmp_adaptive_lock_t; - -#define GET_QLK_PTR(l) ((kmp_queuing_lock_t *)&(l)->lk.qlk) - -#endif // KMP_USE_ADAPTIVE_LOCKS - -// ---------------------------------------------------------------------------- -// DRDPA ticket locks. -struct kmp_base_drdpa_lock { - // All of the fields on the first cache line are only written when - // initializing or reconfiguring the lock. These are relatively rare - // operations, so data from the first cache line will usually stay resident in - // the cache of each thread trying to acquire the lock. - // - // initialized must be the first entry in the lock data structure! - KMP_ALIGN_CACHE - - volatile union kmp_drdpa_lock - *initialized; // points to the lock union if in initialized state - ident_t const *location; // Source code location of omp_init_lock(). - std::atomic *> polls; - std::atomic mask; // is 2**num_polls-1 for mod op - kmp_uint64 cleanup_ticket; // thread with cleanup ticket - std::atomic *old_polls; // will deallocate old_polls - kmp_uint32 num_polls; // must be power of 2 - - // next_ticket it needs to exist in a separate cache line, as it is - // invalidated every time a thread takes a new ticket. - KMP_ALIGN_CACHE - - std::atomic next_ticket; - - // now_serving is used to store our ticket value while we hold the lock. It - // has a slightly different meaning in the DRDPA ticket locks (where it is - // written by the acquiring thread) than it does in the simple ticket locks - // (where it is written by the releasing thread). - // - // Since now_serving is only read an written in the critical section, - // it is non-volatile, but it needs to exist on a separate cache line, - // as it is invalidated at every lock acquire. - // - // Likewise, the vars used for nested locks (owner_id and depth_locked) are - // only written by the thread owning the lock, so they are put in this cache - // line. owner_id is read by other threads, so it must be declared volatile. - KMP_ALIGN_CACHE - kmp_uint64 now_serving; // doesn't have to be volatile - volatile kmp_uint32 owner_id; // (gtid+1) of owning thread, 0 if unlocked - kmp_int32 depth_locked; // depth locked - kmp_lock_flags_t flags; // lock specifics, e.g. critical section lock -}; - -typedef struct kmp_base_drdpa_lock kmp_base_drdpa_lock_t; - -union KMP_ALIGN_CACHE kmp_drdpa_lock { - kmp_base_drdpa_lock_t - lk; // This field must be first to allow static initializing. */ - kmp_lock_pool_t pool; - double lk_align; // use worst case alignment - char lk_pad[KMP_PAD(kmp_base_drdpa_lock_t, CACHE_LINE)]; -}; - -typedef union kmp_drdpa_lock kmp_drdpa_lock_t; - -extern int __kmp_acquire_drdpa_lock(kmp_drdpa_lock_t *lck, kmp_int32 gtid); -extern int __kmp_test_drdpa_lock(kmp_drdpa_lock_t *lck, kmp_int32 gtid); -extern int __kmp_release_drdpa_lock(kmp_drdpa_lock_t *lck, kmp_int32 gtid); -extern void __kmp_init_drdpa_lock(kmp_drdpa_lock_t *lck); -extern void __kmp_destroy_drdpa_lock(kmp_drdpa_lock_t *lck); - -extern int __kmp_acquire_nested_drdpa_lock(kmp_drdpa_lock_t *lck, - kmp_int32 gtid); -extern int __kmp_test_nested_drdpa_lock(kmp_drdpa_lock_t *lck, kmp_int32 gtid); -extern int __kmp_release_nested_drdpa_lock(kmp_drdpa_lock_t *lck, - kmp_int32 gtid); -extern void __kmp_init_nested_drdpa_lock(kmp_drdpa_lock_t *lck); -extern void __kmp_destroy_nested_drdpa_lock(kmp_drdpa_lock_t *lck); - -// ============================================================================ -// Lock purposes. -// ============================================================================ - -// Bootstrap locks. -// -// Bootstrap locks -- very few locks used at library initialization time. -// Bootstrap locks are currently implemented as ticket locks. -// They could also be implemented as test and set lock, but cannot be -// implemented with other lock kinds as they require gtids which are not -// available at initialization time. - -typedef kmp_ticket_lock_t kmp_bootstrap_lock_t; - -#define KMP_BOOTSTRAP_LOCK_INITIALIZER(lock) KMP_TICKET_LOCK_INITIALIZER((lock)) -#define KMP_BOOTSTRAP_LOCK_INIT(lock) \ - kmp_bootstrap_lock_t lock = KMP_TICKET_LOCK_INITIALIZER(lock) - -static inline int __kmp_acquire_bootstrap_lock(kmp_bootstrap_lock_t *lck) { - return __kmp_acquire_ticket_lock(lck, KMP_GTID_DNE); -} - -static inline int __kmp_test_bootstrap_lock(kmp_bootstrap_lock_t *lck) { - return __kmp_test_ticket_lock(lck, KMP_GTID_DNE); -} - -static inline void __kmp_release_bootstrap_lock(kmp_bootstrap_lock_t *lck) { - __kmp_release_ticket_lock(lck, KMP_GTID_DNE); -} - -static inline void __kmp_init_bootstrap_lock(kmp_bootstrap_lock_t *lck) { - __kmp_init_ticket_lock(lck); -} - -static inline void __kmp_destroy_bootstrap_lock(kmp_bootstrap_lock_t *lck) { - __kmp_destroy_ticket_lock(lck); -} - -// Internal RTL locks. -// -// Internal RTL locks are also implemented as ticket locks, for now. -// -// FIXME - We should go through and figure out which lock kind works best for -// each internal lock, and use the type declaration and function calls for -// that explicit lock kind (and get rid of this section). - -typedef kmp_ticket_lock_t kmp_lock_t; - -#define KMP_LOCK_INIT(lock) kmp_lock_t lock = KMP_TICKET_LOCK_INITIALIZER(lock) - -static inline int __kmp_acquire_lock(kmp_lock_t *lck, kmp_int32 gtid) { - return __kmp_acquire_ticket_lock(lck, gtid); -} - -static inline int __kmp_test_lock(kmp_lock_t *lck, kmp_int32 gtid) { - return __kmp_test_ticket_lock(lck, gtid); -} - -static inline void __kmp_release_lock(kmp_lock_t *lck, kmp_int32 gtid) { - __kmp_release_ticket_lock(lck, gtid); -} - -static inline void __kmp_init_lock(kmp_lock_t *lck) { - __kmp_init_ticket_lock(lck); -} - -static inline void __kmp_destroy_lock(kmp_lock_t *lck) { - __kmp_destroy_ticket_lock(lck); -} - -// User locks. -// -// Do not allocate objects of type union kmp_user_lock!!! This will waste space -// unless __kmp_user_lock_kind == lk_drdpa. Instead, check the value of -// __kmp_user_lock_kind and allocate objects of the type of the appropriate -// union member, and cast their addresses to kmp_user_lock_p. - -enum kmp_lock_kind { - lk_default = 0, - lk_tas, -#if KMP_USE_FUTEX - lk_futex, -#endif -#if KMP_USE_DYNAMIC_LOCK && KMP_USE_TSX - lk_hle, - lk_rtm, -#endif - lk_ticket, - lk_queuing, - lk_drdpa, -#if KMP_USE_ADAPTIVE_LOCKS - lk_adaptive -#endif // KMP_USE_ADAPTIVE_LOCKS -}; - -typedef enum kmp_lock_kind kmp_lock_kind_t; - -extern kmp_lock_kind_t __kmp_user_lock_kind; - -union kmp_user_lock { - kmp_tas_lock_t tas; -#if KMP_USE_FUTEX - kmp_futex_lock_t futex; -#endif - kmp_ticket_lock_t ticket; - kmp_queuing_lock_t queuing; - kmp_drdpa_lock_t drdpa; -#if KMP_USE_ADAPTIVE_LOCKS - kmp_adaptive_lock_t adaptive; -#endif // KMP_USE_ADAPTIVE_LOCKS - kmp_lock_pool_t pool; -}; - -typedef union kmp_user_lock *kmp_user_lock_p; - -#if !KMP_USE_DYNAMIC_LOCK - -extern size_t __kmp_base_user_lock_size; -extern size_t __kmp_user_lock_size; - -extern kmp_int32 (*__kmp_get_user_lock_owner_)(kmp_user_lock_p lck); - -static inline kmp_int32 __kmp_get_user_lock_owner(kmp_user_lock_p lck) { - KMP_DEBUG_ASSERT(__kmp_get_user_lock_owner_ != NULL); - return (*__kmp_get_user_lock_owner_)(lck); -} - -extern int (*__kmp_acquire_user_lock_with_checks_)(kmp_user_lock_p lck, - kmp_int32 gtid); - -#if KMP_OS_LINUX && \ - (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64) - -#define __kmp_acquire_user_lock_with_checks(lck, gtid) \ - if (__kmp_user_lock_kind == lk_tas) { \ - if (__kmp_env_consistency_check) { \ - char const *const func = "omp_set_lock"; \ - if ((sizeof(kmp_tas_lock_t) <= OMP_LOCK_T_SIZE) && \ - lck->tas.lk.depth_locked != -1) { \ - KMP_FATAL(LockNestableUsedAsSimple, func); \ - } \ - if ((gtid >= 0) && (lck->tas.lk.poll - 1 == gtid)) { \ - KMP_FATAL(LockIsAlreadyOwned, func); \ - } \ - } \ - if (lck->tas.lk.poll != 0 || \ - !__kmp_atomic_compare_store_acq(&lck->tas.lk.poll, 0, gtid + 1)) { \ - kmp_uint32 spins; \ - KMP_FSYNC_PREPARE(lck); \ - KMP_INIT_YIELD(spins); \ - if (TCR_4(__kmp_nth) > \ - (__kmp_avail_proc ? __kmp_avail_proc : __kmp_xproc)) { \ - KMP_YIELD(TRUE); \ - } else { \ - KMP_YIELD_SPIN(spins); \ - } \ - while (lck->tas.lk.poll != 0 || !__kmp_atomic_compare_store_acq( \ - &lck->tas.lk.poll, 0, gtid + 1)) { \ - if (TCR_4(__kmp_nth) > \ - (__kmp_avail_proc ? __kmp_avail_proc : __kmp_xproc)) { \ - KMP_YIELD(TRUE); \ - } else { \ - KMP_YIELD_SPIN(spins); \ - } \ - } \ - } \ - KMP_FSYNC_ACQUIRED(lck); \ - } else { \ - KMP_DEBUG_ASSERT(__kmp_acquire_user_lock_with_checks_ != NULL); \ - (*__kmp_acquire_user_lock_with_checks_)(lck, gtid); \ - } - -#else -static inline int __kmp_acquire_user_lock_with_checks(kmp_user_lock_p lck, - kmp_int32 gtid) { - KMP_DEBUG_ASSERT(__kmp_acquire_user_lock_with_checks_ != NULL); - return (*__kmp_acquire_user_lock_with_checks_)(lck, gtid); -} -#endif - -extern int (*__kmp_test_user_lock_with_checks_)(kmp_user_lock_p lck, - kmp_int32 gtid); - -#if KMP_OS_LINUX && \ - (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64) - -#include "kmp_i18n.h" /* AC: KMP_FATAL definition */ -extern int __kmp_env_consistency_check; /* AC: copy from kmp.h here */ -static inline int __kmp_test_user_lock_with_checks(kmp_user_lock_p lck, - kmp_int32 gtid) { - if (__kmp_user_lock_kind == lk_tas) { - if (__kmp_env_consistency_check) { - char const *const func = "omp_test_lock"; - if ((sizeof(kmp_tas_lock_t) <= OMP_LOCK_T_SIZE) && - lck->tas.lk.depth_locked != -1) { - KMP_FATAL(LockNestableUsedAsSimple, func); - } - } - return ((lck->tas.lk.poll == 0) && - __kmp_atomic_compare_store_acq(&lck->tas.lk.poll, 0, gtid + 1)); - } else { - KMP_DEBUG_ASSERT(__kmp_test_user_lock_with_checks_ != NULL); - return (*__kmp_test_user_lock_with_checks_)(lck, gtid); - } -} -#else -static inline int __kmp_test_user_lock_with_checks(kmp_user_lock_p lck, - kmp_int32 gtid) { - KMP_DEBUG_ASSERT(__kmp_test_user_lock_with_checks_ != NULL); - return (*__kmp_test_user_lock_with_checks_)(lck, gtid); -} -#endif - -extern int (*__kmp_release_user_lock_with_checks_)(kmp_user_lock_p lck, - kmp_int32 gtid); - -static inline void __kmp_release_user_lock_with_checks(kmp_user_lock_p lck, - kmp_int32 gtid) { - KMP_DEBUG_ASSERT(__kmp_release_user_lock_with_checks_ != NULL); - (*__kmp_release_user_lock_with_checks_)(lck, gtid); -} - -extern void (*__kmp_init_user_lock_with_checks_)(kmp_user_lock_p lck); - -static inline void __kmp_init_user_lock_with_checks(kmp_user_lock_p lck) { - KMP_DEBUG_ASSERT(__kmp_init_user_lock_with_checks_ != NULL); - (*__kmp_init_user_lock_with_checks_)(lck); -} - -// We need a non-checking version of destroy lock for when the RTL is -// doing the cleanup as it can't always tell if the lock is nested or not. -extern void (*__kmp_destroy_user_lock_)(kmp_user_lock_p lck); - -static inline void __kmp_destroy_user_lock(kmp_user_lock_p lck) { - KMP_DEBUG_ASSERT(__kmp_destroy_user_lock_ != NULL); - (*__kmp_destroy_user_lock_)(lck); -} - -extern void (*__kmp_destroy_user_lock_with_checks_)(kmp_user_lock_p lck); - -static inline void __kmp_destroy_user_lock_with_checks(kmp_user_lock_p lck) { - KMP_DEBUG_ASSERT(__kmp_destroy_user_lock_with_checks_ != NULL); - (*__kmp_destroy_user_lock_with_checks_)(lck); -} - -extern int (*__kmp_acquire_nested_user_lock_with_checks_)(kmp_user_lock_p lck, - kmp_int32 gtid); - -#if KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64) - -#define __kmp_acquire_nested_user_lock_with_checks(lck, gtid, depth) \ - if (__kmp_user_lock_kind == lk_tas) { \ - if (__kmp_env_consistency_check) { \ - char const *const func = "omp_set_nest_lock"; \ - if ((sizeof(kmp_tas_lock_t) <= OMP_NEST_LOCK_T_SIZE) && \ - lck->tas.lk.depth_locked == -1) { \ - KMP_FATAL(LockSimpleUsedAsNestable, func); \ - } \ - } \ - if (lck->tas.lk.poll - 1 == gtid) { \ - lck->tas.lk.depth_locked += 1; \ - *depth = KMP_LOCK_ACQUIRED_NEXT; \ - } else { \ - if ((lck->tas.lk.poll != 0) || \ - !__kmp_atomic_compare_store_acq(&lck->tas.lk.poll, 0, gtid + 1)) { \ - kmp_uint32 spins; \ - KMP_FSYNC_PREPARE(lck); \ - KMP_INIT_YIELD(spins); \ - if (TCR_4(__kmp_nth) > \ - (__kmp_avail_proc ? __kmp_avail_proc : __kmp_xproc)) { \ - KMP_YIELD(TRUE); \ - } else { \ - KMP_YIELD_SPIN(spins); \ - } \ - while ( \ - (lck->tas.lk.poll != 0) || \ - !__kmp_atomic_compare_store_acq(&lck->tas.lk.poll, 0, gtid + 1)) { \ - if (TCR_4(__kmp_nth) > \ - (__kmp_avail_proc ? __kmp_avail_proc : __kmp_xproc)) { \ - KMP_YIELD(TRUE); \ - } else { \ - KMP_YIELD_SPIN(spins); \ - } \ - } \ - } \ - lck->tas.lk.depth_locked = 1; \ - *depth = KMP_LOCK_ACQUIRED_FIRST; \ - } \ - KMP_FSYNC_ACQUIRED(lck); \ - } else { \ - KMP_DEBUG_ASSERT(__kmp_acquire_nested_user_lock_with_checks_ != NULL); \ - *depth = (*__kmp_acquire_nested_user_lock_with_checks_)(lck, gtid); \ - } - -#else -static inline void -__kmp_acquire_nested_user_lock_with_checks(kmp_user_lock_p lck, kmp_int32 gtid, - int *depth) { - KMP_DEBUG_ASSERT(__kmp_acquire_nested_user_lock_with_checks_ != NULL); - *depth = (*__kmp_acquire_nested_user_lock_with_checks_)(lck, gtid); -} -#endif - -extern int (*__kmp_test_nested_user_lock_with_checks_)(kmp_user_lock_p lck, - kmp_int32 gtid); - -#if KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64) -static inline int __kmp_test_nested_user_lock_with_checks(kmp_user_lock_p lck, - kmp_int32 gtid) { - if (__kmp_user_lock_kind == lk_tas) { - int retval; - if (__kmp_env_consistency_check) { - char const *const func = "omp_test_nest_lock"; - if ((sizeof(kmp_tas_lock_t) <= OMP_NEST_LOCK_T_SIZE) && - lck->tas.lk.depth_locked == -1) { - KMP_FATAL(LockSimpleUsedAsNestable, func); - } - } - KMP_DEBUG_ASSERT(gtid >= 0); - if (lck->tas.lk.poll - 1 == - gtid) { /* __kmp_get_tas_lock_owner( lck ) == gtid */ - return ++lck->tas.lk.depth_locked; /* same owner, depth increased */ - } - retval = ((lck->tas.lk.poll == 0) && - __kmp_atomic_compare_store_acq(&lck->tas.lk.poll, 0, gtid + 1)); - if (retval) { - KMP_MB(); - lck->tas.lk.depth_locked = 1; - } - return retval; - } else { - KMP_DEBUG_ASSERT(__kmp_test_nested_user_lock_with_checks_ != NULL); - return (*__kmp_test_nested_user_lock_with_checks_)(lck, gtid); - } -} -#else -static inline int __kmp_test_nested_user_lock_with_checks(kmp_user_lock_p lck, - kmp_int32 gtid) { - KMP_DEBUG_ASSERT(__kmp_test_nested_user_lock_with_checks_ != NULL); - return (*__kmp_test_nested_user_lock_with_checks_)(lck, gtid); -} -#endif - -extern int (*__kmp_release_nested_user_lock_with_checks_)(kmp_user_lock_p lck, - kmp_int32 gtid); - -static inline int -__kmp_release_nested_user_lock_with_checks(kmp_user_lock_p lck, - kmp_int32 gtid) { - KMP_DEBUG_ASSERT(__kmp_release_nested_user_lock_with_checks_ != NULL); - return (*__kmp_release_nested_user_lock_with_checks_)(lck, gtid); -} - -extern void (*__kmp_init_nested_user_lock_with_checks_)(kmp_user_lock_p lck); - -static inline void -__kmp_init_nested_user_lock_with_checks(kmp_user_lock_p lck) { - KMP_DEBUG_ASSERT(__kmp_init_nested_user_lock_with_checks_ != NULL); - (*__kmp_init_nested_user_lock_with_checks_)(lck); -} - -extern void (*__kmp_destroy_nested_user_lock_with_checks_)(kmp_user_lock_p lck); - -static inline void -__kmp_destroy_nested_user_lock_with_checks(kmp_user_lock_p lck) { - KMP_DEBUG_ASSERT(__kmp_destroy_nested_user_lock_with_checks_ != NULL); - (*__kmp_destroy_nested_user_lock_with_checks_)(lck); -} - -// user lock functions which do not necessarily exist for all lock kinds. -// -// The "set" functions usually have wrapper routines that check for a NULL set -// function pointer and call it if non-NULL. -// -// In some cases, it makes sense to have a "get" wrapper function check for a -// NULL get function pointer and return NULL / invalid value / error code if -// the function pointer is NULL. -// -// In other cases, the calling code really should differentiate between an -// unimplemented function and one that is implemented but returning NULL / -// invalied value. If this is the case, no get function wrapper exists. - -extern int (*__kmp_is_user_lock_initialized_)(kmp_user_lock_p lck); - -// no set function; fields set durining local allocation - -extern const ident_t *(*__kmp_get_user_lock_location_)(kmp_user_lock_p lck); - -static inline const ident_t *__kmp_get_user_lock_location(kmp_user_lock_p lck) { - if (__kmp_get_user_lock_location_ != NULL) { - return (*__kmp_get_user_lock_location_)(lck); - } else { - return NULL; - } -} - -extern void (*__kmp_set_user_lock_location_)(kmp_user_lock_p lck, - const ident_t *loc); - -static inline void __kmp_set_user_lock_location(kmp_user_lock_p lck, - const ident_t *loc) { - if (__kmp_set_user_lock_location_ != NULL) { - (*__kmp_set_user_lock_location_)(lck, loc); - } -} - -extern kmp_lock_flags_t (*__kmp_get_user_lock_flags_)(kmp_user_lock_p lck); - -extern void (*__kmp_set_user_lock_flags_)(kmp_user_lock_p lck, - kmp_lock_flags_t flags); - -static inline void __kmp_set_user_lock_flags(kmp_user_lock_p lck, - kmp_lock_flags_t flags) { - if (__kmp_set_user_lock_flags_ != NULL) { - (*__kmp_set_user_lock_flags_)(lck, flags); - } -} - -// The fuction which sets up all of the vtbl pointers for kmp_user_lock_t. -extern void __kmp_set_user_lock_vptrs(kmp_lock_kind_t user_lock_kind); - -// Macros for binding user lock functions. -#define KMP_BIND_USER_LOCK_TEMPLATE(nest, kind, suffix) \ - { \ - __kmp_acquire##nest##user_lock_with_checks_ = (int (*)( \ - kmp_user_lock_p, kmp_int32))__kmp_acquire##nest##kind##_##suffix; \ - __kmp_release##nest##user_lock_with_checks_ = (int (*)( \ - kmp_user_lock_p, kmp_int32))__kmp_release##nest##kind##_##suffix; \ - __kmp_test##nest##user_lock_with_checks_ = (int (*)( \ - kmp_user_lock_p, kmp_int32))__kmp_test##nest##kind##_##suffix; \ - __kmp_init##nest##user_lock_with_checks_ = \ - (void (*)(kmp_user_lock_p))__kmp_init##nest##kind##_##suffix; \ - __kmp_destroy##nest##user_lock_with_checks_ = \ - (void (*)(kmp_user_lock_p))__kmp_destroy##nest##kind##_##suffix; \ - } - -#define KMP_BIND_USER_LOCK(kind) KMP_BIND_USER_LOCK_TEMPLATE(_, kind, lock) -#define KMP_BIND_USER_LOCK_WITH_CHECKS(kind) \ - KMP_BIND_USER_LOCK_TEMPLATE(_, kind, lock_with_checks) -#define KMP_BIND_NESTED_USER_LOCK(kind) \ - KMP_BIND_USER_LOCK_TEMPLATE(_nested_, kind, lock) -#define KMP_BIND_NESTED_USER_LOCK_WITH_CHECKS(kind) \ - KMP_BIND_USER_LOCK_TEMPLATE(_nested_, kind, lock_with_checks) - -// User lock table & lock allocation -/* On 64-bit Linux* OS (and OS X*) GNU compiler allocates only 4 bytems memory - for lock variable, which is not enough to store a pointer, so we have to use - lock indexes instead of pointers and maintain lock table to map indexes to - pointers. - - - Note: The first element of the table is not a pointer to lock! It is a - pointer to previously allocated table (or NULL if it is the first table). - - Usage: - - if ( OMP_LOCK_T_SIZE < sizeof( ) ) { // or OMP_NEST_LOCK_T_SIZE - Lock table is fully utilized. User locks are indexes, so table is used on - user lock operation. - Note: it may be the case (lin_32) that we don't need to use a lock - table for regular locks, but do need the table for nested locks. - } - else { - Lock table initialized but not actually used. - } -*/ - -struct kmp_lock_table { - kmp_lock_index_t used; // Number of used elements - kmp_lock_index_t allocated; // Number of allocated elements - kmp_user_lock_p *table; // Lock table. -}; - -typedef struct kmp_lock_table kmp_lock_table_t; - -extern kmp_lock_table_t __kmp_user_lock_table; -extern kmp_user_lock_p __kmp_lock_pool; - -struct kmp_block_of_locks { - struct kmp_block_of_locks *next_block; - void *locks; -}; - -typedef struct kmp_block_of_locks kmp_block_of_locks_t; - -extern kmp_block_of_locks_t *__kmp_lock_blocks; -extern int __kmp_num_locks_in_block; - -extern kmp_user_lock_p __kmp_user_lock_allocate(void **user_lock, - kmp_int32 gtid, - kmp_lock_flags_t flags); -extern void __kmp_user_lock_free(void **user_lock, kmp_int32 gtid, - kmp_user_lock_p lck); -extern kmp_user_lock_p __kmp_lookup_user_lock(void **user_lock, - char const *func); -extern void __kmp_cleanup_user_locks(); - -#define KMP_CHECK_USER_LOCK_INIT() \ - { \ - if (!TCR_4(__kmp_init_user_locks)) { \ - __kmp_acquire_bootstrap_lock(&__kmp_initz_lock); \ - if (!TCR_4(__kmp_init_user_locks)) { \ - TCW_4(__kmp_init_user_locks, TRUE); \ - } \ - __kmp_release_bootstrap_lock(&__kmp_initz_lock); \ - } \ - } - -#endif // KMP_USE_DYNAMIC_LOCK - -#undef KMP_PAD -#undef KMP_GTID_DNE - -#if KMP_USE_DYNAMIC_LOCK -// KMP_USE_DYNAMIC_LOCK enables dynamic dispatch of lock functions without -// breaking the current compatibility. Essential functionality of this new code -// is dynamic dispatch, but it also implements (or enables implementation of) -// hinted user lock and critical section which will be part of OMP 4.5 soon. -// -// Lock type can be decided at creation time (i.e., lock initialization), and -// subsequent lock function call on the created lock object requires type -// extraction and call through jump table using the extracted type. This type -// information is stored in two different ways depending on the size of the lock -// object, and we differentiate lock types by this size requirement - direct and -// indirect locks. -// -// Direct locks: -// A direct lock object fits into the space created by the compiler for an -// omp_lock_t object, and TAS/Futex lock falls into this category. We use low -// one byte of the lock object as the storage for the lock type, and appropriate -// bit operation is required to access the data meaningful to the lock -// algorithms. Also, to differentiate direct lock from indirect lock, 1 is -// written to LSB of the lock object. The newly introduced "hle" lock is also a -// direct lock. -// -// Indirect locks: -// An indirect lock object requires more space than the compiler-generated -// space, and it should be allocated from heap. Depending on the size of the -// compiler-generated space for the lock (i.e., size of omp_lock_t), this -// omp_lock_t object stores either the address of the heap-allocated indirect -// lock (void * fits in the object) or an index to the indirect lock table entry -// that holds the address. Ticket/Queuing/DRDPA/Adaptive lock falls into this -// category, and the newly introduced "rtm" lock is also an indirect lock which -// was implemented on top of the Queuing lock. When the omp_lock_t object holds -// an index (not lock address), 0 is written to LSB to differentiate the lock -// from a direct lock, and the remaining part is the actual index to the -// indirect lock table. - -#include // for uintptr_t - -// Shortcuts -#define KMP_USE_INLINED_TAS \ - (KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM)) && 1 -#define KMP_USE_INLINED_FUTEX KMP_USE_FUTEX && 0 - -// List of lock definitions; all nested locks are indirect locks. -// hle lock is xchg lock prefixed with XACQUIRE/XRELEASE. -// All nested locks are indirect lock types. -#if KMP_USE_TSX -#if KMP_USE_FUTEX -#define KMP_FOREACH_D_LOCK(m, a) m(tas, a) m(futex, a) m(hle, a) -#define KMP_FOREACH_I_LOCK(m, a) \ - m(ticket, a) m(queuing, a) m(adaptive, a) m(drdpa, a) m(rtm, a) \ - m(nested_tas, a) m(nested_futex, a) m(nested_ticket, a) \ - m(nested_queuing, a) m(nested_drdpa, a) -#else -#define KMP_FOREACH_D_LOCK(m, a) m(tas, a) m(hle, a) -#define KMP_FOREACH_I_LOCK(m, a) \ - m(ticket, a) m(queuing, a) m(adaptive, a) m(drdpa, a) m(rtm, a) \ - m(nested_tas, a) m(nested_ticket, a) m(nested_queuing, a) \ - m(nested_drdpa, a) -#endif // KMP_USE_FUTEX -#define KMP_LAST_D_LOCK lockseq_hle -#else -#if KMP_USE_FUTEX -#define KMP_FOREACH_D_LOCK(m, a) m(tas, a) m(futex, a) -#define KMP_FOREACH_I_LOCK(m, a) \ - m(ticket, a) m(queuing, a) m(drdpa, a) m(nested_tas, a) m(nested_futex, a) \ - m(nested_ticket, a) m(nested_queuing, a) m(nested_drdpa, a) -#define KMP_LAST_D_LOCK lockseq_futex -#else -#define KMP_FOREACH_D_LOCK(m, a) m(tas, a) -#define KMP_FOREACH_I_LOCK(m, a) \ - m(ticket, a) m(queuing, a) m(drdpa, a) m(nested_tas, a) m(nested_ticket, a) \ - m(nested_queuing, a) m(nested_drdpa, a) -#define KMP_LAST_D_LOCK lockseq_tas -#endif // KMP_USE_FUTEX -#endif // KMP_USE_TSX - -// Information used in dynamic dispatch -#define KMP_LOCK_SHIFT \ - 8 // number of low bits to be used as tag for direct locks -#define KMP_FIRST_D_LOCK lockseq_tas -#define KMP_FIRST_I_LOCK lockseq_ticket -#define KMP_LAST_I_LOCK lockseq_nested_drdpa -#define KMP_NUM_I_LOCKS \ - (locktag_nested_drdpa + 1) // number of indirect lock types - -// Base type for dynamic locks. -typedef kmp_uint32 kmp_dyna_lock_t; - -// Lock sequence that enumerates all lock kinds. Always make this enumeration -// consistent with kmp_lockseq_t in the include directory. -typedef enum { - lockseq_indirect = 0, -#define expand_seq(l, a) lockseq_##l, - KMP_FOREACH_D_LOCK(expand_seq, 0) KMP_FOREACH_I_LOCK(expand_seq, 0) -#undef expand_seq -} kmp_dyna_lockseq_t; - -// Enumerates indirect lock tags. -typedef enum { -#define expand_tag(l, a) locktag_##l, - KMP_FOREACH_I_LOCK(expand_tag, 0) -#undef expand_tag -} kmp_indirect_locktag_t; - -// Utility macros that extract information from lock sequences. -#define KMP_IS_D_LOCK(seq) \ - ((seq) >= KMP_FIRST_D_LOCK && (seq) <= KMP_LAST_D_LOCK) -#define KMP_IS_I_LOCK(seq) \ - ((seq) >= KMP_FIRST_I_LOCK && (seq) <= KMP_LAST_I_LOCK) -#define KMP_GET_I_TAG(seq) (kmp_indirect_locktag_t)((seq)-KMP_FIRST_I_LOCK) -#define KMP_GET_D_TAG(seq) ((seq) << 1 | 1) - -// Enumerates direct lock tags starting from indirect tag. -typedef enum { -#define expand_tag(l, a) locktag_##l = KMP_GET_D_TAG(lockseq_##l), - KMP_FOREACH_D_LOCK(expand_tag, 0) -#undef expand_tag -} kmp_direct_locktag_t; - -// Indirect lock type -typedef struct { - kmp_user_lock_p lock; - kmp_indirect_locktag_t type; -} kmp_indirect_lock_t; - -// Function tables for direct locks. Set/unset/test differentiate functions -// with/without consistency checking. -extern void (*__kmp_direct_init[])(kmp_dyna_lock_t *, kmp_dyna_lockseq_t); -extern void (*(*__kmp_direct_destroy))(kmp_dyna_lock_t *); -extern int (*(*__kmp_direct_set))(kmp_dyna_lock_t *, kmp_int32); -extern int (*(*__kmp_direct_unset))(kmp_dyna_lock_t *, kmp_int32); -extern int (*(*__kmp_direct_test))(kmp_dyna_lock_t *, kmp_int32); - -// Function tables for indirect locks. Set/unset/test differentiate functions -// with/withuot consistency checking. -extern void (*__kmp_indirect_init[])(kmp_user_lock_p); -extern void (*(*__kmp_indirect_destroy))(kmp_user_lock_p); -extern int (*(*__kmp_indirect_set))(kmp_user_lock_p, kmp_int32); -extern int (*(*__kmp_indirect_unset))(kmp_user_lock_p, kmp_int32); -extern int (*(*__kmp_indirect_test))(kmp_user_lock_p, kmp_int32); - -// Extracts direct lock tag from a user lock pointer -#define KMP_EXTRACT_D_TAG(l) \ - (*((kmp_dyna_lock_t *)(l)) & ((1 << KMP_LOCK_SHIFT) - 1) & \ - -(*((kmp_dyna_lock_t *)(l)) & 1)) - -// Extracts indirect lock index from a user lock pointer -#define KMP_EXTRACT_I_INDEX(l) (*(kmp_lock_index_t *)(l) >> 1) - -// Returns function pointer to the direct lock function with l (kmp_dyna_lock_t -// *) and op (operation type). -#define KMP_D_LOCK_FUNC(l, op) __kmp_direct_##op[KMP_EXTRACT_D_TAG(l)] - -// Returns function pointer to the indirect lock function with l -// (kmp_indirect_lock_t *) and op (operation type). -#define KMP_I_LOCK_FUNC(l, op) \ - __kmp_indirect_##op[((kmp_indirect_lock_t *)(l))->type] - -// Initializes a direct lock with the given lock pointer and lock sequence. -#define KMP_INIT_D_LOCK(l, seq) \ - __kmp_direct_init[KMP_GET_D_TAG(seq)]((kmp_dyna_lock_t *)l, seq) - -// Initializes an indirect lock with the given lock pointer and lock sequence. -#define KMP_INIT_I_LOCK(l, seq) \ - __kmp_direct_init[0]((kmp_dyna_lock_t *)(l), seq) - -// Returns "free" lock value for the given lock type. -#define KMP_LOCK_FREE(type) (locktag_##type) - -// Returns "busy" lock value for the given lock teyp. -#define KMP_LOCK_BUSY(v, type) ((v) << KMP_LOCK_SHIFT | locktag_##type) - -// Returns lock value after removing (shifting) lock tag. -#define KMP_LOCK_STRIP(v) ((v) >> KMP_LOCK_SHIFT) - -// Initializes global states and data structures for managing dynamic user -// locks. -extern void __kmp_init_dynamic_user_locks(); - -// Allocates and returns an indirect lock with the given indirect lock tag. -extern kmp_indirect_lock_t * -__kmp_allocate_indirect_lock(void **, kmp_int32, kmp_indirect_locktag_t); - -// Cleans up global states and data structures for managing dynamic user locks. -extern void __kmp_cleanup_indirect_user_locks(); - -// Default user lock sequence when not using hinted locks. -extern kmp_dyna_lockseq_t __kmp_user_lock_seq; - -// Jump table for "set lock location", available only for indirect locks. -extern void (*__kmp_indirect_set_location[KMP_NUM_I_LOCKS])(kmp_user_lock_p, - const ident_t *); -#define KMP_SET_I_LOCK_LOCATION(lck, loc) \ - { \ - if (__kmp_indirect_set_location[(lck)->type] != NULL) \ - __kmp_indirect_set_location[(lck)->type]((lck)->lock, loc); \ - } - -// Jump table for "set lock flags", available only for indirect locks. -extern void (*__kmp_indirect_set_flags[KMP_NUM_I_LOCKS])(kmp_user_lock_p, - kmp_lock_flags_t); -#define KMP_SET_I_LOCK_FLAGS(lck, flag) \ - { \ - if (__kmp_indirect_set_flags[(lck)->type] != NULL) \ - __kmp_indirect_set_flags[(lck)->type]((lck)->lock, flag); \ - } - -// Jump table for "get lock location", available only for indirect locks. -extern const ident_t *(*__kmp_indirect_get_location[KMP_NUM_I_LOCKS])( - kmp_user_lock_p); -#define KMP_GET_I_LOCK_LOCATION(lck) \ - (__kmp_indirect_get_location[(lck)->type] != NULL \ - ? __kmp_indirect_get_location[(lck)->type]((lck)->lock) \ - : NULL) - -// Jump table for "get lock flags", available only for indirect locks. -extern kmp_lock_flags_t (*__kmp_indirect_get_flags[KMP_NUM_I_LOCKS])( - kmp_user_lock_p); -#define KMP_GET_I_LOCK_FLAGS(lck) \ - (__kmp_indirect_get_flags[(lck)->type] != NULL \ - ? __kmp_indirect_get_flags[(lck)->type]((lck)->lock) \ - : NULL) - -#define KMP_I_LOCK_CHUNK \ - 1024 // number of kmp_indirect_lock_t objects to be allocated together - -// Lock table for indirect locks. -typedef struct kmp_indirect_lock_table { - kmp_indirect_lock_t **table; // blocks of indirect locks allocated - kmp_lock_index_t size; // size of the indirect lock table - kmp_lock_index_t next; // index to the next lock to be allocated -} kmp_indirect_lock_table_t; - -extern kmp_indirect_lock_table_t __kmp_i_lock_table; - -// Returns the indirect lock associated with the given index. -#define KMP_GET_I_LOCK(index) \ - (*(__kmp_i_lock_table.table + (index) / KMP_I_LOCK_CHUNK) + \ - (index) % KMP_I_LOCK_CHUNK) - -// Number of locks in a lock block, which is fixed to "1" now. -// TODO: No lock block implementation now. If we do support, we need to manage -// lock block data structure for each indirect lock type. -extern int __kmp_num_locks_in_block; - -// Fast lock table lookup without consistency checking -#define KMP_LOOKUP_I_LOCK(l) \ - ((OMP_LOCK_T_SIZE < sizeof(void *)) ? KMP_GET_I_LOCK(KMP_EXTRACT_I_INDEX(l)) \ - : *((kmp_indirect_lock_t **)(l))) - -// Used once in kmp_error.cpp -extern kmp_int32 __kmp_get_user_lock_owner(kmp_user_lock_p, kmp_uint32); - -#else // KMP_USE_DYNAMIC_LOCK - -#define KMP_LOCK_BUSY(v, type) (v) -#define KMP_LOCK_FREE(type) 0 -#define KMP_LOCK_STRIP(v) (v) - -#endif // KMP_USE_DYNAMIC_LOCK - -// data structure for using backoff within spin locks. -typedef struct { - kmp_uint32 step; // current step - kmp_uint32 max_backoff; // upper bound of outer delay loop - kmp_uint32 min_tick; // size of inner delay loop in ticks (machine-dependent) -} kmp_backoff_t; - -// Runtime's default backoff parameters -extern kmp_backoff_t __kmp_spin_backoff_params; - -// Backoff function -extern void __kmp_spin_backoff(kmp_backoff_t *); - -#ifdef __cplusplus -} // extern "C" -#endif // __cplusplus - -#endif /* KMP_LOCK_H */ Property changes on: vendor/llvm-openmp/openmp-release80-r363030/runtime/src/kmp_lock.h ___________________________________________________________________ Deleted: svn:eol-style ## -1 +0,0 ## -native \ No newline at end of property Deleted: svn:keywords ## -1 +0,0 ## -FreeBSD=%H \ No newline at end of property Deleted: svn:mime-type ## -1 +0,0 ## -text/plain \ No newline at end of property Index: vendor/llvm-openmp/openmp-release80-r363030/runtime/src/kmp_str.h =================================================================== --- vendor/llvm-openmp/openmp-release80-r363030/runtime/src/kmp_str.h (revision 348960) +++ vendor/llvm-openmp/openmp-release80-r363030/runtime/src/kmp_str.h (nonexistent) @@ -1,126 +0,0 @@ -/* - * kmp_str.h -- String manipulation routines. - */ - -//===----------------------------------------------------------------------===// -// -// The LLVM Compiler Infrastructure -// -// This file is dual licensed under the MIT and the University of Illinois Open -// Source Licenses. See LICENSE.txt for details. -// -//===----------------------------------------------------------------------===// - -#ifndef KMP_STR_H -#define KMP_STR_H - -#include -#include - -#include "kmp_os.h" - -#ifdef __cplusplus -extern "C" { -#endif // __cplusplus - -#if KMP_OS_WINDOWS -#define strdup _strdup -#endif - -/* some macros to replace ctype.h functions */ -#define TOLOWER(c) ((((c) >= 'A') && ((c) <= 'Z')) ? ((c) + 'a' - 'A') : (c)) - -struct kmp_str_buf { - char *str; // Pointer to buffer content, read only. - unsigned int size; // Do not change this field! - int used; // Number of characters printed to buffer, read only. - char bulk[512]; // Do not use this field! -}; // struct kmp_str_buf -typedef struct kmp_str_buf kmp_str_buf_t; - -#define __kmp_str_buf_init(b) \ - { \ - (b)->str = (b)->bulk; \ - (b)->size = sizeof((b)->bulk); \ - (b)->used = 0; \ - (b)->bulk[0] = 0; \ - } - -void __kmp_str_buf_clear(kmp_str_buf_t *buffer); -void __kmp_str_buf_reserve(kmp_str_buf_t *buffer, int size); -void __kmp_str_buf_detach(kmp_str_buf_t *buffer); -void __kmp_str_buf_free(kmp_str_buf_t *buffer); -void __kmp_str_buf_cat(kmp_str_buf_t *buffer, char const *str, int len); -void __kmp_str_buf_catbuf(kmp_str_buf_t *dest, const kmp_str_buf_t *src); -int __kmp_str_buf_vprint(kmp_str_buf_t *buffer, char const *format, - va_list args); -int __kmp_str_buf_print(kmp_str_buf_t *buffer, char const *format, ...); -void __kmp_str_buf_print_size(kmp_str_buf_t *buffer, size_t size); - -/* File name parser. - Usage: - - kmp_str_fname_t fname = __kmp_str_fname_init( path ); - // Use fname.path (copy of original path ), fname.dir, fname.base. - // Note fname.dir concatenated with fname.base gives exact copy of path. - __kmp_str_fname_free( & fname ); -*/ -struct kmp_str_fname { - char *path; - char *dir; - char *base; -}; // struct kmp_str_fname -typedef struct kmp_str_fname kmp_str_fname_t; -void __kmp_str_fname_init(kmp_str_fname_t *fname, char const *path); -void __kmp_str_fname_free(kmp_str_fname_t *fname); -// Compares file name with specified patern. If pattern is NULL, any fname -// matched. -int __kmp_str_fname_match(kmp_str_fname_t const *fname, char const *pattern); - -/* The compiler provides source locations in string form - ";file;func;line;col;;". It is not convenient for manupulation. This - structure keeps source location in more convenient form. - Usage: - - kmp_str_loc_t loc = __kmp_str_loc_init( ident->psource, 0 ); - // use loc.file, loc.func, loc.line, loc.col. - // loc.fname is available if second argument of __kmp_str_loc_init is true. - __kmp_str_loc_free( & loc ); - - If psource is NULL or does not follow format above, file and/or func may be - NULL pointers. -*/ -struct kmp_str_loc { - char *_bulk; // Do not use thid field. - kmp_str_fname_t fname; // Will be initialized if init_fname is true. - char *file; - char *func; - int line; - int col; -}; // struct kmp_str_loc -typedef struct kmp_str_loc kmp_str_loc_t; -kmp_str_loc_t __kmp_str_loc_init(char const *psource, int init_fname); -void __kmp_str_loc_free(kmp_str_loc_t *loc); - -int __kmp_str_eqf(char const *lhs, char const *rhs); -char *__kmp_str_format(char const *format, ...); -void __kmp_str_free(char **str); -int __kmp_str_match(char const *target, int len, char const *data); -int __kmp_str_match_false(char const *data); -int __kmp_str_match_true(char const *data); -void __kmp_str_replace(char *str, char search_for, char replace_with); -void __kmp_str_split(char *str, char delim, char **head, char **tail); -char *__kmp_str_token(char *str, char const *delim, char **buf); -int __kmp_str_to_int(char const *str, char sentinel); - -void __kmp_str_to_size(char const *str, size_t *out, size_t dfactor, - char const **error); -void __kmp_str_to_uint(char const *str, kmp_uint64 *out, char const **error); - -#ifdef __cplusplus -} // extern "C" -#endif // __cplusplus - -#endif // KMP_STR_H - -// end of file // Property changes on: vendor/llvm-openmp/openmp-release80-r363030/runtime/src/kmp_str.h ___________________________________________________________________ Deleted: svn:eol-style ## -1 +0,0 ## -native \ No newline at end of property Deleted: svn:keywords ## -1 +0,0 ## -FreeBSD=%H \ No newline at end of property Deleted: svn:mime-type ## -1 +0,0 ## -text/plain \ No newline at end of property Index: vendor/llvm-openmp/openmp-release80-r363030/runtime/src/kmp_stats.cpp =================================================================== --- vendor/llvm-openmp/openmp-release80-r363030/runtime/src/kmp_stats.cpp (revision 348960) +++ vendor/llvm-openmp/openmp-release80-r363030/runtime/src/kmp_stats.cpp (nonexistent) @@ -1,922 +0,0 @@ -/** @file kmp_stats.cpp - * Statistics gathering and processing. - */ - -//===----------------------------------------------------------------------===// -// -// The LLVM Compiler Infrastructure -// -// This file is dual licensed under the MIT and the University of Illinois Open -// Source Licenses. See LICENSE.txt for details. -// -//===----------------------------------------------------------------------===// - -#include "kmp.h" -#include "kmp_lock.h" -#include "kmp_stats.h" -#include "kmp_str.h" - -#include -#include -#include -#include -#include // for atexit -#include - -#define STRINGIZE2(x) #x -#define STRINGIZE(x) STRINGIZE2(x) - -#define expandName(name, flags, ignore) {STRINGIZE(name), flags}, -statInfo timeStat::timerInfo[] = { - KMP_FOREACH_TIMER(expandName, 0){"TIMER_LAST", 0}}; -const statInfo counter::counterInfo[] = { - KMP_FOREACH_COUNTER(expandName, 0){"COUNTER_LAST", 0}}; -#undef expandName - -#define expandName(ignore1, ignore2, ignore3) {0.0, 0.0, 0.0}, -kmp_stats_output_module::rgb_color kmp_stats_output_module::timerColorInfo[] = { - KMP_FOREACH_TIMER(expandName, 0){0.0, 0.0, 0.0}}; -#undef expandName - -const kmp_stats_output_module::rgb_color - kmp_stats_output_module::globalColorArray[] = { - {1.0, 0.0, 0.0}, // red - {1.0, 0.6, 0.0}, // orange - {1.0, 1.0, 0.0}, // yellow - {0.0, 1.0, 0.0}, // green - {0.0, 0.0, 1.0}, // blue - {0.6, 0.2, 0.8}, // purple - {1.0, 0.0, 1.0}, // magenta - {0.0, 0.4, 0.2}, // dark green - {1.0, 1.0, 0.6}, // light yellow - {0.6, 0.4, 0.6}, // dirty purple - {0.0, 1.0, 1.0}, // cyan - {1.0, 0.4, 0.8}, // pink - {0.5, 0.5, 0.5}, // grey - {0.8, 0.7, 0.5}, // brown - {0.6, 0.6, 1.0}, // light blue - {1.0, 0.7, 0.5}, // peach - {0.8, 0.5, 1.0}, // lavender - {0.6, 0.0, 0.0}, // dark red - {0.7, 0.6, 0.0}, // gold - {0.0, 0.0, 0.0} // black -}; - -// Ensure that the atexit handler only runs once. -static uint32_t statsPrinted = 0; - -// output interface -static kmp_stats_output_module *__kmp_stats_global_output = NULL; - -double logHistogram::binMax[] = { - 1.e1l, 1.e2l, 1.e3l, 1.e4l, 1.e5l, 1.e6l, 1.e7l, 1.e8l, - 1.e9l, 1.e10l, 1.e11l, 1.e12l, 1.e13l, 1.e14l, 1.e15l, 1.e16l, - 1.e17l, 1.e18l, 1.e19l, 1.e20l, 1.e21l, 1.e22l, 1.e23l, 1.e24l, - 1.e25l, 1.e26l, 1.e27l, 1.e28l, 1.e29l, 1.e30l}; - -/* ************* statistic member functions ************* */ - -void statistic::addSample(double sample) { - sample -= offset; - KMP_DEBUG_ASSERT(std::isfinite(sample)); - - double delta = sample - meanVal; - - sampleCount = sampleCount + 1; - meanVal = meanVal + delta / sampleCount; - m2 = m2 + delta * (sample - meanVal); - - minVal = std::min(minVal, sample); - maxVal = std::max(maxVal, sample); - if (collectingHist) - hist.addSample(sample); -} - -statistic &statistic::operator+=(const statistic &other) { - if (other.sampleCount == 0) - return *this; - - if (sampleCount == 0) { - *this = other; - return *this; - } - - uint64_t newSampleCount = sampleCount + other.sampleCount; - double dnsc = double(newSampleCount); - double dsc = double(sampleCount); - double dscBydnsc = dsc / dnsc; - double dosc = double(other.sampleCount); - double delta = other.meanVal - meanVal; - - // Try to order these calculations to avoid overflows. If this were Fortran, - // then the compiler would not be able to re-order over brackets. In C++ it - // may be legal to do that (we certainly hope it doesn't, and CC+ Programming - // Language 2nd edition suggests it shouldn't, since it says that exploitation - // of associativity can only be made if the operation really is associative - // (which floating addition isn't...)). - meanVal = meanVal * dscBydnsc + other.meanVal * (1 - dscBydnsc); - m2 = m2 + other.m2 + dscBydnsc * dosc * delta * delta; - minVal = std::min(minVal, other.minVal); - maxVal = std::max(maxVal, other.maxVal); - sampleCount = newSampleCount; - if (collectingHist) - hist += other.hist; - - return *this; -} - -void statistic::scale(double factor) { - minVal = minVal * factor; - maxVal = maxVal * factor; - meanVal = meanVal * factor; - m2 = m2 * factor * factor; - return; -} - -std::string statistic::format(char unit, bool total) const { - std::string result = formatSI(sampleCount, 9, ' '); - - if (sampleCount == 0) { - result = result + std::string(", ") + formatSI(0.0, 9, unit); - result = result + std::string(", ") + formatSI(0.0, 9, unit); - result = result + std::string(", ") + formatSI(0.0, 9, unit); - if (total) - result = result + std::string(", ") + formatSI(0.0, 9, unit); - result = result + std::string(", ") + formatSI(0.0, 9, unit); - } else { - result = result + std::string(", ") + formatSI(minVal, 9, unit); - result = result + std::string(", ") + formatSI(meanVal, 9, unit); - result = result + std::string(", ") + formatSI(maxVal, 9, unit); - if (total) - result = - result + std::string(", ") + formatSI(meanVal * sampleCount, 9, unit); - result = result + std::string(", ") + formatSI(getSD(), 9, unit); - } - return result; -} - -/* ************* histogram member functions ************* */ - -// Lowest bin that has anything in it -int logHistogram::minBin() const { - for (int i = 0; i < numBins; i++) { - if (bins[i].count != 0) - return i - logOffset; - } - return -logOffset; -} - -// Highest bin that has anything in it -int logHistogram::maxBin() const { - for (int i = numBins - 1; i >= 0; i--) { - if (bins[i].count != 0) - return i - logOffset; - } - return -logOffset; -} - -// Which bin does this sample belong in ? -uint32_t logHistogram::findBin(double sample) { - double v = std::fabs(sample); - // Simply loop up looking which bin to put it in. - // According to a micro-architect this is likely to be faster than a binary - // search, since - // it will only have one branch mis-predict - for (int b = 0; b < numBins; b++) - if (binMax[b] > v) - return b; - fprintf(stderr, - "Trying to add a sample that is too large into a histogram\n"); - KMP_ASSERT(0); - return -1; -} - -void logHistogram::addSample(double sample) { - if (sample == 0.0) { - zeroCount += 1; -#ifdef KMP_DEBUG - _total++; - check(); -#endif - return; - } - KMP_DEBUG_ASSERT(std::isfinite(sample)); - uint32_t bin = findBin(sample); - KMP_DEBUG_ASSERT(0 <= bin && bin < numBins); - - bins[bin].count += 1; - bins[bin].total += sample; -#ifdef KMP_DEBUG - _total++; - check(); -#endif -} - -// This may not be the format we want, but it'll do for now -std::string logHistogram::format(char unit) const { - std::stringstream result; - - result << "Bin, Count, Total\n"; - if (zeroCount) { - result << "0, " << formatSI(zeroCount, 9, ' ') << ", ", - formatSI(0.0, 9, unit); - if (count(minBin()) == 0) - return result.str(); - result << "\n"; - } - for (int i = minBin(); i <= maxBin(); i++) { - result << "10**" << i << "<=v<10**" << (i + 1) << ", " - << formatSI(count(i), 9, ' ') << ", " << formatSI(total(i), 9, unit); - if (i != maxBin()) - result << "\n"; - } - - return result.str(); -} - -/* ************* explicitTimer member functions ************* */ - -void explicitTimer::start(tsc_tick_count tick) { - startTime = tick; - totalPauseTime = 0; - if (timeStat::logEvent(timerEnumValue)) { - __kmp_stats_thread_ptr->incrementNestValue(); - } - return; -} - -void explicitTimer::stop(tsc_tick_count tick, - kmp_stats_list *stats_ptr /* = nullptr */) { - if (startTime.getValue() == 0) - return; - - stat->addSample(((tick - startTime) - totalPauseTime).ticks()); - - if (timeStat::logEvent(timerEnumValue)) { - if (!stats_ptr) - stats_ptr = __kmp_stats_thread_ptr; - stats_ptr->push_event( - startTime.getValue() - __kmp_stats_start_time.getValue(), - tick.getValue() - __kmp_stats_start_time.getValue(), - __kmp_stats_thread_ptr->getNestValue(), timerEnumValue); - stats_ptr->decrementNestValue(); - } - - /* We accept the risk that we drop a sample because it really did start at - t==0. */ - startTime = 0; - return; -} - -/* ************* partitionedTimers member functions ************* */ -partitionedTimers::partitionedTimers() { timer_stack.reserve(8); } - -// initialize the paritioned timers to an initial timer -void partitionedTimers::init(explicitTimer timer) { - KMP_DEBUG_ASSERT(this->timer_stack.size() == 0); - timer_stack.push_back(timer); - timer_stack.back().start(tsc_tick_count::now()); -} - -// stop/save the current timer, and start the new timer (timer_pair) -// There is a special condition where if the current timer is equal to -// the one you are trying to push, then it only manipulates the stack, -// and it won't stop/start the currently running timer. -void partitionedTimers::push(explicitTimer timer) { - // get the current timer - // pause current timer - // push new timer - // start the new timer - explicitTimer *current_timer, *new_timer; - size_t stack_size; - KMP_DEBUG_ASSERT(this->timer_stack.size() > 0); - timer_stack.push_back(timer); - stack_size = timer_stack.size(); - current_timer = &(timer_stack[stack_size - 2]); - new_timer = &(timer_stack[stack_size - 1]); - tsc_tick_count tick = tsc_tick_count::now(); - current_timer->pause(tick); - new_timer->start(tick); -} - -// stop/discard the current timer, and start the previously saved timer -void partitionedTimers::pop() { - // get the current timer - // stop current timer (record event/sample) - // pop current timer - // get the new current timer and resume - explicitTimer *old_timer, *new_timer; - size_t stack_size = timer_stack.size(); - KMP_DEBUG_ASSERT(stack_size > 1); - old_timer = &(timer_stack[stack_size - 1]); - new_timer = &(timer_stack[stack_size - 2]); - tsc_tick_count tick = tsc_tick_count::now(); - old_timer->stop(tick); - new_timer->resume(tick); - timer_stack.pop_back(); -} - -void partitionedTimers::exchange(explicitTimer timer) { - // get the current timer - // stop current timer (record event/sample) - // push new timer - // start the new timer - explicitTimer *current_timer, *new_timer; - size_t stack_size; - KMP_DEBUG_ASSERT(this->timer_stack.size() > 0); - tsc_tick_count tick = tsc_tick_count::now(); - stack_size = timer_stack.size(); - current_timer = &(timer_stack[stack_size - 1]); - current_timer->stop(tick); - timer_stack.pop_back(); - timer_stack.push_back(timer); - new_timer = &(timer_stack[stack_size - 1]); - new_timer->start(tick); -} - -// Wind up all the currently running timers. -// This pops off all the timers from the stack and clears the stack -// After this is called, init() must be run again to initialize the -// stack of timers -void partitionedTimers::windup() { - while (timer_stack.size() > 1) { - this->pop(); - } - // Pop the timer from the init() call - if (timer_stack.size() > 0) { - timer_stack.back().stop(tsc_tick_count::now()); - timer_stack.pop_back(); - } -} - -/* ************* kmp_stats_event_vector member functions ************* */ - -void kmp_stats_event_vector::deallocate() { - __kmp_free(events); - internal_size = 0; - allocated_size = 0; - events = NULL; -} - -// This function is for qsort() which requires the compare function to return -// either a negative number if event1 < event2, a positive number if event1 > -// event2 or zero if event1 == event2. This sorts by start time (lowest to -// highest). -int compare_two_events(const void *event1, const void *event2) { - const kmp_stats_event *ev1 = RCAST(const kmp_stats_event *, event1); - const kmp_stats_event *ev2 = RCAST(const kmp_stats_event *, event2); - - if (ev1->getStart() < ev2->getStart()) - return -1; - else if (ev1->getStart() > ev2->getStart()) - return 1; - else - return 0; -} - -void kmp_stats_event_vector::sort() { - qsort(events, internal_size, sizeof(kmp_stats_event), compare_two_events); -} - -/* ************* kmp_stats_list member functions ************* */ - -// returns a pointer to newly created stats node -kmp_stats_list *kmp_stats_list::push_back(int gtid) { - kmp_stats_list *newnode = - (kmp_stats_list *)__kmp_allocate(sizeof(kmp_stats_list)); - // placement new, only requires space and pointer and initializes (so - // __kmp_allocate instead of C++ new[] is used) - new (newnode) kmp_stats_list(); - newnode->setGtid(gtid); - newnode->prev = this->prev; - newnode->next = this; - newnode->prev->next = newnode; - newnode->next->prev = newnode; - return newnode; -} -void kmp_stats_list::deallocate() { - kmp_stats_list *ptr = this->next; - kmp_stats_list *delptr = this->next; - while (ptr != this) { - delptr = ptr; - ptr = ptr->next; - // placement new means we have to explicitly call destructor. - delptr->_event_vector.deallocate(); - delptr->~kmp_stats_list(); - __kmp_free(delptr); - } -} -kmp_stats_list::iterator kmp_stats_list::begin() { - kmp_stats_list::iterator it; - it.ptr = this->next; - return it; -} -kmp_stats_list::iterator kmp_stats_list::end() { - kmp_stats_list::iterator it; - it.ptr = this; - return it; -} -int kmp_stats_list::size() { - int retval; - kmp_stats_list::iterator it; - for (retval = 0, it = begin(); it != end(); it++, retval++) { - } - return retval; -} - -/* ************* kmp_stats_list::iterator member functions ************* */ - -kmp_stats_list::iterator::iterator() : ptr(NULL) {} -kmp_stats_list::iterator::~iterator() {} -kmp_stats_list::iterator kmp_stats_list::iterator::operator++() { - this->ptr = this->ptr->next; - return *this; -} -kmp_stats_list::iterator kmp_stats_list::iterator::operator++(int dummy) { - this->ptr = this->ptr->next; - return *this; -} -kmp_stats_list::iterator kmp_stats_list::iterator::operator--() { - this->ptr = this->ptr->prev; - return *this; -} -kmp_stats_list::iterator kmp_stats_list::iterator::operator--(int dummy) { - this->ptr = this->ptr->prev; - return *this; -} -bool kmp_stats_list::iterator::operator!=(const kmp_stats_list::iterator &rhs) { - return this->ptr != rhs.ptr; -} -bool kmp_stats_list::iterator::operator==(const kmp_stats_list::iterator &rhs) { - return this->ptr == rhs.ptr; -} -kmp_stats_list *kmp_stats_list::iterator::operator*() const { - return this->ptr; -} - -/* ************* kmp_stats_output_module functions ************** */ - -const char *kmp_stats_output_module::eventsFileName = NULL; -const char *kmp_stats_output_module::plotFileName = NULL; -int kmp_stats_output_module::printPerThreadFlag = 0; -int kmp_stats_output_module::printPerThreadEventsFlag = 0; - -static char const *lastName(char *name) { - int l = strlen(name); - for (int i = l - 1; i >= 0; --i) { - if (name[i] == '.') - name[i] = '_'; - if (name[i] == '/') - return name + i + 1; - } - return name; -} - -/* Read the name of the executable from /proc/self/cmdline */ -static char const *getImageName(char *buffer, size_t buflen) { - FILE *f = fopen("/proc/self/cmdline", "r"); - buffer[0] = char(0); - if (!f) - return buffer; - - // The file contains char(0) delimited words from the commandline. - // This just returns the last filename component of the first word on the - // line. - size_t n = fread(buffer, 1, buflen, f); - if (n == 0) { - fclose(f); - KMP_CHECK_SYSFAIL("fread", 1) - } - fclose(f); - buffer[buflen - 1] = char(0); - return lastName(buffer); -} - -static void getTime(char *buffer, size_t buflen, bool underscores = false) { - time_t timer; - - time(&timer); - - struct tm *tm_info = localtime(&timer); - if (underscores) - strftime(buffer, buflen, "%Y-%m-%d_%H%M%S", tm_info); - else - strftime(buffer, buflen, "%Y-%m-%d %H%M%S", tm_info); -} - -/* Generate a stats file name, expanding prototypes */ -static std::string generateFilename(char const *prototype, - char const *imageName) { - std::string res; - - for (int i = 0; prototype[i] != char(0); i++) { - char ch = prototype[i]; - - if (ch == '%') { - i++; - if (prototype[i] == char(0)) - break; - - switch (prototype[i]) { - case 't': // Insert time and date - { - char date[26]; - getTime(date, sizeof(date), true); - res += date; - } break; - case 'e': // Insert executable name - res += imageName; - break; - case 'p': // Insert pid - { - std::stringstream ss; - ss << getpid(); - res += ss.str(); - } break; - default: - res += prototype[i]; - break; - } - } else - res += ch; - } - return res; -} - -// init() is called very near the beginning of execution time in the constructor -// of __kmp_stats_global_output -void kmp_stats_output_module::init() { - - fprintf(stderr, "*** Stats enabled OpenMP* runtime ***\n"); - char *statsFileName = getenv("KMP_STATS_FILE"); - eventsFileName = getenv("KMP_STATS_EVENTS_FILE"); - plotFileName = getenv("KMP_STATS_PLOT_FILE"); - char *threadStats = getenv("KMP_STATS_THREADS"); - char *threadEvents = getenv("KMP_STATS_EVENTS"); - - // set the stats output filenames based on environment variables and defaults - if (statsFileName) { - char imageName[1024]; - // Process any escapes (e.g., %p, %e, %t) in the name - outputFileName = generateFilename( - statsFileName, getImageName(&imageName[0], sizeof(imageName))); - } - eventsFileName = eventsFileName ? eventsFileName : "events.dat"; - plotFileName = plotFileName ? plotFileName : "events.plt"; - - // set the flags based on environment variables matching: true, on, 1, .true. - // , .t. , yes - printPerThreadFlag = __kmp_str_match_true(threadStats); - printPerThreadEventsFlag = __kmp_str_match_true(threadEvents); - - if (printPerThreadEventsFlag) { - // assigns a color to each timer for printing - setupEventColors(); - } else { - // will clear flag so that no event will be logged - timeStat::clearEventFlags(); - } -} - -void kmp_stats_output_module::setupEventColors() { - int i; - int globalColorIndex = 0; - int numGlobalColors = sizeof(globalColorArray) / sizeof(rgb_color); - for (i = 0; i < TIMER_LAST; i++) { - if (timeStat::logEvent((timer_e)i)) { - timerColorInfo[i] = globalColorArray[globalColorIndex]; - globalColorIndex = (globalColorIndex + 1) % numGlobalColors; - } - } -} - -void kmp_stats_output_module::printTimerStats(FILE *statsOut, - statistic const *theStats, - statistic const *totalStats) { - fprintf(statsOut, - "Timer, SampleCount, Min, " - "Mean, Max, Total, SD\n"); - for (timer_e s = timer_e(0); s < TIMER_LAST; s = timer_e(s + 1)) { - statistic const *stat = &theStats[s]; - char tag = timeStat::noUnits(s) ? ' ' : 'T'; - - fprintf(statsOut, "%-35s, %s\n", timeStat::name(s), - stat->format(tag, true).c_str()); - } - // Also print the Total_ versions of times. - for (timer_e s = timer_e(0); s < TIMER_LAST; s = timer_e(s + 1)) { - char tag = timeStat::noUnits(s) ? ' ' : 'T'; - if (totalStats && !timeStat::noTotal(s)) - fprintf(statsOut, "Total_%-29s, %s\n", timeStat::name(s), - totalStats[s].format(tag, true).c_str()); - } - - // Print historgram of statistics - if (theStats[0].haveHist()) { - fprintf(statsOut, "\nTimer distributions\n"); - for (int s = 0; s < TIMER_LAST; s++) { - statistic const *stat = &theStats[s]; - - if (stat->getCount() != 0) { - char tag = timeStat::noUnits(timer_e(s)) ? ' ' : 'T'; - - fprintf(statsOut, "%s\n", timeStat::name(timer_e(s))); - fprintf(statsOut, "%s\n", stat->getHist()->format(tag).c_str()); - } - } - } -} - -void kmp_stats_output_module::printCounterStats(FILE *statsOut, - statistic const *theStats) { - fprintf(statsOut, "Counter, ThreadCount, Min, Mean, " - " Max, Total, SD\n"); - for (int s = 0; s < COUNTER_LAST; s++) { - statistic const *stat = &theStats[s]; - fprintf(statsOut, "%-25s, %s\n", counter::name(counter_e(s)), - stat->format(' ', true).c_str()); - } - // Print histogram of counters - if (theStats[0].haveHist()) { - fprintf(statsOut, "\nCounter distributions\n"); - for (int s = 0; s < COUNTER_LAST; s++) { - statistic const *stat = &theStats[s]; - - if (stat->getCount() != 0) { - fprintf(statsOut, "%s\n", counter::name(counter_e(s))); - fprintf(statsOut, "%s\n", stat->getHist()->format(' ').c_str()); - } - } - } -} - -void kmp_stats_output_module::printCounters(FILE *statsOut, - counter const *theCounters) { - // We print all the counters even if they are zero. - // That makes it easier to slice them into a spreadsheet if you need to. - fprintf(statsOut, "\nCounter, Count\n"); - for (int c = 0; c < COUNTER_LAST; c++) { - counter const *stat = &theCounters[c]; - fprintf(statsOut, "%-25s, %s\n", counter::name(counter_e(c)), - formatSI(stat->getValue(), 9, ' ').c_str()); - } -} - -void kmp_stats_output_module::printEvents(FILE *eventsOut, - kmp_stats_event_vector *theEvents, - int gtid) { - // sort by start time before printing - theEvents->sort(); - for (int i = 0; i < theEvents->size(); i++) { - kmp_stats_event ev = theEvents->at(i); - rgb_color color = getEventColor(ev.getTimerName()); - fprintf(eventsOut, "%d %lu %lu %1.1f rgb(%1.1f,%1.1f,%1.1f) %s\n", gtid, - ev.getStart(), ev.getStop(), 1.2 - (ev.getNestLevel() * 0.2), - color.r, color.g, color.b, timeStat::name(ev.getTimerName())); - } - return; -} - -void kmp_stats_output_module::windupExplicitTimers() { - // Wind up any explicit timers. We assume that it's fair at this point to just - // walk all the explcit timers in all threads and say "it's over". - // If the timer wasn't running, this won't record anything anyway. - kmp_stats_list::iterator it; - for (it = __kmp_stats_list->begin(); it != __kmp_stats_list->end(); it++) { - kmp_stats_list *ptr = *it; - ptr->getPartitionedTimers()->windup(); - ptr->endLife(); - } -} - -void kmp_stats_output_module::printPloticusFile() { - int i; - int size = __kmp_stats_list->size(); - FILE *plotOut = fopen(plotFileName, "w+"); - - fprintf(plotOut, "#proc page\n" - " pagesize: 15 10\n" - " scale: 1.0\n\n"); - - fprintf(plotOut, "#proc getdata\n" - " file: %s\n\n", - eventsFileName); - - fprintf(plotOut, "#proc areadef\n" - " title: OpenMP Sampling Timeline\n" - " titledetails: align=center size=16\n" - " rectangle: 1 1 13 9\n" - " xautorange: datafield=2,3\n" - " yautorange: -1 %d\n\n", - size); - - fprintf(plotOut, "#proc xaxis\n" - " stubs: inc\n" - " stubdetails: size=12\n" - " label: Time (ticks)\n" - " labeldetails: size=14\n\n"); - - fprintf(plotOut, "#proc yaxis\n" - " stubs: inc 1\n" - " stubrange: 0 %d\n" - " stubdetails: size=12\n" - " label: Thread #\n" - " labeldetails: size=14\n\n", - size - 1); - - fprintf(plotOut, "#proc bars\n" - " exactcolorfield: 5\n" - " axis: x\n" - " locfield: 1\n" - " segmentfields: 2 3\n" - " barwidthfield: 4\n\n"); - - // create legend entries corresponding to the timer color - for (i = 0; i < TIMER_LAST; i++) { - if (timeStat::logEvent((timer_e)i)) { - rgb_color c = getEventColor((timer_e)i); - fprintf(plotOut, "#proc legendentry\n" - " sampletype: color\n" - " label: %s\n" - " details: rgb(%1.1f,%1.1f,%1.1f)\n\n", - timeStat::name((timer_e)i), c.r, c.g, c.b); - } - } - - fprintf(plotOut, "#proc legend\n" - " format: down\n" - " location: max max\n\n"); - fclose(plotOut); - return; -} - -static void outputEnvVariable(FILE *statsOut, char const *name) { - char const *value = getenv(name); - fprintf(statsOut, "# %s = %s\n", name, value ? value : "*unspecified*"); -} - -/* Print some useful information about - * the date and time this experiment ran. - * the machine on which it ran. - We output all of this as stylised comments, though we may decide to parse - some of it. */ -void kmp_stats_output_module::printHeaderInfo(FILE *statsOut) { - std::time_t now = std::time(0); - char buffer[40]; - char hostName[80]; - - std::strftime(&buffer[0], sizeof(buffer), "%c", std::localtime(&now)); - fprintf(statsOut, "# Time of run: %s\n", &buffer[0]); - if (gethostname(&hostName[0], sizeof(hostName)) == 0) - fprintf(statsOut, "# Hostname: %s\n", &hostName[0]); -#if KMP_ARCH_X86 || KMP_ARCH_X86_64 - fprintf(statsOut, "# CPU: %s\n", &__kmp_cpuinfo.name[0]); - fprintf(statsOut, "# Family: %d, Model: %d, Stepping: %d\n", - __kmp_cpuinfo.family, __kmp_cpuinfo.model, __kmp_cpuinfo.stepping); - if (__kmp_cpuinfo.frequency == 0) - fprintf(statsOut, "# Nominal frequency: Unknown\n"); - else - fprintf(statsOut, "# Nominal frequency: %sz\n", - formatSI(double(__kmp_cpuinfo.frequency), 9, 'H').c_str()); - outputEnvVariable(statsOut, "KMP_HW_SUBSET"); - outputEnvVariable(statsOut, "KMP_AFFINITY"); - outputEnvVariable(statsOut, "KMP_BLOCKTIME"); - outputEnvVariable(statsOut, "KMP_LIBRARY"); - fprintf(statsOut, "# Production runtime built " __DATE__ " " __TIME__ "\n"); -#endif -} - -void kmp_stats_output_module::outputStats(const char *heading) { - // Stop all the explicit timers in all threads - // Do this before declaring the local statistics because thay have - // constructors so will take time to create. - windupExplicitTimers(); - - statistic allStats[TIMER_LAST]; - statistic totalStats[TIMER_LAST]; /* Synthesized, cross threads versions of - normal timer stats */ - statistic allCounters[COUNTER_LAST]; - - FILE *statsOut = - !outputFileName.empty() ? fopen(outputFileName.c_str(), "a+") : stderr; - if (!statsOut) - statsOut = stderr; - - FILE *eventsOut; - if (eventPrintingEnabled()) { - eventsOut = fopen(eventsFileName, "w+"); - } - - printHeaderInfo(statsOut); - fprintf(statsOut, "%s\n", heading); - // Accumulate across threads. - kmp_stats_list::iterator it; - for (it = __kmp_stats_list->begin(); it != __kmp_stats_list->end(); it++) { - int t = (*it)->getGtid(); - // Output per thread stats if requested. - if (printPerThreadFlag) { - fprintf(statsOut, "Thread %d\n", t); - printTimerStats(statsOut, (*it)->getTimers(), 0); - printCounters(statsOut, (*it)->getCounters()); - fprintf(statsOut, "\n"); - } - // Output per thread events if requested. - if (eventPrintingEnabled()) { - kmp_stats_event_vector events = (*it)->getEventVector(); - printEvents(eventsOut, &events, t); - } - - // Accumulate timers. - for (timer_e s = timer_e(0); s < TIMER_LAST; s = timer_e(s + 1)) { - // See if we should ignore this timer when aggregating - if ((timeStat::masterOnly(s) && (t != 0)) || // Timer only valid on master - // and this thread is worker - (timeStat::workerOnly(s) && (t == 0)) // Timer only valid on worker - // and this thread is the master - ) { - continue; - } - - statistic *threadStat = (*it)->getTimer(s); - allStats[s] += *threadStat; - - // Add Total stats for timers that are valid in more than one thread - if (!timeStat::noTotal(s)) - totalStats[s].addSample(threadStat->getTotal()); - } - - // Accumulate counters. - for (counter_e c = counter_e(0); c < COUNTER_LAST; c = counter_e(c + 1)) { - if (counter::masterOnly(c) && t != 0) - continue; - allCounters[c].addSample((*it)->getCounter(c)->getValue()); - } - } - - if (eventPrintingEnabled()) { - printPloticusFile(); - fclose(eventsOut); - } - - fprintf(statsOut, "Aggregate for all threads\n"); - printTimerStats(statsOut, &allStats[0], &totalStats[0]); - fprintf(statsOut, "\n"); - printCounterStats(statsOut, &allCounters[0]); - - if (statsOut != stderr) - fclose(statsOut); -} - -/* ************* exported C functions ************** */ - -// no name mangling for these functions, we want the c files to be able to get -// at these functions -extern "C" { - -void __kmp_reset_stats() { - kmp_stats_list::iterator it; - for (it = __kmp_stats_list->begin(); it != __kmp_stats_list->end(); it++) { - timeStat *timers = (*it)->getTimers(); - counter *counters = (*it)->getCounters(); - - for (int t = 0; t < TIMER_LAST; t++) - timers[t].reset(); - - for (int c = 0; c < COUNTER_LAST; c++) - counters[c].reset(); - - // reset the event vector so all previous events are "erased" - (*it)->resetEventVector(); - } -} - -// This function will reset all stats and stop all threads' explicit timers if -// they haven't been stopped already. -void __kmp_output_stats(const char *heading) { - __kmp_stats_global_output->outputStats(heading); - __kmp_reset_stats(); -} - -void __kmp_accumulate_stats_at_exit(void) { - // Only do this once. - if (KMP_XCHG_FIXED32(&statsPrinted, 1) != 0) - return; - - __kmp_output_stats("Statistics on exit"); -} - -void __kmp_stats_init(void) { - __kmp_init_tas_lock(&__kmp_stats_lock); - __kmp_stats_start_time = tsc_tick_count::now(); - __kmp_stats_global_output = new kmp_stats_output_module(); - __kmp_stats_list = new kmp_stats_list(); -} - -void __kmp_stats_fini(void) { - __kmp_accumulate_stats_at_exit(); - __kmp_stats_list->deallocate(); - delete __kmp_stats_global_output; - delete __kmp_stats_list; -} - -} // extern "C" Property changes on: vendor/llvm-openmp/openmp-release80-r363030/runtime/src/kmp_stats.cpp ___________________________________________________________________ Deleted: svn:eol-style ## -1 +0,0 ## -native \ No newline at end of property Deleted: svn:keywords ## -1 +0,0 ## -FreeBSD=%H \ No newline at end of property Deleted: svn:mime-type ## -1 +0,0 ## -text/plain \ No newline at end of property Index: vendor/llvm-openmp/openmp-release80-r363030/runtime/src/kmp_version.h =================================================================== --- vendor/llvm-openmp/openmp-release80-r363030/runtime/src/kmp_version.h (revision 348960) +++ vendor/llvm-openmp/openmp-release80-r363030/runtime/src/kmp_version.h (nonexistent) @@ -1,67 +0,0 @@ -/* - * kmp_version.h -- version number for this release - */ - -//===----------------------------------------------------------------------===// -// -// The LLVM Compiler Infrastructure -// -// This file is dual licensed under the MIT and the University of Illinois Open -// Source Licenses. See LICENSE.txt for details. -// -//===----------------------------------------------------------------------===// - -#ifndef KMP_VERSION_H -#define KMP_VERSION_H - -#ifdef __cplusplus -extern "C" { -#endif // __cplusplus - -#ifndef KMP_VERSION_MAJOR -#error KMP_VERSION_MAJOR macro is not defined. -#endif -#define KMP_VERSION_MINOR 0 -/* Using "magic" prefix in all the version strings is rather convenient to get - static version info from binaries by using standard utilities "strings" and - "grep", e. g.: - $ strings libomp.so | grep "@(#)" - gives clean list of all version strings in the library. Leading zero helps - to keep version string separate from printable characters which may occurs - just before version string. */ -#define KMP_VERSION_MAGIC_STR "\x00@(#) " -#define KMP_VERSION_MAGIC_LEN 6 // Length of KMP_VERSION_MAGIC_STR. -#define KMP_VERSION_PREF_STR "Intel(R) OMP " -#define KMP_VERSION_PREFIX KMP_VERSION_MAGIC_STR KMP_VERSION_PREF_STR - -/* declare all the version string constants for KMP_VERSION env. variable */ -extern int const __kmp_version_major; -extern int const __kmp_version_minor; -extern int const __kmp_version_build; -extern int const __kmp_openmp_version; -extern char const - __kmp_copyright[]; // Old variable, kept for compatibility with ITC and ITP. -extern char const __kmp_version_copyright[]; -extern char const __kmp_version_lib_ver[]; -extern char const __kmp_version_lib_type[]; -extern char const __kmp_version_link_type[]; -extern char const __kmp_version_build_time[]; -extern char const __kmp_version_target_env[]; -extern char const __kmp_version_build_compiler[]; -extern char const __kmp_version_alt_comp[]; -extern char const __kmp_version_omp_api[]; -// ??? extern char const __kmp_version_debug[]; -extern char const __kmp_version_lock[]; -extern char const __kmp_version_nested_stats_reporting[]; -extern char const __kmp_version_ftnstdcall[]; -extern char const __kmp_version_ftncdecl[]; -extern char const __kmp_version_ftnextra[]; - -void __kmp_print_version_1(void); -void __kmp_print_version_2(void); - -#ifdef __cplusplus -} // extern "C" -#endif // __cplusplus - -#endif /* KMP_VERSION_H */ Property changes on: vendor/llvm-openmp/openmp-release80-r363030/runtime/src/kmp_version.h ___________________________________________________________________ Deleted: svn:eol-style ## -1 +0,0 ## -native \ No newline at end of property Deleted: svn:keywords ## -1 +0,0 ## -FreeBSD=%H \ No newline at end of property Deleted: svn:mime-type ## -1 +0,0 ## -text/plain \ No newline at end of property Index: vendor/llvm-openmp/openmp-release80-r363030/runtime/src/kmp_wrapper_malloc.h =================================================================== --- vendor/llvm-openmp/openmp-release80-r363030/runtime/src/kmp_wrapper_malloc.h (revision 348960) +++ vendor/llvm-openmp/openmp-release80-r363030/runtime/src/kmp_wrapper_malloc.h (nonexistent) @@ -1,197 +0,0 @@ -/* - * kmp_wrapper_malloc.h -- Wrappers for memory allocation routines - * (malloc(), free(), and others). - */ - -//===----------------------------------------------------------------------===// -// -// The LLVM Compiler Infrastructure -// -// This file is dual licensed under the MIT and the University of Illinois Open -// Source Licenses. See LICENSE.txt for details. -// -//===----------------------------------------------------------------------===// - -#ifndef KMP_WRAPPER_MALLOC_H -#define KMP_WRAPPER_MALLOC_H - -/* This header serves for 3 purposes: - 1. Declaring standard memory allocation rourines in OS-independent way. - 2. Passing source location info through memory allocation wrappers. - 3. Enabling native memory debugging capabilities. - - 1. Declaring standard memory allocation rourines in OS-independent way. - ----------------------------------------------------------------------- - On Linux* OS, alloca() function is declared in header, while on - Windows* OS there is no header, function _alloca() (note - underscore!) is declared in . This header eliminates these - differences, so client code incluiding "kmp_wrapper_malloc.h" can rely on - following routines: - - malloc - calloc - realloc - free - alloca - - in OS-independent way. It also enables memory tracking capabilities in debug - build. (Currently it is available only on Windows* OS.) - - 2. Passing source location info through memory allocation wrappers. - ------------------------------------------------------------------- - Some tools may help debugging memory errors, for example, report memory - leaks. However, memory allocation wrappers may hinder source location. - For example: - - void * aligned_malloc( int size ) { - void * ptr = malloc( size ); // All the memory leaks will be reported at - // this line. - // some adjustments... - return ptr; - }; - - ptr = aligned_malloc( size ); // Memory leak will *not* be detected here. :-( - - To overcome the problem, information about original source location should - be passed through all the memory allocation wrappers, for example: - - void * aligned_malloc( int size, char const * file, int line ) { - void * ptr = _malloc_dbg( size, file, line ); - // some adjustments... - return ptr; - }; - void * ptr = aligned_malloc( size, __FILE__, __LINE__ ); - - This is a good idea for debug, but passing additional arguments impacts - performance. Disabling extra arguments in release version of the software - introduces too many conditional compilation, which makes code unreadable. - This header defines few macros and functions facilitating it: - - void * _aligned_malloc( int size KMP_SRC_LOC_DECL ) { - void * ptr = malloc_src_loc( size KMP_SRC_LOC_PARM ); - // some adjustments... - return ptr; - }; - #define aligned_malloc( size ) _aligned_malloc( (size) KMP_SRC_LOC_CURR ) - // Use macro instead of direct call to function. - - void * ptr = aligned_malloc( size ); // Bingo! Memory leak will be - // reported at this line. - - 3. Enabling native memory debugging capabilities. - ------------------------------------------------- - Some platforms may offer memory debugging capabilities. For example, debug - version of Microsoft RTL tracks all memory allocations and can report memory - leaks. This header enables this, and makes report more useful (see "Passing - source location info through memory allocation wrappers"). -*/ - -#include - -#include "kmp_os.h" - -// Include alloca() declaration. -#if KMP_OS_WINDOWS -#include // Windows* OS: _alloca() declared in "malloc.h". -#if KMP_MSVC_COMPAT -#define alloca _alloca // Allow to use alloca() with no underscore. -#endif -#elif KMP_OS_DRAGONFLY || KMP_OS_FREEBSD || KMP_OS_NETBSD || KMP_OS_OPENBSD -// Declared in "stdlib.h". -#elif KMP_OS_UNIX -#include // Linux* OS and OS X*: alloc() declared in "alloca". -#else -#error Unknown or unsupported OS. -#endif - -/* KMP_SRC_LOC_DECL -- Declaring source location paramemters, to be used in - function declaration. - KMP_SRC_LOC_PARM -- Source location paramemters, to be used to pass - parameters to underlying levels. - KMP_SRC_LOC_CURR -- Source location arguments describing current location, - to be used at top-level. - - Typical usage: - void * _aligned_malloc( int size KMP_SRC_LOC_DECL ) { - // Note: Comma is missed before KMP_SRC_LOC_DECL. - KE_TRACE( 25, ( "called from %s:%d\n", KMP_SRC_LOC_PARM ) ); - ... - } - #define aligned_malloc( size ) _aligned_malloc( (size) KMP_SRC_LOC_CURR ) - // Use macro instead of direct call to function -- macro passes info - // about current source location to the func. -*/ -#if KMP_DEBUG -#define KMP_SRC_LOC_DECL , char const *_file_, int _line_ -#define KMP_SRC_LOC_PARM , _file_, _line_ -#define KMP_SRC_LOC_CURR , __FILE__, __LINE__ -#else -#define KMP_SRC_LOC_DECL -#define KMP_SRC_LOC_PARM -#define KMP_SRC_LOC_CURR -#endif // KMP_DEBUG - -/* malloc_src_loc() and free_src_loc() are pseudo-functions (really macros) - with accepts extra arguments (source location info) in debug mode. They - should be used in place of malloc() and free(), this allows enabling native - memory debugging capabilities (if any). - - Typical usage: - ptr = malloc_src_loc( size KMP_SRC_LOC_PARM ); - // Inside memory allocation wrapper, or - ptr = malloc_src_loc( size KMP_SRC_LOC_CURR ); - // Outside of memory allocation wrapper. -*/ -#define malloc_src_loc(args) _malloc_src_loc(args) -#define free_src_loc(args) _free_src_loc(args) -/* Depending on build mode (debug or release), malloc_src_loc is declared with - 1 or 3 parameters, but calls to malloc_src_loc() are always the same: - - ... malloc_src_loc( size KMP_SRC_LOC_PARM ); // or KMP_SRC_LOC_CURR - - Compiler issues warning/error "too few arguments in macro invocation". - Declaring two macros, malloc_src_loc() and _malloc_src_loc(), overcomes the - problem. */ - -#if KMP_DEBUG - -#if KMP_OS_WINDOWS && _DEBUG -// KMP_DEBUG != _DEBUG. MS debug RTL is available only if _DEBUG is defined. - -// Windows* OS has native memory debugging capabilities. Enable them. - -#include - -#define KMP_MEM_BLOCK _CLIENT_BLOCK -#define malloc(size) _malloc_dbg((size), KMP_MEM_BLOCK, __FILE__, __LINE__) -#define calloc(num, size) \ - _calloc_dbg((num), (size), KMP_MEM_BLOCK, __FILE__, __LINE__) -#define realloc(ptr, size) \ - _realloc_dbg((ptr), (size), KMP_MEM_BLOCK, __FILE__, __LINE__) -#define free(ptr) _free_dbg((ptr), KMP_MEM_BLOCK) - -#define _malloc_src_loc(size, file, line) \ - _malloc_dbg((size), KMP_MEM_BLOCK, (file), (line)) -#define _free_src_loc(ptr, file, line) _free_dbg((ptr), KMP_MEM_BLOCK) - -#else - -// Linux* OS, OS X*, or non-debug Windows* OS. - -#define _malloc_src_loc(size, file, line) malloc((size)) -#define _free_src_loc(ptr, file, line) free((ptr)) - -#endif - -#else - -// In release build malloc_src_loc() and free_src_loc() do not have extra -// parameters. -#define _malloc_src_loc(size) malloc((size)) -#define _free_src_loc(ptr) free((ptr)) - -#endif // KMP_DEBUG - -#endif // KMP_WRAPPER_MALLOC_H - -// end of file // Property changes on: vendor/llvm-openmp/openmp-release80-r363030/runtime/src/kmp_wrapper_malloc.h ___________________________________________________________________ Deleted: svn:eol-style ## -1 +0,0 ## -native \ No newline at end of property Deleted: svn:keywords ## -1 +0,0 ## -FreeBSD=%H \ No newline at end of property Deleted: svn:mime-type ## -1 +0,0 ## -text/plain \ No newline at end of property Index: vendor/llvm-openmp/openmp-release80-r363030/runtime/src/kmp_taskq.cpp =================================================================== --- vendor/llvm-openmp/openmp-release80-r363030/runtime/src/kmp_taskq.cpp (revision 348960) +++ vendor/llvm-openmp/openmp-release80-r363030/runtime/src/kmp_taskq.cpp (nonexistent) @@ -1,2029 +0,0 @@ -/* - * kmp_taskq.cpp -- TASKQ support for OpenMP. - */ - -//===----------------------------------------------------------------------===// -// -// The LLVM Compiler Infrastructure -// -// This file is dual licensed under the MIT and the University of Illinois Open -// Source Licenses. See LICENSE.txt for details. -// -//===----------------------------------------------------------------------===// - -#include "kmp.h" -#include "kmp_error.h" -#include "kmp_i18n.h" -#include "kmp_io.h" - -#define MAX_MESSAGE 512 - -/* Taskq routines and global variables */ - -#define KMP_DEBUG_REF_CTS(x) KF_TRACE(1, x); - -#define THREAD_ALLOC_FOR_TASKQ - -static int in_parallel_context(kmp_team_t *team) { - return !team->t.t_serialized; -} - -static void __kmp_taskq_eo(int *gtid_ref, int *cid_ref, ident_t *loc_ref) { - int gtid = *gtid_ref; - int tid = __kmp_tid_from_gtid(gtid); - kmp_uint32 my_token; - kmpc_task_queue_t *taskq; - kmp_taskq_t *tq = &__kmp_threads[gtid]->th.th_team->t.t_taskq; - - if (__kmp_env_consistency_check) -#if KMP_USE_DYNAMIC_LOCK - __kmp_push_sync(gtid, ct_ordered_in_taskq, loc_ref, NULL, 0); -#else - __kmp_push_sync(gtid, ct_ordered_in_taskq, loc_ref, NULL); -#endif - - if (!__kmp_threads[gtid]->th.th_team->t.t_serialized) { - KMP_MB(); /* Flush all pending memory write invalidates. */ - - /* GEH - need check here under stats to make sure */ - /* inside task (curr_thunk[*tid_ref] != NULL) */ - - my_token = tq->tq_curr_thunk[tid]->th_tasknum; - - taskq = tq->tq_curr_thunk[tid]->th.th_shareds->sv_queue; - - KMP_WAIT_YIELD(&taskq->tq_tasknum_serving, my_token, KMP_EQ, NULL); - KMP_MB(); - } -} - -static void __kmp_taskq_xo(int *gtid_ref, int *cid_ref, ident_t *loc_ref) { - int gtid = *gtid_ref; - int tid = __kmp_tid_from_gtid(gtid); - kmp_uint32 my_token; - kmp_taskq_t *tq = &__kmp_threads[gtid]->th.th_team->t.t_taskq; - - if (__kmp_env_consistency_check) - __kmp_pop_sync(gtid, ct_ordered_in_taskq, loc_ref); - - if (!__kmp_threads[gtid]->th.th_team->t.t_serialized) { - KMP_MB(); /* Flush all pending memory write invalidates. */ - - /* GEH - need check here under stats to make sure */ - /* inside task (curr_thunk[tid] != NULL) */ - - my_token = tq->tq_curr_thunk[tid]->th_tasknum; - - KMP_MB(); /* Flush all pending memory write invalidates. */ - - tq->tq_curr_thunk[tid]->th.th_shareds->sv_queue->tq_tasknum_serving = - my_token + 1; - - KMP_MB(); /* Flush all pending memory write invalidates. */ - } -} - -static void __kmp_taskq_check_ordered(kmp_int32 gtid, kmpc_thunk_t *thunk) { - kmp_uint32 my_token; - kmpc_task_queue_t *taskq; - - /* assume we are always called from an active parallel context */ - - KMP_MB(); /* Flush all pending memory write invalidates. */ - - my_token = thunk->th_tasknum; - - taskq = thunk->th.th_shareds->sv_queue; - - if (taskq->tq_tasknum_serving <= my_token) { - KMP_WAIT_YIELD(&taskq->tq_tasknum_serving, my_token, KMP_GE, NULL); - KMP_MB(); - taskq->tq_tasknum_serving = my_token + 1; - KMP_MB(); - } -} - -#ifdef KMP_DEBUG - -static void __kmp_dump_TQF(kmp_int32 flags) { - if (flags & TQF_IS_ORDERED) - __kmp_printf("ORDERED "); - if (flags & TQF_IS_LASTPRIVATE) - __kmp_printf("LAST_PRIV "); - if (flags & TQF_IS_NOWAIT) - __kmp_printf("NOWAIT "); - if (flags & TQF_HEURISTICS) - __kmp_printf("HEURIST "); - if (flags & TQF_INTERFACE_RESERVED1) - __kmp_printf("RESERV1 "); - if (flags & TQF_INTERFACE_RESERVED2) - __kmp_printf("RESERV2 "); - if (flags & TQF_INTERFACE_RESERVED3) - __kmp_printf("RESERV3 "); - if (flags & TQF_INTERFACE_RESERVED4) - __kmp_printf("RESERV4 "); - if (flags & TQF_IS_LAST_TASK) - __kmp_printf("LAST_TASK "); - if (flags & TQF_TASKQ_TASK) - __kmp_printf("TASKQ_TASK "); - if (flags & TQF_RELEASE_WORKERS) - __kmp_printf("RELEASE "); - if (flags & TQF_ALL_TASKS_QUEUED) - __kmp_printf("ALL_QUEUED "); - if (flags & TQF_PARALLEL_CONTEXT) - __kmp_printf("PARALLEL "); - if (flags & TQF_DEALLOCATED) - __kmp_printf("DEALLOC "); - if (!(flags & (TQF_INTERNAL_FLAGS | TQF_INTERFACE_FLAGS))) - __kmp_printf("(NONE)"); -} - -static void __kmp_dump_thunk(kmp_taskq_t *tq, kmpc_thunk_t *thunk, - kmp_int32 global_tid) { - int i; - int nproc = __kmp_threads[global_tid]->th.th_team->t.t_nproc; - - __kmp_printf("\tThunk at %p on (%d): ", thunk, global_tid); - - if (thunk != NULL) { - for (i = 0; i < nproc; i++) { - if (tq->tq_curr_thunk[i] == thunk) { - __kmp_printf("[%i] ", i); - } - } - __kmp_printf("th_shareds=%p, ", thunk->th.th_shareds); - __kmp_printf("th_task=%p, ", thunk->th_task); - __kmp_printf("th_encl_thunk=%p, ", thunk->th_encl_thunk); - __kmp_printf("th_status=%d, ", thunk->th_status); - __kmp_printf("th_tasknum=%u, ", thunk->th_tasknum); - __kmp_printf("th_flags="); - __kmp_dump_TQF(thunk->th_flags); - } - - __kmp_printf("\n"); -} - -static void __kmp_dump_thunk_stack(kmpc_thunk_t *thunk, kmp_int32 thread_num) { - kmpc_thunk_t *th; - - __kmp_printf(" Thunk stack for T#%d: ", thread_num); - - for (th = thunk; th != NULL; th = th->th_encl_thunk) - __kmp_printf("%p ", th); - - __kmp_printf("\n"); -} - -static void __kmp_dump_task_queue(kmp_taskq_t *tq, kmpc_task_queue_t *queue, - kmp_int32 global_tid) { - int qs, count, i; - kmpc_thunk_t *thunk; - kmpc_task_queue_t *taskq; - - __kmp_printf("Task Queue at %p on (%d):\n", queue, global_tid); - - if (queue != NULL) { - int in_parallel = queue->tq_flags & TQF_PARALLEL_CONTEXT; - - if (__kmp_env_consistency_check) { - __kmp_printf(" tq_loc : "); - } - if (in_parallel) { - - // if (queue->tq.tq_parent != 0) - //__kmp_acquire_lock(& queue->tq.tq_parent->tq_link_lck, global_tid); - - //__kmp_acquire_lock(& queue->tq_link_lck, global_tid); - - // Make sure data structures are in consistent state before querying them - // Seems to work without this for digital/alpha, needed for IBM/RS6000 - KMP_MB(); - - __kmp_printf(" tq_parent : %p\n", queue->tq.tq_parent); - __kmp_printf(" tq_first_child : %p\n", queue->tq_first_child); - __kmp_printf(" tq_next_child : %p\n", queue->tq_next_child); - __kmp_printf(" tq_prev_child : %p\n", queue->tq_prev_child); - __kmp_printf(" tq_ref_count : %d\n", queue->tq_ref_count); - - //__kmp_release_lock(& queue->tq_link_lck, global_tid); - - // if (queue->tq.tq_parent != 0) - //__kmp_release_lock(& queue->tq.tq_parent->tq_link_lck, global_tid); - - //__kmp_acquire_lock(& queue->tq_free_thunks_lck, global_tid); - //__kmp_acquire_lock(& queue->tq_queue_lck, global_tid); - - // Make sure data structures are in consistent state before querying them - // Seems to work without this for digital/alpha, needed for IBM/RS6000 - KMP_MB(); - } - - __kmp_printf(" tq_shareds : "); - for (i = 0; i < ((queue == tq->tq_root) ? queue->tq_nproc : 1); i++) - __kmp_printf("%p ", queue->tq_shareds[i].ai_data); - __kmp_printf("\n"); - - if (in_parallel) { - __kmp_printf(" tq_tasknum_queuing : %u\n", queue->tq_tasknum_queuing); - __kmp_printf(" tq_tasknum_serving : %u\n", queue->tq_tasknum_serving); - } - - __kmp_printf(" tq_queue : %p\n", queue->tq_queue); - __kmp_printf(" tq_thunk_space : %p\n", queue->tq_thunk_space); - __kmp_printf(" tq_taskq_slot : %p\n", queue->tq_taskq_slot); - - __kmp_printf(" tq_free_thunks : "); - for (thunk = queue->tq_free_thunks; thunk != NULL; - thunk = thunk->th.th_next_free) - __kmp_printf("%p ", thunk); - __kmp_printf("\n"); - - __kmp_printf(" tq_nslots : %d\n", queue->tq_nslots); - __kmp_printf(" tq_head : %d\n", queue->tq_head); - __kmp_printf(" tq_tail : %d\n", queue->tq_tail); - __kmp_printf(" tq_nfull : %d\n", queue->tq_nfull); - __kmp_printf(" tq_hiwat : %d\n", queue->tq_hiwat); - __kmp_printf(" tq_flags : "); - __kmp_dump_TQF(queue->tq_flags); - __kmp_printf("\n"); - - if (in_parallel) { - __kmp_printf(" tq_th_thunks : "); - for (i = 0; i < queue->tq_nproc; i++) { - __kmp_printf("%d ", queue->tq_th_thunks[i].ai_data); - } - __kmp_printf("\n"); - } - - __kmp_printf("\n"); - __kmp_printf(" Queue slots:\n"); - - qs = queue->tq_tail; - for (count = 0; count < queue->tq_nfull; ++count) { - __kmp_printf("(%d)", qs); - __kmp_dump_thunk(tq, queue->tq_queue[qs].qs_thunk, global_tid); - qs = (qs + 1) % queue->tq_nslots; - } - - __kmp_printf("\n"); - - if (in_parallel) { - if (queue->tq_taskq_slot != NULL) { - __kmp_printf(" TaskQ slot:\n"); - __kmp_dump_thunk(tq, CCAST(kmpc_thunk_t *, queue->tq_taskq_slot), - global_tid); - __kmp_printf("\n"); - } - //__kmp_release_lock(& queue->tq_queue_lck, global_tid); - //__kmp_release_lock(& queue->tq_free_thunks_lck, global_tid); - } - } - - __kmp_printf(" Taskq freelist: "); - - //__kmp_acquire_lock( & tq->tq_freelist_lck, global_tid ); - - // Make sure data structures are in consistent state before querying them - // Seems to work without this call for digital/alpha, needed for IBM/RS6000 - KMP_MB(); - - for (taskq = tq->tq_freelist; taskq != NULL; taskq = taskq->tq.tq_next_free) - __kmp_printf("%p ", taskq); - - //__kmp_release_lock( & tq->tq_freelist_lck, global_tid ); - - __kmp_printf("\n\n"); -} - -static void __kmp_aux_dump_task_queue_tree(kmp_taskq_t *tq, - kmpc_task_queue_t *curr_queue, - kmp_int32 level, - kmp_int32 global_tid) { - int i, count, qs; - int nproc = __kmp_threads[global_tid]->th.th_team->t.t_nproc; - kmpc_task_queue_t *queue = curr_queue; - - if (curr_queue == NULL) - return; - - __kmp_printf(" "); - - for (i = 0; i < level; i++) - __kmp_printf(" "); - - __kmp_printf("%p", curr_queue); - - for (i = 0; i < nproc; i++) { - if (tq->tq_curr_thunk[i] && - tq->tq_curr_thunk[i]->th.th_shareds->sv_queue == curr_queue) { - __kmp_printf(" [%i]", i); - } - } - - __kmp_printf(":"); - - //__kmp_acquire_lock(& curr_queue->tq_queue_lck, global_tid); - - // Make sure data structures are in consistent state before querying them - // Seems to work without this call for digital/alpha, needed for IBM/RS6000 - KMP_MB(); - - qs = curr_queue->tq_tail; - - for (count = 0; count < curr_queue->tq_nfull; ++count) { - __kmp_printf("%p ", curr_queue->tq_queue[qs].qs_thunk); - qs = (qs + 1) % curr_queue->tq_nslots; - } - - //__kmp_release_lock(& curr_queue->tq_queue_lck, global_tid); - - __kmp_printf("\n"); - - if (curr_queue->tq_first_child) { - //__kmp_acquire_lock(& curr_queue->tq_link_lck, global_tid); - - // Make sure data structures are in consistent state before querying them - // Seems to work without this call for digital/alpha, needed for IBM/RS6000 - KMP_MB(); - - if (curr_queue->tq_first_child) { - for (queue = CCAST(kmpc_task_queue_t *, curr_queue->tq_first_child); - queue != NULL; queue = queue->tq_next_child) { - __kmp_aux_dump_task_queue_tree(tq, queue, level + 1, global_tid); - } - } - - //__kmp_release_lock(& curr_queue->tq_link_lck, global_tid); - } -} - -static void __kmp_dump_task_queue_tree(kmp_taskq_t *tq, - kmpc_task_queue_t *tqroot, - kmp_int32 global_tid) { - __kmp_printf("TaskQ Tree at root %p on (%d):\n", tqroot, global_tid); - - __kmp_aux_dump_task_queue_tree(tq, tqroot, 0, global_tid); - - __kmp_printf("\n"); -} -#endif - -/* New taskq storage routines that try to minimize overhead of mallocs but - still provide cache line alignment. */ -static void *__kmp_taskq_allocate(size_t size, kmp_int32 global_tid) { - void *addr, *orig_addr; - size_t bytes; - - KB_TRACE(5, ("__kmp_taskq_allocate: called size=%d, gtid=%d\n", (int)size, - global_tid)); - - bytes = sizeof(void *) + CACHE_LINE + size; - -#ifdef THREAD_ALLOC_FOR_TASKQ - orig_addr = - (void *)__kmp_thread_malloc(__kmp_thread_from_gtid(global_tid), bytes); -#else - KE_TRACE(10, ("%%%%%% MALLOC( %d )\n", bytes)); - orig_addr = (void *)KMP_INTERNAL_MALLOC(bytes); -#endif /* THREAD_ALLOC_FOR_TASKQ */ - - if (orig_addr == 0) - KMP_FATAL(OutOfHeapMemory); - - addr = orig_addr; - - if (((kmp_uintptr_t)addr & (CACHE_LINE - 1)) != 0) { - KB_TRACE(50, ("__kmp_taskq_allocate: adjust for cache alignment\n")); - addr = (void *)(((kmp_uintptr_t)addr + CACHE_LINE) & ~(CACHE_LINE - 1)); - } - - (*(void **)addr) = orig_addr; - - KB_TRACE(10, - ("__kmp_taskq_allocate: allocate: %p, use: %p - %p, size: %d, " - "gtid: %d\n", - orig_addr, ((void **)addr) + 1, - ((char *)(((void **)addr) + 1)) + size - 1, (int)size, global_tid)); - - return (((void **)addr) + 1); -} - -static void __kmpc_taskq_free(void *p, kmp_int32 global_tid) { - KB_TRACE(5, ("__kmpc_taskq_free: called addr=%p, gtid=%d\n", p, global_tid)); - - KB_TRACE(10, ("__kmpc_taskq_free: freeing: %p, gtid: %d\n", - (*(((void **)p) - 1)), global_tid)); - -#ifdef THREAD_ALLOC_FOR_TASKQ - __kmp_thread_free(__kmp_thread_from_gtid(global_tid), *(((void **)p) - 1)); -#else - KMP_INTERNAL_FREE(*(((void **)p) - 1)); -#endif /* THREAD_ALLOC_FOR_TASKQ */ -} - -/* Keep freed kmpc_task_queue_t on an internal freelist and recycle since - they're of constant size. */ - -static kmpc_task_queue_t * -__kmp_alloc_taskq(kmp_taskq_t *tq, int in_parallel, kmp_int32 nslots, - kmp_int32 nthunks, kmp_int32 nshareds, kmp_int32 nproc, - size_t sizeof_thunk, size_t sizeof_shareds, - kmpc_thunk_t **new_taskq_thunk, kmp_int32 global_tid) { - kmp_int32 i; - size_t bytes; - kmpc_task_queue_t *new_queue; - kmpc_aligned_shared_vars_t *shared_var_array; - char *shared_var_storage; - char *pt; /* for doing byte-adjusted address computations */ - - __kmp_acquire_lock(&tq->tq_freelist_lck, global_tid); - - // Make sure data structures are in consistent state before querying them - // Seems to work without this call for digital/alpha, needed for IBM/RS6000 - KMP_MB(); - - if (tq->tq_freelist) { - new_queue = tq->tq_freelist; - tq->tq_freelist = tq->tq_freelist->tq.tq_next_free; - - KMP_DEBUG_ASSERT(new_queue->tq_flags & TQF_DEALLOCATED); - - new_queue->tq_flags = 0; - - __kmp_release_lock(&tq->tq_freelist_lck, global_tid); - } else { - __kmp_release_lock(&tq->tq_freelist_lck, global_tid); - - new_queue = (kmpc_task_queue_t *)__kmp_taskq_allocate( - sizeof(kmpc_task_queue_t), global_tid); - new_queue->tq_flags = 0; - } - - /* space in the task queue for queue slots (allocate as one big chunk */ - /* of storage including new_taskq_task space) */ - - sizeof_thunk += - (CACHE_LINE - (sizeof_thunk % CACHE_LINE)); /* pad to cache line size */ - pt = (char *)__kmp_taskq_allocate(nthunks * sizeof_thunk, global_tid); - new_queue->tq_thunk_space = (kmpc_thunk_t *)pt; - *new_taskq_thunk = (kmpc_thunk_t *)(pt + (nthunks - 1) * sizeof_thunk); - - /* chain the allocated thunks into a freelist for this queue */ - - new_queue->tq_free_thunks = (kmpc_thunk_t *)pt; - - for (i = 0; i < (nthunks - 2); i++) { - ((kmpc_thunk_t *)(pt + i * sizeof_thunk))->th.th_next_free = - (kmpc_thunk_t *)(pt + (i + 1) * sizeof_thunk); -#ifdef KMP_DEBUG - ((kmpc_thunk_t *)(pt + i * sizeof_thunk))->th_flags = TQF_DEALLOCATED; -#endif - } - - ((kmpc_thunk_t *)(pt + (nthunks - 2) * sizeof_thunk))->th.th_next_free = NULL; -#ifdef KMP_DEBUG - ((kmpc_thunk_t *)(pt + (nthunks - 2) * sizeof_thunk))->th_flags = - TQF_DEALLOCATED; -#endif - - /* initialize the locks */ - - if (in_parallel) { - __kmp_init_lock(&new_queue->tq_link_lck); - __kmp_init_lock(&new_queue->tq_free_thunks_lck); - __kmp_init_lock(&new_queue->tq_queue_lck); - } - - /* now allocate the slots */ - - bytes = nslots * sizeof(kmpc_aligned_queue_slot_t); - new_queue->tq_queue = - (kmpc_aligned_queue_slot_t *)__kmp_taskq_allocate(bytes, global_tid); - - /* space for array of pointers to shared variable structures */ - sizeof_shareds += sizeof(kmpc_task_queue_t *); - sizeof_shareds += - (CACHE_LINE - (sizeof_shareds % CACHE_LINE)); /* pad to cache line size */ - - bytes = nshareds * sizeof(kmpc_aligned_shared_vars_t); - shared_var_array = - (kmpc_aligned_shared_vars_t *)__kmp_taskq_allocate(bytes, global_tid); - - bytes = nshareds * sizeof_shareds; - shared_var_storage = (char *)__kmp_taskq_allocate(bytes, global_tid); - - for (i = 0; i < nshareds; i++) { - shared_var_array[i].ai_data = - (kmpc_shared_vars_t *)(shared_var_storage + i * sizeof_shareds); - shared_var_array[i].ai_data->sv_queue = new_queue; - } - new_queue->tq_shareds = shared_var_array; - - /* array for number of outstanding thunks per thread */ - - if (in_parallel) { - bytes = nproc * sizeof(kmpc_aligned_int32_t); - new_queue->tq_th_thunks = - (kmpc_aligned_int32_t *)__kmp_taskq_allocate(bytes, global_tid); - new_queue->tq_nproc = nproc; - - for (i = 0; i < nproc; i++) - new_queue->tq_th_thunks[i].ai_data = 0; - } - - return new_queue; -} - -static void __kmp_free_taskq(kmp_taskq_t *tq, kmpc_task_queue_t *p, - int in_parallel, kmp_int32 global_tid) { - __kmpc_taskq_free(p->tq_thunk_space, global_tid); - __kmpc_taskq_free(p->tq_queue, global_tid); - - /* free shared var structure storage */ - __kmpc_taskq_free(CCAST(kmpc_shared_vars_t *, p->tq_shareds[0].ai_data), - global_tid); - /* free array of pointers to shared vars storage */ - __kmpc_taskq_free(p->tq_shareds, global_tid); - -#ifdef KMP_DEBUG - p->tq_first_child = NULL; - p->tq_next_child = NULL; - p->tq_prev_child = NULL; - p->tq_ref_count = -10; - p->tq_shareds = NULL; - p->tq_tasknum_queuing = 0; - p->tq_tasknum_serving = 0; - p->tq_queue = NULL; - p->tq_thunk_space = NULL; - p->tq_taskq_slot = NULL; - p->tq_free_thunks = NULL; - p->tq_nslots = 0; - p->tq_head = 0; - p->tq_tail = 0; - p->tq_nfull = 0; - p->tq_hiwat = 0; - - if (in_parallel) { - int i; - - for (i = 0; i < p->tq_nproc; i++) - p->tq_th_thunks[i].ai_data = 0; - } - if (__kmp_env_consistency_check) - p->tq_loc = NULL; - KMP_DEBUG_ASSERT(p->tq_flags & TQF_DEALLOCATED); - p->tq_flags = TQF_DEALLOCATED; -#endif /* KMP_DEBUG */ - - if (in_parallel) { - __kmpc_taskq_free(p->tq_th_thunks, global_tid); - __kmp_destroy_lock(&p->tq_link_lck); - __kmp_destroy_lock(&p->tq_queue_lck); - __kmp_destroy_lock(&p->tq_free_thunks_lck); - } -#ifdef KMP_DEBUG - p->tq_th_thunks = NULL; -#endif /* KMP_DEBUG */ - - // Make sure data structures are in consistent state before querying them - // Seems to work without this call for digital/alpha, needed for IBM/RS6000 - KMP_MB(); - - __kmp_acquire_lock(&tq->tq_freelist_lck, global_tid); - p->tq.tq_next_free = tq->tq_freelist; - - tq->tq_freelist = p; - __kmp_release_lock(&tq->tq_freelist_lck, global_tid); -} - -/* Once a group of thunks has been allocated for use in a particular queue, - these are managed via a per-queue freelist. - We force a check that there's always a thunk free if we need one. */ - -static kmpc_thunk_t *__kmp_alloc_thunk(kmpc_task_queue_t *queue, - int in_parallel, kmp_int32 global_tid) { - kmpc_thunk_t *fl; - - if (in_parallel) { - __kmp_acquire_lock(&queue->tq_free_thunks_lck, global_tid); - // Make sure data structures are in consistent state before querying them - // Seems to work without this call for digital/alpha, needed for IBM/RS6000 - KMP_MB(); - } - - fl = queue->tq_free_thunks; - - KMP_DEBUG_ASSERT(fl != NULL); - - queue->tq_free_thunks = fl->th.th_next_free; - fl->th_flags = 0; - - if (in_parallel) - __kmp_release_lock(&queue->tq_free_thunks_lck, global_tid); - - return fl; -} - -static void __kmp_free_thunk(kmpc_task_queue_t *queue, kmpc_thunk_t *p, - int in_parallel, kmp_int32 global_tid) { -#ifdef KMP_DEBUG - p->th_task = 0; - p->th_encl_thunk = 0; - p->th_status = 0; - p->th_tasknum = 0; -/* Also could zero pointers to private vars */ -#endif - - if (in_parallel) { - __kmp_acquire_lock(&queue->tq_free_thunks_lck, global_tid); - // Make sure data structures are in consistent state before querying them - // Seems to work without this call for digital/alpha, needed for IBM/RS6000 - KMP_MB(); - } - - p->th.th_next_free = queue->tq_free_thunks; - queue->tq_free_thunks = p; - -#ifdef KMP_DEBUG - p->th_flags = TQF_DEALLOCATED; -#endif - - if (in_parallel) - __kmp_release_lock(&queue->tq_free_thunks_lck, global_tid); -} - -/* returns nonzero if the queue just became full after the enqueue */ -static kmp_int32 __kmp_enqueue_task(kmp_taskq_t *tq, kmp_int32 global_tid, - kmpc_task_queue_t *queue, - kmpc_thunk_t *thunk, int in_parallel) { - kmp_int32 ret; - - /* dkp: can we get around the lock in the TQF_RELEASE_WORKERS case (only the - * master is executing then) */ - if (in_parallel) { - __kmp_acquire_lock(&queue->tq_queue_lck, global_tid); - // Make sure data structures are in consistent state before querying them - // Seems to work without this call for digital/alpha, needed for IBM/RS6000 - KMP_MB(); - } - - KMP_DEBUG_ASSERT(queue->tq_nfull < queue->tq_nslots); // check queue not full - - queue->tq_queue[(queue->tq_head)++].qs_thunk = thunk; - - if (queue->tq_head >= queue->tq_nslots) - queue->tq_head = 0; - - (queue->tq_nfull)++; - - KMP_MB(); /* to assure that nfull is seen to increase before - TQF_ALL_TASKS_QUEUED is set */ - - ret = (in_parallel) ? (queue->tq_nfull == queue->tq_nslots) : FALSE; - - if (in_parallel) { - /* don't need to wait until workers are released before unlocking */ - __kmp_release_lock(&queue->tq_queue_lck, global_tid); - - if (tq->tq_global_flags & TQF_RELEASE_WORKERS) { - // If just creating the root queue, the worker threads are waiting at a - // join barrier until now, when there's something in the queue for them to - // do; release them now to do work. This should only be done when this is - // the first task enqueued, so reset the flag here also. - tq->tq_global_flags &= ~TQF_RELEASE_WORKERS; /* no lock needed, workers - are still in spin mode */ - // avoid releasing barrier twice if taskq_task switches threads - KMP_MB(); - - __kmpc_end_barrier_master(NULL, global_tid); - } - } - - return ret; -} - -static kmpc_thunk_t *__kmp_dequeue_task(kmp_int32 global_tid, - kmpc_task_queue_t *queue, - int in_parallel) { - kmpc_thunk_t *pt; - int tid = __kmp_tid_from_gtid(global_tid); - - KMP_DEBUG_ASSERT(queue->tq_nfull > 0); /* check queue not empty */ - - if (queue->tq.tq_parent != NULL && in_parallel) { - int ct; - __kmp_acquire_lock(&queue->tq.tq_parent->tq_link_lck, global_tid); - ct = ++(queue->tq_ref_count); - __kmp_release_lock(&queue->tq.tq_parent->tq_link_lck, global_tid); - KMP_DEBUG_REF_CTS( - ("line %d gtid %d: Q %p inc %d\n", __LINE__, global_tid, queue, ct)); - } - - pt = queue->tq_queue[(queue->tq_tail)++].qs_thunk; - - if (queue->tq_tail >= queue->tq_nslots) - queue->tq_tail = 0; - - if (in_parallel) { - queue->tq_th_thunks[tid].ai_data++; - - KMP_MB(); /* necessary so ai_data increment is propagated to other threads - immediately (digital) */ - - KF_TRACE(200, ("__kmp_dequeue_task: T#%d(:%d) now has %d outstanding " - "thunks from queue %p\n", - global_tid, tid, queue->tq_th_thunks[tid].ai_data, queue)); - } - - (queue->tq_nfull)--; - -#ifdef KMP_DEBUG - KMP_MB(); - - /* necessary so (queue->tq_nfull > 0) above succeeds after tq_nfull is - * decremented */ - - KMP_DEBUG_ASSERT(queue->tq_nfull >= 0); - - if (in_parallel) { - KMP_DEBUG_ASSERT(queue->tq_th_thunks[tid].ai_data <= - __KMP_TASKQ_THUNKS_PER_TH); - } -#endif - - return pt; -} - -/* Find the next (non-null) task to dequeue and return it. - * This is never called unless in_parallel=TRUE - * - * Here are the rules for deciding which queue to take the task from: - * 1. Walk up the task queue tree from the current queue's parent and look - * on the way up (for loop, below). - * 2. Do a depth-first search back down the tree from the root and - * look (find_task_in_descendant_queue()). - * - * Here are the rules for deciding which task to take from a queue - * (__kmp_find_task_in_queue ()): - * 1. Never take the last task from a queue if TQF_IS_LASTPRIVATE; this task - * must be staged to make sure we execute the last one with - * TQF_IS_LAST_TASK at the end of task queue execution. - * 2. If the queue length is below some high water mark and the taskq task - * is enqueued, prefer running the taskq task. - * 3. Otherwise, take a (normal) task from the queue. - * - * If we do all this and return pt == NULL at the bottom of this routine, - * this means there are no more tasks to execute (except possibly for - * TQF_IS_LASTPRIVATE). - */ - -static kmpc_thunk_t *__kmp_find_task_in_queue(kmp_int32 global_tid, - kmpc_task_queue_t *queue) { - kmpc_thunk_t *pt = NULL; - int tid = __kmp_tid_from_gtid(global_tid); - - /* To prevent deadlock from tq_queue_lck if queue already deallocated */ - if (!(queue->tq_flags & TQF_DEALLOCATED)) { - - __kmp_acquire_lock(&queue->tq_queue_lck, global_tid); - - /* Check again to avoid race in __kmpc_end_taskq() */ - if (!(queue->tq_flags & TQF_DEALLOCATED)) { - // Make sure data structures are in consistent state before querying them - // Seems to work without this for digital/alpha, needed for IBM/RS6000 - KMP_MB(); - - if ((queue->tq_taskq_slot != NULL) && - (queue->tq_nfull <= queue->tq_hiwat)) { - /* if there's enough room in the queue and the dispatcher */ - /* (taskq task) is available, schedule more tasks */ - pt = CCAST(kmpc_thunk_t *, queue->tq_taskq_slot); - queue->tq_taskq_slot = NULL; - } else if (queue->tq_nfull == 0 || - queue->tq_th_thunks[tid].ai_data >= - __KMP_TASKQ_THUNKS_PER_TH) { - /* do nothing if no thunks available or this thread can't */ - /* run any because it already is executing too many */ - pt = NULL; - } else if (queue->tq_nfull > 1) { - /* always safe to schedule a task even if TQF_IS_LASTPRIVATE */ - - pt = __kmp_dequeue_task(global_tid, queue, TRUE); - } else if (!(queue->tq_flags & TQF_IS_LASTPRIVATE)) { - // one thing in queue, always safe to schedule if !TQF_IS_LASTPRIVATE - pt = __kmp_dequeue_task(global_tid, queue, TRUE); - } else if (queue->tq_flags & TQF_IS_LAST_TASK) { - /* TQF_IS_LASTPRIVATE, one thing in queue, kmpc_end_taskq_task() */ - /* has been run so this is last task, run with TQF_IS_LAST_TASK so */ - /* instrumentation does copy-out. */ - pt = __kmp_dequeue_task(global_tid, queue, TRUE); - pt->th_flags |= - TQF_IS_LAST_TASK; /* don't need test_then_or since already locked */ - } - } - - /* GEH - What happens here if is lastprivate, but not last task? */ - __kmp_release_lock(&queue->tq_queue_lck, global_tid); - } - - return pt; -} - -/* Walk a tree of queues starting at queue's first child and return a non-NULL - thunk if one can be scheduled. Must only be called when in_parallel=TRUE */ - -static kmpc_thunk_t * -__kmp_find_task_in_descendant_queue(kmp_int32 global_tid, - kmpc_task_queue_t *curr_queue) { - kmpc_thunk_t *pt = NULL; - kmpc_task_queue_t *queue = curr_queue; - - if (curr_queue->tq_first_child != NULL) { - __kmp_acquire_lock(&curr_queue->tq_link_lck, global_tid); - // Make sure data structures are in consistent state before querying them - // Seems to work without this call for digital/alpha, needed for IBM/RS6000 - KMP_MB(); - - queue = CCAST(kmpc_task_queue_t *, curr_queue->tq_first_child); - if (queue == NULL) { - __kmp_release_lock(&curr_queue->tq_link_lck, global_tid); - return NULL; - } - - while (queue != NULL) { - int ct; - kmpc_task_queue_t *next; - - ct = ++(queue->tq_ref_count); - __kmp_release_lock(&curr_queue->tq_link_lck, global_tid); - KMP_DEBUG_REF_CTS( - ("line %d gtid %d: Q %p inc %d\n", __LINE__, global_tid, queue, ct)); - - pt = __kmp_find_task_in_queue(global_tid, queue); - - if (pt != NULL) { - int ct; - - __kmp_acquire_lock(&curr_queue->tq_link_lck, global_tid); - // Make sure data structures in consistent state before querying them - // Seems to work without this for digital/alpha, needed for IBM/RS6000 - KMP_MB(); - - ct = --(queue->tq_ref_count); - KMP_DEBUG_REF_CTS(("line %d gtid %d: Q %p dec %d\n", __LINE__, - global_tid, queue, ct)); - KMP_DEBUG_ASSERT(queue->tq_ref_count >= 0); - - __kmp_release_lock(&curr_queue->tq_link_lck, global_tid); - - return pt; - } - - /* although reference count stays active during descendant walk, shouldn't - matter since if children still exist, reference counts aren't being - monitored anyway */ - - pt = __kmp_find_task_in_descendant_queue(global_tid, queue); - - if (pt != NULL) { - int ct; - - __kmp_acquire_lock(&curr_queue->tq_link_lck, global_tid); - // Make sure data structures in consistent state before querying them - // Seems to work without this for digital/alpha, needed for IBM/RS6000 - KMP_MB(); - - ct = --(queue->tq_ref_count); - KMP_DEBUG_REF_CTS(("line %d gtid %d: Q %p dec %d\n", __LINE__, - global_tid, queue, ct)); - KMP_DEBUG_ASSERT(ct >= 0); - - __kmp_release_lock(&curr_queue->tq_link_lck, global_tid); - - return pt; - } - - __kmp_acquire_lock(&curr_queue->tq_link_lck, global_tid); - // Make sure data structures in consistent state before querying them - // Seems to work without this for digital/alpha, needed for IBM/RS6000 - KMP_MB(); - - next = queue->tq_next_child; - - ct = --(queue->tq_ref_count); - KMP_DEBUG_REF_CTS( - ("line %d gtid %d: Q %p dec %d\n", __LINE__, global_tid, queue, ct)); - KMP_DEBUG_ASSERT(ct >= 0); - - queue = next; - } - - __kmp_release_lock(&curr_queue->tq_link_lck, global_tid); - } - - return pt; -} - -/* Walk up the taskq tree looking for a task to execute. If we get to the root, - search the tree for a descendent queue task. Must only be called when - in_parallel=TRUE */ -static kmpc_thunk_t * -__kmp_find_task_in_ancestor_queue(kmp_taskq_t *tq, kmp_int32 global_tid, - kmpc_task_queue_t *curr_queue) { - kmpc_task_queue_t *queue; - kmpc_thunk_t *pt; - - pt = NULL; - - if (curr_queue->tq.tq_parent != NULL) { - queue = curr_queue->tq.tq_parent; - - while (queue != NULL) { - if (queue->tq.tq_parent != NULL) { - int ct; - __kmp_acquire_lock(&queue->tq.tq_parent->tq_link_lck, global_tid); - // Make sure data structures in consistent state before querying them - // Seems to work without this for digital/alpha, needed for IBM/RS6000 - KMP_MB(); - - ct = ++(queue->tq_ref_count); - __kmp_release_lock(&queue->tq.tq_parent->tq_link_lck, global_tid); - KMP_DEBUG_REF_CTS(("line %d gtid %d: Q %p inc %d\n", __LINE__, - global_tid, queue, ct)); - } - - pt = __kmp_find_task_in_queue(global_tid, queue); - if (pt != NULL) { - if (queue->tq.tq_parent != NULL) { - int ct; - __kmp_acquire_lock(&queue->tq.tq_parent->tq_link_lck, global_tid); - // Make sure data structures in consistent state before querying them - // Seems to work without this for digital/alpha, needed for IBM/RS6000 - KMP_MB(); - - ct = --(queue->tq_ref_count); - KMP_DEBUG_REF_CTS(("line %d gtid %d: Q %p dec %d\n", __LINE__, - global_tid, queue, ct)); - KMP_DEBUG_ASSERT(ct >= 0); - - __kmp_release_lock(&queue->tq.tq_parent->tq_link_lck, global_tid); - } - - return pt; - } - - if (queue->tq.tq_parent != NULL) { - int ct; - __kmp_acquire_lock(&queue->tq.tq_parent->tq_link_lck, global_tid); - // Make sure data structures in consistent state before querying them - // Seems to work without this for digital/alpha, needed for IBM/RS6000 - KMP_MB(); - - ct = --(queue->tq_ref_count); - KMP_DEBUG_REF_CTS(("line %d gtid %d: Q %p dec %d\n", __LINE__, - global_tid, queue, ct)); - KMP_DEBUG_ASSERT(ct >= 0); - } - queue = queue->tq.tq_parent; - - if (queue != NULL) - __kmp_release_lock(&queue->tq_link_lck, global_tid); - } - } - - pt = __kmp_find_task_in_descendant_queue(global_tid, tq->tq_root); - - return pt; -} - -static int __kmp_taskq_tasks_finished(kmpc_task_queue_t *queue) { - int i; - - /* KMP_MB(); */ /* is this really necessary? */ - - for (i = 0; i < queue->tq_nproc; i++) { - if (queue->tq_th_thunks[i].ai_data != 0) - return FALSE; - } - - return TRUE; -} - -static int __kmp_taskq_has_any_children(kmpc_task_queue_t *queue) { - return (queue->tq_first_child != NULL); -} - -static void __kmp_remove_queue_from_tree(kmp_taskq_t *tq, kmp_int32 global_tid, - kmpc_task_queue_t *queue, - int in_parallel) { -#ifdef KMP_DEBUG - kmp_int32 i; - kmpc_thunk_t *thunk; -#endif - - KF_TRACE(50, - ("Before Deletion of TaskQ at %p on (%d):\n", queue, global_tid)); - KF_DUMP(50, __kmp_dump_task_queue(tq, queue, global_tid)); - - /* sub-queue in a recursion, not the root task queue */ - KMP_DEBUG_ASSERT(queue->tq.tq_parent != NULL); - - if (in_parallel) { - __kmp_acquire_lock(&queue->tq.tq_parent->tq_link_lck, global_tid); - // Make sure data structures are in consistent state before querying them - // Seems to work without this call for digital/alpha, needed for IBM/RS6000 - KMP_MB(); - } - - KMP_DEBUG_ASSERT(queue->tq_first_child == NULL); - - /* unlink queue from its siblings if any at this level */ - if (queue->tq_prev_child != NULL) - queue->tq_prev_child->tq_next_child = queue->tq_next_child; - if (queue->tq_next_child != NULL) - queue->tq_next_child->tq_prev_child = queue->tq_prev_child; - if (queue->tq.tq_parent->tq_first_child == queue) - queue->tq.tq_parent->tq_first_child = queue->tq_next_child; - - queue->tq_prev_child = NULL; - queue->tq_next_child = NULL; - - if (in_parallel) { - KMP_DEBUG_REF_CTS( - ("line %d gtid %d: Q %p waiting for ref_count of %d to reach 1\n", - __LINE__, global_tid, queue, queue->tq_ref_count)); - - /* wait until all other threads have stopped accessing this queue */ - while (queue->tq_ref_count > 1) { - __kmp_release_lock(&queue->tq.tq_parent->tq_link_lck, global_tid); - - KMP_WAIT_YIELD((volatile kmp_uint32 *)&queue->tq_ref_count, 1, KMP_LE, - NULL); - - __kmp_acquire_lock(&queue->tq.tq_parent->tq_link_lck, global_tid); - // Make sure data structures are in consistent state before querying them - // Seems to work without this for digital/alpha, needed for IBM/RS6000 - KMP_MB(); - } - - __kmp_release_lock(&queue->tq.tq_parent->tq_link_lck, global_tid); - } - - KMP_DEBUG_REF_CTS( - ("line %d gtid %d: Q %p freeing queue\n", __LINE__, global_tid, queue)); - -#ifdef KMP_DEBUG - KMP_DEBUG_ASSERT(queue->tq_flags & TQF_ALL_TASKS_QUEUED); - KMP_DEBUG_ASSERT(queue->tq_nfull == 0); - - for (i = 0; i < queue->tq_nproc; i++) { - KMP_DEBUG_ASSERT(queue->tq_th_thunks[i].ai_data == 0); - } - - i = 0; - for (thunk = queue->tq_free_thunks; thunk != NULL; - thunk = thunk->th.th_next_free) - ++i; - - KMP_ASSERT(i == - queue->tq_nslots + (queue->tq_nproc * __KMP_TASKQ_THUNKS_PER_TH)); -#endif - - /* release storage for queue entry */ - __kmp_free_taskq(tq, queue, TRUE, global_tid); - - KF_TRACE(50, ("After Deletion of TaskQ at %p on (%d):\n", queue, global_tid)); - KF_DUMP(50, __kmp_dump_task_queue_tree(tq, tq->tq_root, global_tid)); -} - -/* Starting from indicated queue, proceed downward through tree and remove all - taskqs which are finished, but only go down to taskqs which have the "nowait" - clause present. Assume this is only called when in_parallel=TRUE. */ - -static void __kmp_find_and_remove_finished_child_taskq( - kmp_taskq_t *tq, kmp_int32 global_tid, kmpc_task_queue_t *curr_queue) { - kmpc_task_queue_t *queue = curr_queue; - - if (curr_queue->tq_first_child != NULL) { - __kmp_acquire_lock(&curr_queue->tq_link_lck, global_tid); - // Make sure data structures are in consistent state before querying them - // Seems to work without this call for digital/alpha, needed for IBM/RS6000 - KMP_MB(); - - queue = CCAST(kmpc_task_queue_t *, curr_queue->tq_first_child); - if (queue != NULL) { - __kmp_release_lock(&curr_queue->tq_link_lck, global_tid); - return; - } - - while (queue != NULL) { - kmpc_task_queue_t *next; - int ct = ++(queue->tq_ref_count); - KMP_DEBUG_REF_CTS( - ("line %d gtid %d: Q %p inc %d\n", __LINE__, global_tid, queue, ct)); - - /* although reference count stays active during descendant walk, */ - /* shouldn't matter since if children still exist, reference */ - /* counts aren't being monitored anyway */ - - if (queue->tq_flags & TQF_IS_NOWAIT) { - __kmp_find_and_remove_finished_child_taskq(tq, global_tid, queue); - - if ((queue->tq_flags & TQF_ALL_TASKS_QUEUED) && - (queue->tq_nfull == 0) && __kmp_taskq_tasks_finished(queue) && - !__kmp_taskq_has_any_children(queue)) { - - /* Only remove this if we have not already marked it for deallocation. - This should prevent multiple threads from trying to free this. */ - - if (__kmp_test_lock(&queue->tq_queue_lck, global_tid)) { - if (!(queue->tq_flags & TQF_DEALLOCATED)) { - queue->tq_flags |= TQF_DEALLOCATED; - __kmp_release_lock(&queue->tq_queue_lck, global_tid); - - __kmp_remove_queue_from_tree(tq, global_tid, queue, TRUE); - - /* Can't do any more here since can't be sure where sibling queue - * is so just exit this level */ - return; - } else { - __kmp_release_lock(&queue->tq_queue_lck, global_tid); - } - } - /* otherwise, just fall through and decrement reference count */ - } - } - - __kmp_acquire_lock(&curr_queue->tq_link_lck, global_tid); - // Make sure data structures are in consistent state before querying them - // Seems to work without this for digital/alpha, needed for IBM/RS6000 - KMP_MB(); - - next = queue->tq_next_child; - - ct = --(queue->tq_ref_count); - KMP_DEBUG_REF_CTS( - ("line %d gtid %d: Q %p dec %d\n", __LINE__, global_tid, queue, ct)); - KMP_DEBUG_ASSERT(ct >= 0); - - queue = next; - } - - __kmp_release_lock(&curr_queue->tq_link_lck, global_tid); - } -} - -/* Starting from indicated queue, proceed downward through tree and remove all - taskq's assuming all are finished and assuming NO other threads are executing - at this point. */ -static void __kmp_remove_all_child_taskq(kmp_taskq_t *tq, kmp_int32 global_tid, - kmpc_task_queue_t *queue) { - kmpc_task_queue_t *next_child; - - queue = CCAST(kmpc_task_queue_t *, queue->tq_first_child); - - while (queue != NULL) { - __kmp_remove_all_child_taskq(tq, global_tid, queue); - - next_child = queue->tq_next_child; - queue->tq_flags |= TQF_DEALLOCATED; - __kmp_remove_queue_from_tree(tq, global_tid, queue, FALSE); - queue = next_child; - } -} - -static void __kmp_execute_task_from_queue(kmp_taskq_t *tq, ident_t *loc, - kmp_int32 global_tid, - kmpc_thunk_t *thunk, - int in_parallel) { - kmpc_task_queue_t *queue = thunk->th.th_shareds->sv_queue; - kmp_int32 tid = __kmp_tid_from_gtid(global_tid); - - KF_TRACE(100, ("After dequeueing this Task on (%d):\n", global_tid)); - KF_DUMP(100, __kmp_dump_thunk(tq, thunk, global_tid)); - KF_TRACE(100, ("Task Queue: %p looks like this (%d):\n", queue, global_tid)); - KF_DUMP(100, __kmp_dump_task_queue(tq, queue, global_tid)); - - /* For the taskq task, the curr_thunk pushes and pop pairs are set up as - * follows: - * - * happens exactly once: - * 1) __kmpc_taskq : push (if returning thunk only) - * 4) __kmpc_end_taskq_task : pop - * - * optionally happens *each* time taskq task is dequeued/enqueued: - * 2) __kmpc_taskq_task : pop - * 3) __kmp_execute_task_from_queue : push - * - * execution ordering: 1,(2,3)*,4 - */ - - if (!(thunk->th_flags & TQF_TASKQ_TASK)) { - kmp_int32 index = (queue == tq->tq_root) ? tid : 0; - thunk->th.th_shareds = - CCAST(kmpc_shared_vars_t *, queue->tq_shareds[index].ai_data); - - if (__kmp_env_consistency_check) { - __kmp_push_workshare(global_tid, - (queue->tq_flags & TQF_IS_ORDERED) ? ct_task_ordered - : ct_task, - queue->tq_loc); - } - } else { - if (__kmp_env_consistency_check) - __kmp_push_workshare(global_tid, ct_taskq, queue->tq_loc); - } - - if (in_parallel) { - thunk->th_encl_thunk = tq->tq_curr_thunk[tid]; - tq->tq_curr_thunk[tid] = thunk; - - KF_DUMP(200, __kmp_dump_thunk_stack(tq->tq_curr_thunk[tid], global_tid)); - } - - KF_TRACE(50, ("Begin Executing Thunk %p from queue %p on (%d)\n", thunk, - queue, global_tid)); - thunk->th_task(global_tid, thunk); - KF_TRACE(50, ("End Executing Thunk %p from queue %p on (%d)\n", thunk, queue, - global_tid)); - - if (!(thunk->th_flags & TQF_TASKQ_TASK)) { - if (__kmp_env_consistency_check) - __kmp_pop_workshare(global_tid, - (queue->tq_flags & TQF_IS_ORDERED) ? ct_task_ordered - : ct_task, - queue->tq_loc); - - if (in_parallel) { - tq->tq_curr_thunk[tid] = thunk->th_encl_thunk; - thunk->th_encl_thunk = NULL; - KF_DUMP(200, __kmp_dump_thunk_stack(tq->tq_curr_thunk[tid], global_tid)); - } - - if ((thunk->th_flags & TQF_IS_ORDERED) && in_parallel) { - __kmp_taskq_check_ordered(global_tid, thunk); - } - - __kmp_free_thunk(queue, thunk, in_parallel, global_tid); - - KF_TRACE(100, ("T#%d After freeing thunk: %p, TaskQ looks like this:\n", - global_tid, thunk)); - KF_DUMP(100, __kmp_dump_task_queue(tq, queue, global_tid)); - - if (in_parallel) { - KMP_MB(); /* needed so thunk put on free list before outstanding thunk - count is decremented */ - - KMP_DEBUG_ASSERT(queue->tq_th_thunks[tid].ai_data >= 1); - - KF_TRACE( - 200, - ("__kmp_execute_task_from_queue: T#%d has %d thunks in queue %p\n", - global_tid, queue->tq_th_thunks[tid].ai_data - 1, queue)); - - queue->tq_th_thunks[tid].ai_data--; - - /* KMP_MB(); */ /* is MB really necessary ? */ - } - - if (queue->tq.tq_parent != NULL && in_parallel) { - int ct; - __kmp_acquire_lock(&queue->tq.tq_parent->tq_link_lck, global_tid); - ct = --(queue->tq_ref_count); - __kmp_release_lock(&queue->tq.tq_parent->tq_link_lck, global_tid); - KMP_DEBUG_REF_CTS( - ("line %d gtid %d: Q %p dec %d\n", __LINE__, global_tid, queue, ct)); - KMP_DEBUG_ASSERT(ct >= 0); - } - } -} - -/* starts a taskq; creates and returns a thunk for the taskq_task */ -/* also, returns pointer to shared vars for this thread in "shareds" arg */ -kmpc_thunk_t *__kmpc_taskq(ident_t *loc, kmp_int32 global_tid, - kmpc_task_t taskq_task, size_t sizeof_thunk, - size_t sizeof_shareds, kmp_int32 flags, - kmpc_shared_vars_t **shareds) { - int in_parallel; - kmp_int32 nslots, nthunks, nshareds, nproc; - kmpc_task_queue_t *new_queue, *curr_queue; - kmpc_thunk_t *new_taskq_thunk; - kmp_info_t *th; - kmp_team_t *team; - kmp_taskq_t *tq; - kmp_int32 tid; - - KE_TRACE(10, ("__kmpc_taskq called (%d)\n", global_tid)); - - th = __kmp_threads[global_tid]; - team = th->th.th_team; - tq = &team->t.t_taskq; - nproc = team->t.t_nproc; - tid = __kmp_tid_from_gtid(global_tid); - - /* find out whether this is a parallel taskq or serialized one. */ - in_parallel = in_parallel_context(team); - - if (!tq->tq_root) { - if (in_parallel) { - /* Vector ORDERED SECTION to taskq version */ - th->th.th_dispatch->th_deo_fcn = __kmp_taskq_eo; - - /* Vector ORDERED SECTION to taskq version */ - th->th.th_dispatch->th_dxo_fcn = __kmp_taskq_xo; - } - - if (in_parallel) { - // This shouldn't be a barrier region boundary, it will confuse the user. - /* Need the boundary to be at the end taskq instead. */ - if (__kmp_barrier(bs_plain_barrier, global_tid, TRUE, 0, NULL, NULL)) { - /* Creating the active root queue, and we are not the master thread. */ - /* The master thread below created the queue and tasks have been */ - /* enqueued, and the master thread released this barrier. This */ - /* worker thread can now proceed and execute tasks. See also the */ - /* TQF_RELEASE_WORKERS which is used to handle this case. */ - *shareds = - CCAST(kmpc_shared_vars_t *, tq->tq_root->tq_shareds[tid].ai_data); - KE_TRACE(10, ("__kmpc_taskq return (%d)\n", global_tid)); - - return NULL; - } - } - - /* master thread only executes this code */ - if (tq->tq_curr_thunk_capacity < nproc) { - if (tq->tq_curr_thunk) - __kmp_free(tq->tq_curr_thunk); - else { - /* only need to do this once at outer level, i.e. when tq_curr_thunk is - * still NULL */ - __kmp_init_lock(&tq->tq_freelist_lck); - } - - tq->tq_curr_thunk = - (kmpc_thunk_t **)__kmp_allocate(nproc * sizeof(kmpc_thunk_t *)); - tq->tq_curr_thunk_capacity = nproc; - } - - if (in_parallel) - tq->tq_global_flags = TQF_RELEASE_WORKERS; - } - - /* dkp: in future, if flags & TQF_HEURISTICS, will choose nslots based */ - /* on some heuristics (e.g., depth of queue nesting?). */ - nslots = (in_parallel) ? (2 * nproc) : 1; - - /* There must be nproc * __KMP_TASKQ_THUNKS_PER_TH extra slots for pending */ - /* jobs being executed by other threads, and one extra for taskq slot */ - nthunks = (in_parallel) ? (nslots + (nproc * __KMP_TASKQ_THUNKS_PER_TH) + 1) - : nslots + 2; - - /* Only the root taskq gets a per-thread array of shareds. */ - /* The rest of the taskq's only get one copy of the shared vars. */ - nshareds = (!tq->tq_root && in_parallel) ? nproc : 1; - - /* create overall queue data structure and its components that require - * allocation */ - new_queue = __kmp_alloc_taskq(tq, in_parallel, nslots, nthunks, nshareds, - nproc, sizeof_thunk, sizeof_shareds, - &new_taskq_thunk, global_tid); - - /* rest of new_queue initializations */ - new_queue->tq_flags = flags & TQF_INTERFACE_FLAGS; - - if (in_parallel) { - new_queue->tq_tasknum_queuing = 0; - new_queue->tq_tasknum_serving = 0; - new_queue->tq_flags |= TQF_PARALLEL_CONTEXT; - } - - new_queue->tq_taskq_slot = NULL; - new_queue->tq_nslots = nslots; - new_queue->tq_hiwat = HIGH_WATER_MARK(nslots); - new_queue->tq_nfull = 0; - new_queue->tq_head = 0; - new_queue->tq_tail = 0; - new_queue->tq_loc = loc; - - if ((new_queue->tq_flags & TQF_IS_ORDERED) && in_parallel) { - /* prepare to serve the first-queued task's ORDERED directive */ - new_queue->tq_tasknum_serving = 1; - - /* Vector ORDERED SECTION to taskq version */ - th->th.th_dispatch->th_deo_fcn = __kmp_taskq_eo; - - /* Vector ORDERED SECTION to taskq version */ - th->th.th_dispatch->th_dxo_fcn = __kmp_taskq_xo; - } - - /* create a new thunk for the taskq_task in the new_queue */ - *shareds = CCAST(kmpc_shared_vars_t *, new_queue->tq_shareds[0].ai_data); - - new_taskq_thunk->th.th_shareds = *shareds; - new_taskq_thunk->th_task = taskq_task; - new_taskq_thunk->th_flags = new_queue->tq_flags | TQF_TASKQ_TASK; - new_taskq_thunk->th_status = 0; - - KMP_DEBUG_ASSERT(new_taskq_thunk->th_flags & TQF_TASKQ_TASK); - - // Make sure these inits complete before threads start using this queue - /* KMP_MB(); */ // (necessary?) - - /* insert the new task queue into the tree, but only after all fields - * initialized */ - - if (in_parallel) { - if (!tq->tq_root) { - new_queue->tq.tq_parent = NULL; - new_queue->tq_first_child = NULL; - new_queue->tq_next_child = NULL; - new_queue->tq_prev_child = NULL; - new_queue->tq_ref_count = 1; - tq->tq_root = new_queue; - } else { - curr_queue = tq->tq_curr_thunk[tid]->th.th_shareds->sv_queue; - new_queue->tq.tq_parent = curr_queue; - new_queue->tq_first_child = NULL; - new_queue->tq_prev_child = NULL; - new_queue->tq_ref_count = - 1; /* for this the thread that built the queue */ - - KMP_DEBUG_REF_CTS(("line %d gtid %d: Q %p alloc %d\n", __LINE__, - global_tid, new_queue, new_queue->tq_ref_count)); - - __kmp_acquire_lock(&curr_queue->tq_link_lck, global_tid); - - // Make sure data structures are in consistent state before querying them - // Seems to work without this for digital/alpha, needed for IBM/RS6000 - KMP_MB(); - - new_queue->tq_next_child = - CCAST(struct kmpc_task_queue_t *, curr_queue->tq_first_child); - - if (curr_queue->tq_first_child != NULL) - curr_queue->tq_first_child->tq_prev_child = new_queue; - - curr_queue->tq_first_child = new_queue; - - __kmp_release_lock(&curr_queue->tq_link_lck, global_tid); - } - - /* set up thunk stack only after code that determines curr_queue above */ - new_taskq_thunk->th_encl_thunk = tq->tq_curr_thunk[tid]; - tq->tq_curr_thunk[tid] = new_taskq_thunk; - - KF_DUMP(200, __kmp_dump_thunk_stack(tq->tq_curr_thunk[tid], global_tid)); - } else { - new_taskq_thunk->th_encl_thunk = 0; - new_queue->tq.tq_parent = NULL; - new_queue->tq_first_child = NULL; - new_queue->tq_next_child = NULL; - new_queue->tq_prev_child = NULL; - new_queue->tq_ref_count = 1; - } - -#ifdef KMP_DEBUG - KF_TRACE(150, ("Creating TaskQ Task on (%d):\n", global_tid)); - KF_DUMP(150, __kmp_dump_thunk(tq, new_taskq_thunk, global_tid)); - - if (in_parallel) { - KF_TRACE(25, - ("After TaskQ at %p Creation on (%d):\n", new_queue, global_tid)); - } else { - KF_TRACE(25, ("After Serial TaskQ at %p Creation on (%d):\n", new_queue, - global_tid)); - } - - KF_DUMP(25, __kmp_dump_task_queue(tq, new_queue, global_tid)); - - if (in_parallel) { - KF_DUMP(50, __kmp_dump_task_queue_tree(tq, tq->tq_root, global_tid)); - } -#endif /* KMP_DEBUG */ - - if (__kmp_env_consistency_check) - __kmp_push_workshare(global_tid, ct_taskq, new_queue->tq_loc); - - KE_TRACE(10, ("__kmpc_taskq return (%d)\n", global_tid)); - - return new_taskq_thunk; -} - -/* ends a taskq; last thread out destroys the queue */ - -void __kmpc_end_taskq(ident_t *loc, kmp_int32 global_tid, - kmpc_thunk_t *taskq_thunk) { -#ifdef KMP_DEBUG - kmp_int32 i; -#endif - kmp_taskq_t *tq; - int in_parallel; - kmp_info_t *th; - kmp_int32 is_outermost; - kmpc_task_queue_t *queue; - kmpc_thunk_t *thunk; - int nproc; - - KE_TRACE(10, ("__kmpc_end_taskq called (%d)\n", global_tid)); - - tq = &__kmp_threads[global_tid]->th.th_team->t.t_taskq; - nproc = __kmp_threads[global_tid]->th.th_team->t.t_nproc; - - /* For the outermost taskq only, all but one thread will have taskq_thunk == - * NULL */ - queue = (taskq_thunk == NULL) ? tq->tq_root - : taskq_thunk->th.th_shareds->sv_queue; - - KE_TRACE(50, ("__kmpc_end_taskq queue=%p (%d) \n", queue, global_tid)); - is_outermost = (queue == tq->tq_root); - in_parallel = (queue->tq_flags & TQF_PARALLEL_CONTEXT); - - if (in_parallel) { - kmp_uint32 spins; - - /* this is just a safeguard to release the waiting threads if */ - /* the outermost taskq never queues a task */ - - if (is_outermost && (KMP_MASTER_GTID(global_tid))) { - if (tq->tq_global_flags & TQF_RELEASE_WORKERS) { - /* no lock needed, workers are still in spin mode */ - tq->tq_global_flags &= ~TQF_RELEASE_WORKERS; - - __kmp_end_split_barrier(bs_plain_barrier, global_tid); - } - } - - /* keep dequeueing work until all tasks are queued and dequeued */ - - do { - /* wait until something is available to dequeue */ - KMP_INIT_YIELD(spins); - - while ((queue->tq_nfull == 0) && (queue->tq_taskq_slot == NULL) && - (!__kmp_taskq_has_any_children(queue)) && - (!(queue->tq_flags & TQF_ALL_TASKS_QUEUED))) { - KMP_YIELD_WHEN(TRUE, spins); - } - - /* check to see if we can execute tasks in the queue */ - while (((queue->tq_nfull != 0) || (queue->tq_taskq_slot != NULL)) && - (thunk = __kmp_find_task_in_queue(global_tid, queue)) != NULL) { - KF_TRACE(50, ("Found thunk: %p in primary queue %p (%d)\n", thunk, - queue, global_tid)); - __kmp_execute_task_from_queue(tq, loc, global_tid, thunk, in_parallel); - } - - /* see if work found can be found in a descendant queue */ - if ((__kmp_taskq_has_any_children(queue)) && - (thunk = __kmp_find_task_in_descendant_queue(global_tid, queue)) != - NULL) { - - KF_TRACE(50, - ("Stole thunk: %p in descendant queue: %p while waiting in " - "queue: %p (%d)\n", - thunk, thunk->th.th_shareds->sv_queue, queue, global_tid)); - - __kmp_execute_task_from_queue(tq, loc, global_tid, thunk, in_parallel); - } - - } while ((!(queue->tq_flags & TQF_ALL_TASKS_QUEUED)) || - (queue->tq_nfull != 0)); - - KF_TRACE(50, ("All tasks queued and dequeued in queue: %p (%d)\n", queue, - global_tid)); - - /* wait while all tasks are not finished and more work found - in descendant queues */ - - while ((!__kmp_taskq_tasks_finished(queue)) && - (thunk = __kmp_find_task_in_descendant_queue(global_tid, queue)) != - NULL) { - - KF_TRACE(50, ("Stole thunk: %p in descendant queue: %p while waiting in " - "queue: %p (%d)\n", - thunk, thunk->th.th_shareds->sv_queue, queue, global_tid)); - - __kmp_execute_task_from_queue(tq, loc, global_tid, thunk, in_parallel); - } - - KF_TRACE(50, ("No work found in descendent queues or all work finished in " - "queue: %p (%d)\n", - queue, global_tid)); - - if (!is_outermost) { - /* need to return if NOWAIT present and not outermost taskq */ - - if (queue->tq_flags & TQF_IS_NOWAIT) { - __kmp_acquire_lock(&queue->tq.tq_parent->tq_link_lck, global_tid); - queue->tq_ref_count--; - KMP_DEBUG_ASSERT(queue->tq_ref_count >= 0); - __kmp_release_lock(&queue->tq.tq_parent->tq_link_lck, global_tid); - - KE_TRACE( - 10, ("__kmpc_end_taskq return for nowait case (%d)\n", global_tid)); - - return; - } - - __kmp_find_and_remove_finished_child_taskq(tq, global_tid, queue); - - /* WAIT until all tasks are finished and no child queues exist before - * proceeding */ - KMP_INIT_YIELD(spins); - - while (!__kmp_taskq_tasks_finished(queue) || - __kmp_taskq_has_any_children(queue)) { - thunk = __kmp_find_task_in_ancestor_queue(tq, global_tid, queue); - - if (thunk != NULL) { - KF_TRACE(50, - ("Stole thunk: %p in ancestor queue: %p while waiting in " - "queue: %p (%d)\n", - thunk, thunk->th.th_shareds->sv_queue, queue, global_tid)); - __kmp_execute_task_from_queue(tq, loc, global_tid, thunk, - in_parallel); - } - - KMP_YIELD_WHEN(thunk == NULL, spins); - - __kmp_find_and_remove_finished_child_taskq(tq, global_tid, queue); - } - - __kmp_acquire_lock(&queue->tq_queue_lck, global_tid); - if (!(queue->tq_flags & TQF_DEALLOCATED)) { - queue->tq_flags |= TQF_DEALLOCATED; - } - __kmp_release_lock(&queue->tq_queue_lck, global_tid); - - /* only the allocating thread can deallocate the queue */ - if (taskq_thunk != NULL) { - __kmp_remove_queue_from_tree(tq, global_tid, queue, TRUE); - } - - KE_TRACE( - 10, - ("__kmpc_end_taskq return for non_outermost queue, wait case (%d)\n", - global_tid)); - - return; - } - - // Outermost Queue: steal work from descendants until all tasks are finished - - KMP_INIT_YIELD(spins); - - while (!__kmp_taskq_tasks_finished(queue)) { - thunk = __kmp_find_task_in_descendant_queue(global_tid, queue); - - if (thunk != NULL) { - KF_TRACE(50, - ("Stole thunk: %p in descendant queue: %p while waiting in " - "queue: %p (%d)\n", - thunk, thunk->th.th_shareds->sv_queue, queue, global_tid)); - - __kmp_execute_task_from_queue(tq, loc, global_tid, thunk, in_parallel); - } - - KMP_YIELD_WHEN(thunk == NULL, spins); - } - - /* Need this barrier to prevent destruction of queue before threads have all - * executed above code */ - /* This may need to be done earlier when NOWAIT is implemented for the - * outermost level */ - - if (!__kmp_barrier(bs_plain_barrier, global_tid, TRUE, 0, NULL, NULL)) { - /* the queue->tq_flags & TQF_IS_NOWAIT case is not yet handled here; */ - /* for right now, everybody waits, and the master thread destroys the */ - /* remaining queues. */ - - __kmp_remove_all_child_taskq(tq, global_tid, queue); - - /* Now destroy the root queue */ - KF_TRACE(100, ("T#%d Before Deletion of top-level TaskQ at %p:\n", - global_tid, queue)); - KF_DUMP(100, __kmp_dump_task_queue(tq, queue, global_tid)); - -#ifdef KMP_DEBUG - /* the root queue entry */ - KMP_DEBUG_ASSERT((queue->tq.tq_parent == NULL) && - (queue->tq_next_child == NULL)); - - /* children must all be gone by now because of barrier above */ - KMP_DEBUG_ASSERT(queue->tq_first_child == NULL); - - for (i = 0; i < nproc; i++) { - KMP_DEBUG_ASSERT(queue->tq_th_thunks[i].ai_data == 0); - } - - for (i = 0, thunk = queue->tq_free_thunks; thunk != NULL; - i++, thunk = thunk->th.th_next_free) - ; - - KMP_DEBUG_ASSERT(i == - queue->tq_nslots + (nproc * __KMP_TASKQ_THUNKS_PER_TH)); - - for (i = 0; i < nproc; i++) { - KMP_DEBUG_ASSERT(!tq->tq_curr_thunk[i]); - } -#endif - /* unlink the root queue entry */ - tq->tq_root = NULL; - - /* release storage for root queue entry */ - KF_TRACE(50, ("After Deletion of top-level TaskQ at %p on (%d):\n", queue, - global_tid)); - - queue->tq_flags |= TQF_DEALLOCATED; - __kmp_free_taskq(tq, queue, in_parallel, global_tid); - - KF_DUMP(50, __kmp_dump_task_queue_tree(tq, tq->tq_root, global_tid)); - - /* release the workers now that the data structures are up to date */ - __kmp_end_split_barrier(bs_plain_barrier, global_tid); - } - - th = __kmp_threads[global_tid]; - - /* Reset ORDERED SECTION to parallel version */ - th->th.th_dispatch->th_deo_fcn = 0; - - /* Reset ORDERED SECTION to parallel version */ - th->th.th_dispatch->th_dxo_fcn = 0; - } else { - /* in serial execution context, dequeue the last task */ - /* and execute it, if there were any tasks encountered */ - - if (queue->tq_nfull > 0) { - KMP_DEBUG_ASSERT(queue->tq_nfull == 1); - - thunk = __kmp_dequeue_task(global_tid, queue, in_parallel); - - if (queue->tq_flags & TQF_IS_LAST_TASK) { - /* TQF_IS_LASTPRIVATE, one thing in queue, __kmpc_end_taskq_task() */ - /* has been run so this is last task, run with TQF_IS_LAST_TASK so */ - /* instrumentation does copy-out. */ - - /* no need for test_then_or call since already locked */ - thunk->th_flags |= TQF_IS_LAST_TASK; - } - - KF_TRACE(50, ("T#%d found thunk: %p in serial queue: %p\n", global_tid, - thunk, queue)); - - __kmp_execute_task_from_queue(tq, loc, global_tid, thunk, in_parallel); - } - - // destroy the unattached serial queue now that there is no more work to do - KF_TRACE(100, ("Before Deletion of Serialized TaskQ at %p on (%d):\n", - queue, global_tid)); - KF_DUMP(100, __kmp_dump_task_queue(tq, queue, global_tid)); - -#ifdef KMP_DEBUG - i = 0; - for (thunk = queue->tq_free_thunks; thunk != NULL; - thunk = thunk->th.th_next_free) - ++i; - KMP_DEBUG_ASSERT(i == queue->tq_nslots + 1); -#endif - /* release storage for unattached serial queue */ - KF_TRACE(50, - ("Serialized TaskQ at %p deleted on (%d).\n", queue, global_tid)); - - queue->tq_flags |= TQF_DEALLOCATED; - __kmp_free_taskq(tq, queue, in_parallel, global_tid); - } - - KE_TRACE(10, ("__kmpc_end_taskq return (%d)\n", global_tid)); -} - -/* Enqueues a task for thunk previously created by __kmpc_task_buffer. */ -/* Returns nonzero if just filled up queue */ - -kmp_int32 __kmpc_task(ident_t *loc, kmp_int32 global_tid, kmpc_thunk_t *thunk) { - kmp_int32 ret; - kmpc_task_queue_t *queue; - int in_parallel; - kmp_taskq_t *tq; - - KE_TRACE(10, ("__kmpc_task called (%d)\n", global_tid)); - - KMP_DEBUG_ASSERT(!(thunk->th_flags & - TQF_TASKQ_TASK)); /* thunk->th_task is a regular task */ - - tq = &__kmp_threads[global_tid]->th.th_team->t.t_taskq; - queue = thunk->th.th_shareds->sv_queue; - in_parallel = (queue->tq_flags & TQF_PARALLEL_CONTEXT); - - if (in_parallel && (thunk->th_flags & TQF_IS_ORDERED)) - thunk->th_tasknum = ++queue->tq_tasknum_queuing; - - /* For serial execution dequeue the preceding task and execute it, if one - * exists */ - /* This cannot be the last task. That one is handled in __kmpc_end_taskq */ - - if (!in_parallel && queue->tq_nfull > 0) { - kmpc_thunk_t *prev_thunk; - - KMP_DEBUG_ASSERT(queue->tq_nfull == 1); - - prev_thunk = __kmp_dequeue_task(global_tid, queue, in_parallel); - - KF_TRACE(50, ("T#%d found thunk: %p in serial queue: %p\n", global_tid, - prev_thunk, queue)); - - __kmp_execute_task_from_queue(tq, loc, global_tid, prev_thunk, in_parallel); - } - - /* The instrumentation sequence is: __kmpc_task_buffer(), initialize private - variables, __kmpc_task(). The __kmpc_task_buffer routine checks that the - task queue is not full and allocates a thunk (which is then passed to - __kmpc_task()). So, the enqueue below should never fail due to a full - queue. */ - - KF_TRACE(100, ("After enqueueing this Task on (%d):\n", global_tid)); - KF_DUMP(100, __kmp_dump_thunk(tq, thunk, global_tid)); - - ret = __kmp_enqueue_task(tq, global_tid, queue, thunk, in_parallel); - - KF_TRACE(100, ("Task Queue looks like this on (%d):\n", global_tid)); - KF_DUMP(100, __kmp_dump_task_queue(tq, queue, global_tid)); - - KE_TRACE(10, ("__kmpc_task return (%d)\n", global_tid)); - - return ret; -} - -/* enqueues a taskq_task for thunk previously created by __kmpc_taskq */ -/* this should never be called unless in a parallel context */ - -void __kmpc_taskq_task(ident_t *loc, kmp_int32 global_tid, kmpc_thunk_t *thunk, - kmp_int32 status) { - kmpc_task_queue_t *queue; - kmp_taskq_t *tq = &__kmp_threads[global_tid]->th.th_team->t.t_taskq; - int tid = __kmp_tid_from_gtid(global_tid); - - KE_TRACE(10, ("__kmpc_taskq_task called (%d)\n", global_tid)); - KF_TRACE(100, ("TaskQ Task argument thunk on (%d):\n", global_tid)); - KF_DUMP(100, __kmp_dump_thunk(tq, thunk, global_tid)); - - queue = thunk->th.th_shareds->sv_queue; - - if (__kmp_env_consistency_check) - __kmp_pop_workshare(global_tid, ct_taskq, loc); - - /* thunk->th_task is the taskq_task */ - KMP_DEBUG_ASSERT(thunk->th_flags & TQF_TASKQ_TASK); - - /* not supposed to call __kmpc_taskq_task if it's already enqueued */ - KMP_DEBUG_ASSERT(queue->tq_taskq_slot == NULL); - - /* dequeue taskq thunk from curr_thunk stack */ - tq->tq_curr_thunk[tid] = thunk->th_encl_thunk; - thunk->th_encl_thunk = NULL; - - KF_DUMP(200, __kmp_dump_thunk_stack(tq->tq_curr_thunk[tid], global_tid)); - - thunk->th_status = status; - - // Flush thunk->th_status before taskq_task enqueued to avoid race condition - KMP_MB(); - - /* enqueue taskq_task in thunk into special slot in queue */ - /* GEH - probably don't need to lock taskq slot since only one */ - /* thread enqueues & already a lock set at dequeue point */ - - queue->tq_taskq_slot = thunk; - - KE_TRACE(10, ("__kmpc_taskq_task return (%d)\n", global_tid)); -} - -/* ends a taskq_task; done generating tasks */ - -void __kmpc_end_taskq_task(ident_t *loc, kmp_int32 global_tid, - kmpc_thunk_t *thunk) { - kmp_taskq_t *tq; - kmpc_task_queue_t *queue; - int in_parallel; - int tid; - - KE_TRACE(10, ("__kmpc_end_taskq_task called (%d)\n", global_tid)); - - tq = &__kmp_threads[global_tid]->th.th_team->t.t_taskq; - queue = thunk->th.th_shareds->sv_queue; - in_parallel = (queue->tq_flags & TQF_PARALLEL_CONTEXT); - tid = __kmp_tid_from_gtid(global_tid); - - if (__kmp_env_consistency_check) - __kmp_pop_workshare(global_tid, ct_taskq, loc); - - if (in_parallel) { -#if KMP_ARCH_X86 || KMP_ARCH_X86_64 - KMP_TEST_THEN_OR32(RCAST(volatile kmp_uint32 *, &queue->tq_flags), - TQF_ALL_TASKS_QUEUED); -#else - { - __kmp_acquire_lock(&queue->tq_queue_lck, global_tid); - - // Make sure data structures are in consistent state before querying them - // Seems to work without this for digital/alpha, needed for IBM/RS6000 - KMP_MB(); - - queue->tq_flags |= TQF_ALL_TASKS_QUEUED; - __kmp_release_lock(&queue->tq_queue_lck, global_tid); - } -#endif - } - - if (thunk->th_flags & TQF_IS_LASTPRIVATE) { - /* Normally, __kmp_find_task_in_queue() refuses to schedule the last task in - the queue if TQF_IS_LASTPRIVATE so we can positively identify that last - task and run it with its TQF_IS_LAST_TASK bit turned on in th_flags. - When __kmpc_end_taskq_task() is called we are done generating all the - tasks, so we know the last one in the queue is the lastprivate task. - Mark the queue as having gotten to this state via tq_flags & - TQF_IS_LAST_TASK; when that task actually executes mark it via th_flags & - TQF_IS_LAST_TASK (this th_flags bit signals the instrumented code to do - copy-outs after execution). */ - if (!in_parallel) { - /* No synchronization needed for serial context */ - queue->tq_flags |= TQF_IS_LAST_TASK; - } else { -#if KMP_ARCH_X86 || KMP_ARCH_X86_64 - KMP_TEST_THEN_OR32(RCAST(volatile kmp_uint32 *, &queue->tq_flags), - TQF_IS_LAST_TASK); -#else - { - __kmp_acquire_lock(&queue->tq_queue_lck, global_tid); - - // Make sure data structures in consistent state before querying them - // Seems to work without this for digital/alpha, needed for IBM/RS6000 - KMP_MB(); - - queue->tq_flags |= TQF_IS_LAST_TASK; - __kmp_release_lock(&queue->tq_queue_lck, global_tid); - } -#endif - /* to prevent race condition where last task is dequeued but */ - /* flag isn't visible yet (not sure about this) */ - KMP_MB(); - } - } - - /* dequeue taskq thunk from curr_thunk stack */ - if (in_parallel) { - tq->tq_curr_thunk[tid] = thunk->th_encl_thunk; - thunk->th_encl_thunk = NULL; - - KF_DUMP(200, __kmp_dump_thunk_stack(tq->tq_curr_thunk[tid], global_tid)); - } - - KE_TRACE(10, ("__kmpc_end_taskq_task return (%d)\n", global_tid)); -} - -/* returns thunk for a regular task based on taskq_thunk */ -/* (__kmpc_taskq_task does the analogous thing for a TQF_TASKQ_TASK) */ - -kmpc_thunk_t *__kmpc_task_buffer(ident_t *loc, kmp_int32 global_tid, - kmpc_thunk_t *taskq_thunk, kmpc_task_t task) { - kmp_taskq_t *tq; - kmpc_task_queue_t *queue; - kmpc_thunk_t *new_thunk; - int in_parallel; - - KE_TRACE(10, ("__kmpc_task_buffer called (%d)\n", global_tid)); - - KMP_DEBUG_ASSERT( - taskq_thunk->th_flags & - TQF_TASKQ_TASK); /* taskq_thunk->th_task is the taskq_task */ - - tq = &__kmp_threads[global_tid]->th.th_team->t.t_taskq; - queue = taskq_thunk->th.th_shareds->sv_queue; - in_parallel = (queue->tq_flags & TQF_PARALLEL_CONTEXT); - - /* The instrumentation sequence is: __kmpc_task_buffer(), initialize private - variables, __kmpc_task(). The __kmpc_task_buffer routine checks that the - task queue is not full and allocates a thunk (which is then passed to - __kmpc_task()). So, we can pre-allocate a thunk here assuming it will be - the next to be enqueued in __kmpc_task(). */ - - new_thunk = __kmp_alloc_thunk(queue, in_parallel, global_tid); - new_thunk->th.th_shareds = - CCAST(kmpc_shared_vars_t *, queue->tq_shareds[0].ai_data); - new_thunk->th_encl_thunk = NULL; - new_thunk->th_task = task; - - /* GEH - shouldn't need to lock the read of tq_flags here */ - new_thunk->th_flags = queue->tq_flags & TQF_INTERFACE_FLAGS; - - new_thunk->th_status = 0; - - KMP_DEBUG_ASSERT(!(new_thunk->th_flags & TQF_TASKQ_TASK)); - - KF_TRACE(100, ("Creating Regular Task on (%d):\n", global_tid)); - KF_DUMP(100, __kmp_dump_thunk(tq, new_thunk, global_tid)); - - KE_TRACE(10, ("__kmpc_task_buffer return (%d)\n", global_tid)); - - return new_thunk; -} Property changes on: vendor/llvm-openmp/openmp-release80-r363030/runtime/src/kmp_taskq.cpp ___________________________________________________________________ Deleted: svn:eol-style ## -1 +0,0 ## -native \ No newline at end of property Deleted: svn:keywords ## -1 +0,0 ## -FreeBSD=%H \ No newline at end of property Deleted: svn:mime-type ## -1 +0,0 ## -text/plain \ No newline at end of property Index: vendor/llvm-openmp/openmp-release80-r363030/runtime/src/kmp_ftn_cdecl.cpp =================================================================== --- vendor/llvm-openmp/openmp-release80-r363030/runtime/src/kmp_ftn_cdecl.cpp (revision 348960) +++ vendor/llvm-openmp/openmp-release80-r363030/runtime/src/kmp_ftn_cdecl.cpp (nonexistent) @@ -1,35 +0,0 @@ -/* - * kmp_ftn_cdecl.cpp -- Fortran __cdecl linkage support for OpenMP. - */ - -//===----------------------------------------------------------------------===// -// -// The LLVM Compiler Infrastructure -// -// This file is dual licensed under the MIT and the University of Illinois Open -// Source Licenses. See LICENSE.txt for details. -// -//===----------------------------------------------------------------------===// - -#include "kmp.h" -#include "kmp_affinity.h" - -#if KMP_OS_WINDOWS -#if defined KMP_WIN_CDECL || !KMP_DYNAMIC_LIB -#define KMP_FTN_ENTRIES KMP_FTN_UPPER -#endif -#elif KMP_OS_UNIX -#define KMP_FTN_ENTRIES KMP_FTN_PLAIN -#endif - -// Note: This string is not printed when KMP_VERSION=1. -char const __kmp_version_ftncdecl[] = - KMP_VERSION_PREFIX "Fortran __cdecl OMP support: " -#ifdef KMP_FTN_ENTRIES - "yes"; -#define FTN_STDCALL /* no stdcall */ -#include "kmp_ftn_os.h" -#include "kmp_ftn_entry.h" -#else - "no"; -#endif /* KMP_FTN_ENTRIES */ Property changes on: vendor/llvm-openmp/openmp-release80-r363030/runtime/src/kmp_ftn_cdecl.cpp ___________________________________________________________________ Deleted: svn:eol-style ## -1 +0,0 ## -native \ No newline at end of property Deleted: svn:keywords ## -1 +0,0 ## -FreeBSD=%H \ No newline at end of property Deleted: svn:mime-type ## -1 +0,0 ## -text/plain \ No newline at end of property Index: vendor/llvm-openmp/openmp-release80-r363030/runtime/src/kmp_io.h =================================================================== --- vendor/llvm-openmp/openmp-release80-r363030/runtime/src/kmp_io.h (revision 348960) +++ vendor/llvm-openmp/openmp-release80-r363030/runtime/src/kmp_io.h (nonexistent) @@ -1,39 +0,0 @@ -/* - * kmp_io.h -- RTL IO header file. - */ - -//===----------------------------------------------------------------------===// -// -// The LLVM Compiler Infrastructure -// -// This file is dual licensed under the MIT and the University of Illinois Open -// Source Licenses. See LICENSE.txt for details. -// -//===----------------------------------------------------------------------===// - -#ifndef KMP_IO_H -#define KMP_IO_H - -#ifdef __cplusplus -extern "C" { -#endif - -/* ------------------------------------------------------------------------ */ - -enum kmp_io { kmp_out = 0, kmp_err }; - -extern kmp_bootstrap_lock_t __kmp_stdio_lock; /* Control stdio functions */ -extern kmp_bootstrap_lock_t - __kmp_console_lock; /* Control console initialization */ - -extern void __kmp_vprintf(enum kmp_io stream, char const *format, va_list ap); -extern void __kmp_printf(char const *format, ...); -extern void __kmp_printf_no_lock(char const *format, ...); -extern void __kmp_fprintf(enum kmp_io stream, char const *format, ...); -extern void __kmp_close_console(void); - -#ifdef __cplusplus -} -#endif - -#endif /* KMP_IO_H */ Property changes on: vendor/llvm-openmp/openmp-release80-r363030/runtime/src/kmp_io.h ___________________________________________________________________ Deleted: svn:eol-style ## -1 +0,0 ## -native \ No newline at end of property Deleted: svn:keywords ## -1 +0,0 ## -FreeBSD=%H \ No newline at end of property Deleted: svn:mime-type ## -1 +0,0 ## -text/plain \ No newline at end of property Index: vendor/llvm-openmp/openmp-release80-r363030/runtime/src/kmp_stub.h =================================================================== --- vendor/llvm-openmp/openmp-release80-r363030/runtime/src/kmp_stub.h (revision 348960) +++ vendor/llvm-openmp/openmp-release80-r363030/runtime/src/kmp_stub.h (nonexistent) @@ -1,59 +0,0 @@ -/* - * kmp_stub.h - */ - -//===----------------------------------------------------------------------===// -// -// The LLVM Compiler Infrastructure -// -// This file is dual licensed under the MIT and the University of Illinois Open -// Source Licenses. See LICENSE.txt for details. -// -//===----------------------------------------------------------------------===// - -#ifndef KMP_STUB_H -#define KMP_STUB_H - -#ifdef __cplusplus -extern "C" { -#endif // __cplusplus - -void __kmps_set_blocktime(int arg); -int __kmps_get_blocktime(void); -void __kmps_set_dynamic(int arg); -int __kmps_get_dynamic(void); -void __kmps_set_library(int arg); -int __kmps_get_library(void); -void __kmps_set_nested(int arg); -int __kmps_get_nested(void); -void __kmps_set_stacksize(int arg); -int __kmps_get_stacksize(); - -#ifndef KMP_SCHED_TYPE_DEFINED -#define KMP_SCHED_TYPE_DEFINED -typedef enum kmp_sched { - kmp_sched_static = 1, // mapped to kmp_sch_static_chunked (33) - kmp_sched_dynamic = 2, // mapped to kmp_sch_dynamic_chunked (35) - kmp_sched_guided = 3, // mapped to kmp_sch_guided_chunked (36) - kmp_sched_auto = 4, // mapped to kmp_sch_auto (38) - kmp_sched_default = kmp_sched_static // default scheduling -} kmp_sched_t; -#endif -void __kmps_set_schedule(kmp_sched_t kind, int modifier); -void __kmps_get_schedule(kmp_sched_t *kind, int *modifier); - -#if OMP_40_ENABLED -void __kmps_set_proc_bind(kmp_proc_bind_t arg); -kmp_proc_bind_t __kmps_get_proc_bind(void); -#endif /* OMP_40_ENABLED */ - -double __kmps_get_wtime(); -double __kmps_get_wtick(); - -#ifdef __cplusplus -} // extern "C" -#endif // __cplusplus - -#endif // KMP_STUB_H - -// end of file // Property changes on: vendor/llvm-openmp/openmp-release80-r363030/runtime/src/kmp_stub.h ___________________________________________________________________ Deleted: svn:eol-style ## -1 +0,0 ## -native \ No newline at end of property Deleted: svn:keywords ## -1 +0,0 ## -FreeBSD=%H \ No newline at end of property Deleted: svn:mime-type ## -1 +0,0 ## -text/plain \ No newline at end of property Index: vendor/llvm-openmp/openmp-release80-r363030/runtime/src/kmp_lock.cpp =================================================================== --- vendor/llvm-openmp/openmp-release80-r363030/runtime/src/kmp_lock.cpp (revision 348960) +++ vendor/llvm-openmp/openmp-release80-r363030/runtime/src/kmp_lock.cpp (nonexistent) @@ -1,3965 +0,0 @@ -/* - * kmp_lock.cpp -- lock-related functions - */ - -//===----------------------------------------------------------------------===// -// -// The LLVM Compiler Infrastructure -// -// This file is dual licensed under the MIT and the University of Illinois Open -// Source Licenses. See LICENSE.txt for details. -// -//===----------------------------------------------------------------------===// - -#include -#include - -#include "kmp.h" -#include "kmp_i18n.h" -#include "kmp_io.h" -#include "kmp_itt.h" -#include "kmp_lock.h" -#include "kmp_wait_release.h" -#include "kmp_wrapper_getpid.h" - -#include "tsan_annotations.h" - -#if KMP_USE_FUTEX -#include -#include -// We should really include , but that causes compatibility problems on -// different Linux* OS distributions that either require that you include (or -// break when you try to include) . Since all we need is the two -// macros below (which are part of the kernel ABI, so can't change) we just -// define the constants here and don't include -#ifndef FUTEX_WAIT -#define FUTEX_WAIT 0 -#endif -#ifndef FUTEX_WAKE -#define FUTEX_WAKE 1 -#endif -#endif - -/* Implement spin locks for internal library use. */ -/* The algorithm implemented is Lamport's bakery lock [1974]. */ - -void __kmp_validate_locks(void) { - int i; - kmp_uint32 x, y; - - /* Check to make sure unsigned arithmetic does wraps properly */ - x = ~((kmp_uint32)0) - 2; - y = x - 2; - - for (i = 0; i < 8; ++i, ++x, ++y) { - kmp_uint32 z = (x - y); - KMP_ASSERT(z == 2); - } - - KMP_ASSERT(offsetof(kmp_base_queuing_lock, tail_id) % 8 == 0); -} - -/* ------------------------------------------------------------------------ */ -/* test and set locks */ - -// For the non-nested locks, we can only assume that the first 4 bytes were -// allocated, since gcc only allocates 4 bytes for omp_lock_t, and the Intel -// compiler only allocates a 4 byte pointer on IA-32 architecture. On -// Windows* OS on Intel(R) 64, we can assume that all 8 bytes were allocated. -// -// gcc reserves >= 8 bytes for nested locks, so we can assume that the -// entire 8 bytes were allocated for nested locks on all 64-bit platforms. - -static kmp_int32 __kmp_get_tas_lock_owner(kmp_tas_lock_t *lck) { - return KMP_LOCK_STRIP(KMP_ATOMIC_LD_RLX(&lck->lk.poll)) - 1; -} - -static inline bool __kmp_is_tas_lock_nestable(kmp_tas_lock_t *lck) { - return lck->lk.depth_locked != -1; -} - -__forceinline static int -__kmp_acquire_tas_lock_timed_template(kmp_tas_lock_t *lck, kmp_int32 gtid) { - KMP_MB(); - -#ifdef USE_LOCK_PROFILE - kmp_uint32 curr = KMP_LOCK_STRIP(lck->lk.poll); - if ((curr != 0) && (curr != gtid + 1)) - __kmp_printf("LOCK CONTENTION: %p\n", lck); -/* else __kmp_printf( "." );*/ -#endif /* USE_LOCK_PROFILE */ - - kmp_int32 tas_free = KMP_LOCK_FREE(tas); - kmp_int32 tas_busy = KMP_LOCK_BUSY(gtid + 1, tas); - - if (KMP_ATOMIC_LD_RLX(&lck->lk.poll) == tas_free && - __kmp_atomic_compare_store_acq(&lck->lk.poll, tas_free, tas_busy)) { - KMP_FSYNC_ACQUIRED(lck); - return KMP_LOCK_ACQUIRED_FIRST; - } - - kmp_uint32 spins; - KMP_FSYNC_PREPARE(lck); - KMP_INIT_YIELD(spins); - if (TCR_4(__kmp_nth) > (__kmp_avail_proc ? __kmp_avail_proc : __kmp_xproc)) { - KMP_YIELD(TRUE); - } else { - KMP_YIELD_SPIN(spins); - } - - kmp_backoff_t backoff = __kmp_spin_backoff_params; - while (KMP_ATOMIC_LD_RLX(&lck->lk.poll) != tas_free || - !__kmp_atomic_compare_store_acq(&lck->lk.poll, tas_free, tas_busy)) { - __kmp_spin_backoff(&backoff); - if (TCR_4(__kmp_nth) > - (__kmp_avail_proc ? __kmp_avail_proc : __kmp_xproc)) { - KMP_YIELD(TRUE); - } else { - KMP_YIELD_SPIN(spins); - } - } - KMP_FSYNC_ACQUIRED(lck); - return KMP_LOCK_ACQUIRED_FIRST; -} - -int __kmp_acquire_tas_lock(kmp_tas_lock_t *lck, kmp_int32 gtid) { - int retval = __kmp_acquire_tas_lock_timed_template(lck, gtid); - ANNOTATE_TAS_ACQUIRED(lck); - return retval; -} - -static int __kmp_acquire_tas_lock_with_checks(kmp_tas_lock_t *lck, - kmp_int32 gtid) { - char const *const func = "omp_set_lock"; - if ((sizeof(kmp_tas_lock_t) <= OMP_LOCK_T_SIZE) && - __kmp_is_tas_lock_nestable(lck)) { - KMP_FATAL(LockNestableUsedAsSimple, func); - } - if ((gtid >= 0) && (__kmp_get_tas_lock_owner(lck) == gtid)) { - KMP_FATAL(LockIsAlreadyOwned, func); - } - return __kmp_acquire_tas_lock(lck, gtid); -} - -int __kmp_test_tas_lock(kmp_tas_lock_t *lck, kmp_int32 gtid) { - kmp_int32 tas_free = KMP_LOCK_FREE(tas); - kmp_int32 tas_busy = KMP_LOCK_BUSY(gtid + 1, tas); - if (KMP_ATOMIC_LD_RLX(&lck->lk.poll) == tas_free && - __kmp_atomic_compare_store_acq(&lck->lk.poll, tas_free, tas_busy)) { - KMP_FSYNC_ACQUIRED(lck); - return TRUE; - } - return FALSE; -} - -static int __kmp_test_tas_lock_with_checks(kmp_tas_lock_t *lck, - kmp_int32 gtid) { - char const *const func = "omp_test_lock"; - if ((sizeof(kmp_tas_lock_t) <= OMP_LOCK_T_SIZE) && - __kmp_is_tas_lock_nestable(lck)) { - KMP_FATAL(LockNestableUsedAsSimple, func); - } - return __kmp_test_tas_lock(lck, gtid); -} - -int __kmp_release_tas_lock(kmp_tas_lock_t *lck, kmp_int32 gtid) { - KMP_MB(); /* Flush all pending memory write invalidates. */ - - KMP_FSYNC_RELEASING(lck); - ANNOTATE_TAS_RELEASED(lck); - KMP_ATOMIC_ST_REL(&lck->lk.poll, KMP_LOCK_FREE(tas)); - KMP_MB(); /* Flush all pending memory write invalidates. */ - - KMP_YIELD(TCR_4(__kmp_nth) > - (__kmp_avail_proc ? __kmp_avail_proc : __kmp_xproc)); - return KMP_LOCK_RELEASED; -} - -static int __kmp_release_tas_lock_with_checks(kmp_tas_lock_t *lck, - kmp_int32 gtid) { - char const *const func = "omp_unset_lock"; - KMP_MB(); /* in case another processor initialized lock */ - if ((sizeof(kmp_tas_lock_t) <= OMP_LOCK_T_SIZE) && - __kmp_is_tas_lock_nestable(lck)) { - KMP_FATAL(LockNestableUsedAsSimple, func); - } - if (__kmp_get_tas_lock_owner(lck) == -1) { - KMP_FATAL(LockUnsettingFree, func); - } - if ((gtid >= 0) && (__kmp_get_tas_lock_owner(lck) >= 0) && - (__kmp_get_tas_lock_owner(lck) != gtid)) { - KMP_FATAL(LockUnsettingSetByAnother, func); - } - return __kmp_release_tas_lock(lck, gtid); -} - -void __kmp_init_tas_lock(kmp_tas_lock_t *lck) { - lck->lk.poll = KMP_LOCK_FREE(tas); -} - -void __kmp_destroy_tas_lock(kmp_tas_lock_t *lck) { lck->lk.poll = 0; } - -static void __kmp_destroy_tas_lock_with_checks(kmp_tas_lock_t *lck) { - char const *const func = "omp_destroy_lock"; - if ((sizeof(kmp_tas_lock_t) <= OMP_LOCK_T_SIZE) && - __kmp_is_tas_lock_nestable(lck)) { - KMP_FATAL(LockNestableUsedAsSimple, func); - } - if (__kmp_get_tas_lock_owner(lck) != -1) { - KMP_FATAL(LockStillOwned, func); - } - __kmp_destroy_tas_lock(lck); -} - -// nested test and set locks - -int __kmp_acquire_nested_tas_lock(kmp_tas_lock_t *lck, kmp_int32 gtid) { - KMP_DEBUG_ASSERT(gtid >= 0); - - if (__kmp_get_tas_lock_owner(lck) == gtid) { - lck->lk.depth_locked += 1; - return KMP_LOCK_ACQUIRED_NEXT; - } else { - __kmp_acquire_tas_lock_timed_template(lck, gtid); - ANNOTATE_TAS_ACQUIRED(lck); - lck->lk.depth_locked = 1; - return KMP_LOCK_ACQUIRED_FIRST; - } -} - -static int __kmp_acquire_nested_tas_lock_with_checks(kmp_tas_lock_t *lck, - kmp_int32 gtid) { - char const *const func = "omp_set_nest_lock"; - if (!__kmp_is_tas_lock_nestable(lck)) { - KMP_FATAL(LockSimpleUsedAsNestable, func); - } - return __kmp_acquire_nested_tas_lock(lck, gtid); -} - -int __kmp_test_nested_tas_lock(kmp_tas_lock_t *lck, kmp_int32 gtid) { - int retval; - - KMP_DEBUG_ASSERT(gtid >= 0); - - if (__kmp_get_tas_lock_owner(lck) == gtid) { - retval = ++lck->lk.depth_locked; - } else if (!__kmp_test_tas_lock(lck, gtid)) { - retval = 0; - } else { - KMP_MB(); - retval = lck->lk.depth_locked = 1; - } - return retval; -} - -static int __kmp_test_nested_tas_lock_with_checks(kmp_tas_lock_t *lck, - kmp_int32 gtid) { - char const *const func = "omp_test_nest_lock"; - if (!__kmp_is_tas_lock_nestable(lck)) { - KMP_FATAL(LockSimpleUsedAsNestable, func); - } - return __kmp_test_nested_tas_lock(lck, gtid); -} - -int __kmp_release_nested_tas_lock(kmp_tas_lock_t *lck, kmp_int32 gtid) { - KMP_DEBUG_ASSERT(gtid >= 0); - - KMP_MB(); - if (--(lck->lk.depth_locked) == 0) { - __kmp_release_tas_lock(lck, gtid); - return KMP_LOCK_RELEASED; - } - return KMP_LOCK_STILL_HELD; -} - -static int __kmp_release_nested_tas_lock_with_checks(kmp_tas_lock_t *lck, - kmp_int32 gtid) { - char const *const func = "omp_unset_nest_lock"; - KMP_MB(); /* in case another processor initialized lock */ - if (!__kmp_is_tas_lock_nestable(lck)) { - KMP_FATAL(LockSimpleUsedAsNestable, func); - } - if (__kmp_get_tas_lock_owner(lck) == -1) { - KMP_FATAL(LockUnsettingFree, func); - } - if (__kmp_get_tas_lock_owner(lck) != gtid) { - KMP_FATAL(LockUnsettingSetByAnother, func); - } - return __kmp_release_nested_tas_lock(lck, gtid); -} - -void __kmp_init_nested_tas_lock(kmp_tas_lock_t *lck) { - __kmp_init_tas_lock(lck); - lck->lk.depth_locked = 0; // >= 0 for nestable locks, -1 for simple locks -} - -void __kmp_destroy_nested_tas_lock(kmp_tas_lock_t *lck) { - __kmp_destroy_tas_lock(lck); - lck->lk.depth_locked = 0; -} - -static void __kmp_destroy_nested_tas_lock_with_checks(kmp_tas_lock_t *lck) { - char const *const func = "omp_destroy_nest_lock"; - if (!__kmp_is_tas_lock_nestable(lck)) { - KMP_FATAL(LockSimpleUsedAsNestable, func); - } - if (__kmp_get_tas_lock_owner(lck) != -1) { - KMP_FATAL(LockStillOwned, func); - } - __kmp_destroy_nested_tas_lock(lck); -} - -#if KMP_USE_FUTEX - -/* ------------------------------------------------------------------------ */ -/* futex locks */ - -// futex locks are really just test and set locks, with a different method -// of handling contention. They take the same amount of space as test and -// set locks, and are allocated the same way (i.e. use the area allocated by -// the compiler for non-nested locks / allocate nested locks on the heap). - -static kmp_int32 __kmp_get_futex_lock_owner(kmp_futex_lock_t *lck) { - return KMP_LOCK_STRIP((TCR_4(lck->lk.poll) >> 1)) - 1; -} - -static inline bool __kmp_is_futex_lock_nestable(kmp_futex_lock_t *lck) { - return lck->lk.depth_locked != -1; -} - -__forceinline static int -__kmp_acquire_futex_lock_timed_template(kmp_futex_lock_t *lck, kmp_int32 gtid) { - kmp_int32 gtid_code = (gtid + 1) << 1; - - KMP_MB(); - -#ifdef USE_LOCK_PROFILE - kmp_uint32 curr = KMP_LOCK_STRIP(TCR_4(lck->lk.poll)); - if ((curr != 0) && (curr != gtid_code)) - __kmp_printf("LOCK CONTENTION: %p\n", lck); -/* else __kmp_printf( "." );*/ -#endif /* USE_LOCK_PROFILE */ - - KMP_FSYNC_PREPARE(lck); - KA_TRACE(1000, ("__kmp_acquire_futex_lock: lck:%p(0x%x), T#%d entering\n", - lck, lck->lk.poll, gtid)); - - kmp_int32 poll_val; - - while ((poll_val = KMP_COMPARE_AND_STORE_RET32( - &(lck->lk.poll), KMP_LOCK_FREE(futex), - KMP_LOCK_BUSY(gtid_code, futex))) != KMP_LOCK_FREE(futex)) { - - kmp_int32 cond = KMP_LOCK_STRIP(poll_val) & 1; - KA_TRACE( - 1000, - ("__kmp_acquire_futex_lock: lck:%p, T#%d poll_val = 0x%x cond = 0x%x\n", - lck, gtid, poll_val, cond)); - - // NOTE: if you try to use the following condition for this branch - // - // if ( poll_val & 1 == 0 ) - // - // Then the 12.0 compiler has a bug where the following block will - // always be skipped, regardless of the value of the LSB of poll_val. - if (!cond) { - // Try to set the lsb in the poll to indicate to the owner - // thread that they need to wake this thread up. - if (!KMP_COMPARE_AND_STORE_REL32(&(lck->lk.poll), poll_val, - poll_val | KMP_LOCK_BUSY(1, futex))) { - KA_TRACE( - 1000, - ("__kmp_acquire_futex_lock: lck:%p(0x%x), T#%d can't set bit 0\n", - lck, lck->lk.poll, gtid)); - continue; - } - poll_val |= KMP_LOCK_BUSY(1, futex); - - KA_TRACE(1000, - ("__kmp_acquire_futex_lock: lck:%p(0x%x), T#%d bit 0 set\n", lck, - lck->lk.poll, gtid)); - } - - KA_TRACE( - 1000, - ("__kmp_acquire_futex_lock: lck:%p, T#%d before futex_wait(0x%x)\n", - lck, gtid, poll_val)); - - kmp_int32 rc; - if ((rc = syscall(__NR_futex, &(lck->lk.poll), FUTEX_WAIT, poll_val, NULL, - NULL, 0)) != 0) { - KA_TRACE(1000, ("__kmp_acquire_futex_lock: lck:%p, T#%d futex_wait(0x%x) " - "failed (rc=%d errno=%d)\n", - lck, gtid, poll_val, rc, errno)); - continue; - } - - KA_TRACE(1000, - ("__kmp_acquire_futex_lock: lck:%p, T#%d after futex_wait(0x%x)\n", - lck, gtid, poll_val)); - // This thread has now done a successful futex wait call and was entered on - // the OS futex queue. We must now perform a futex wake call when releasing - // the lock, as we have no idea how many other threads are in the queue. - gtid_code |= 1; - } - - KMP_FSYNC_ACQUIRED(lck); - KA_TRACE(1000, ("__kmp_acquire_futex_lock: lck:%p(0x%x), T#%d exiting\n", lck, - lck->lk.poll, gtid)); - return KMP_LOCK_ACQUIRED_FIRST; -} - -int __kmp_acquire_futex_lock(kmp_futex_lock_t *lck, kmp_int32 gtid) { - int retval = __kmp_acquire_futex_lock_timed_template(lck, gtid); - ANNOTATE_FUTEX_ACQUIRED(lck); - return retval; -} - -static int __kmp_acquire_futex_lock_with_checks(kmp_futex_lock_t *lck, - kmp_int32 gtid) { - char const *const func = "omp_set_lock"; - if ((sizeof(kmp_futex_lock_t) <= OMP_LOCK_T_SIZE) && - __kmp_is_futex_lock_nestable(lck)) { - KMP_FATAL(LockNestableUsedAsSimple, func); - } - if ((gtid >= 0) && (__kmp_get_futex_lock_owner(lck) == gtid)) { - KMP_FATAL(LockIsAlreadyOwned, func); - } - return __kmp_acquire_futex_lock(lck, gtid); -} - -int __kmp_test_futex_lock(kmp_futex_lock_t *lck, kmp_int32 gtid) { - if (KMP_COMPARE_AND_STORE_ACQ32(&(lck->lk.poll), KMP_LOCK_FREE(futex), - KMP_LOCK_BUSY((gtid + 1) << 1, futex))) { - KMP_FSYNC_ACQUIRED(lck); - return TRUE; - } - return FALSE; -} - -static int __kmp_test_futex_lock_with_checks(kmp_futex_lock_t *lck, - kmp_int32 gtid) { - char const *const func = "omp_test_lock"; - if ((sizeof(kmp_futex_lock_t) <= OMP_LOCK_T_SIZE) && - __kmp_is_futex_lock_nestable(lck)) { - KMP_FATAL(LockNestableUsedAsSimple, func); - } - return __kmp_test_futex_lock(lck, gtid); -} - -int __kmp_release_futex_lock(kmp_futex_lock_t *lck, kmp_int32 gtid) { - KMP_MB(); /* Flush all pending memory write invalidates. */ - - KA_TRACE(1000, ("__kmp_release_futex_lock: lck:%p(0x%x), T#%d entering\n", - lck, lck->lk.poll, gtid)); - - KMP_FSYNC_RELEASING(lck); - ANNOTATE_FUTEX_RELEASED(lck); - - kmp_int32 poll_val = KMP_XCHG_FIXED32(&(lck->lk.poll), KMP_LOCK_FREE(futex)); - - KA_TRACE(1000, - ("__kmp_release_futex_lock: lck:%p, T#%d released poll_val = 0x%x\n", - lck, gtid, poll_val)); - - if (KMP_LOCK_STRIP(poll_val) & 1) { - KA_TRACE(1000, - ("__kmp_release_futex_lock: lck:%p, T#%d futex_wake 1 thread\n", - lck, gtid)); - syscall(__NR_futex, &(lck->lk.poll), FUTEX_WAKE, KMP_LOCK_BUSY(1, futex), - NULL, NULL, 0); - } - - KMP_MB(); /* Flush all pending memory write invalidates. */ - - KA_TRACE(1000, ("__kmp_release_futex_lock: lck:%p(0x%x), T#%d exiting\n", lck, - lck->lk.poll, gtid)); - - KMP_YIELD(TCR_4(__kmp_nth) > - (__kmp_avail_proc ? __kmp_avail_proc : __kmp_xproc)); - return KMP_LOCK_RELEASED; -} - -static int __kmp_release_futex_lock_with_checks(kmp_futex_lock_t *lck, - kmp_int32 gtid) { - char const *const func = "omp_unset_lock"; - KMP_MB(); /* in case another processor initialized lock */ - if ((sizeof(kmp_futex_lock_t) <= OMP_LOCK_T_SIZE) && - __kmp_is_futex_lock_nestable(lck)) { - KMP_FATAL(LockNestableUsedAsSimple, func); - } - if (__kmp_get_futex_lock_owner(lck) == -1) { - KMP_FATAL(LockUnsettingFree, func); - } - if ((gtid >= 0) && (__kmp_get_futex_lock_owner(lck) >= 0) && - (__kmp_get_futex_lock_owner(lck) != gtid)) { - KMP_FATAL(LockUnsettingSetByAnother, func); - } - return __kmp_release_futex_lock(lck, gtid); -} - -void __kmp_init_futex_lock(kmp_futex_lock_t *lck) { - TCW_4(lck->lk.poll, KMP_LOCK_FREE(futex)); -} - -void __kmp_destroy_futex_lock(kmp_futex_lock_t *lck) { lck->lk.poll = 0; } - -static void __kmp_destroy_futex_lock_with_checks(kmp_futex_lock_t *lck) { - char const *const func = "omp_destroy_lock"; - if ((sizeof(kmp_futex_lock_t) <= OMP_LOCK_T_SIZE) && - __kmp_is_futex_lock_nestable(lck)) { - KMP_FATAL(LockNestableUsedAsSimple, func); - } - if (__kmp_get_futex_lock_owner(lck) != -1) { - KMP_FATAL(LockStillOwned, func); - } - __kmp_destroy_futex_lock(lck); -} - -// nested futex locks - -int __kmp_acquire_nested_futex_lock(kmp_futex_lock_t *lck, kmp_int32 gtid) { - KMP_DEBUG_ASSERT(gtid >= 0); - - if (__kmp_get_futex_lock_owner(lck) == gtid) { - lck->lk.depth_locked += 1; - return KMP_LOCK_ACQUIRED_NEXT; - } else { - __kmp_acquire_futex_lock_timed_template(lck, gtid); - ANNOTATE_FUTEX_ACQUIRED(lck); - lck->lk.depth_locked = 1; - return KMP_LOCK_ACQUIRED_FIRST; - } -} - -static int __kmp_acquire_nested_futex_lock_with_checks(kmp_futex_lock_t *lck, - kmp_int32 gtid) { - char const *const func = "omp_set_nest_lock"; - if (!__kmp_is_futex_lock_nestable(lck)) { - KMP_FATAL(LockSimpleUsedAsNestable, func); - } - return __kmp_acquire_nested_futex_lock(lck, gtid); -} - -int __kmp_test_nested_futex_lock(kmp_futex_lock_t *lck, kmp_int32 gtid) { - int retval; - - KMP_DEBUG_ASSERT(gtid >= 0); - - if (__kmp_get_futex_lock_owner(lck) == gtid) { - retval = ++lck->lk.depth_locked; - } else if (!__kmp_test_futex_lock(lck, gtid)) { - retval = 0; - } else { - KMP_MB(); - retval = lck->lk.depth_locked = 1; - } - return retval; -} - -static int __kmp_test_nested_futex_lock_with_checks(kmp_futex_lock_t *lck, - kmp_int32 gtid) { - char const *const func = "omp_test_nest_lock"; - if (!__kmp_is_futex_lock_nestable(lck)) { - KMP_FATAL(LockSimpleUsedAsNestable, func); - } - return __kmp_test_nested_futex_lock(lck, gtid); -} - -int __kmp_release_nested_futex_lock(kmp_futex_lock_t *lck, kmp_int32 gtid) { - KMP_DEBUG_ASSERT(gtid >= 0); - - KMP_MB(); - if (--(lck->lk.depth_locked) == 0) { - __kmp_release_futex_lock(lck, gtid); - return KMP_LOCK_RELEASED; - } - return KMP_LOCK_STILL_HELD; -} - -static int __kmp_release_nested_futex_lock_with_checks(kmp_futex_lock_t *lck, - kmp_int32 gtid) { - char const *const func = "omp_unset_nest_lock"; - KMP_MB(); /* in case another processor initialized lock */ - if (!__kmp_is_futex_lock_nestable(lck)) { - KMP_FATAL(LockSimpleUsedAsNestable, func); - } - if (__kmp_get_futex_lock_owner(lck) == -1) { - KMP_FATAL(LockUnsettingFree, func); - } - if (__kmp_get_futex_lock_owner(lck) != gtid) { - KMP_FATAL(LockUnsettingSetByAnother, func); - } - return __kmp_release_nested_futex_lock(lck, gtid); -} - -void __kmp_init_nested_futex_lock(kmp_futex_lock_t *lck) { - __kmp_init_futex_lock(lck); - lck->lk.depth_locked = 0; // >= 0 for nestable locks, -1 for simple locks -} - -void __kmp_destroy_nested_futex_lock(kmp_futex_lock_t *lck) { - __kmp_destroy_futex_lock(lck); - lck->lk.depth_locked = 0; -} - -static void __kmp_destroy_nested_futex_lock_with_checks(kmp_futex_lock_t *lck) { - char const *const func = "omp_destroy_nest_lock"; - if (!__kmp_is_futex_lock_nestable(lck)) { - KMP_FATAL(LockSimpleUsedAsNestable, func); - } - if (__kmp_get_futex_lock_owner(lck) != -1) { - KMP_FATAL(LockStillOwned, func); - } - __kmp_destroy_nested_futex_lock(lck); -} - -#endif // KMP_USE_FUTEX - -/* ------------------------------------------------------------------------ */ -/* ticket (bakery) locks */ - -static kmp_int32 __kmp_get_ticket_lock_owner(kmp_ticket_lock_t *lck) { - return std::atomic_load_explicit(&lck->lk.owner_id, - std::memory_order_relaxed) - - 1; -} - -static inline bool __kmp_is_ticket_lock_nestable(kmp_ticket_lock_t *lck) { - return std::atomic_load_explicit(&lck->lk.depth_locked, - std::memory_order_relaxed) != -1; -} - -static kmp_uint32 __kmp_bakery_check(void *now_serving, kmp_uint32 my_ticket) { - return std::atomic_load_explicit((std::atomic *)now_serving, - std::memory_order_acquire) == my_ticket; -} - -__forceinline static int -__kmp_acquire_ticket_lock_timed_template(kmp_ticket_lock_t *lck, - kmp_int32 gtid) { - kmp_uint32 my_ticket = std::atomic_fetch_add_explicit( - &lck->lk.next_ticket, 1U, std::memory_order_relaxed); - -#ifdef USE_LOCK_PROFILE - if (std::atomic_load_explicit(&lck->lk.now_serving, - std::memory_order_relaxed) != my_ticket) - __kmp_printf("LOCK CONTENTION: %p\n", lck); -/* else __kmp_printf( "." );*/ -#endif /* USE_LOCK_PROFILE */ - - if (std::atomic_load_explicit(&lck->lk.now_serving, - std::memory_order_acquire) == my_ticket) { - return KMP_LOCK_ACQUIRED_FIRST; - } - KMP_WAIT_YIELD_PTR(&lck->lk.now_serving, my_ticket, __kmp_bakery_check, lck); - return KMP_LOCK_ACQUIRED_FIRST; -} - -int __kmp_acquire_ticket_lock(kmp_ticket_lock_t *lck, kmp_int32 gtid) { - int retval = __kmp_acquire_ticket_lock_timed_template(lck, gtid); - ANNOTATE_TICKET_ACQUIRED(lck); - return retval; -} - -static int __kmp_acquire_ticket_lock_with_checks(kmp_ticket_lock_t *lck, - kmp_int32 gtid) { - char const *const func = "omp_set_lock"; - - if (!std::atomic_load_explicit(&lck->lk.initialized, - std::memory_order_relaxed)) { - KMP_FATAL(LockIsUninitialized, func); - } - if (lck->lk.self != lck) { - KMP_FATAL(LockIsUninitialized, func); - } - if (__kmp_is_ticket_lock_nestable(lck)) { - KMP_FATAL(LockNestableUsedAsSimple, func); - } - if ((gtid >= 0) && (__kmp_get_ticket_lock_owner(lck) == gtid)) { - KMP_FATAL(LockIsAlreadyOwned, func); - } - - __kmp_acquire_ticket_lock(lck, gtid); - - std::atomic_store_explicit(&lck->lk.owner_id, gtid + 1, - std::memory_order_relaxed); - return KMP_LOCK_ACQUIRED_FIRST; -} - -int __kmp_test_ticket_lock(kmp_ticket_lock_t *lck, kmp_int32 gtid) { - kmp_uint32 my_ticket = std::atomic_load_explicit(&lck->lk.next_ticket, - std::memory_order_relaxed); - - if (std::atomic_load_explicit(&lck->lk.now_serving, - std::memory_order_relaxed) == my_ticket) { - kmp_uint32 next_ticket = my_ticket + 1; - if (std::atomic_compare_exchange_strong_explicit( - &lck->lk.next_ticket, &my_ticket, next_ticket, - std::memory_order_acquire, std::memory_order_acquire)) { - return TRUE; - } - } - return FALSE; -} - -static int __kmp_test_ticket_lock_with_checks(kmp_ticket_lock_t *lck, - kmp_int32 gtid) { - char const *const func = "omp_test_lock"; - - if (!std::atomic_load_explicit(&lck->lk.initialized, - std::memory_order_relaxed)) { - KMP_FATAL(LockIsUninitialized, func); - } - if (lck->lk.self != lck) { - KMP_FATAL(LockIsUninitialized, func); - } - if (__kmp_is_ticket_lock_nestable(lck)) { - KMP_FATAL(LockNestableUsedAsSimple, func); - } - - int retval = __kmp_test_ticket_lock(lck, gtid); - - if (retval) { - std::atomic_store_explicit(&lck->lk.owner_id, gtid + 1, - std::memory_order_relaxed); - } - return retval; -} - -int __kmp_release_ticket_lock(kmp_ticket_lock_t *lck, kmp_int32 gtid) { - kmp_uint32 distance = std::atomic_load_explicit(&lck->lk.next_ticket, - std::memory_order_relaxed) - - std::atomic_load_explicit(&lck->lk.now_serving, - std::memory_order_relaxed); - - ANNOTATE_TICKET_RELEASED(lck); - std::atomic_fetch_add_explicit(&lck->lk.now_serving, 1U, - std::memory_order_release); - - KMP_YIELD(distance > - (kmp_uint32)(__kmp_avail_proc ? __kmp_avail_proc : __kmp_xproc)); - return KMP_LOCK_RELEASED; -} - -static int __kmp_release_ticket_lock_with_checks(kmp_ticket_lock_t *lck, - kmp_int32 gtid) { - char const *const func = "omp_unset_lock"; - - if (!std::atomic_load_explicit(&lck->lk.initialized, - std::memory_order_relaxed)) { - KMP_FATAL(LockIsUninitialized, func); - } - if (lck->lk.self != lck) { - KMP_FATAL(LockIsUninitialized, func); - } - if (__kmp_is_ticket_lock_nestable(lck)) { - KMP_FATAL(LockNestableUsedAsSimple, func); - } - if (__kmp_get_ticket_lock_owner(lck) == -1) { - KMP_FATAL(LockUnsettingFree, func); - } - if ((gtid >= 0) && (__kmp_get_ticket_lock_owner(lck) >= 0) && - (__kmp_get_ticket_lock_owner(lck) != gtid)) { - KMP_FATAL(LockUnsettingSetByAnother, func); - } - std::atomic_store_explicit(&lck->lk.owner_id, 0, std::memory_order_relaxed); - return __kmp_release_ticket_lock(lck, gtid); -} - -void __kmp_init_ticket_lock(kmp_ticket_lock_t *lck) { - lck->lk.location = NULL; - lck->lk.self = lck; - std::atomic_store_explicit(&lck->lk.next_ticket, 0U, - std::memory_order_relaxed); - std::atomic_store_explicit(&lck->lk.now_serving, 0U, - std::memory_order_relaxed); - std::atomic_store_explicit( - &lck->lk.owner_id, 0, - std::memory_order_relaxed); // no thread owns the lock. - std::atomic_store_explicit( - &lck->lk.depth_locked, -1, - std::memory_order_relaxed); // -1 => not a nested lock. - std::atomic_store_explicit(&lck->lk.initialized, true, - std::memory_order_release); -} - -void __kmp_destroy_ticket_lock(kmp_ticket_lock_t *lck) { - std::atomic_store_explicit(&lck->lk.initialized, false, - std::memory_order_release); - lck->lk.self = NULL; - lck->lk.location = NULL; - std::atomic_store_explicit(&lck->lk.next_ticket, 0U, - std::memory_order_relaxed); - std::atomic_store_explicit(&lck->lk.now_serving, 0U, - std::memory_order_relaxed); - std::atomic_store_explicit(&lck->lk.owner_id, 0, std::memory_order_relaxed); - std::atomic_store_explicit(&lck->lk.depth_locked, -1, - std::memory_order_relaxed); -} - -static void __kmp_destroy_ticket_lock_with_checks(kmp_ticket_lock_t *lck) { - char const *const func = "omp_destroy_lock"; - - if (!std::atomic_load_explicit(&lck->lk.initialized, - std::memory_order_relaxed)) { - KMP_FATAL(LockIsUninitialized, func); - } - if (lck->lk.self != lck) { - KMP_FATAL(LockIsUninitialized, func); - } - if (__kmp_is_ticket_lock_nestable(lck)) { - KMP_FATAL(LockNestableUsedAsSimple, func); - } - if (__kmp_get_ticket_lock_owner(lck) != -1) { - KMP_FATAL(LockStillOwned, func); - } - __kmp_destroy_ticket_lock(lck); -} - -// nested ticket locks - -int __kmp_acquire_nested_ticket_lock(kmp_ticket_lock_t *lck, kmp_int32 gtid) { - KMP_DEBUG_ASSERT(gtid >= 0); - - if (__kmp_get_ticket_lock_owner(lck) == gtid) { - std::atomic_fetch_add_explicit(&lck->lk.depth_locked, 1, - std::memory_order_relaxed); - return KMP_LOCK_ACQUIRED_NEXT; - } else { - __kmp_acquire_ticket_lock_timed_template(lck, gtid); - ANNOTATE_TICKET_ACQUIRED(lck); - std::atomic_store_explicit(&lck->lk.depth_locked, 1, - std::memory_order_relaxed); - std::atomic_store_explicit(&lck->lk.owner_id, gtid + 1, - std::memory_order_relaxed); - return KMP_LOCK_ACQUIRED_FIRST; - } -} - -static int __kmp_acquire_nested_ticket_lock_with_checks(kmp_ticket_lock_t *lck, - kmp_int32 gtid) { - char const *const func = "omp_set_nest_lock"; - - if (!std::atomic_load_explicit(&lck->lk.initialized, - std::memory_order_relaxed)) { - KMP_FATAL(LockIsUninitialized, func); - } - if (lck->lk.self != lck) { - KMP_FATAL(LockIsUninitialized, func); - } - if (!__kmp_is_ticket_lock_nestable(lck)) { - KMP_FATAL(LockSimpleUsedAsNestable, func); - } - return __kmp_acquire_nested_ticket_lock(lck, gtid); -} - -int __kmp_test_nested_ticket_lock(kmp_ticket_lock_t *lck, kmp_int32 gtid) { - int retval; - - KMP_DEBUG_ASSERT(gtid >= 0); - - if (__kmp_get_ticket_lock_owner(lck) == gtid) { - retval = std::atomic_fetch_add_explicit(&lck->lk.depth_locked, 1, - std::memory_order_relaxed) + - 1; - } else if (!__kmp_test_ticket_lock(lck, gtid)) { - retval = 0; - } else { - std::atomic_store_explicit(&lck->lk.depth_locked, 1, - std::memory_order_relaxed); - std::atomic_store_explicit(&lck->lk.owner_id, gtid + 1, - std::memory_order_relaxed); - retval = 1; - } - return retval; -} - -static int __kmp_test_nested_ticket_lock_with_checks(kmp_ticket_lock_t *lck, - kmp_int32 gtid) { - char const *const func = "omp_test_nest_lock"; - - if (!std::atomic_load_explicit(&lck->lk.initialized, - std::memory_order_relaxed)) { - KMP_FATAL(LockIsUninitialized, func); - } - if (lck->lk.self != lck) { - KMP_FATAL(LockIsUninitialized, func); - } - if (!__kmp_is_ticket_lock_nestable(lck)) { - KMP_FATAL(LockSimpleUsedAsNestable, func); - } - return __kmp_test_nested_ticket_lock(lck, gtid); -} - -int __kmp_release_nested_ticket_lock(kmp_ticket_lock_t *lck, kmp_int32 gtid) { - KMP_DEBUG_ASSERT(gtid >= 0); - - if ((std::atomic_fetch_add_explicit(&lck->lk.depth_locked, -1, - std::memory_order_relaxed) - - 1) == 0) { - std::atomic_store_explicit(&lck->lk.owner_id, 0, std::memory_order_relaxed); - __kmp_release_ticket_lock(lck, gtid); - return KMP_LOCK_RELEASED; - } - return KMP_LOCK_STILL_HELD; -} - -static int __kmp_release_nested_ticket_lock_with_checks(kmp_ticket_lock_t *lck, - kmp_int32 gtid) { - char const *const func = "omp_unset_nest_lock"; - - if (!std::atomic_load_explicit(&lck->lk.initialized, - std::memory_order_relaxed)) { - KMP_FATAL(LockIsUninitialized, func); - } - if (lck->lk.self != lck) { - KMP_FATAL(LockIsUninitialized, func); - } - if (!__kmp_is_ticket_lock_nestable(lck)) { - KMP_FATAL(LockSimpleUsedAsNestable, func); - } - if (__kmp_get_ticket_lock_owner(lck) == -1) { - KMP_FATAL(LockUnsettingFree, func); - } - if (__kmp_get_ticket_lock_owner(lck) != gtid) { - KMP_FATAL(LockUnsettingSetByAnother, func); - } - return __kmp_release_nested_ticket_lock(lck, gtid); -} - -void __kmp_init_nested_ticket_lock(kmp_ticket_lock_t *lck) { - __kmp_init_ticket_lock(lck); - std::atomic_store_explicit(&lck->lk.depth_locked, 0, - std::memory_order_relaxed); - // >= 0 for nestable locks, -1 for simple locks -} - -void __kmp_destroy_nested_ticket_lock(kmp_ticket_lock_t *lck) { - __kmp_destroy_ticket_lock(lck); - std::atomic_store_explicit(&lck->lk.depth_locked, 0, - std::memory_order_relaxed); -} - -static void -__kmp_destroy_nested_ticket_lock_with_checks(kmp_ticket_lock_t *lck) { - char const *const func = "omp_destroy_nest_lock"; - - if (!std::atomic_load_explicit(&lck->lk.initialized, - std::memory_order_relaxed)) { - KMP_FATAL(LockIsUninitialized, func); - } - if (lck->lk.self != lck) { - KMP_FATAL(LockIsUninitialized, func); - } - if (!__kmp_is_ticket_lock_nestable(lck)) { - KMP_FATAL(LockSimpleUsedAsNestable, func); - } - if (__kmp_get_ticket_lock_owner(lck) != -1) { - KMP_FATAL(LockStillOwned, func); - } - __kmp_destroy_nested_ticket_lock(lck); -} - -// access functions to fields which don't exist for all lock kinds. - -static const ident_t *__kmp_get_ticket_lock_location(kmp_ticket_lock_t *lck) { - return lck->lk.location; -} - -static void __kmp_set_ticket_lock_location(kmp_ticket_lock_t *lck, - const ident_t *loc) { - lck->lk.location = loc; -} - -static kmp_lock_flags_t __kmp_get_ticket_lock_flags(kmp_ticket_lock_t *lck) { - return lck->lk.flags; -} - -static void __kmp_set_ticket_lock_flags(kmp_ticket_lock_t *lck, - kmp_lock_flags_t flags) { - lck->lk.flags = flags; -} - -/* ------------------------------------------------------------------------ */ -/* queuing locks */ - -/* First the states - (head,tail) = 0, 0 means lock is unheld, nobody on queue - UINT_MAX or -1, 0 means lock is held, nobody on queue - h, h means lock held or about to transition, - 1 element on queue - h, t h <> t, means lock is held or about to - transition, >1 elements on queue - - Now the transitions - Acquire(0,0) = -1 ,0 - Release(0,0) = Error - Acquire(-1,0) = h ,h h > 0 - Release(-1,0) = 0 ,0 - Acquire(h,h) = h ,t h > 0, t > 0, h <> t - Release(h,h) = -1 ,0 h > 0 - Acquire(h,t) = h ,t' h > 0, t > 0, t' > 0, h <> t, h <> t', t <> t' - Release(h,t) = h',t h > 0, t > 0, h <> t, h <> h', h' maybe = t - - And pictorially - - +-----+ - | 0, 0|------- release -------> Error - +-----+ - | ^ - acquire| |release - | | - | | - v | - +-----+ - |-1, 0| - +-----+ - | ^ - acquire| |release - | | - | | - v | - +-----+ - | h, h| - +-----+ - | ^ - acquire| |release - | | - | | - v | - +-----+ - | h, t|----- acquire, release loopback ---+ - +-----+ | - ^ | - | | - +------------------------------------+ - */ - -#ifdef DEBUG_QUEUING_LOCKS - -/* Stuff for circular trace buffer */ -#define TRACE_BUF_ELE 1024 -static char traces[TRACE_BUF_ELE][128] = {0}; -static int tc = 0; -#define TRACE_LOCK(X, Y) \ - KMP_SNPRINTF(traces[tc++ % TRACE_BUF_ELE], 128, "t%d at %s\n", X, Y); -#define TRACE_LOCK_T(X, Y, Z) \ - KMP_SNPRINTF(traces[tc++ % TRACE_BUF_ELE], 128, "t%d at %s%d\n", X, Y, Z); -#define TRACE_LOCK_HT(X, Y, Z, Q) \ - KMP_SNPRINTF(traces[tc++ % TRACE_BUF_ELE], 128, "t%d at %s %d,%d\n", X, Y, \ - Z, Q); - -static void __kmp_dump_queuing_lock(kmp_info_t *this_thr, kmp_int32 gtid, - kmp_queuing_lock_t *lck, kmp_int32 head_id, - kmp_int32 tail_id) { - kmp_int32 t, i; - - __kmp_printf_no_lock("\n__kmp_dump_queuing_lock: TRACE BEGINS HERE! \n"); - - i = tc % TRACE_BUF_ELE; - __kmp_printf_no_lock("%s\n", traces[i]); - i = (i + 1) % TRACE_BUF_ELE; - while (i != (tc % TRACE_BUF_ELE)) { - __kmp_printf_no_lock("%s", traces[i]); - i = (i + 1) % TRACE_BUF_ELE; - } - __kmp_printf_no_lock("\n"); - - __kmp_printf_no_lock("\n__kmp_dump_queuing_lock: gtid+1:%d, spin_here:%d, " - "next_wait:%d, head_id:%d, tail_id:%d\n", - gtid + 1, this_thr->th.th_spin_here, - this_thr->th.th_next_waiting, head_id, tail_id); - - __kmp_printf_no_lock("\t\thead: %d ", lck->lk.head_id); - - if (lck->lk.head_id >= 1) { - t = __kmp_threads[lck->lk.head_id - 1]->th.th_next_waiting; - while (t > 0) { - __kmp_printf_no_lock("-> %d ", t); - t = __kmp_threads[t - 1]->th.th_next_waiting; - } - } - __kmp_printf_no_lock("; tail: %d ", lck->lk.tail_id); - __kmp_printf_no_lock("\n\n"); -} - -#endif /* DEBUG_QUEUING_LOCKS */ - -static kmp_int32 __kmp_get_queuing_lock_owner(kmp_queuing_lock_t *lck) { - return TCR_4(lck->lk.owner_id) - 1; -} - -static inline bool __kmp_is_queuing_lock_nestable(kmp_queuing_lock_t *lck) { - return lck->lk.depth_locked != -1; -} - -/* Acquire a lock using a the queuing lock implementation */ -template -/* [TLW] The unused template above is left behind because of what BEB believes - is a potential compiler problem with __forceinline. */ -__forceinline static int -__kmp_acquire_queuing_lock_timed_template(kmp_queuing_lock_t *lck, - kmp_int32 gtid) { - kmp_info_t *this_thr = __kmp_thread_from_gtid(gtid); - volatile kmp_int32 *head_id_p = &lck->lk.head_id; - volatile kmp_int32 *tail_id_p = &lck->lk.tail_id; - volatile kmp_uint32 *spin_here_p; - kmp_int32 need_mf = 1; - -#if OMPT_SUPPORT - ompt_state_t prev_state = ompt_state_undefined; -#endif - - KA_TRACE(1000, - ("__kmp_acquire_queuing_lock: lck:%p, T#%d entering\n", lck, gtid)); - - KMP_FSYNC_PREPARE(lck); - KMP_DEBUG_ASSERT(this_thr != NULL); - spin_here_p = &this_thr->th.th_spin_here; - -#ifdef DEBUG_QUEUING_LOCKS - TRACE_LOCK(gtid + 1, "acq ent"); - if (*spin_here_p) - __kmp_dump_queuing_lock(this_thr, gtid, lck, *head_id_p, *tail_id_p); - if (this_thr->th.th_next_waiting != 0) - __kmp_dump_queuing_lock(this_thr, gtid, lck, *head_id_p, *tail_id_p); -#endif - KMP_DEBUG_ASSERT(!*spin_here_p); - KMP_DEBUG_ASSERT(this_thr->th.th_next_waiting == 0); - - /* The following st.rel to spin_here_p needs to precede the cmpxchg.acq to - head_id_p that may follow, not just in execution order, but also in - visibility order. This way, when a releasing thread observes the changes to - the queue by this thread, it can rightly assume that spin_here_p has - already been set to TRUE, so that when it sets spin_here_p to FALSE, it is - not premature. If the releasing thread sets spin_here_p to FALSE before - this thread sets it to TRUE, this thread will hang. */ - *spin_here_p = TRUE; /* before enqueuing to prevent race */ - - while (1) { - kmp_int32 enqueued; - kmp_int32 head; - kmp_int32 tail; - - head = *head_id_p; - - switch (head) { - - case -1: { -#ifdef DEBUG_QUEUING_LOCKS - tail = *tail_id_p; - TRACE_LOCK_HT(gtid + 1, "acq read: ", head, tail); -#endif - tail = 0; /* to make sure next link asynchronously read is not set - accidentally; this assignment prevents us from entering the - if ( t > 0 ) condition in the enqueued case below, which is not - necessary for this state transition */ - - need_mf = 0; - /* try (-1,0)->(tid,tid) */ - enqueued = KMP_COMPARE_AND_STORE_ACQ64((volatile kmp_int64 *)tail_id_p, - KMP_PACK_64(-1, 0), - KMP_PACK_64(gtid + 1, gtid + 1)); -#ifdef DEBUG_QUEUING_LOCKS - if (enqueued) - TRACE_LOCK(gtid + 1, "acq enq: (-1,0)->(tid,tid)"); -#endif - } break; - - default: { - tail = *tail_id_p; - KMP_DEBUG_ASSERT(tail != gtid + 1); - -#ifdef DEBUG_QUEUING_LOCKS - TRACE_LOCK_HT(gtid + 1, "acq read: ", head, tail); -#endif - - if (tail == 0) { - enqueued = FALSE; - } else { - need_mf = 0; - /* try (h,t) or (h,h)->(h,tid) */ - enqueued = KMP_COMPARE_AND_STORE_ACQ32(tail_id_p, tail, gtid + 1); - -#ifdef DEBUG_QUEUING_LOCKS - if (enqueued) - TRACE_LOCK(gtid + 1, "acq enq: (h,t)->(h,tid)"); -#endif - } - } break; - - case 0: /* empty queue */ - { - kmp_int32 grabbed_lock; - -#ifdef DEBUG_QUEUING_LOCKS - tail = *tail_id_p; - TRACE_LOCK_HT(gtid + 1, "acq read: ", head, tail); -#endif - /* try (0,0)->(-1,0) */ - - /* only legal transition out of head = 0 is head = -1 with no change to - * tail */ - grabbed_lock = KMP_COMPARE_AND_STORE_ACQ32(head_id_p, 0, -1); - - if (grabbed_lock) { - - *spin_here_p = FALSE; - - KA_TRACE( - 1000, - ("__kmp_acquire_queuing_lock: lck:%p, T#%d exiting: no queuing\n", - lck, gtid)); -#ifdef DEBUG_QUEUING_LOCKS - TRACE_LOCK_HT(gtid + 1, "acq exit: ", head, 0); -#endif - -#if OMPT_SUPPORT - if (ompt_enabled.enabled && prev_state != ompt_state_undefined) { - /* change the state before clearing wait_id */ - this_thr->th.ompt_thread_info.state = prev_state; - this_thr->th.ompt_thread_info.wait_id = 0; - } -#endif - - KMP_FSYNC_ACQUIRED(lck); - return KMP_LOCK_ACQUIRED_FIRST; /* lock holder cannot be on queue */ - } - enqueued = FALSE; - } break; - } - -#if OMPT_SUPPORT - if (ompt_enabled.enabled && prev_state == ompt_state_undefined) { - /* this thread will spin; set wait_id before entering wait state */ - prev_state = this_thr->th.ompt_thread_info.state; - this_thr->th.ompt_thread_info.wait_id = (uint64_t)lck; - this_thr->th.ompt_thread_info.state = ompt_state_wait_lock; - } -#endif - - if (enqueued) { - if (tail > 0) { - kmp_info_t *tail_thr = __kmp_thread_from_gtid(tail - 1); - KMP_ASSERT(tail_thr != NULL); - tail_thr->th.th_next_waiting = gtid + 1; - /* corresponding wait for this write in release code */ - } - KA_TRACE(1000, - ("__kmp_acquire_queuing_lock: lck:%p, T#%d waiting for lock\n", - lck, gtid)); - - /* ToDo: May want to consider using __kmp_wait_sleep or something that - sleeps for throughput only here. */ - KMP_MB(); - KMP_WAIT_YIELD(spin_here_p, FALSE, KMP_EQ, lck); - -#ifdef DEBUG_QUEUING_LOCKS - TRACE_LOCK(gtid + 1, "acq spin"); - - if (this_thr->th.th_next_waiting != 0) - __kmp_dump_queuing_lock(this_thr, gtid, lck, *head_id_p, *tail_id_p); -#endif - KMP_DEBUG_ASSERT(this_thr->th.th_next_waiting == 0); - KA_TRACE(1000, ("__kmp_acquire_queuing_lock: lck:%p, T#%d exiting: after " - "waiting on queue\n", - lck, gtid)); - -#ifdef DEBUG_QUEUING_LOCKS - TRACE_LOCK(gtid + 1, "acq exit 2"); -#endif - -#if OMPT_SUPPORT - /* change the state before clearing wait_id */ - this_thr->th.ompt_thread_info.state = prev_state; - this_thr->th.ompt_thread_info.wait_id = 0; -#endif - - /* got lock, we were dequeued by the thread that released lock */ - return KMP_LOCK_ACQUIRED_FIRST; - } - - /* Yield if number of threads > number of logical processors */ - /* ToDo: Not sure why this should only be in oversubscription case, - maybe should be traditional YIELD_INIT/YIELD_WHEN loop */ - KMP_YIELD(TCR_4(__kmp_nth) > - (__kmp_avail_proc ? __kmp_avail_proc : __kmp_xproc)); -#ifdef DEBUG_QUEUING_LOCKS - TRACE_LOCK(gtid + 1, "acq retry"); -#endif - } - KMP_ASSERT2(0, "should not get here"); - return KMP_LOCK_ACQUIRED_FIRST; -} - -int __kmp_acquire_queuing_lock(kmp_queuing_lock_t *lck, kmp_int32 gtid) { - KMP_DEBUG_ASSERT(gtid >= 0); - - int retval = __kmp_acquire_queuing_lock_timed_template(lck, gtid); - ANNOTATE_QUEUING_ACQUIRED(lck); - return retval; -} - -static int __kmp_acquire_queuing_lock_with_checks(kmp_queuing_lock_t *lck, - kmp_int32 gtid) { - char const *const func = "omp_set_lock"; - if (lck->lk.initialized != lck) { - KMP_FATAL(LockIsUninitialized, func); - } - if (__kmp_is_queuing_lock_nestable(lck)) { - KMP_FATAL(LockNestableUsedAsSimple, func); - } - if (__kmp_get_queuing_lock_owner(lck) == gtid) { - KMP_FATAL(LockIsAlreadyOwned, func); - } - - __kmp_acquire_queuing_lock(lck, gtid); - - lck->lk.owner_id = gtid + 1; - return KMP_LOCK_ACQUIRED_FIRST; -} - -int __kmp_test_queuing_lock(kmp_queuing_lock_t *lck, kmp_int32 gtid) { - volatile kmp_int32 *head_id_p = &lck->lk.head_id; - kmp_int32 head; -#ifdef KMP_DEBUG - kmp_info_t *this_thr; -#endif - - KA_TRACE(1000, ("__kmp_test_queuing_lock: T#%d entering\n", gtid)); - KMP_DEBUG_ASSERT(gtid >= 0); -#ifdef KMP_DEBUG - this_thr = __kmp_thread_from_gtid(gtid); - KMP_DEBUG_ASSERT(this_thr != NULL); - KMP_DEBUG_ASSERT(!this_thr->th.th_spin_here); -#endif - - head = *head_id_p; - - if (head == 0) { /* nobody on queue, nobody holding */ - /* try (0,0)->(-1,0) */ - if (KMP_COMPARE_AND_STORE_ACQ32(head_id_p, 0, -1)) { - KA_TRACE(1000, - ("__kmp_test_queuing_lock: T#%d exiting: holding lock\n", gtid)); - KMP_FSYNC_ACQUIRED(lck); - ANNOTATE_QUEUING_ACQUIRED(lck); - return TRUE; - } - } - - KA_TRACE(1000, - ("__kmp_test_queuing_lock: T#%d exiting: without lock\n", gtid)); - return FALSE; -} - -static int __kmp_test_queuing_lock_with_checks(kmp_queuing_lock_t *lck, - kmp_int32 gtid) { - char const *const func = "omp_test_lock"; - if (lck->lk.initialized != lck) { - KMP_FATAL(LockIsUninitialized, func); - } - if (__kmp_is_queuing_lock_nestable(lck)) { - KMP_FATAL(LockNestableUsedAsSimple, func); - } - - int retval = __kmp_test_queuing_lock(lck, gtid); - - if (retval) { - lck->lk.owner_id = gtid + 1; - } - return retval; -} - -int __kmp_release_queuing_lock(kmp_queuing_lock_t *lck, kmp_int32 gtid) { - kmp_info_t *this_thr; - volatile kmp_int32 *head_id_p = &lck->lk.head_id; - volatile kmp_int32 *tail_id_p = &lck->lk.tail_id; - - KA_TRACE(1000, - ("__kmp_release_queuing_lock: lck:%p, T#%d entering\n", lck, gtid)); - KMP_DEBUG_ASSERT(gtid >= 0); - this_thr = __kmp_thread_from_gtid(gtid); - KMP_DEBUG_ASSERT(this_thr != NULL); -#ifdef DEBUG_QUEUING_LOCKS - TRACE_LOCK(gtid + 1, "rel ent"); - - if (this_thr->th.th_spin_here) - __kmp_dump_queuing_lock(this_thr, gtid, lck, *head_id_p, *tail_id_p); - if (this_thr->th.th_next_waiting != 0) - __kmp_dump_queuing_lock(this_thr, gtid, lck, *head_id_p, *tail_id_p); -#endif - KMP_DEBUG_ASSERT(!this_thr->th.th_spin_here); - KMP_DEBUG_ASSERT(this_thr->th.th_next_waiting == 0); - - KMP_FSYNC_RELEASING(lck); - ANNOTATE_QUEUING_RELEASED(lck); - - while (1) { - kmp_int32 dequeued; - kmp_int32 head; - kmp_int32 tail; - - head = *head_id_p; - -#ifdef DEBUG_QUEUING_LOCKS - tail = *tail_id_p; - TRACE_LOCK_HT(gtid + 1, "rel read: ", head, tail); - if (head == 0) - __kmp_dump_queuing_lock(this_thr, gtid, lck, head, tail); -#endif - KMP_DEBUG_ASSERT(head != - 0); /* holding the lock, head must be -1 or queue head */ - - if (head == -1) { /* nobody on queue */ - /* try (-1,0)->(0,0) */ - if (KMP_COMPARE_AND_STORE_REL32(head_id_p, -1, 0)) { - KA_TRACE( - 1000, - ("__kmp_release_queuing_lock: lck:%p, T#%d exiting: queue empty\n", - lck, gtid)); -#ifdef DEBUG_QUEUING_LOCKS - TRACE_LOCK_HT(gtid + 1, "rel exit: ", 0, 0); -#endif - -#if OMPT_SUPPORT -/* nothing to do - no other thread is trying to shift blame */ -#endif - return KMP_LOCK_RELEASED; - } - dequeued = FALSE; - } else { - KMP_MB(); - tail = *tail_id_p; - if (head == tail) { /* only one thread on the queue */ -#ifdef DEBUG_QUEUING_LOCKS - if (head <= 0) - __kmp_dump_queuing_lock(this_thr, gtid, lck, head, tail); -#endif - KMP_DEBUG_ASSERT(head > 0); - - /* try (h,h)->(-1,0) */ - dequeued = KMP_COMPARE_AND_STORE_REL64( - RCAST(volatile kmp_int64 *, tail_id_p), KMP_PACK_64(head, head), - KMP_PACK_64(-1, 0)); -#ifdef DEBUG_QUEUING_LOCKS - TRACE_LOCK(gtid + 1, "rel deq: (h,h)->(-1,0)"); -#endif - - } else { - volatile kmp_int32 *waiting_id_p; - kmp_info_t *head_thr = __kmp_thread_from_gtid(head - 1); - KMP_DEBUG_ASSERT(head_thr != NULL); - waiting_id_p = &head_thr->th.th_next_waiting; - -/* Does this require synchronous reads? */ -#ifdef DEBUG_QUEUING_LOCKS - if (head <= 0 || tail <= 0) - __kmp_dump_queuing_lock(this_thr, gtid, lck, head, tail); -#endif - KMP_DEBUG_ASSERT(head > 0 && tail > 0); - - /* try (h,t)->(h',t) or (t,t) */ - KMP_MB(); - /* make sure enqueuing thread has time to update next waiting thread - * field */ - *head_id_p = KMP_WAIT_YIELD((volatile kmp_uint32 *)waiting_id_p, 0, - KMP_NEQ, NULL); -#ifdef DEBUG_QUEUING_LOCKS - TRACE_LOCK(gtid + 1, "rel deq: (h,t)->(h',t)"); -#endif - dequeued = TRUE; - } - } - - if (dequeued) { - kmp_info_t *head_thr = __kmp_thread_from_gtid(head - 1); - KMP_DEBUG_ASSERT(head_thr != NULL); - -/* Does this require synchronous reads? */ -#ifdef DEBUG_QUEUING_LOCKS - if (head <= 0 || tail <= 0) - __kmp_dump_queuing_lock(this_thr, gtid, lck, head, tail); -#endif - KMP_DEBUG_ASSERT(head > 0 && tail > 0); - - /* For clean code only. Thread not released until next statement prevents - race with acquire code. */ - head_thr->th.th_next_waiting = 0; -#ifdef DEBUG_QUEUING_LOCKS - TRACE_LOCK_T(gtid + 1, "rel nw=0 for t=", head); -#endif - - KMP_MB(); - /* reset spin value */ - head_thr->th.th_spin_here = FALSE; - - KA_TRACE(1000, ("__kmp_release_queuing_lock: lck:%p, T#%d exiting: after " - "dequeuing\n", - lck, gtid)); -#ifdef DEBUG_QUEUING_LOCKS - TRACE_LOCK(gtid + 1, "rel exit 2"); -#endif - return KMP_LOCK_RELEASED; - } -/* KMP_CPU_PAUSE(); don't want to make releasing thread hold up acquiring - threads */ - -#ifdef DEBUG_QUEUING_LOCKS - TRACE_LOCK(gtid + 1, "rel retry"); -#endif - - } /* while */ - KMP_ASSERT2(0, "should not get here"); - return KMP_LOCK_RELEASED; -} - -static int __kmp_release_queuing_lock_with_checks(kmp_queuing_lock_t *lck, - kmp_int32 gtid) { - char const *const func = "omp_unset_lock"; - KMP_MB(); /* in case another processor initialized lock */ - if (lck->lk.initialized != lck) { - KMP_FATAL(LockIsUninitialized, func); - } - if (__kmp_is_queuing_lock_nestable(lck)) { - KMP_FATAL(LockNestableUsedAsSimple, func); - } - if (__kmp_get_queuing_lock_owner(lck) == -1) { - KMP_FATAL(LockUnsettingFree, func); - } - if (__kmp_get_queuing_lock_owner(lck) != gtid) { - KMP_FATAL(LockUnsettingSetByAnother, func); - } - lck->lk.owner_id = 0; - return __kmp_release_queuing_lock(lck, gtid); -} - -void __kmp_init_queuing_lock(kmp_queuing_lock_t *lck) { - lck->lk.location = NULL; - lck->lk.head_id = 0; - lck->lk.tail_id = 0; - lck->lk.next_ticket = 0; - lck->lk.now_serving = 0; - lck->lk.owner_id = 0; // no thread owns the lock. - lck->lk.depth_locked = -1; // >= 0 for nestable locks, -1 for simple locks. - lck->lk.initialized = lck; - - KA_TRACE(1000, ("__kmp_init_queuing_lock: lock %p initialized\n", lck)); -} - -void __kmp_destroy_queuing_lock(kmp_queuing_lock_t *lck) { - lck->lk.initialized = NULL; - lck->lk.location = NULL; - lck->lk.head_id = 0; - lck->lk.tail_id = 0; - lck->lk.next_ticket = 0; - lck->lk.now_serving = 0; - lck->lk.owner_id = 0; - lck->lk.depth_locked = -1; -} - -static void __kmp_destroy_queuing_lock_with_checks(kmp_queuing_lock_t *lck) { - char const *const func = "omp_destroy_lock"; - if (lck->lk.initialized != lck) { - KMP_FATAL(LockIsUninitialized, func); - } - if (__kmp_is_queuing_lock_nestable(lck)) { - KMP_FATAL(LockNestableUsedAsSimple, func); - } - if (__kmp_get_queuing_lock_owner(lck) != -1) { - KMP_FATAL(LockStillOwned, func); - } - __kmp_destroy_queuing_lock(lck); -} - -// nested queuing locks - -int __kmp_acquire_nested_queuing_lock(kmp_queuing_lock_t *lck, kmp_int32 gtid) { - KMP_DEBUG_ASSERT(gtid >= 0); - - if (__kmp_get_queuing_lock_owner(lck) == gtid) { - lck->lk.depth_locked += 1; - return KMP_LOCK_ACQUIRED_NEXT; - } else { - __kmp_acquire_queuing_lock_timed_template(lck, gtid); - ANNOTATE_QUEUING_ACQUIRED(lck); - KMP_MB(); - lck->lk.depth_locked = 1; - KMP_MB(); - lck->lk.owner_id = gtid + 1; - return KMP_LOCK_ACQUIRED_FIRST; - } -} - -static int -__kmp_acquire_nested_queuing_lock_with_checks(kmp_queuing_lock_t *lck, - kmp_int32 gtid) { - char const *const func = "omp_set_nest_lock"; - if (lck->lk.initialized != lck) { - KMP_FATAL(LockIsUninitialized, func); - } - if (!__kmp_is_queuing_lock_nestable(lck)) { - KMP_FATAL(LockSimpleUsedAsNestable, func); - } - return __kmp_acquire_nested_queuing_lock(lck, gtid); -} - -int __kmp_test_nested_queuing_lock(kmp_queuing_lock_t *lck, kmp_int32 gtid) { - int retval; - - KMP_DEBUG_ASSERT(gtid >= 0); - - if (__kmp_get_queuing_lock_owner(lck) == gtid) { - retval = ++lck->lk.depth_locked; - } else if (!__kmp_test_queuing_lock(lck, gtid)) { - retval = 0; - } else { - KMP_MB(); - retval = lck->lk.depth_locked = 1; - KMP_MB(); - lck->lk.owner_id = gtid + 1; - } - return retval; -} - -static int __kmp_test_nested_queuing_lock_with_checks(kmp_queuing_lock_t *lck, - kmp_int32 gtid) { - char const *const func = "omp_test_nest_lock"; - if (lck->lk.initialized != lck) { - KMP_FATAL(LockIsUninitialized, func); - } - if (!__kmp_is_queuing_lock_nestable(lck)) { - KMP_FATAL(LockSimpleUsedAsNestable, func); - } - return __kmp_test_nested_queuing_lock(lck, gtid); -} - -int __kmp_release_nested_queuing_lock(kmp_queuing_lock_t *lck, kmp_int32 gtid) { - KMP_DEBUG_ASSERT(gtid >= 0); - - KMP_MB(); - if (--(lck->lk.depth_locked) == 0) { - KMP_MB(); - lck->lk.owner_id = 0; - __kmp_release_queuing_lock(lck, gtid); - return KMP_LOCK_RELEASED; - } - return KMP_LOCK_STILL_HELD; -} - -static int -__kmp_release_nested_queuing_lock_with_checks(kmp_queuing_lock_t *lck, - kmp_int32 gtid) { - char const *const func = "omp_unset_nest_lock"; - KMP_MB(); /* in case another processor initialized lock */ - if (lck->lk.initialized != lck) { - KMP_FATAL(LockIsUninitialized, func); - } - if (!__kmp_is_queuing_lock_nestable(lck)) { - KMP_FATAL(LockSimpleUsedAsNestable, func); - } - if (__kmp_get_queuing_lock_owner(lck) == -1) { - KMP_FATAL(LockUnsettingFree, func); - } - if (__kmp_get_queuing_lock_owner(lck) != gtid) { - KMP_FATAL(LockUnsettingSetByAnother, func); - } - return __kmp_release_nested_queuing_lock(lck, gtid); -} - -void __kmp_init_nested_queuing_lock(kmp_queuing_lock_t *lck) { - __kmp_init_queuing_lock(lck); - lck->lk.depth_locked = 0; // >= 0 for nestable locks, -1 for simple locks -} - -void __kmp_destroy_nested_queuing_lock(kmp_queuing_lock_t *lck) { - __kmp_destroy_queuing_lock(lck); - lck->lk.depth_locked = 0; -} - -static void -__kmp_destroy_nested_queuing_lock_with_checks(kmp_queuing_lock_t *lck) { - char const *const func = "omp_destroy_nest_lock"; - if (lck->lk.initialized != lck) { - KMP_FATAL(LockIsUninitialized, func); - } - if (!__kmp_is_queuing_lock_nestable(lck)) { - KMP_FATAL(LockSimpleUsedAsNestable, func); - } - if (__kmp_get_queuing_lock_owner(lck) != -1) { - KMP_FATAL(LockStillOwned, func); - } - __kmp_destroy_nested_queuing_lock(lck); -} - -// access functions to fields which don't exist for all lock kinds. - -static const ident_t *__kmp_get_queuing_lock_location(kmp_queuing_lock_t *lck) { - return lck->lk.location; -} - -static void __kmp_set_queuing_lock_location(kmp_queuing_lock_t *lck, - const ident_t *loc) { - lck->lk.location = loc; -} - -static kmp_lock_flags_t __kmp_get_queuing_lock_flags(kmp_queuing_lock_t *lck) { - return lck->lk.flags; -} - -static void __kmp_set_queuing_lock_flags(kmp_queuing_lock_t *lck, - kmp_lock_flags_t flags) { - lck->lk.flags = flags; -} - -#if KMP_USE_ADAPTIVE_LOCKS - -/* RTM Adaptive locks */ - -#if (KMP_COMPILER_ICC && __INTEL_COMPILER >= 1300) || \ - (KMP_COMPILER_MSVC && _MSC_VER >= 1700) || \ - (KMP_COMPILER_CLANG && KMP_MSVC_COMPAT) - -#include -#define SOFT_ABORT_MASK (_XABORT_RETRY | _XABORT_CONFLICT | _XABORT_EXPLICIT) - -#else - -// Values from the status register after failed speculation. -#define _XBEGIN_STARTED (~0u) -#define _XABORT_EXPLICIT (1 << 0) -#define _XABORT_RETRY (1 << 1) -#define _XABORT_CONFLICT (1 << 2) -#define _XABORT_CAPACITY (1 << 3) -#define _XABORT_DEBUG (1 << 4) -#define _XABORT_NESTED (1 << 5) -#define _XABORT_CODE(x) ((unsigned char)(((x) >> 24) & 0xFF)) - -// Aborts for which it's worth trying again immediately -#define SOFT_ABORT_MASK (_XABORT_RETRY | _XABORT_CONFLICT | _XABORT_EXPLICIT) - -#define STRINGIZE_INTERNAL(arg) #arg -#define STRINGIZE(arg) STRINGIZE_INTERNAL(arg) - -// Access to RTM instructions -/*A version of XBegin which returns -1 on speculation, and the value of EAX on - an abort. This is the same definition as the compiler intrinsic that will be - supported at some point. */ -static __inline int _xbegin() { - int res = -1; - -#if KMP_OS_WINDOWS -#if KMP_ARCH_X86_64 - _asm { - _emit 0xC7 - _emit 0xF8 - _emit 2 - _emit 0 - _emit 0 - _emit 0 - jmp L2 - mov res, eax - L2: - } -#else /* IA32 */ - _asm { - _emit 0xC7 - _emit 0xF8 - _emit 2 - _emit 0 - _emit 0 - _emit 0 - jmp L2 - mov res, eax - L2: - } -#endif // KMP_ARCH_X86_64 -#else - /* Note that %eax must be noted as killed (clobbered), because the XSR is - returned in %eax(%rax) on abort. Other register values are restored, so - don't need to be killed. - - We must also mark 'res' as an input and an output, since otherwise - 'res=-1' may be dropped as being dead, whereas we do need the assignment on - the successful (i.e., non-abort) path. */ - __asm__ volatile("1: .byte 0xC7; .byte 0xF8;\n" - " .long 1f-1b-6\n" - " jmp 2f\n" - "1: movl %%eax,%0\n" - "2:" - : "+r"(res)::"memory", "%eax"); -#endif // KMP_OS_WINDOWS - return res; -} - -/* Transaction end */ -static __inline void _xend() { -#if KMP_OS_WINDOWS - __asm { - _emit 0x0f - _emit 0x01 - _emit 0xd5 - } -#else - __asm__ volatile(".byte 0x0f; .byte 0x01; .byte 0xd5" ::: "memory"); -#endif -} - -/* This is a macro, the argument must be a single byte constant which can be - evaluated by the inline assembler, since it is emitted as a byte into the - assembly code. */ -// clang-format off -#if KMP_OS_WINDOWS -#define _xabort(ARG) _asm _emit 0xc6 _asm _emit 0xf8 _asm _emit ARG -#else -#define _xabort(ARG) \ - __asm__ volatile(".byte 0xC6; .byte 0xF8; .byte " STRINGIZE(ARG):::"memory"); -#endif -// clang-format on -#endif // KMP_COMPILER_ICC && __INTEL_COMPILER >= 1300 - -// Statistics is collected for testing purpose -#if KMP_DEBUG_ADAPTIVE_LOCKS - -// We accumulate speculative lock statistics when the lock is destroyed. We -// keep locks that haven't been destroyed in the liveLocks list so that we can -// grab their statistics too. -static kmp_adaptive_lock_statistics_t destroyedStats; - -// To hold the list of live locks. -static kmp_adaptive_lock_info_t liveLocks; - -// A lock so we can safely update the list of locks. -static kmp_bootstrap_lock_t chain_lock = - KMP_BOOTSTRAP_LOCK_INITIALIZER(chain_lock); - -// Initialize the list of stats. -void __kmp_init_speculative_stats() { - kmp_adaptive_lock_info_t *lck = &liveLocks; - - memset(CCAST(kmp_adaptive_lock_statistics_t *, &(lck->stats)), 0, - sizeof(lck->stats)); - lck->stats.next = lck; - lck->stats.prev = lck; - - KMP_ASSERT(lck->stats.next->stats.prev == lck); - KMP_ASSERT(lck->stats.prev->stats.next == lck); - - __kmp_init_bootstrap_lock(&chain_lock); -} - -// Insert the lock into the circular list -static void __kmp_remember_lock(kmp_adaptive_lock_info_t *lck) { - __kmp_acquire_bootstrap_lock(&chain_lock); - - lck->stats.next = liveLocks.stats.next; - lck->stats.prev = &liveLocks; - - liveLocks.stats.next = lck; - lck->stats.next->stats.prev = lck; - - KMP_ASSERT(lck->stats.next->stats.prev == lck); - KMP_ASSERT(lck->stats.prev->stats.next == lck); - - __kmp_release_bootstrap_lock(&chain_lock); -} - -static void __kmp_forget_lock(kmp_adaptive_lock_info_t *lck) { - KMP_ASSERT(lck->stats.next->stats.prev == lck); - KMP_ASSERT(lck->stats.prev->stats.next == lck); - - kmp_adaptive_lock_info_t *n = lck->stats.next; - kmp_adaptive_lock_info_t *p = lck->stats.prev; - - n->stats.prev = p; - p->stats.next = n; -} - -static void __kmp_zero_speculative_stats(kmp_adaptive_lock_info_t *lck) { - memset(CCAST(kmp_adaptive_lock_statistics_t *, &lck->stats), 0, - sizeof(lck->stats)); - __kmp_remember_lock(lck); -} - -static void __kmp_add_stats(kmp_adaptive_lock_statistics_t *t, - kmp_adaptive_lock_info_t *lck) { - kmp_adaptive_lock_statistics_t volatile *s = &lck->stats; - - t->nonSpeculativeAcquireAttempts += lck->acquire_attempts; - t->successfulSpeculations += s->successfulSpeculations; - t->hardFailedSpeculations += s->hardFailedSpeculations; - t->softFailedSpeculations += s->softFailedSpeculations; - t->nonSpeculativeAcquires += s->nonSpeculativeAcquires; - t->lemmingYields += s->lemmingYields; -} - -static void __kmp_accumulate_speculative_stats(kmp_adaptive_lock_info_t *lck) { - __kmp_acquire_bootstrap_lock(&chain_lock); - - __kmp_add_stats(&destroyedStats, lck); - __kmp_forget_lock(lck); - - __kmp_release_bootstrap_lock(&chain_lock); -} - -static float percent(kmp_uint32 count, kmp_uint32 total) { - return (total == 0) ? 0.0 : (100.0 * count) / total; -} - -static FILE *__kmp_open_stats_file() { - if (strcmp(__kmp_speculative_statsfile, "-") == 0) - return stdout; - - size_t buffLen = KMP_STRLEN(__kmp_speculative_statsfile) + 20; - char buffer[buffLen]; - KMP_SNPRINTF(&buffer[0], buffLen, __kmp_speculative_statsfile, - (kmp_int32)getpid()); - FILE *result = fopen(&buffer[0], "w"); - - // Maybe we should issue a warning here... - return result ? result : stdout; -} - -void __kmp_print_speculative_stats() { - kmp_adaptive_lock_statistics_t total = destroyedStats; - kmp_adaptive_lock_info_t *lck; - - for (lck = liveLocks.stats.next; lck != &liveLocks; lck = lck->stats.next) { - __kmp_add_stats(&total, lck); - } - kmp_adaptive_lock_statistics_t *t = &total; - kmp_uint32 totalSections = - t->nonSpeculativeAcquires + t->successfulSpeculations; - kmp_uint32 totalSpeculations = t->successfulSpeculations + - t->hardFailedSpeculations + - t->softFailedSpeculations; - if (totalSections <= 0) - return; - - FILE *statsFile = __kmp_open_stats_file(); - - fprintf(statsFile, "Speculative lock statistics (all approximate!)\n"); - fprintf(statsFile, " Lock parameters: \n" - " max_soft_retries : %10d\n" - " max_badness : %10d\n", - __kmp_adaptive_backoff_params.max_soft_retries, - __kmp_adaptive_backoff_params.max_badness); - fprintf(statsFile, " Non-speculative acquire attempts : %10d\n", - t->nonSpeculativeAcquireAttempts); - fprintf(statsFile, " Total critical sections : %10d\n", - totalSections); - fprintf(statsFile, " Successful speculations : %10d (%5.1f%%)\n", - t->successfulSpeculations, - percent(t->successfulSpeculations, totalSections)); - fprintf(statsFile, " Non-speculative acquires : %10d (%5.1f%%)\n", - t->nonSpeculativeAcquires, - percent(t->nonSpeculativeAcquires, totalSections)); - fprintf(statsFile, " Lemming yields : %10d\n\n", - t->lemmingYields); - - fprintf(statsFile, " Speculative acquire attempts : %10d\n", - totalSpeculations); - fprintf(statsFile, " Successes : %10d (%5.1f%%)\n", - t->successfulSpeculations, - percent(t->successfulSpeculations, totalSpeculations)); - fprintf(statsFile, " Soft failures : %10d (%5.1f%%)\n", - t->softFailedSpeculations, - percent(t->softFailedSpeculations, totalSpeculations)); - fprintf(statsFile, " Hard failures : %10d (%5.1f%%)\n", - t->hardFailedSpeculations, - percent(t->hardFailedSpeculations, totalSpeculations)); - - if (statsFile != stdout) - fclose(statsFile); -} - -#define KMP_INC_STAT(lck, stat) (lck->lk.adaptive.stats.stat++) -#else -#define KMP_INC_STAT(lck, stat) - -#endif // KMP_DEBUG_ADAPTIVE_LOCKS - -static inline bool __kmp_is_unlocked_queuing_lock(kmp_queuing_lock_t *lck) { - // It is enough to check that the head_id is zero. - // We don't also need to check the tail. - bool res = lck->lk.head_id == 0; - -// We need a fence here, since we must ensure that no memory operations -// from later in this thread float above that read. -#if KMP_COMPILER_ICC - _mm_mfence(); -#else - __sync_synchronize(); -#endif - - return res; -} - -// Functions for manipulating the badness -static __inline void -__kmp_update_badness_after_success(kmp_adaptive_lock_t *lck) { - // Reset the badness to zero so we eagerly try to speculate again - lck->lk.adaptive.badness = 0; - KMP_INC_STAT(lck, successfulSpeculations); -} - -// Create a bit mask with one more set bit. -static __inline void __kmp_step_badness(kmp_adaptive_lock_t *lck) { - kmp_uint32 newBadness = (lck->lk.adaptive.badness << 1) | 1; - if (newBadness > lck->lk.adaptive.max_badness) { - return; - } else { - lck->lk.adaptive.badness = newBadness; - } -} - -// Check whether speculation should be attempted. -static __inline int __kmp_should_speculate(kmp_adaptive_lock_t *lck, - kmp_int32 gtid) { - kmp_uint32 badness = lck->lk.adaptive.badness; - kmp_uint32 attempts = lck->lk.adaptive.acquire_attempts; - int res = (attempts & badness) == 0; - return res; -} - -// Attempt to acquire only the speculative lock. -// Does not back off to the non-speculative lock. -static int __kmp_test_adaptive_lock_only(kmp_adaptive_lock_t *lck, - kmp_int32 gtid) { - int retries = lck->lk.adaptive.max_soft_retries; - - // We don't explicitly count the start of speculation, rather we record the - // results (success, hard fail, soft fail). The sum of all of those is the - // total number of times we started speculation since all speculations must - // end one of those ways. - do { - kmp_uint32 status = _xbegin(); - // Switch this in to disable actual speculation but exercise at least some - // of the rest of the code. Useful for debugging... - // kmp_uint32 status = _XABORT_NESTED; - - if (status == _XBEGIN_STARTED) { - /* We have successfully started speculation. Check that no-one acquired - the lock for real between when we last looked and now. This also gets - the lock cache line into our read-set, which we need so that we'll - abort if anyone later claims it for real. */ - if (!__kmp_is_unlocked_queuing_lock(GET_QLK_PTR(lck))) { - // Lock is now visibly acquired, so someone beat us to it. Abort the - // transaction so we'll restart from _xbegin with the failure status. - _xabort(0x01); - KMP_ASSERT2(0, "should not get here"); - } - return 1; // Lock has been acquired (speculatively) - } else { - // We have aborted, update the statistics - if (status & SOFT_ABORT_MASK) { - KMP_INC_STAT(lck, softFailedSpeculations); - // and loop round to retry. - } else { - KMP_INC_STAT(lck, hardFailedSpeculations); - // Give up if we had a hard failure. - break; - } - } - } while (retries--); // Loop while we have retries, and didn't fail hard. - - // Either we had a hard failure or we didn't succeed softly after - // the full set of attempts, so back off the badness. - __kmp_step_badness(lck); - return 0; -} - -// Attempt to acquire the speculative lock, or back off to the non-speculative -// one if the speculative lock cannot be acquired. -// We can succeed speculatively, non-speculatively, or fail. -static int __kmp_test_adaptive_lock(kmp_adaptive_lock_t *lck, kmp_int32 gtid) { - // First try to acquire the lock speculatively - if (__kmp_should_speculate(lck, gtid) && - __kmp_test_adaptive_lock_only(lck, gtid)) - return 1; - - // Speculative acquisition failed, so try to acquire it non-speculatively. - // Count the non-speculative acquire attempt - lck->lk.adaptive.acquire_attempts++; - - // Use base, non-speculative lock. - if (__kmp_test_queuing_lock(GET_QLK_PTR(lck), gtid)) { - KMP_INC_STAT(lck, nonSpeculativeAcquires); - return 1; // Lock is acquired (non-speculatively) - } else { - return 0; // Failed to acquire the lock, it's already visibly locked. - } -} - -static int __kmp_test_adaptive_lock_with_checks(kmp_adaptive_lock_t *lck, - kmp_int32 gtid) { - char const *const func = "omp_test_lock"; - if (lck->lk.qlk.initialized != GET_QLK_PTR(lck)) { - KMP_FATAL(LockIsUninitialized, func); - } - - int retval = __kmp_test_adaptive_lock(lck, gtid); - - if (retval) { - lck->lk.qlk.owner_id = gtid + 1; - } - return retval; -} - -// Block until we can acquire a speculative, adaptive lock. We check whether we -// should be trying to speculate. If we should be, we check the real lock to see -// if it is free, and, if not, pause without attempting to acquire it until it -// is. Then we try the speculative acquire. This means that although we suffer -// from lemmings a little (because all we can't acquire the lock speculatively -// until the queue of threads waiting has cleared), we don't get into a state -// where we can never acquire the lock speculatively (because we force the queue -// to clear by preventing new arrivals from entering the queue). This does mean -// that when we're trying to break lemmings, the lock is no longer fair. However -// OpenMP makes no guarantee that its locks are fair, so this isn't a real -// problem. -static void __kmp_acquire_adaptive_lock(kmp_adaptive_lock_t *lck, - kmp_int32 gtid) { - if (__kmp_should_speculate(lck, gtid)) { - if (__kmp_is_unlocked_queuing_lock(GET_QLK_PTR(lck))) { - if (__kmp_test_adaptive_lock_only(lck, gtid)) - return; - // We tried speculation and failed, so give up. - } else { - // We can't try speculation until the lock is free, so we pause here - // (without suspending on the queueing lock, to allow it to drain, then - // try again. All other threads will also see the same result for - // shouldSpeculate, so will be doing the same if they try to claim the - // lock from now on. - while (!__kmp_is_unlocked_queuing_lock(GET_QLK_PTR(lck))) { - KMP_INC_STAT(lck, lemmingYields); - __kmp_yield(TRUE); - } - - if (__kmp_test_adaptive_lock_only(lck, gtid)) - return; - } - } - - // Speculative acquisition failed, so acquire it non-speculatively. - // Count the non-speculative acquire attempt - lck->lk.adaptive.acquire_attempts++; - - __kmp_acquire_queuing_lock_timed_template(GET_QLK_PTR(lck), gtid); - // We have acquired the base lock, so count that. - KMP_INC_STAT(lck, nonSpeculativeAcquires); - ANNOTATE_QUEUING_ACQUIRED(lck); -} - -static void __kmp_acquire_adaptive_lock_with_checks(kmp_adaptive_lock_t *lck, - kmp_int32 gtid) { - char const *const func = "omp_set_lock"; - if (lck->lk.qlk.initialized != GET_QLK_PTR(lck)) { - KMP_FATAL(LockIsUninitialized, func); - } - if (__kmp_get_queuing_lock_owner(GET_QLK_PTR(lck)) == gtid) { - KMP_FATAL(LockIsAlreadyOwned, func); - } - - __kmp_acquire_adaptive_lock(lck, gtid); - - lck->lk.qlk.owner_id = gtid + 1; -} - -static int __kmp_release_adaptive_lock(kmp_adaptive_lock_t *lck, - kmp_int32 gtid) { - if (__kmp_is_unlocked_queuing_lock(GET_QLK_PTR( - lck))) { // If the lock doesn't look claimed we must be speculating. - // (Or the user's code is buggy and they're releasing without locking; - // if we had XTEST we'd be able to check that case...) - _xend(); // Exit speculation - __kmp_update_badness_after_success(lck); - } else { // Since the lock *is* visibly locked we're not speculating, - // so should use the underlying lock's release scheme. - __kmp_release_queuing_lock(GET_QLK_PTR(lck), gtid); - } - return KMP_LOCK_RELEASED; -} - -static int __kmp_release_adaptive_lock_with_checks(kmp_adaptive_lock_t *lck, - kmp_int32 gtid) { - char const *const func = "omp_unset_lock"; - KMP_MB(); /* in case another processor initialized lock */ - if (lck->lk.qlk.initialized != GET_QLK_PTR(lck)) { - KMP_FATAL(LockIsUninitialized, func); - } - if (__kmp_get_queuing_lock_owner(GET_QLK_PTR(lck)) == -1) { - KMP_FATAL(LockUnsettingFree, func); - } - if (__kmp_get_queuing_lock_owner(GET_QLK_PTR(lck)) != gtid) { - KMP_FATAL(LockUnsettingSetByAnother, func); - } - lck->lk.qlk.owner_id = 0; - __kmp_release_adaptive_lock(lck, gtid); - return KMP_LOCK_RELEASED; -} - -static void __kmp_init_adaptive_lock(kmp_adaptive_lock_t *lck) { - __kmp_init_queuing_lock(GET_QLK_PTR(lck)); - lck->lk.adaptive.badness = 0; - lck->lk.adaptive.acquire_attempts = 0; // nonSpeculativeAcquireAttempts = 0; - lck->lk.adaptive.max_soft_retries = - __kmp_adaptive_backoff_params.max_soft_retries; - lck->lk.adaptive.max_badness = __kmp_adaptive_backoff_params.max_badness; -#if KMP_DEBUG_ADAPTIVE_LOCKS - __kmp_zero_speculative_stats(&lck->lk.adaptive); -#endif - KA_TRACE(1000, ("__kmp_init_adaptive_lock: lock %p initialized\n", lck)); -} - -static void __kmp_destroy_adaptive_lock(kmp_adaptive_lock_t *lck) { -#if KMP_DEBUG_ADAPTIVE_LOCKS - __kmp_accumulate_speculative_stats(&lck->lk.adaptive); -#endif - __kmp_destroy_queuing_lock(GET_QLK_PTR(lck)); - // Nothing needed for the speculative part. -} - -static void __kmp_destroy_adaptive_lock_with_checks(kmp_adaptive_lock_t *lck) { - char const *const func = "omp_destroy_lock"; - if (lck->lk.qlk.initialized != GET_QLK_PTR(lck)) { - KMP_FATAL(LockIsUninitialized, func); - } - if (__kmp_get_queuing_lock_owner(GET_QLK_PTR(lck)) != -1) { - KMP_FATAL(LockStillOwned, func); - } - __kmp_destroy_adaptive_lock(lck); -} - -#endif // KMP_USE_ADAPTIVE_LOCKS - -/* ------------------------------------------------------------------------ */ -/* DRDPA ticket locks */ -/* "DRDPA" means Dynamically Reconfigurable Distributed Polling Area */ - -static kmp_int32 __kmp_get_drdpa_lock_owner(kmp_drdpa_lock_t *lck) { - return lck->lk.owner_id - 1; -} - -static inline bool __kmp_is_drdpa_lock_nestable(kmp_drdpa_lock_t *lck) { - return lck->lk.depth_locked != -1; -} - -__forceinline static int -__kmp_acquire_drdpa_lock_timed_template(kmp_drdpa_lock_t *lck, kmp_int32 gtid) { - kmp_uint64 ticket = KMP_ATOMIC_INC(&lck->lk.next_ticket); - kmp_uint64 mask = lck->lk.mask; // atomic load - std::atomic *polls = lck->lk.polls; - -#ifdef USE_LOCK_PROFILE - if (polls[ticket & mask] != ticket) - __kmp_printf("LOCK CONTENTION: %p\n", lck); -/* else __kmp_printf( "." );*/ -#endif /* USE_LOCK_PROFILE */ - - // Now spin-wait, but reload the polls pointer and mask, in case the - // polling area has been reconfigured. Unless it is reconfigured, the - // reloads stay in L1 cache and are cheap. - // - // Keep this code in sync with KMP_WAIT_YIELD, in kmp_dispatch.cpp !!! - // - // The current implementation of KMP_WAIT_YIELD doesn't allow for mask - // and poll to be re-read every spin iteration. - kmp_uint32 spins; - - KMP_FSYNC_PREPARE(lck); - KMP_INIT_YIELD(spins); - while (polls[ticket & mask] < ticket) { // atomic load - // If we are oversubscribed, - // or have waited a bit (and KMP_LIBRARY=turnaround), then yield. - // CPU Pause is in the macros for yield. - // - KMP_YIELD(TCR_4(__kmp_nth) > - (__kmp_avail_proc ? __kmp_avail_proc : __kmp_xproc)); - KMP_YIELD_SPIN(spins); - - // Re-read the mask and the poll pointer from the lock structure. - // - // Make certain that "mask" is read before "polls" !!! - // - // If another thread picks reconfigures the polling area and updates their - // values, and we get the new value of mask and the old polls pointer, we - // could access memory beyond the end of the old polling area. - mask = lck->lk.mask; // atomic load - polls = lck->lk.polls; // atomic load - } - - // Critical section starts here - KMP_FSYNC_ACQUIRED(lck); - KA_TRACE(1000, ("__kmp_acquire_drdpa_lock: ticket #%lld acquired lock %p\n", - ticket, lck)); - lck->lk.now_serving = ticket; // non-volatile store - - // Deallocate a garbage polling area if we know that we are the last - // thread that could possibly access it. - // - // The >= check is in case __kmp_test_drdpa_lock() allocated the cleanup - // ticket. - if ((lck->lk.old_polls != NULL) && (ticket >= lck->lk.cleanup_ticket)) { - __kmp_free(lck->lk.old_polls); - lck->lk.old_polls = NULL; - lck->lk.cleanup_ticket = 0; - } - - // Check to see if we should reconfigure the polling area. - // If there is still a garbage polling area to be deallocated from a - // previous reconfiguration, let a later thread reconfigure it. - if (lck->lk.old_polls == NULL) { - bool reconfigure = false; - std::atomic *old_polls = polls; - kmp_uint32 num_polls = TCR_4(lck->lk.num_polls); - - if (TCR_4(__kmp_nth) > - (__kmp_avail_proc ? __kmp_avail_proc : __kmp_xproc)) { - // We are in oversubscription mode. Contract the polling area - // down to a single location, if that hasn't been done already. - if (num_polls > 1) { - reconfigure = true; - num_polls = TCR_4(lck->lk.num_polls); - mask = 0; - num_polls = 1; - polls = (std::atomic *)__kmp_allocate(num_polls * - sizeof(*polls)); - polls[0] = ticket; - } - } else { - // We are in under/fully subscribed mode. Check the number of - // threads waiting on the lock. The size of the polling area - // should be at least the number of threads waiting. - kmp_uint64 num_waiting = TCR_8(lck->lk.next_ticket) - ticket - 1; - if (num_waiting > num_polls) { - kmp_uint32 old_num_polls = num_polls; - reconfigure = true; - do { - mask = (mask << 1) | 1; - num_polls *= 2; - } while (num_polls <= num_waiting); - - // Allocate the new polling area, and copy the relevant portion - // of the old polling area to the new area. __kmp_allocate() - // zeroes the memory it allocates, and most of the old area is - // just zero padding, so we only copy the release counters. - polls = (std::atomic *)__kmp_allocate(num_polls * - sizeof(*polls)); - kmp_uint32 i; - for (i = 0; i < old_num_polls; i++) { - polls[i].store(old_polls[i]); - } - } - } - - if (reconfigure) { - // Now write the updated fields back to the lock structure. - // - // Make certain that "polls" is written before "mask" !!! - // - // If another thread picks up the new value of mask and the old polls - // pointer , it could access memory beyond the end of the old polling - // area. - // - // On x86, we need memory fences. - KA_TRACE(1000, ("__kmp_acquire_drdpa_lock: ticket #%lld reconfiguring " - "lock %p to %d polls\n", - ticket, lck, num_polls)); - - lck->lk.old_polls = old_polls; - lck->lk.polls = polls; // atomic store - - KMP_MB(); - - lck->lk.num_polls = num_polls; - lck->lk.mask = mask; // atomic store - - KMP_MB(); - - // Only after the new polling area and mask have been flushed - // to main memory can we update the cleanup ticket field. - // - // volatile load / non-volatile store - lck->lk.cleanup_ticket = lck->lk.next_ticket; - } - } - return KMP_LOCK_ACQUIRED_FIRST; -} - -int __kmp_acquire_drdpa_lock(kmp_drdpa_lock_t *lck, kmp_int32 gtid) { - int retval = __kmp_acquire_drdpa_lock_timed_template(lck, gtid); - ANNOTATE_DRDPA_ACQUIRED(lck); - return retval; -} - -static int __kmp_acquire_drdpa_lock_with_checks(kmp_drdpa_lock_t *lck, - kmp_int32 gtid) { - char const *const func = "omp_set_lock"; - if (lck->lk.initialized != lck) { - KMP_FATAL(LockIsUninitialized, func); - } - if (__kmp_is_drdpa_lock_nestable(lck)) { - KMP_FATAL(LockNestableUsedAsSimple, func); - } - if ((gtid >= 0) && (__kmp_get_drdpa_lock_owner(lck) == gtid)) { - KMP_FATAL(LockIsAlreadyOwned, func); - } - - __kmp_acquire_drdpa_lock(lck, gtid); - - lck->lk.owner_id = gtid + 1; - return KMP_LOCK_ACQUIRED_FIRST; -} - -int __kmp_test_drdpa_lock(kmp_drdpa_lock_t *lck, kmp_int32 gtid) { - // First get a ticket, then read the polls pointer and the mask. - // The polls pointer must be read before the mask!!! (See above) - kmp_uint64 ticket = lck->lk.next_ticket; // atomic load - std::atomic *polls = lck->lk.polls; - kmp_uint64 mask = lck->lk.mask; // atomic load - if (polls[ticket & mask] == ticket) { - kmp_uint64 next_ticket = ticket + 1; - if (__kmp_atomic_compare_store_acq(&lck->lk.next_ticket, ticket, - next_ticket)) { - KMP_FSYNC_ACQUIRED(lck); - KA_TRACE(1000, ("__kmp_test_drdpa_lock: ticket #%lld acquired lock %p\n", - ticket, lck)); - lck->lk.now_serving = ticket; // non-volatile store - - // Since no threads are waiting, there is no possibility that we would - // want to reconfigure the polling area. We might have the cleanup ticket - // value (which says that it is now safe to deallocate old_polls), but - // we'll let a later thread which calls __kmp_acquire_lock do that - this - // routine isn't supposed to block, and we would risk blocks if we called - // __kmp_free() to do the deallocation. - return TRUE; - } - } - return FALSE; -} - -static int __kmp_test_drdpa_lock_with_checks(kmp_drdpa_lock_t *lck, - kmp_int32 gtid) { - char const *const func = "omp_test_lock"; - if (lck->lk.initialized != lck) { - KMP_FATAL(LockIsUninitialized, func); - } - if (__kmp_is_drdpa_lock_nestable(lck)) { - KMP_FATAL(LockNestableUsedAsSimple, func); - } - - int retval = __kmp_test_drdpa_lock(lck, gtid); - - if (retval) { - lck->lk.owner_id = gtid + 1; - } - return retval; -} - -int __kmp_release_drdpa_lock(kmp_drdpa_lock_t *lck, kmp_int32 gtid) { - // Read the ticket value from the lock data struct, then the polls pointer and - // the mask. The polls pointer must be read before the mask!!! (See above) - kmp_uint64 ticket = lck->lk.now_serving + 1; // non-atomic load - std::atomic *polls = lck->lk.polls; // atomic load - kmp_uint64 mask = lck->lk.mask; // atomic load - KA_TRACE(1000, ("__kmp_release_drdpa_lock: ticket #%lld released lock %p\n", - ticket - 1, lck)); - KMP_FSYNC_RELEASING(lck); - ANNOTATE_DRDPA_RELEASED(lck); - polls[ticket & mask] = ticket; // atomic store - return KMP_LOCK_RELEASED; -} - -static int __kmp_release_drdpa_lock_with_checks(kmp_drdpa_lock_t *lck, - kmp_int32 gtid) { - char const *const func = "omp_unset_lock"; - KMP_MB(); /* in case another processor initialized lock */ - if (lck->lk.initialized != lck) { - KMP_FATAL(LockIsUninitialized, func); - } - if (__kmp_is_drdpa_lock_nestable(lck)) { - KMP_FATAL(LockNestableUsedAsSimple, func); - } - if (__kmp_get_drdpa_lock_owner(lck) == -1) { - KMP_FATAL(LockUnsettingFree, func); - } - if ((gtid >= 0) && (__kmp_get_drdpa_lock_owner(lck) >= 0) && - (__kmp_get_drdpa_lock_owner(lck) != gtid)) { - KMP_FATAL(LockUnsettingSetByAnother, func); - } - lck->lk.owner_id = 0; - return __kmp_release_drdpa_lock(lck, gtid); -} - -void __kmp_init_drdpa_lock(kmp_drdpa_lock_t *lck) { - lck->lk.location = NULL; - lck->lk.mask = 0; - lck->lk.num_polls = 1; - lck->lk.polls = (std::atomic *)__kmp_allocate( - lck->lk.num_polls * sizeof(*(lck->lk.polls))); - lck->lk.cleanup_ticket = 0; - lck->lk.old_polls = NULL; - lck->lk.next_ticket = 0; - lck->lk.now_serving = 0; - lck->lk.owner_id = 0; // no thread owns the lock. - lck->lk.depth_locked = -1; // >= 0 for nestable locks, -1 for simple locks. - lck->lk.initialized = lck; - - KA_TRACE(1000, ("__kmp_init_drdpa_lock: lock %p initialized\n", lck)); -} - -void __kmp_destroy_drdpa_lock(kmp_drdpa_lock_t *lck) { - lck->lk.initialized = NULL; - lck->lk.location = NULL; - if (lck->lk.polls.load() != NULL) { - __kmp_free(lck->lk.polls.load()); - lck->lk.polls = NULL; - } - if (lck->lk.old_polls != NULL) { - __kmp_free(lck->lk.old_polls); - lck->lk.old_polls = NULL; - } - lck->lk.mask = 0; - lck->lk.num_polls = 0; - lck->lk.cleanup_ticket = 0; - lck->lk.next_ticket = 0; - lck->lk.now_serving = 0; - lck->lk.owner_id = 0; - lck->lk.depth_locked = -1; -} - -static void __kmp_destroy_drdpa_lock_with_checks(kmp_drdpa_lock_t *lck) { - char const *const func = "omp_destroy_lock"; - if (lck->lk.initialized != lck) { - KMP_FATAL(LockIsUninitialized, func); - } - if (__kmp_is_drdpa_lock_nestable(lck)) { - KMP_FATAL(LockNestableUsedAsSimple, func); - } - if (__kmp_get_drdpa_lock_owner(lck) != -1) { - KMP_FATAL(LockStillOwned, func); - } - __kmp_destroy_drdpa_lock(lck); -} - -// nested drdpa ticket locks - -int __kmp_acquire_nested_drdpa_lock(kmp_drdpa_lock_t *lck, kmp_int32 gtid) { - KMP_DEBUG_ASSERT(gtid >= 0); - - if (__kmp_get_drdpa_lock_owner(lck) == gtid) { - lck->lk.depth_locked += 1; - return KMP_LOCK_ACQUIRED_NEXT; - } else { - __kmp_acquire_drdpa_lock_timed_template(lck, gtid); - ANNOTATE_DRDPA_ACQUIRED(lck); - KMP_MB(); - lck->lk.depth_locked = 1; - KMP_MB(); - lck->lk.owner_id = gtid + 1; - return KMP_LOCK_ACQUIRED_FIRST; - } -} - -static void __kmp_acquire_nested_drdpa_lock_with_checks(kmp_drdpa_lock_t *lck, - kmp_int32 gtid) { - char const *const func = "omp_set_nest_lock"; - if (lck->lk.initialized != lck) { - KMP_FATAL(LockIsUninitialized, func); - } - if (!__kmp_is_drdpa_lock_nestable(lck)) { - KMP_FATAL(LockSimpleUsedAsNestable, func); - } - __kmp_acquire_nested_drdpa_lock(lck, gtid); -} - -int __kmp_test_nested_drdpa_lock(kmp_drdpa_lock_t *lck, kmp_int32 gtid) { - int retval; - - KMP_DEBUG_ASSERT(gtid >= 0); - - if (__kmp_get_drdpa_lock_owner(lck) == gtid) { - retval = ++lck->lk.depth_locked; - } else if (!__kmp_test_drdpa_lock(lck, gtid)) { - retval = 0; - } else { - KMP_MB(); - retval = lck->lk.depth_locked = 1; - KMP_MB(); - lck->lk.owner_id = gtid + 1; - } - return retval; -} - -static int __kmp_test_nested_drdpa_lock_with_checks(kmp_drdpa_lock_t *lck, - kmp_int32 gtid) { - char const *const func = "omp_test_nest_lock"; - if (lck->lk.initialized != lck) { - KMP_FATAL(LockIsUninitialized, func); - } - if (!__kmp_is_drdpa_lock_nestable(lck)) { - KMP_FATAL(LockSimpleUsedAsNestable, func); - } - return __kmp_test_nested_drdpa_lock(lck, gtid); -} - -int __kmp_release_nested_drdpa_lock(kmp_drdpa_lock_t *lck, kmp_int32 gtid) { - KMP_DEBUG_ASSERT(gtid >= 0); - - KMP_MB(); - if (--(lck->lk.depth_locked) == 0) { - KMP_MB(); - lck->lk.owner_id = 0; - __kmp_release_drdpa_lock(lck, gtid); - return KMP_LOCK_RELEASED; - } - return KMP_LOCK_STILL_HELD; -} - -static int __kmp_release_nested_drdpa_lock_with_checks(kmp_drdpa_lock_t *lck, - kmp_int32 gtid) { - char const *const func = "omp_unset_nest_lock"; - KMP_MB(); /* in case another processor initialized lock */ - if (lck->lk.initialized != lck) { - KMP_FATAL(LockIsUninitialized, func); - } - if (!__kmp_is_drdpa_lock_nestable(lck)) { - KMP_FATAL(LockSimpleUsedAsNestable, func); - } - if (__kmp_get_drdpa_lock_owner(lck) == -1) { - KMP_FATAL(LockUnsettingFree, func); - } - if (__kmp_get_drdpa_lock_owner(lck) != gtid) { - KMP_FATAL(LockUnsettingSetByAnother, func); - } - return __kmp_release_nested_drdpa_lock(lck, gtid); -} - -void __kmp_init_nested_drdpa_lock(kmp_drdpa_lock_t *lck) { - __kmp_init_drdpa_lock(lck); - lck->lk.depth_locked = 0; // >= 0 for nestable locks, -1 for simple locks -} - -void __kmp_destroy_nested_drdpa_lock(kmp_drdpa_lock_t *lck) { - __kmp_destroy_drdpa_lock(lck); - lck->lk.depth_locked = 0; -} - -static void __kmp_destroy_nested_drdpa_lock_with_checks(kmp_drdpa_lock_t *lck) { - char const *const func = "omp_destroy_nest_lock"; - if (lck->lk.initialized != lck) { - KMP_FATAL(LockIsUninitialized, func); - } - if (!__kmp_is_drdpa_lock_nestable(lck)) { - KMP_FATAL(LockSimpleUsedAsNestable, func); - } - if (__kmp_get_drdpa_lock_owner(lck) != -1) { - KMP_FATAL(LockStillOwned, func); - } - __kmp_destroy_nested_drdpa_lock(lck); -} - -// access functions to fields which don't exist for all lock kinds. - -static const ident_t *__kmp_get_drdpa_lock_location(kmp_drdpa_lock_t *lck) { - return lck->lk.location; -} - -static void __kmp_set_drdpa_lock_location(kmp_drdpa_lock_t *lck, - const ident_t *loc) { - lck->lk.location = loc; -} - -static kmp_lock_flags_t __kmp_get_drdpa_lock_flags(kmp_drdpa_lock_t *lck) { - return lck->lk.flags; -} - -static void __kmp_set_drdpa_lock_flags(kmp_drdpa_lock_t *lck, - kmp_lock_flags_t flags) { - lck->lk.flags = flags; -} - -// Time stamp counter -#if KMP_ARCH_X86 || KMP_ARCH_X86_64 -#define __kmp_tsc() __kmp_hardware_timestamp() -// Runtime's default backoff parameters -kmp_backoff_t __kmp_spin_backoff_params = {1, 4096, 100}; -#else -// Use nanoseconds for other platforms -extern kmp_uint64 __kmp_now_nsec(); -kmp_backoff_t __kmp_spin_backoff_params = {1, 256, 100}; -#define __kmp_tsc() __kmp_now_nsec() -#endif - -// A useful predicate for dealing with timestamps that may wrap. -// Is a before b? Since the timestamps may wrap, this is asking whether it's -// shorter to go clockwise from a to b around the clock-face, or anti-clockwise. -// Times where going clockwise is less distance than going anti-clockwise -// are in the future, others are in the past. e.g. a = MAX-1, b = MAX+1 (=0), -// then a > b (true) does not mean a reached b; whereas signed(a) = -2, -// signed(b) = 0 captures the actual difference -static inline bool before(kmp_uint64 a, kmp_uint64 b) { - return ((kmp_int64)b - (kmp_int64)a) > 0; -} - -// Truncated binary exponential backoff function -void __kmp_spin_backoff(kmp_backoff_t *boff) { - // We could flatten this loop, but making it a nested loop gives better result - kmp_uint32 i; - for (i = boff->step; i > 0; i--) { - kmp_uint64 goal = __kmp_tsc() + boff->min_tick; - do { - KMP_CPU_PAUSE(); - } while (before(__kmp_tsc(), goal)); - } - boff->step = (boff->step << 1 | 1) & (boff->max_backoff - 1); -} - -#if KMP_USE_DYNAMIC_LOCK - -// Direct lock initializers. It simply writes a tag to the low 8 bits of the -// lock word. -static void __kmp_init_direct_lock(kmp_dyna_lock_t *lck, - kmp_dyna_lockseq_t seq) { - TCW_4(*lck, KMP_GET_D_TAG(seq)); - KA_TRACE( - 20, - ("__kmp_init_direct_lock: initialized direct lock with type#%d\n", seq)); -} - -#if KMP_USE_TSX - -// HLE lock functions - imported from the testbed runtime. -#define HLE_ACQUIRE ".byte 0xf2;" -#define HLE_RELEASE ".byte 0xf3;" - -static inline kmp_uint32 swap4(kmp_uint32 volatile *p, kmp_uint32 v) { - __asm__ volatile(HLE_ACQUIRE "xchg %1,%0" : "+r"(v), "+m"(*p) : : "memory"); - return v; -} - -static void __kmp_destroy_hle_lock(kmp_dyna_lock_t *lck) { TCW_4(*lck, 0); } - -static void __kmp_destroy_hle_lock_with_checks(kmp_dyna_lock_t *lck) { - TCW_4(*lck, 0); -} - -static void __kmp_acquire_hle_lock(kmp_dyna_lock_t *lck, kmp_int32 gtid) { - // Use gtid for KMP_LOCK_BUSY if necessary - if (swap4(lck, KMP_LOCK_BUSY(1, hle)) != KMP_LOCK_FREE(hle)) { - int delay = 1; - do { - while (*(kmp_uint32 volatile *)lck != KMP_LOCK_FREE(hle)) { - for (int i = delay; i != 0; --i) - KMP_CPU_PAUSE(); - delay = ((delay << 1) | 1) & 7; - } - } while (swap4(lck, KMP_LOCK_BUSY(1, hle)) != KMP_LOCK_FREE(hle)); - } -} - -static void __kmp_acquire_hle_lock_with_checks(kmp_dyna_lock_t *lck, - kmp_int32 gtid) { - __kmp_acquire_hle_lock(lck, gtid); // TODO: add checks -} - -static int __kmp_release_hle_lock(kmp_dyna_lock_t *lck, kmp_int32 gtid) { - __asm__ volatile(HLE_RELEASE "movl %1,%0" - : "=m"(*lck) - : "r"(KMP_LOCK_FREE(hle)) - : "memory"); - return KMP_LOCK_RELEASED; -} - -static int __kmp_release_hle_lock_with_checks(kmp_dyna_lock_t *lck, - kmp_int32 gtid) { - return __kmp_release_hle_lock(lck, gtid); // TODO: add checks -} - -static int __kmp_test_hle_lock(kmp_dyna_lock_t *lck, kmp_int32 gtid) { - return swap4(lck, KMP_LOCK_BUSY(1, hle)) == KMP_LOCK_FREE(hle); -} - -static int __kmp_test_hle_lock_with_checks(kmp_dyna_lock_t *lck, - kmp_int32 gtid) { - return __kmp_test_hle_lock(lck, gtid); // TODO: add checks -} - -static void __kmp_init_rtm_lock(kmp_queuing_lock_t *lck) { - __kmp_init_queuing_lock(lck); -} - -static void __kmp_destroy_rtm_lock(kmp_queuing_lock_t *lck) { - __kmp_destroy_queuing_lock(lck); -} - -static void __kmp_destroy_rtm_lock_with_checks(kmp_queuing_lock_t *lck) { - __kmp_destroy_queuing_lock_with_checks(lck); -} - -static void __kmp_acquire_rtm_lock(kmp_queuing_lock_t *lck, kmp_int32 gtid) { - unsigned retries = 3, status; - do { - status = _xbegin(); - if (status == _XBEGIN_STARTED) { - if (__kmp_is_unlocked_queuing_lock(lck)) - return; - _xabort(0xff); - } - if ((status & _XABORT_EXPLICIT) && _XABORT_CODE(status) == 0xff) { - // Wait until lock becomes free - while (!__kmp_is_unlocked_queuing_lock(lck)) - __kmp_yield(TRUE); - } else if (!(status & _XABORT_RETRY)) - break; - } while (retries--); - - // Fall-back non-speculative lock (xchg) - __kmp_acquire_queuing_lock(lck, gtid); -} - -static void __kmp_acquire_rtm_lock_with_checks(kmp_queuing_lock_t *lck, - kmp_int32 gtid) { - __kmp_acquire_rtm_lock(lck, gtid); -} - -static int __kmp_release_rtm_lock(kmp_queuing_lock_t *lck, kmp_int32 gtid) { - if (__kmp_is_unlocked_queuing_lock(lck)) { - // Releasing from speculation - _xend(); - } else { - // Releasing from a real lock - __kmp_release_queuing_lock(lck, gtid); - } - return KMP_LOCK_RELEASED; -} - -static int __kmp_release_rtm_lock_with_checks(kmp_queuing_lock_t *lck, - kmp_int32 gtid) { - return __kmp_release_rtm_lock(lck, gtid); -} - -static int __kmp_test_rtm_lock(kmp_queuing_lock_t *lck, kmp_int32 gtid) { - unsigned retries = 3, status; - do { - status = _xbegin(); - if (status == _XBEGIN_STARTED && __kmp_is_unlocked_queuing_lock(lck)) { - return 1; - } - if (!(status & _XABORT_RETRY)) - break; - } while (retries--); - - return (__kmp_is_unlocked_queuing_lock(lck)) ? 1 : 0; -} - -static int __kmp_test_rtm_lock_with_checks(kmp_queuing_lock_t *lck, - kmp_int32 gtid) { - return __kmp_test_rtm_lock(lck, gtid); -} - -#endif // KMP_USE_TSX - -// Entry functions for indirect locks (first element of direct lock jump tables) -static void __kmp_init_indirect_lock(kmp_dyna_lock_t *l, - kmp_dyna_lockseq_t tag); -static void __kmp_destroy_indirect_lock(kmp_dyna_lock_t *lock); -static int __kmp_set_indirect_lock(kmp_dyna_lock_t *lock, kmp_int32); -static int __kmp_unset_indirect_lock(kmp_dyna_lock_t *lock, kmp_int32); -static int __kmp_test_indirect_lock(kmp_dyna_lock_t *lock, kmp_int32); -static int __kmp_set_indirect_lock_with_checks(kmp_dyna_lock_t *lock, - kmp_int32); -static int __kmp_unset_indirect_lock_with_checks(kmp_dyna_lock_t *lock, - kmp_int32); -static int __kmp_test_indirect_lock_with_checks(kmp_dyna_lock_t *lock, - kmp_int32); - -// Lock function definitions for the union parameter type -#define KMP_FOREACH_LOCK_KIND(m, a) m(ticket, a) m(queuing, a) m(drdpa, a) - -#define expand1(lk, op) \ - static void __kmp_##op##_##lk##_##lock(kmp_user_lock_p lock) { \ - __kmp_##op##_##lk##_##lock(&lock->lk); \ - } -#define expand2(lk, op) \ - static int __kmp_##op##_##lk##_##lock(kmp_user_lock_p lock, \ - kmp_int32 gtid) { \ - return __kmp_##op##_##lk##_##lock(&lock->lk, gtid); \ - } -#define expand3(lk, op) \ - static void __kmp_set_##lk##_##lock_flags(kmp_user_lock_p lock, \ - kmp_lock_flags_t flags) { \ - __kmp_set_##lk##_lock_flags(&lock->lk, flags); \ - } -#define expand4(lk, op) \ - static void __kmp_set_##lk##_##lock_location(kmp_user_lock_p lock, \ - const ident_t *loc) { \ - __kmp_set_##lk##_lock_location(&lock->lk, loc); \ - } - -KMP_FOREACH_LOCK_KIND(expand1, init) -KMP_FOREACH_LOCK_KIND(expand1, init_nested) -KMP_FOREACH_LOCK_KIND(expand1, destroy) -KMP_FOREACH_LOCK_KIND(expand1, destroy_nested) -KMP_FOREACH_LOCK_KIND(expand2, acquire) -KMP_FOREACH_LOCK_KIND(expand2, acquire_nested) -KMP_FOREACH_LOCK_KIND(expand2, release) -KMP_FOREACH_LOCK_KIND(expand2, release_nested) -KMP_FOREACH_LOCK_KIND(expand2, test) -KMP_FOREACH_LOCK_KIND(expand2, test_nested) -KMP_FOREACH_LOCK_KIND(expand3, ) -KMP_FOREACH_LOCK_KIND(expand4, ) - -#undef expand1 -#undef expand2 -#undef expand3 -#undef expand4 - -// Jump tables for the indirect lock functions -// Only fill in the odd entries, that avoids the need to shift out the low bit - -// init functions -#define expand(l, op) 0, __kmp_init_direct_lock, -void (*__kmp_direct_init[])(kmp_dyna_lock_t *, kmp_dyna_lockseq_t) = { - __kmp_init_indirect_lock, 0, KMP_FOREACH_D_LOCK(expand, init)}; -#undef expand - -// destroy functions -#define expand(l, op) 0, (void (*)(kmp_dyna_lock_t *))__kmp_##op##_##l##_lock, -static void (*direct_destroy[])(kmp_dyna_lock_t *) = { - __kmp_destroy_indirect_lock, 0, KMP_FOREACH_D_LOCK(expand, destroy)}; -#undef expand -#define expand(l, op) \ - 0, (void (*)(kmp_dyna_lock_t *))__kmp_destroy_##l##_lock_with_checks, -static void (*direct_destroy_check[])(kmp_dyna_lock_t *) = { - __kmp_destroy_indirect_lock, 0, KMP_FOREACH_D_LOCK(expand, destroy)}; -#undef expand - -// set/acquire functions -#define expand(l, op) \ - 0, (int (*)(kmp_dyna_lock_t *, kmp_int32))__kmp_##op##_##l##_lock, -static int (*direct_set[])(kmp_dyna_lock_t *, kmp_int32) = { - __kmp_set_indirect_lock, 0, KMP_FOREACH_D_LOCK(expand, acquire)}; -#undef expand -#define expand(l, op) \ - 0, (int (*)(kmp_dyna_lock_t *, kmp_int32))__kmp_##op##_##l##_lock_with_checks, -static int (*direct_set_check[])(kmp_dyna_lock_t *, kmp_int32) = { - __kmp_set_indirect_lock_with_checks, 0, - KMP_FOREACH_D_LOCK(expand, acquire)}; -#undef expand - -// unset/release and test functions -#define expand(l, op) \ - 0, (int (*)(kmp_dyna_lock_t *, kmp_int32))__kmp_##op##_##l##_lock, -static int (*direct_unset[])(kmp_dyna_lock_t *, kmp_int32) = { - __kmp_unset_indirect_lock, 0, KMP_FOREACH_D_LOCK(expand, release)}; -static int (*direct_test[])(kmp_dyna_lock_t *, kmp_int32) = { - __kmp_test_indirect_lock, 0, KMP_FOREACH_D_LOCK(expand, test)}; -#undef expand -#define expand(l, op) \ - 0, (int (*)(kmp_dyna_lock_t *, kmp_int32))__kmp_##op##_##l##_lock_with_checks, -static int (*direct_unset_check[])(kmp_dyna_lock_t *, kmp_int32) = { - __kmp_unset_indirect_lock_with_checks, 0, - KMP_FOREACH_D_LOCK(expand, release)}; -static int (*direct_test_check[])(kmp_dyna_lock_t *, kmp_int32) = { - __kmp_test_indirect_lock_with_checks, 0, KMP_FOREACH_D_LOCK(expand, test)}; -#undef expand - -// Exposes only one set of jump tables (*lock or *lock_with_checks). -void (*(*__kmp_direct_destroy))(kmp_dyna_lock_t *) = 0; -int (*(*__kmp_direct_set))(kmp_dyna_lock_t *, kmp_int32) = 0; -int (*(*__kmp_direct_unset))(kmp_dyna_lock_t *, kmp_int32) = 0; -int (*(*__kmp_direct_test))(kmp_dyna_lock_t *, kmp_int32) = 0; - -// Jump tables for the indirect lock functions -#define expand(l, op) (void (*)(kmp_user_lock_p)) __kmp_##op##_##l##_##lock, -void (*__kmp_indirect_init[])(kmp_user_lock_p) = { - KMP_FOREACH_I_LOCK(expand, init)}; -#undef expand - -#define expand(l, op) (void (*)(kmp_user_lock_p)) __kmp_##op##_##l##_##lock, -static void (*indirect_destroy[])(kmp_user_lock_p) = { - KMP_FOREACH_I_LOCK(expand, destroy)}; -#undef expand -#define expand(l, op) \ - (void (*)(kmp_user_lock_p)) __kmp_##op##_##l##_##lock_with_checks, -static void (*indirect_destroy_check[])(kmp_user_lock_p) = { - KMP_FOREACH_I_LOCK(expand, destroy)}; -#undef expand - -// set/acquire functions -#define expand(l, op) \ - (int (*)(kmp_user_lock_p, kmp_int32)) __kmp_##op##_##l##_##lock, -static int (*indirect_set[])(kmp_user_lock_p, - kmp_int32) = {KMP_FOREACH_I_LOCK(expand, acquire)}; -#undef expand -#define expand(l, op) \ - (int (*)(kmp_user_lock_p, kmp_int32)) __kmp_##op##_##l##_##lock_with_checks, -static int (*indirect_set_check[])(kmp_user_lock_p, kmp_int32) = { - KMP_FOREACH_I_LOCK(expand, acquire)}; -#undef expand - -// unset/release and test functions -#define expand(l, op) \ - (int (*)(kmp_user_lock_p, kmp_int32)) __kmp_##op##_##l##_##lock, -static int (*indirect_unset[])(kmp_user_lock_p, kmp_int32) = { - KMP_FOREACH_I_LOCK(expand, release)}; -static int (*indirect_test[])(kmp_user_lock_p, - kmp_int32) = {KMP_FOREACH_I_LOCK(expand, test)}; -#undef expand -#define expand(l, op) \ - (int (*)(kmp_user_lock_p, kmp_int32)) __kmp_##op##_##l##_##lock_with_checks, -static int (*indirect_unset_check[])(kmp_user_lock_p, kmp_int32) = { - KMP_FOREACH_I_LOCK(expand, release)}; -static int (*indirect_test_check[])(kmp_user_lock_p, kmp_int32) = { - KMP_FOREACH_I_LOCK(expand, test)}; -#undef expand - -// Exposes only one jump tables (*lock or *lock_with_checks). -void (*(*__kmp_indirect_destroy))(kmp_user_lock_p) = 0; -int (*(*__kmp_indirect_set))(kmp_user_lock_p, kmp_int32) = 0; -int (*(*__kmp_indirect_unset))(kmp_user_lock_p, kmp_int32) = 0; -int (*(*__kmp_indirect_test))(kmp_user_lock_p, kmp_int32) = 0; - -// Lock index table. -kmp_indirect_lock_table_t __kmp_i_lock_table; - -// Size of indirect locks. -static kmp_uint32 __kmp_indirect_lock_size[KMP_NUM_I_LOCKS] = {0}; - -// Jump tables for lock accessor/modifier. -void (*__kmp_indirect_set_location[KMP_NUM_I_LOCKS])(kmp_user_lock_p, - const ident_t *) = {0}; -void (*__kmp_indirect_set_flags[KMP_NUM_I_LOCKS])(kmp_user_lock_p, - kmp_lock_flags_t) = {0}; -const ident_t *(*__kmp_indirect_get_location[KMP_NUM_I_LOCKS])( - kmp_user_lock_p) = {0}; -kmp_lock_flags_t (*__kmp_indirect_get_flags[KMP_NUM_I_LOCKS])( - kmp_user_lock_p) = {0}; - -// Use different lock pools for different lock types. -static kmp_indirect_lock_t *__kmp_indirect_lock_pool[KMP_NUM_I_LOCKS] = {0}; - -// User lock allocator for dynamically dispatched indirect locks. Every entry of -// the indirect lock table holds the address and type of the allocated indrect -// lock (kmp_indirect_lock_t), and the size of the table doubles when it is -// full. A destroyed indirect lock object is returned to the reusable pool of -// locks, unique to each lock type. -kmp_indirect_lock_t *__kmp_allocate_indirect_lock(void **user_lock, - kmp_int32 gtid, - kmp_indirect_locktag_t tag) { - kmp_indirect_lock_t *lck; - kmp_lock_index_t idx; - - __kmp_acquire_lock(&__kmp_global_lock, gtid); - - if (__kmp_indirect_lock_pool[tag] != NULL) { - // Reuse the allocated and destroyed lock object - lck = __kmp_indirect_lock_pool[tag]; - if (OMP_LOCK_T_SIZE < sizeof(void *)) - idx = lck->lock->pool.index; - __kmp_indirect_lock_pool[tag] = (kmp_indirect_lock_t *)lck->lock->pool.next; - KA_TRACE(20, ("__kmp_allocate_indirect_lock: reusing an existing lock %p\n", - lck)); - } else { - idx = __kmp_i_lock_table.next; - // Check capacity and double the size if it is full - if (idx == __kmp_i_lock_table.size) { - // Double up the space for block pointers - int row = __kmp_i_lock_table.size / KMP_I_LOCK_CHUNK; - kmp_indirect_lock_t **new_table = (kmp_indirect_lock_t **)__kmp_allocate( - 2 * row * sizeof(kmp_indirect_lock_t *)); - KMP_MEMCPY(new_table, __kmp_i_lock_table.table, - row * sizeof(kmp_indirect_lock_t *)); - kmp_indirect_lock_t **old_table = __kmp_i_lock_table.table; - __kmp_i_lock_table.table = new_table; - __kmp_free(old_table); - // Allocate new objects in the new blocks - for (int i = row; i < 2 * row; ++i) - *(__kmp_i_lock_table.table + i) = (kmp_indirect_lock_t *)__kmp_allocate( - KMP_I_LOCK_CHUNK * sizeof(kmp_indirect_lock_t)); - __kmp_i_lock_table.size = 2 * idx; - } - __kmp_i_lock_table.next++; - lck = KMP_GET_I_LOCK(idx); - // Allocate a new base lock object - lck->lock = (kmp_user_lock_p)__kmp_allocate(__kmp_indirect_lock_size[tag]); - KA_TRACE(20, - ("__kmp_allocate_indirect_lock: allocated a new lock %p\n", lck)); - } - - __kmp_release_lock(&__kmp_global_lock, gtid); - - lck->type = tag; - - if (OMP_LOCK_T_SIZE < sizeof(void *)) { - *((kmp_lock_index_t *)user_lock) = idx - << 1; // indirect lock word must be even - } else { - *((kmp_indirect_lock_t **)user_lock) = lck; - } - - return lck; -} - -// User lock lookup for dynamically dispatched locks. -static __forceinline kmp_indirect_lock_t * -__kmp_lookup_indirect_lock(void **user_lock, const char *func) { - if (__kmp_env_consistency_check) { - kmp_indirect_lock_t *lck = NULL; - if (user_lock == NULL) { - KMP_FATAL(LockIsUninitialized, func); - } - if (OMP_LOCK_T_SIZE < sizeof(void *)) { - kmp_lock_index_t idx = KMP_EXTRACT_I_INDEX(user_lock); - if (idx >= __kmp_i_lock_table.size) { - KMP_FATAL(LockIsUninitialized, func); - } - lck = KMP_GET_I_LOCK(idx); - } else { - lck = *((kmp_indirect_lock_t **)user_lock); - } - if (lck == NULL) { - KMP_FATAL(LockIsUninitialized, func); - } - return lck; - } else { - if (OMP_LOCK_T_SIZE < sizeof(void *)) { - return KMP_GET_I_LOCK(KMP_EXTRACT_I_INDEX(user_lock)); - } else { - return *((kmp_indirect_lock_t **)user_lock); - } - } -} - -static void __kmp_init_indirect_lock(kmp_dyna_lock_t *lock, - kmp_dyna_lockseq_t seq) { -#if KMP_USE_ADAPTIVE_LOCKS - if (seq == lockseq_adaptive && !__kmp_cpuinfo.rtm) { - KMP_WARNING(AdaptiveNotSupported, "kmp_lockseq_t", "adaptive"); - seq = lockseq_queuing; - } -#endif -#if KMP_USE_TSX - if (seq == lockseq_rtm && !__kmp_cpuinfo.rtm) { - seq = lockseq_queuing; - } -#endif - kmp_indirect_locktag_t tag = KMP_GET_I_TAG(seq); - kmp_indirect_lock_t *l = - __kmp_allocate_indirect_lock((void **)lock, __kmp_entry_gtid(), tag); - KMP_I_LOCK_FUNC(l, init)(l->lock); - KA_TRACE( - 20, ("__kmp_init_indirect_lock: initialized indirect lock with type#%d\n", - seq)); -} - -static void __kmp_destroy_indirect_lock(kmp_dyna_lock_t *lock) { - kmp_uint32 gtid = __kmp_entry_gtid(); - kmp_indirect_lock_t *l = - __kmp_lookup_indirect_lock((void **)lock, "omp_destroy_lock"); - KMP_I_LOCK_FUNC(l, destroy)(l->lock); - kmp_indirect_locktag_t tag = l->type; - - __kmp_acquire_lock(&__kmp_global_lock, gtid); - - // Use the base lock's space to keep the pool chain. - l->lock->pool.next = (kmp_user_lock_p)__kmp_indirect_lock_pool[tag]; - if (OMP_LOCK_T_SIZE < sizeof(void *)) { - l->lock->pool.index = KMP_EXTRACT_I_INDEX(lock); - } - __kmp_indirect_lock_pool[tag] = l; - - __kmp_release_lock(&__kmp_global_lock, gtid); -} - -static int __kmp_set_indirect_lock(kmp_dyna_lock_t *lock, kmp_int32 gtid) { - kmp_indirect_lock_t *l = KMP_LOOKUP_I_LOCK(lock); - return KMP_I_LOCK_FUNC(l, set)(l->lock, gtid); -} - -static int __kmp_unset_indirect_lock(kmp_dyna_lock_t *lock, kmp_int32 gtid) { - kmp_indirect_lock_t *l = KMP_LOOKUP_I_LOCK(lock); - return KMP_I_LOCK_FUNC(l, unset)(l->lock, gtid); -} - -static int __kmp_test_indirect_lock(kmp_dyna_lock_t *lock, kmp_int32 gtid) { - kmp_indirect_lock_t *l = KMP_LOOKUP_I_LOCK(lock); - return KMP_I_LOCK_FUNC(l, test)(l->lock, gtid); -} - -static int __kmp_set_indirect_lock_with_checks(kmp_dyna_lock_t *lock, - kmp_int32 gtid) { - kmp_indirect_lock_t *l = - __kmp_lookup_indirect_lock((void **)lock, "omp_set_lock"); - return KMP_I_LOCK_FUNC(l, set)(l->lock, gtid); -} - -static int __kmp_unset_indirect_lock_with_checks(kmp_dyna_lock_t *lock, - kmp_int32 gtid) { - kmp_indirect_lock_t *l = - __kmp_lookup_indirect_lock((void **)lock, "omp_unset_lock"); - return KMP_I_LOCK_FUNC(l, unset)(l->lock, gtid); -} - -static int __kmp_test_indirect_lock_with_checks(kmp_dyna_lock_t *lock, - kmp_int32 gtid) { - kmp_indirect_lock_t *l = - __kmp_lookup_indirect_lock((void **)lock, "omp_test_lock"); - return KMP_I_LOCK_FUNC(l, test)(l->lock, gtid); -} - -kmp_dyna_lockseq_t __kmp_user_lock_seq = lockseq_queuing; - -// This is used only in kmp_error.cpp when consistency checking is on. -kmp_int32 __kmp_get_user_lock_owner(kmp_user_lock_p lck, kmp_uint32 seq) { - switch (seq) { - case lockseq_tas: - case lockseq_nested_tas: - return __kmp_get_tas_lock_owner((kmp_tas_lock_t *)lck); -#if KMP_USE_FUTEX - case lockseq_futex: - case lockseq_nested_futex: - return __kmp_get_futex_lock_owner((kmp_futex_lock_t *)lck); -#endif - case lockseq_ticket: - case lockseq_nested_ticket: - return __kmp_get_ticket_lock_owner((kmp_ticket_lock_t *)lck); - case lockseq_queuing: - case lockseq_nested_queuing: -#if KMP_USE_ADAPTIVE_LOCKS - case lockseq_adaptive: -#endif - return __kmp_get_queuing_lock_owner((kmp_queuing_lock_t *)lck); - case lockseq_drdpa: - case lockseq_nested_drdpa: - return __kmp_get_drdpa_lock_owner((kmp_drdpa_lock_t *)lck); - default: - return 0; - } -} - -// Initializes data for dynamic user locks. -void __kmp_init_dynamic_user_locks() { - // Initialize jump table for the lock functions - if (__kmp_env_consistency_check) { - __kmp_direct_set = direct_set_check; - __kmp_direct_unset = direct_unset_check; - __kmp_direct_test = direct_test_check; - __kmp_direct_destroy = direct_destroy_check; - __kmp_indirect_set = indirect_set_check; - __kmp_indirect_unset = indirect_unset_check; - __kmp_indirect_test = indirect_test_check; - __kmp_indirect_destroy = indirect_destroy_check; - } else { - __kmp_direct_set = direct_set; - __kmp_direct_unset = direct_unset; - __kmp_direct_test = direct_test; - __kmp_direct_destroy = direct_destroy; - __kmp_indirect_set = indirect_set; - __kmp_indirect_unset = indirect_unset; - __kmp_indirect_test = indirect_test; - __kmp_indirect_destroy = indirect_destroy; - } - // If the user locks have already been initialized, then return. Allow the - // switch between different KMP_CONSISTENCY_CHECK values, but do not allocate - // new lock tables if they have already been allocated. - if (__kmp_init_user_locks) - return; - - // Initialize lock index table - __kmp_i_lock_table.size = KMP_I_LOCK_CHUNK; - __kmp_i_lock_table.table = - (kmp_indirect_lock_t **)__kmp_allocate(sizeof(kmp_indirect_lock_t *)); - *(__kmp_i_lock_table.table) = (kmp_indirect_lock_t *)__kmp_allocate( - KMP_I_LOCK_CHUNK * sizeof(kmp_indirect_lock_t)); - __kmp_i_lock_table.next = 0; - - // Indirect lock size - __kmp_indirect_lock_size[locktag_ticket] = sizeof(kmp_ticket_lock_t); - __kmp_indirect_lock_size[locktag_queuing] = sizeof(kmp_queuing_lock_t); -#if KMP_USE_ADAPTIVE_LOCKS - __kmp_indirect_lock_size[locktag_adaptive] = sizeof(kmp_adaptive_lock_t); -#endif - __kmp_indirect_lock_size[locktag_drdpa] = sizeof(kmp_drdpa_lock_t); -#if KMP_USE_TSX - __kmp_indirect_lock_size[locktag_rtm] = sizeof(kmp_queuing_lock_t); -#endif - __kmp_indirect_lock_size[locktag_nested_tas] = sizeof(kmp_tas_lock_t); -#if KMP_USE_FUTEX - __kmp_indirect_lock_size[locktag_nested_futex] = sizeof(kmp_futex_lock_t); -#endif - __kmp_indirect_lock_size[locktag_nested_ticket] = sizeof(kmp_ticket_lock_t); - __kmp_indirect_lock_size[locktag_nested_queuing] = sizeof(kmp_queuing_lock_t); - __kmp_indirect_lock_size[locktag_nested_drdpa] = sizeof(kmp_drdpa_lock_t); - -// Initialize lock accessor/modifier -#define fill_jumps(table, expand, sep) \ - { \ - table[locktag##sep##ticket] = expand(ticket); \ - table[locktag##sep##queuing] = expand(queuing); \ - table[locktag##sep##drdpa] = expand(drdpa); \ - } - -#if KMP_USE_ADAPTIVE_LOCKS -#define fill_table(table, expand) \ - { \ - fill_jumps(table, expand, _); \ - table[locktag_adaptive] = expand(queuing); \ - fill_jumps(table, expand, _nested_); \ - } -#else -#define fill_table(table, expand) \ - { \ - fill_jumps(table, expand, _); \ - fill_jumps(table, expand, _nested_); \ - } -#endif // KMP_USE_ADAPTIVE_LOCKS - -#define expand(l) \ - (void (*)(kmp_user_lock_p, const ident_t *)) __kmp_set_##l##_lock_location - fill_table(__kmp_indirect_set_location, expand); -#undef expand -#define expand(l) \ - (void (*)(kmp_user_lock_p, kmp_lock_flags_t)) __kmp_set_##l##_lock_flags - fill_table(__kmp_indirect_set_flags, expand); -#undef expand -#define expand(l) \ - (const ident_t *(*)(kmp_user_lock_p)) __kmp_get_##l##_lock_location - fill_table(__kmp_indirect_get_location, expand); -#undef expand -#define expand(l) \ - (kmp_lock_flags_t(*)(kmp_user_lock_p)) __kmp_get_##l##_lock_flags - fill_table(__kmp_indirect_get_flags, expand); -#undef expand - - __kmp_init_user_locks = TRUE; -} - -// Clean up the lock table. -void __kmp_cleanup_indirect_user_locks() { - kmp_lock_index_t i; - int k; - - // Clean up locks in the pools first (they were already destroyed before going - // into the pools). - for (k = 0; k < KMP_NUM_I_LOCKS; ++k) { - kmp_indirect_lock_t *l = __kmp_indirect_lock_pool[k]; - while (l != NULL) { - kmp_indirect_lock_t *ll = l; - l = (kmp_indirect_lock_t *)l->lock->pool.next; - KA_TRACE(20, ("__kmp_cleanup_indirect_user_locks: freeing %p from pool\n", - ll)); - __kmp_free(ll->lock); - ll->lock = NULL; - } - __kmp_indirect_lock_pool[k] = NULL; - } - // Clean up the remaining undestroyed locks. - for (i = 0; i < __kmp_i_lock_table.next; i++) { - kmp_indirect_lock_t *l = KMP_GET_I_LOCK(i); - if (l->lock != NULL) { - // Locks not destroyed explicitly need to be destroyed here. - KMP_I_LOCK_FUNC(l, destroy)(l->lock); - KA_TRACE( - 20, - ("__kmp_cleanup_indirect_user_locks: destroy/freeing %p from table\n", - l)); - __kmp_free(l->lock); - } - } - // Free the table - for (i = 0; i < __kmp_i_lock_table.size / KMP_I_LOCK_CHUNK; i++) - __kmp_free(__kmp_i_lock_table.table[i]); - __kmp_free(__kmp_i_lock_table.table); - - __kmp_init_user_locks = FALSE; -} - -enum kmp_lock_kind __kmp_user_lock_kind = lk_default; -int __kmp_num_locks_in_block = 1; // FIXME - tune this value - -#else // KMP_USE_DYNAMIC_LOCK - -static void __kmp_init_tas_lock_with_checks(kmp_tas_lock_t *lck) { - __kmp_init_tas_lock(lck); -} - -static void __kmp_init_nested_tas_lock_with_checks(kmp_tas_lock_t *lck) { - __kmp_init_nested_tas_lock(lck); -} - -#if KMP_USE_FUTEX -static void __kmp_init_futex_lock_with_checks(kmp_futex_lock_t *lck) { - __kmp_init_futex_lock(lck); -} - -static void __kmp_init_nested_futex_lock_with_checks(kmp_futex_lock_t *lck) { - __kmp_init_nested_futex_lock(lck); -} -#endif - -static int __kmp_is_ticket_lock_initialized(kmp_ticket_lock_t *lck) { - return lck == lck->lk.self; -} - -static void __kmp_init_ticket_lock_with_checks(kmp_ticket_lock_t *lck) { - __kmp_init_ticket_lock(lck); -} - -static void __kmp_init_nested_ticket_lock_with_checks(kmp_ticket_lock_t *lck) { - __kmp_init_nested_ticket_lock(lck); -} - -static int __kmp_is_queuing_lock_initialized(kmp_queuing_lock_t *lck) { - return lck == lck->lk.initialized; -} - -static void __kmp_init_queuing_lock_with_checks(kmp_queuing_lock_t *lck) { - __kmp_init_queuing_lock(lck); -} - -static void -__kmp_init_nested_queuing_lock_with_checks(kmp_queuing_lock_t *lck) { - __kmp_init_nested_queuing_lock(lck); -} - -#if KMP_USE_ADAPTIVE_LOCKS -static void __kmp_init_adaptive_lock_with_checks(kmp_adaptive_lock_t *lck) { - __kmp_init_adaptive_lock(lck); -} -#endif - -static int __kmp_is_drdpa_lock_initialized(kmp_drdpa_lock_t *lck) { - return lck == lck->lk.initialized; -} - -static void __kmp_init_drdpa_lock_with_checks(kmp_drdpa_lock_t *lck) { - __kmp_init_drdpa_lock(lck); -} - -static void __kmp_init_nested_drdpa_lock_with_checks(kmp_drdpa_lock_t *lck) { - __kmp_init_nested_drdpa_lock(lck); -} - -/* user locks - * They are implemented as a table of function pointers which are set to the - * lock functions of the appropriate kind, once that has been determined. */ - -enum kmp_lock_kind __kmp_user_lock_kind = lk_default; - -size_t __kmp_base_user_lock_size = 0; -size_t __kmp_user_lock_size = 0; - -kmp_int32 (*__kmp_get_user_lock_owner_)(kmp_user_lock_p lck) = NULL; -int (*__kmp_acquire_user_lock_with_checks_)(kmp_user_lock_p lck, - kmp_int32 gtid) = NULL; - -int (*__kmp_test_user_lock_with_checks_)(kmp_user_lock_p lck, - kmp_int32 gtid) = NULL; -int (*__kmp_release_user_lock_with_checks_)(kmp_user_lock_p lck, - kmp_int32 gtid) = NULL; -void (*__kmp_init_user_lock_with_checks_)(kmp_user_lock_p lck) = NULL; -void (*__kmp_destroy_user_lock_)(kmp_user_lock_p lck) = NULL; -void (*__kmp_destroy_user_lock_with_checks_)(kmp_user_lock_p lck) = NULL; -int (*__kmp_acquire_nested_user_lock_with_checks_)(kmp_user_lock_p lck, - kmp_int32 gtid) = NULL; - -int (*__kmp_test_nested_user_lock_with_checks_)(kmp_user_lock_p lck, - kmp_int32 gtid) = NULL; -int (*__kmp_release_nested_user_lock_with_checks_)(kmp_user_lock_p lck, - kmp_int32 gtid) = NULL; -void (*__kmp_init_nested_user_lock_with_checks_)(kmp_user_lock_p lck) = NULL; -void (*__kmp_destroy_nested_user_lock_with_checks_)(kmp_user_lock_p lck) = NULL; - -int (*__kmp_is_user_lock_initialized_)(kmp_user_lock_p lck) = NULL; -const ident_t *(*__kmp_get_user_lock_location_)(kmp_user_lock_p lck) = NULL; -void (*__kmp_set_user_lock_location_)(kmp_user_lock_p lck, - const ident_t *loc) = NULL; -kmp_lock_flags_t (*__kmp_get_user_lock_flags_)(kmp_user_lock_p lck) = NULL; -void (*__kmp_set_user_lock_flags_)(kmp_user_lock_p lck, - kmp_lock_flags_t flags) = NULL; - -void __kmp_set_user_lock_vptrs(kmp_lock_kind_t user_lock_kind) { - switch (user_lock_kind) { - case lk_default: - default: - KMP_ASSERT(0); - - case lk_tas: { - __kmp_base_user_lock_size = sizeof(kmp_base_tas_lock_t); - __kmp_user_lock_size = sizeof(kmp_tas_lock_t); - - __kmp_get_user_lock_owner_ = - (kmp_int32(*)(kmp_user_lock_p))(&__kmp_get_tas_lock_owner); - - if (__kmp_env_consistency_check) { - KMP_BIND_USER_LOCK_WITH_CHECKS(tas); - KMP_BIND_NESTED_USER_LOCK_WITH_CHECKS(tas); - } else { - KMP_BIND_USER_LOCK(tas); - KMP_BIND_NESTED_USER_LOCK(tas); - } - - __kmp_destroy_user_lock_ = - (void (*)(kmp_user_lock_p))(&__kmp_destroy_tas_lock); - - __kmp_is_user_lock_initialized_ = (int (*)(kmp_user_lock_p))NULL; - - __kmp_get_user_lock_location_ = (const ident_t *(*)(kmp_user_lock_p))NULL; - - __kmp_set_user_lock_location_ = - (void (*)(kmp_user_lock_p, const ident_t *))NULL; - - __kmp_get_user_lock_flags_ = (kmp_lock_flags_t(*)(kmp_user_lock_p))NULL; - - __kmp_set_user_lock_flags_ = - (void (*)(kmp_user_lock_p, kmp_lock_flags_t))NULL; - } break; - -#if KMP_USE_FUTEX - - case lk_futex: { - __kmp_base_user_lock_size = sizeof(kmp_base_futex_lock_t); - __kmp_user_lock_size = sizeof(kmp_futex_lock_t); - - __kmp_get_user_lock_owner_ = - (kmp_int32(*)(kmp_user_lock_p))(&__kmp_get_futex_lock_owner); - - if (__kmp_env_consistency_check) { - KMP_BIND_USER_LOCK_WITH_CHECKS(futex); - KMP_BIND_NESTED_USER_LOCK_WITH_CHECKS(futex); - } else { - KMP_BIND_USER_LOCK(futex); - KMP_BIND_NESTED_USER_LOCK(futex); - } - - __kmp_destroy_user_lock_ = - (void (*)(kmp_user_lock_p))(&__kmp_destroy_futex_lock); - - __kmp_is_user_lock_initialized_ = (int (*)(kmp_user_lock_p))NULL; - - __kmp_get_user_lock_location_ = (const ident_t *(*)(kmp_user_lock_p))NULL; - - __kmp_set_user_lock_location_ = - (void (*)(kmp_user_lock_p, const ident_t *))NULL; - - __kmp_get_user_lock_flags_ = (kmp_lock_flags_t(*)(kmp_user_lock_p))NULL; - - __kmp_set_user_lock_flags_ = - (void (*)(kmp_user_lock_p, kmp_lock_flags_t))NULL; - } break; - -#endif // KMP_USE_FUTEX - - case lk_ticket: { - __kmp_base_user_lock_size = sizeof(kmp_base_ticket_lock_t); - __kmp_user_lock_size = sizeof(kmp_ticket_lock_t); - - __kmp_get_user_lock_owner_ = - (kmp_int32(*)(kmp_user_lock_p))(&__kmp_get_ticket_lock_owner); - - if (__kmp_env_consistency_check) { - KMP_BIND_USER_LOCK_WITH_CHECKS(ticket); - KMP_BIND_NESTED_USER_LOCK_WITH_CHECKS(ticket); - } else { - KMP_BIND_USER_LOCK(ticket); - KMP_BIND_NESTED_USER_LOCK(ticket); - } - - __kmp_destroy_user_lock_ = - (void (*)(kmp_user_lock_p))(&__kmp_destroy_ticket_lock); - - __kmp_is_user_lock_initialized_ = - (int (*)(kmp_user_lock_p))(&__kmp_is_ticket_lock_initialized); - - __kmp_get_user_lock_location_ = - (const ident_t *(*)(kmp_user_lock_p))(&__kmp_get_ticket_lock_location); - - __kmp_set_user_lock_location_ = (void (*)( - kmp_user_lock_p, const ident_t *))(&__kmp_set_ticket_lock_location); - - __kmp_get_user_lock_flags_ = - (kmp_lock_flags_t(*)(kmp_user_lock_p))(&__kmp_get_ticket_lock_flags); - - __kmp_set_user_lock_flags_ = (void (*)(kmp_user_lock_p, kmp_lock_flags_t))( - &__kmp_set_ticket_lock_flags); - } break; - - case lk_queuing: { - __kmp_base_user_lock_size = sizeof(kmp_base_queuing_lock_t); - __kmp_user_lock_size = sizeof(kmp_queuing_lock_t); - - __kmp_get_user_lock_owner_ = - (kmp_int32(*)(kmp_user_lock_p))(&__kmp_get_queuing_lock_owner); - - if (__kmp_env_consistency_check) { - KMP_BIND_USER_LOCK_WITH_CHECKS(queuing); - KMP_BIND_NESTED_USER_LOCK_WITH_CHECKS(queuing); - } else { - KMP_BIND_USER_LOCK(queuing); - KMP_BIND_NESTED_USER_LOCK(queuing); - } - - __kmp_destroy_user_lock_ = - (void (*)(kmp_user_lock_p))(&__kmp_destroy_queuing_lock); - - __kmp_is_user_lock_initialized_ = - (int (*)(kmp_user_lock_p))(&__kmp_is_queuing_lock_initialized); - - __kmp_get_user_lock_location_ = - (const ident_t *(*)(kmp_user_lock_p))(&__kmp_get_queuing_lock_location); - - __kmp_set_user_lock_location_ = (void (*)( - kmp_user_lock_p, const ident_t *))(&__kmp_set_queuing_lock_location); - - __kmp_get_user_lock_flags_ = - (kmp_lock_flags_t(*)(kmp_user_lock_p))(&__kmp_get_queuing_lock_flags); - - __kmp_set_user_lock_flags_ = (void (*)(kmp_user_lock_p, kmp_lock_flags_t))( - &__kmp_set_queuing_lock_flags); - } break; - -#if KMP_USE_ADAPTIVE_LOCKS - case lk_adaptive: { - __kmp_base_user_lock_size = sizeof(kmp_base_adaptive_lock_t); - __kmp_user_lock_size = sizeof(kmp_adaptive_lock_t); - - __kmp_get_user_lock_owner_ = - (kmp_int32(*)(kmp_user_lock_p))(&__kmp_get_queuing_lock_owner); - - if (__kmp_env_consistency_check) { - KMP_BIND_USER_LOCK_WITH_CHECKS(adaptive); - } else { - KMP_BIND_USER_LOCK(adaptive); - } - - __kmp_destroy_user_lock_ = - (void (*)(kmp_user_lock_p))(&__kmp_destroy_adaptive_lock); - - __kmp_is_user_lock_initialized_ = - (int (*)(kmp_user_lock_p))(&__kmp_is_queuing_lock_initialized); - - __kmp_get_user_lock_location_ = - (const ident_t *(*)(kmp_user_lock_p))(&__kmp_get_queuing_lock_location); - - __kmp_set_user_lock_location_ = (void (*)( - kmp_user_lock_p, const ident_t *))(&__kmp_set_queuing_lock_location); - - __kmp_get_user_lock_flags_ = - (kmp_lock_flags_t(*)(kmp_user_lock_p))(&__kmp_get_queuing_lock_flags); - - __kmp_set_user_lock_flags_ = (void (*)(kmp_user_lock_p, kmp_lock_flags_t))( - &__kmp_set_queuing_lock_flags); - - } break; -#endif // KMP_USE_ADAPTIVE_LOCKS - - case lk_drdpa: { - __kmp_base_user_lock_size = sizeof(kmp_base_drdpa_lock_t); - __kmp_user_lock_size = sizeof(kmp_drdpa_lock_t); - - __kmp_get_user_lock_owner_ = - (kmp_int32(*)(kmp_user_lock_p))(&__kmp_get_drdpa_lock_owner); - - if (__kmp_env_consistency_check) { - KMP_BIND_USER_LOCK_WITH_CHECKS(drdpa); - KMP_BIND_NESTED_USER_LOCK_WITH_CHECKS(drdpa); - } else { - KMP_BIND_USER_LOCK(drdpa); - KMP_BIND_NESTED_USER_LOCK(drdpa); - } - - __kmp_destroy_user_lock_ = - (void (*)(kmp_user_lock_p))(&__kmp_destroy_drdpa_lock); - - __kmp_is_user_lock_initialized_ = - (int (*)(kmp_user_lock_p))(&__kmp_is_drdpa_lock_initialized); - - __kmp_get_user_lock_location_ = - (const ident_t *(*)(kmp_user_lock_p))(&__kmp_get_drdpa_lock_location); - - __kmp_set_user_lock_location_ = (void (*)( - kmp_user_lock_p, const ident_t *))(&__kmp_set_drdpa_lock_location); - - __kmp_get_user_lock_flags_ = - (kmp_lock_flags_t(*)(kmp_user_lock_p))(&__kmp_get_drdpa_lock_flags); - - __kmp_set_user_lock_flags_ = (void (*)(kmp_user_lock_p, kmp_lock_flags_t))( - &__kmp_set_drdpa_lock_flags); - } break; - } -} - -// ---------------------------------------------------------------------------- -// User lock table & lock allocation - -kmp_lock_table_t __kmp_user_lock_table = {1, 0, NULL}; -kmp_user_lock_p __kmp_lock_pool = NULL; - -// Lock block-allocation support. -kmp_block_of_locks *__kmp_lock_blocks = NULL; -int __kmp_num_locks_in_block = 1; // FIXME - tune this value - -static kmp_lock_index_t __kmp_lock_table_insert(kmp_user_lock_p lck) { - // Assume that kmp_global_lock is held upon entry/exit. - kmp_lock_index_t index; - if (__kmp_user_lock_table.used >= __kmp_user_lock_table.allocated) { - kmp_lock_index_t size; - kmp_user_lock_p *table; - // Reallocate lock table. - if (__kmp_user_lock_table.allocated == 0) { - size = 1024; - } else { - size = __kmp_user_lock_table.allocated * 2; - } - table = (kmp_user_lock_p *)__kmp_allocate(sizeof(kmp_user_lock_p) * size); - KMP_MEMCPY(table + 1, __kmp_user_lock_table.table + 1, - sizeof(kmp_user_lock_p) * (__kmp_user_lock_table.used - 1)); - table[0] = (kmp_user_lock_p)__kmp_user_lock_table.table; - // We cannot free the previous table now, since it may be in use by other - // threads. So save the pointer to the previous table in in the first - // element of the new table. All the tables will be organized into a list, - // and could be freed when library shutting down. - __kmp_user_lock_table.table = table; - __kmp_user_lock_table.allocated = size; - } - KMP_DEBUG_ASSERT(__kmp_user_lock_table.used < - __kmp_user_lock_table.allocated); - index = __kmp_user_lock_table.used; - __kmp_user_lock_table.table[index] = lck; - ++__kmp_user_lock_table.used; - return index; -} - -static kmp_user_lock_p __kmp_lock_block_allocate() { - // Assume that kmp_global_lock is held upon entry/exit. - static int last_index = 0; - if ((last_index >= __kmp_num_locks_in_block) || (__kmp_lock_blocks == NULL)) { - // Restart the index. - last_index = 0; - // Need to allocate a new block. - KMP_DEBUG_ASSERT(__kmp_user_lock_size > 0); - size_t space_for_locks = __kmp_user_lock_size * __kmp_num_locks_in_block; - char *buffer = - (char *)__kmp_allocate(space_for_locks + sizeof(kmp_block_of_locks)); - // Set up the new block. - kmp_block_of_locks *new_block = - (kmp_block_of_locks *)(&buffer[space_for_locks]); - new_block->next_block = __kmp_lock_blocks; - new_block->locks = (void *)buffer; - // Publish the new block. - KMP_MB(); - __kmp_lock_blocks = new_block; - } - kmp_user_lock_p ret = (kmp_user_lock_p)(&( - ((char *)(__kmp_lock_blocks->locks))[last_index * __kmp_user_lock_size])); - last_index++; - return ret; -} - -// Get memory for a lock. It may be freshly allocated memory or reused memory -// from lock pool. -kmp_user_lock_p __kmp_user_lock_allocate(void **user_lock, kmp_int32 gtid, - kmp_lock_flags_t flags) { - kmp_user_lock_p lck; - kmp_lock_index_t index; - KMP_DEBUG_ASSERT(user_lock); - - __kmp_acquire_lock(&__kmp_global_lock, gtid); - - if (__kmp_lock_pool == NULL) { - // Lock pool is empty. Allocate new memory. - - // ANNOTATION: Found no good way to express the syncronisation - // between allocation and usage, so ignore the allocation - ANNOTATE_IGNORE_WRITES_BEGIN(); - if (__kmp_num_locks_in_block <= 1) { // Tune this cutoff point. - lck = (kmp_user_lock_p)__kmp_allocate(__kmp_user_lock_size); - } else { - lck = __kmp_lock_block_allocate(); - } - ANNOTATE_IGNORE_WRITES_END(); - - // Insert lock in the table so that it can be freed in __kmp_cleanup, - // and debugger has info on all allocated locks. - index = __kmp_lock_table_insert(lck); - } else { - // Pick up lock from pool. - lck = __kmp_lock_pool; - index = __kmp_lock_pool->pool.index; - __kmp_lock_pool = __kmp_lock_pool->pool.next; - } - - // We could potentially differentiate between nested and regular locks - // here, and do the lock table lookup for regular locks only. - if (OMP_LOCK_T_SIZE < sizeof(void *)) { - *((kmp_lock_index_t *)user_lock) = index; - } else { - *((kmp_user_lock_p *)user_lock) = lck; - } - - // mark the lock if it is critical section lock. - __kmp_set_user_lock_flags(lck, flags); - - __kmp_release_lock(&__kmp_global_lock, gtid); // AC: TODO move this line upper - - return lck; -} - -// Put lock's memory to pool for reusing. -void __kmp_user_lock_free(void **user_lock, kmp_int32 gtid, - kmp_user_lock_p lck) { - KMP_DEBUG_ASSERT(user_lock != NULL); - KMP_DEBUG_ASSERT(lck != NULL); - - __kmp_acquire_lock(&__kmp_global_lock, gtid); - - lck->pool.next = __kmp_lock_pool; - __kmp_lock_pool = lck; - if (OMP_LOCK_T_SIZE < sizeof(void *)) { - kmp_lock_index_t index = *((kmp_lock_index_t *)user_lock); - KMP_DEBUG_ASSERT(0 < index && index <= __kmp_user_lock_table.used); - lck->pool.index = index; - } - - __kmp_release_lock(&__kmp_global_lock, gtid); -} - -kmp_user_lock_p __kmp_lookup_user_lock(void **user_lock, char const *func) { - kmp_user_lock_p lck = NULL; - - if (__kmp_env_consistency_check) { - if (user_lock == NULL) { - KMP_FATAL(LockIsUninitialized, func); - } - } - - if (OMP_LOCK_T_SIZE < sizeof(void *)) { - kmp_lock_index_t index = *((kmp_lock_index_t *)user_lock); - if (__kmp_env_consistency_check) { - if (!(0 < index && index < __kmp_user_lock_table.used)) { - KMP_FATAL(LockIsUninitialized, func); - } - } - KMP_DEBUG_ASSERT(0 < index && index < __kmp_user_lock_table.used); - KMP_DEBUG_ASSERT(__kmp_user_lock_size > 0); - lck = __kmp_user_lock_table.table[index]; - } else { - lck = *((kmp_user_lock_p *)user_lock); - } - - if (__kmp_env_consistency_check) { - if (lck == NULL) { - KMP_FATAL(LockIsUninitialized, func); - } - } - - return lck; -} - -void __kmp_cleanup_user_locks(void) { - // Reset lock pool. Don't worry about lock in the pool--we will free them when - // iterating through lock table (it includes all the locks, dead or alive). - __kmp_lock_pool = NULL; - -#define IS_CRITICAL(lck) \ - ((__kmp_get_user_lock_flags_ != NULL) && \ - ((*__kmp_get_user_lock_flags_)(lck)&kmp_lf_critical_section)) - - // Loop through lock table, free all locks. - // Do not free item [0], it is reserved for lock tables list. - // - // FIXME - we are iterating through a list of (pointers to) objects of type - // union kmp_user_lock, but we have no way of knowing whether the base type is - // currently "pool" or whatever the global user lock type is. - // - // We are relying on the fact that for all of the user lock types - // (except "tas"), the first field in the lock struct is the "initialized" - // field, which is set to the address of the lock object itself when - // the lock is initialized. When the union is of type "pool", the - // first field is a pointer to the next object in the free list, which - // will not be the same address as the object itself. - // - // This means that the check (*__kmp_is_user_lock_initialized_)(lck) will fail - // for "pool" objects on the free list. This must happen as the "location" - // field of real user locks overlaps the "index" field of "pool" objects. - // - // It would be better to run through the free list, and remove all "pool" - // objects from the lock table before executing this loop. However, - // "pool" objects do not always have their index field set (only on - // lin_32e), and I don't want to search the lock table for the address - // of every "pool" object on the free list. - while (__kmp_user_lock_table.used > 1) { - const ident *loc; - - // reduce __kmp_user_lock_table.used before freeing the lock, - // so that state of locks is consistent - kmp_user_lock_p lck = - __kmp_user_lock_table.table[--__kmp_user_lock_table.used]; - - if ((__kmp_is_user_lock_initialized_ != NULL) && - (*__kmp_is_user_lock_initialized_)(lck)) { - // Issue a warning if: KMP_CONSISTENCY_CHECK AND lock is initialized AND - // it is NOT a critical section (user is not responsible for destroying - // criticals) AND we know source location to report. - if (__kmp_env_consistency_check && (!IS_CRITICAL(lck)) && - ((loc = __kmp_get_user_lock_location(lck)) != NULL) && - (loc->psource != NULL)) { - kmp_str_loc_t str_loc = __kmp_str_loc_init(loc->psource, 0); - KMP_WARNING(CnsLockNotDestroyed, str_loc.file, str_loc.line); - __kmp_str_loc_free(&str_loc); - } - -#ifdef KMP_DEBUG - if (IS_CRITICAL(lck)) { - KA_TRACE( - 20, - ("__kmp_cleanup_user_locks: free critical section lock %p (%p)\n", - lck, *(void **)lck)); - } else { - KA_TRACE(20, ("__kmp_cleanup_user_locks: free lock %p (%p)\n", lck, - *(void **)lck)); - } -#endif // KMP_DEBUG - - // Cleanup internal lock dynamic resources (for drdpa locks particularly). - __kmp_destroy_user_lock(lck); - } - - // Free the lock if block allocation of locks is not used. - if (__kmp_lock_blocks == NULL) { - __kmp_free(lck); - } - } - -#undef IS_CRITICAL - - // delete lock table(s). - kmp_user_lock_p *table_ptr = __kmp_user_lock_table.table; - __kmp_user_lock_table.table = NULL; - __kmp_user_lock_table.allocated = 0; - - while (table_ptr != NULL) { - // In the first element we saved the pointer to the previous - // (smaller) lock table. - kmp_user_lock_p *next = (kmp_user_lock_p *)(table_ptr[0]); - __kmp_free(table_ptr); - table_ptr = next; - } - - // Free buffers allocated for blocks of locks. - kmp_block_of_locks_t *block_ptr = __kmp_lock_blocks; - __kmp_lock_blocks = NULL; - - while (block_ptr != NULL) { - kmp_block_of_locks_t *next = block_ptr->next_block; - __kmp_free(block_ptr->locks); - // *block_ptr itself was allocated at the end of the locks vector. - block_ptr = next; - } - - TCW_4(__kmp_init_user_locks, FALSE); -} - -#endif // KMP_USE_DYNAMIC_LOCK Property changes on: vendor/llvm-openmp/openmp-release80-r363030/runtime/src/kmp_lock.cpp ___________________________________________________________________ Deleted: svn:eol-style ## -1 +0,0 ## -native \ No newline at end of property Deleted: svn:keywords ## -1 +0,0 ## -FreeBSD=%H \ No newline at end of property Deleted: svn:mime-type ## -1 +0,0 ## -text/plain \ No newline at end of property Index: vendor/llvm-openmp/openmp-release80-r363030/runtime/src/kmp_dispatch.h =================================================================== --- vendor/llvm-openmp/openmp-release80-r363030/runtime/src/kmp_dispatch.h (revision 348960) +++ vendor/llvm-openmp/openmp-release80-r363030/runtime/src/kmp_dispatch.h (nonexistent) @@ -1,514 +0,0 @@ -/* - * kmp_dispatch.h: dynamic scheduling - iteration initialization and dispatch. - */ - -//===----------------------------------------------------------------------===// -// -// The LLVM Compiler Infrastructure -// -// This file is dual licensed under the MIT and the University of Illinois Open -// Source Licenses. See LICENSE.txt for details. -// -//===----------------------------------------------------------------------===// - -#ifndef KMP_DISPATCH_H -#define KMP_DISPATCH_H - -/* ------------------------------------------------------------------------ */ -/* ------------------------------------------------------------------------ */ - -#include "kmp.h" -#include "kmp_error.h" -#include "kmp_i18n.h" -#include "kmp_itt.h" -#include "kmp_stats.h" -#include "kmp_str.h" -#if KMP_OS_WINDOWS && KMP_ARCH_X86 -#include -#endif - -#if OMPT_SUPPORT -#include "ompt-internal.h" -#include "ompt-specific.h" -#endif - -/* ------------------------------------------------------------------------ */ -/* ------------------------------------------------------------------------ */ -#if KMP_USE_HIER_SCHED -// Forward declarations of some hierarchical scheduling data structures -template struct kmp_hier_t; -template struct kmp_hier_top_unit_t; -#endif // KMP_USE_HIER_SCHED - -template struct dispatch_shared_info_template; -template struct dispatch_private_info_template; - -template -extern void __kmp_dispatch_init_algorithm(ident_t *loc, int gtid, - dispatch_private_info_template *pr, - enum sched_type schedule, T lb, T ub, - typename traits_t::signed_t st, -#if USE_ITT_BUILD - kmp_uint64 *cur_chunk, -#endif - typename traits_t::signed_t chunk, - T nproc, T unit_id); -template -extern int __kmp_dispatch_next_algorithm( - int gtid, dispatch_private_info_template *pr, - dispatch_shared_info_template volatile *sh, kmp_int32 *p_last, T *p_lb, - T *p_ub, typename traits_t::signed_t *p_st, T nproc, T unit_id); - -void __kmp_dispatch_dxo_error(int *gtid_ref, int *cid_ref, ident_t *loc_ref); -void __kmp_dispatch_deo_error(int *gtid_ref, int *cid_ref, ident_t *loc_ref); - -#if KMP_STATIC_STEAL_ENABLED - -// replaces dispatch_private_info{32,64} structures and -// dispatch_private_info{32,64}_t types -template struct dispatch_private_infoXX_template { - typedef typename traits_t::unsigned_t UT; - typedef typename traits_t::signed_t ST; - UT count; // unsigned - T ub; - /* Adding KMP_ALIGN_CACHE here doesn't help / can hurt performance */ - T lb; - ST st; // signed - UT tc; // unsigned - T static_steal_counter; // for static_steal only; maybe better to put after ub - - /* parm[1-4] are used in different ways by different scheduling algorithms */ - - // KMP_ALIGN( 32 ) ensures ( if the KMP_ALIGN macro is turned on ) - // a) parm3 is properly aligned and - // b) all parm1-4 are in the same cache line. - // Because of parm1-4 are used together, performance seems to be better - // if they are in the same line (not measured though). - - struct KMP_ALIGN(32) { // compiler does not accept sizeof(T)*4 - T parm1; - T parm2; - T parm3; - T parm4; - }; - - UT ordered_lower; // unsigned - UT ordered_upper; // unsigned -#if KMP_OS_WINDOWS - T last_upper; -#endif /* KMP_OS_WINDOWS */ -}; - -#else /* KMP_STATIC_STEAL_ENABLED */ - -// replaces dispatch_private_info{32,64} structures and -// dispatch_private_info{32,64}_t types -template struct dispatch_private_infoXX_template { - typedef typename traits_t::unsigned_t UT; - typedef typename traits_t::signed_t ST; - T lb; - T ub; - ST st; // signed - UT tc; // unsigned - - T parm1; - T parm2; - T parm3; - T parm4; - - UT count; // unsigned - - UT ordered_lower; // unsigned - UT ordered_upper; // unsigned -#if KMP_OS_WINDOWS - T last_upper; -#endif /* KMP_OS_WINDOWS */ -}; -#endif /* KMP_STATIC_STEAL_ENABLED */ - -template struct KMP_ALIGN_CACHE dispatch_private_info_template { - // duplicate alignment here, otherwise size of structure is not correct in our - // compiler - union KMP_ALIGN_CACHE private_info_tmpl { - dispatch_private_infoXX_template p; - dispatch_private_info64_t p64; - } u; - enum sched_type schedule; /* scheduling algorithm */ - kmp_sched_flags_t flags; /* flags (e.g., ordered, nomerge, etc.) */ - kmp_uint32 ordered_bumped; - // to retain the structure size after making order - kmp_int32 ordered_dummy[KMP_MAX_ORDERED - 3]; - dispatch_private_info *next; /* stack of buffers for nest of serial regions */ - kmp_uint32 type_size; -#if KMP_USE_HIER_SCHED - kmp_int32 hier_id; - kmp_hier_top_unit_t *hier_parent; - // member functions - kmp_int32 get_hier_id() const { return hier_id; } - kmp_hier_top_unit_t *get_parent() { return hier_parent; } -#endif - enum cons_type pushed_ws; -}; - -// replaces dispatch_shared_info{32,64} structures and -// dispatch_shared_info{32,64}_t types -template struct dispatch_shared_infoXX_template { - typedef typename traits_t::unsigned_t UT; - /* chunk index under dynamic, number of idle threads under static-steal; - iteration index otherwise */ - volatile UT iteration; - volatile UT num_done; - volatile UT ordered_iteration; - // to retain the structure size making ordered_iteration scalar - UT ordered_dummy[KMP_MAX_ORDERED - 3]; -}; - -// replaces dispatch_shared_info structure and dispatch_shared_info_t type -template struct dispatch_shared_info_template { - typedef typename traits_t::unsigned_t UT; - // we need union here to keep the structure size - union shared_info_tmpl { - dispatch_shared_infoXX_template s; - dispatch_shared_info64_t s64; - } u; - volatile kmp_uint32 buffer_index; -#if OMP_45_ENABLED - volatile kmp_int32 doacross_buf_idx; // teamwise index - kmp_uint32 *doacross_flags; // array of iteration flags (0/1) - kmp_int32 doacross_num_done; // count finished threads -#endif -#if KMP_USE_HIER_SCHED - kmp_hier_t *hier; -#endif -#if KMP_USE_HWLOC - // When linking with libhwloc, the ORDERED EPCC test slowsdown on big - // machines (> 48 cores). Performance analysis showed that a cache thrash - // was occurring and this padding helps alleviate the problem. - char padding[64]; -#endif -}; - -/* ------------------------------------------------------------------------ */ -/* ------------------------------------------------------------------------ */ - -#undef USE_TEST_LOCKS - -// test_then_add template (general template should NOT be used) -template static __forceinline T test_then_add(volatile T *p, T d); - -template <> -__forceinline kmp_int32 test_then_add(volatile kmp_int32 *p, - kmp_int32 d) { - kmp_int32 r; - r = KMP_TEST_THEN_ADD32(p, d); - return r; -} - -template <> -__forceinline kmp_int64 test_then_add(volatile kmp_int64 *p, - kmp_int64 d) { - kmp_int64 r; - r = KMP_TEST_THEN_ADD64(p, d); - return r; -} - -// test_then_inc_acq template (general template should NOT be used) -template static __forceinline T test_then_inc_acq(volatile T *p); - -template <> -__forceinline kmp_int32 test_then_inc_acq(volatile kmp_int32 *p) { - kmp_int32 r; - r = KMP_TEST_THEN_INC_ACQ32(p); - return r; -} - -template <> -__forceinline kmp_int64 test_then_inc_acq(volatile kmp_int64 *p) { - kmp_int64 r; - r = KMP_TEST_THEN_INC_ACQ64(p); - return r; -} - -// test_then_inc template (general template should NOT be used) -template static __forceinline T test_then_inc(volatile T *p); - -template <> -__forceinline kmp_int32 test_then_inc(volatile kmp_int32 *p) { - kmp_int32 r; - r = KMP_TEST_THEN_INC32(p); - return r; -} - -template <> -__forceinline kmp_int64 test_then_inc(volatile kmp_int64 *p) { - kmp_int64 r; - r = KMP_TEST_THEN_INC64(p); - return r; -} - -// compare_and_swap template (general template should NOT be used) -template -static __forceinline kmp_int32 compare_and_swap(volatile T *p, T c, T s); - -template <> -__forceinline kmp_int32 compare_and_swap(volatile kmp_int32 *p, - kmp_int32 c, kmp_int32 s) { - return KMP_COMPARE_AND_STORE_REL32(p, c, s); -} - -template <> -__forceinline kmp_int32 compare_and_swap(volatile kmp_int64 *p, - kmp_int64 c, kmp_int64 s) { - return KMP_COMPARE_AND_STORE_REL64(p, c, s); -} - -template kmp_uint32 __kmp_ge(T value, T checker) { - return value >= checker; -} -template kmp_uint32 __kmp_eq(T value, T checker) { - return value == checker; -} - -/* - Spin wait loop that first does pause, then yield. - Waits until function returns non-zero when called with *spinner and check. - Does NOT put threads to sleep. - Arguments: - UT is unsigned 4- or 8-byte type - spinner - memory location to check value - checker - value which spinner is >, <, ==, etc. - pred - predicate function to perform binary comparison of some sort -#if USE_ITT_BUILD - obj -- is higher-level synchronization object to report to ittnotify. It - is used to report locks consistently. For example, if lock is acquired - immediately, its address is reported to ittnotify via - KMP_FSYNC_ACQUIRED(). However, it lock cannot be acquired immediately - and lock routine calls to KMP_WAIT_YIELD(), the later should report the - same address, not an address of low-level spinner. -#endif // USE_ITT_BUILD - TODO: make inline function (move to header file for icl) -*/ -template -static UT __kmp_wait_yield(volatile UT *spinner, UT checker, - kmp_uint32 (*pred)(UT, UT) - USE_ITT_BUILD_ARG(void *obj)) { - // note: we may not belong to a team at this point - volatile UT *spin = spinner; - UT check = checker; - kmp_uint32 spins; - kmp_uint32 (*f)(UT, UT) = pred; - UT r; - - KMP_FSYNC_SPIN_INIT(obj, CCAST(UT *, spin)); - KMP_INIT_YIELD(spins); - // main wait spin loop - while (!f(r = *spin, check)) { - KMP_FSYNC_SPIN_PREPARE(obj); - /* GEH - remove this since it was accidentally introduced when kmp_wait was - split. - It causes problems with infinite recursion because of exit lock */ - /* if ( TCR_4(__kmp_global.g.g_done) && __kmp_global.g.g_abort) - __kmp_abort_thread(); */ - - // if we are oversubscribed, - // or have waited a bit (and KMP_LIBRARY=throughput, then yield - // pause is in the following code - KMP_YIELD(TCR_4(__kmp_nth) > __kmp_avail_proc); - KMP_YIELD_SPIN(spins); - } - KMP_FSYNC_SPIN_ACQUIRED(obj); - return r; -} - -/* ------------------------------------------------------------------------ */ -/* ------------------------------------------------------------------------ */ - -template -void __kmp_dispatch_deo(int *gtid_ref, int *cid_ref, ident_t *loc_ref) { - dispatch_private_info_template *pr; - - int gtid = *gtid_ref; - // int cid = *cid_ref; - kmp_info_t *th = __kmp_threads[gtid]; - KMP_DEBUG_ASSERT(th->th.th_dispatch); - - KD_TRACE(100, ("__kmp_dispatch_deo: T#%d called\n", gtid)); - if (__kmp_env_consistency_check) { - pr = reinterpret_cast *>( - th->th.th_dispatch->th_dispatch_pr_current); - if (pr->pushed_ws != ct_none) { -#if KMP_USE_DYNAMIC_LOCK - __kmp_push_sync(gtid, ct_ordered_in_pdo, loc_ref, NULL, 0); -#else - __kmp_push_sync(gtid, ct_ordered_in_pdo, loc_ref, NULL); -#endif - } - } - - if (!th->th.th_team->t.t_serialized) { - dispatch_shared_info_template *sh = - reinterpret_cast *>( - th->th.th_dispatch->th_dispatch_sh_current); - UT lower; - - if (!__kmp_env_consistency_check) { - pr = reinterpret_cast *>( - th->th.th_dispatch->th_dispatch_pr_current); - } - lower = pr->u.p.ordered_lower; - -#if !defined(KMP_GOMP_COMPAT) - if (__kmp_env_consistency_check) { - if (pr->ordered_bumped) { - struct cons_header *p = __kmp_threads[gtid]->th.th_cons; - __kmp_error_construct2(kmp_i18n_msg_CnsMultipleNesting, - ct_ordered_in_pdo, loc_ref, - &p->stack_data[p->w_top]); - } - } -#endif /* !defined(KMP_GOMP_COMPAT) */ - - KMP_MB(); -#ifdef KMP_DEBUG - { - char *buff; - // create format specifiers before the debug output - buff = __kmp_str_format("__kmp_dispatch_deo: T#%%d before wait: " - "ordered_iter:%%%s lower:%%%s\n", - traits_t::spec, traits_t::spec); - KD_TRACE(1000, (buff, gtid, sh->u.s.ordered_iteration, lower)); - __kmp_str_free(&buff); - } -#endif - __kmp_wait_yield(&sh->u.s.ordered_iteration, lower, - __kmp_ge USE_ITT_BUILD_ARG(NULL)); - KMP_MB(); /* is this necessary? */ -#ifdef KMP_DEBUG - { - char *buff; - // create format specifiers before the debug output - buff = __kmp_str_format("__kmp_dispatch_deo: T#%%d after wait: " - "ordered_iter:%%%s lower:%%%s\n", - traits_t::spec, traits_t::spec); - KD_TRACE(1000, (buff, gtid, sh->u.s.ordered_iteration, lower)); - __kmp_str_free(&buff); - } -#endif - } - KD_TRACE(100, ("__kmp_dispatch_deo: T#%d returned\n", gtid)); -} - -template -void __kmp_dispatch_dxo(int *gtid_ref, int *cid_ref, ident_t *loc_ref) { - typedef typename traits_t::signed_t ST; - dispatch_private_info_template *pr; - - int gtid = *gtid_ref; - // int cid = *cid_ref; - kmp_info_t *th = __kmp_threads[gtid]; - KMP_DEBUG_ASSERT(th->th.th_dispatch); - - KD_TRACE(100, ("__kmp_dispatch_dxo: T#%d called\n", gtid)); - if (__kmp_env_consistency_check) { - pr = reinterpret_cast *>( - th->th.th_dispatch->th_dispatch_pr_current); - if (pr->pushed_ws != ct_none) { - __kmp_pop_sync(gtid, ct_ordered_in_pdo, loc_ref); - } - } - - if (!th->th.th_team->t.t_serialized) { - dispatch_shared_info_template *sh = - reinterpret_cast *>( - th->th.th_dispatch->th_dispatch_sh_current); - - if (!__kmp_env_consistency_check) { - pr = reinterpret_cast *>( - th->th.th_dispatch->th_dispatch_pr_current); - } - - KMP_FSYNC_RELEASING(CCAST(UT *, &sh->u.s.ordered_iteration)); -#if !defined(KMP_GOMP_COMPAT) - if (__kmp_env_consistency_check) { - if (pr->ordered_bumped != 0) { - struct cons_header *p = __kmp_threads[gtid]->th.th_cons; - /* How to test it? - OM */ - __kmp_error_construct2(kmp_i18n_msg_CnsMultipleNesting, - ct_ordered_in_pdo, loc_ref, - &p->stack_data[p->w_top]); - } - } -#endif /* !defined(KMP_GOMP_COMPAT) */ - - KMP_MB(); /* Flush all pending memory write invalidates. */ - - pr->ordered_bumped += 1; - - KD_TRACE(1000, - ("__kmp_dispatch_dxo: T#%d bumping ordered ordered_bumped=%d\n", - gtid, pr->ordered_bumped)); - - KMP_MB(); /* Flush all pending memory write invalidates. */ - - /* TODO use general release procedure? */ - test_then_inc((volatile ST *)&sh->u.s.ordered_iteration); - - KMP_MB(); /* Flush all pending memory write invalidates. */ - } - KD_TRACE(100, ("__kmp_dispatch_dxo: T#%d returned\n", gtid)); -} - -/* Computes and returns x to the power of y, where y must a non-negative integer - */ -template -static __forceinline long double __kmp_pow(long double x, UT y) { - long double s = 1.0L; - - KMP_DEBUG_ASSERT(x > 0.0 && x < 1.0); - // KMP_DEBUG_ASSERT(y >= 0); // y is unsigned - while (y) { - if (y & 1) - s *= x; - x *= x; - y >>= 1; - } - return s; -} - -/* Computes and returns the number of unassigned iterations after idx chunks - have been assigned - (the total number of unassigned iterations in chunks with index greater than - or equal to idx). - __forceinline seems to be broken so that if we __forceinline this function, - the behavior is wrong - (one of the unit tests, sch_guided_analytical_basic.cpp, fails) -*/ -template -static __inline typename traits_t::unsigned_t -__kmp_dispatch_guided_remaining(T tc, typename traits_t::floating_t base, - typename traits_t::unsigned_t idx) { - /* Note: On Windows* OS on IA-32 architecture and Intel(R) 64, at - least for ICL 8.1, long double arithmetic may not really have - long double precision, even with /Qlong_double. Currently, we - workaround that in the caller code, by manipulating the FPCW for - Windows* OS on IA-32 architecture. The lack of precision is not - expected to be a correctness issue, though. - */ - typedef typename traits_t::unsigned_t UT; - - long double x = tc * __kmp_pow(base, idx); - UT r = (UT)x; - if (x == r) - return r; - return r + 1; -} - -// Parameters of the guided-iterative algorithm: -// p2 = n * nproc * ( chunk + 1 ) // point of switching to dynamic -// p3 = 1 / ( n * nproc ) // remaining iterations multiplier -// by default n = 2. For example with n = 3 the chunks distribution will be more -// flat. -// With n = 1 first chunk is the same as for static schedule, e.g. trip / nproc. -static const int guided_int_param = 2; -static const double guided_flt_param = 0.5; // = 1.0 / guided_int_param; -#endif // KMP_DISPATCH_H Property changes on: vendor/llvm-openmp/openmp-release80-r363030/runtime/src/kmp_dispatch.h ___________________________________________________________________ Deleted: svn:eol-style ## -1 +0,0 ## -native \ No newline at end of property Deleted: svn:keywords ## -1 +0,0 ## -FreeBSD=%H \ No newline at end of property Deleted: svn:mime-type ## -1 +0,0 ## -text/plain \ No newline at end of property Index: vendor/llvm-openmp/openmp-release80-r363030/runtime/src/kmp_os.h =================================================================== --- vendor/llvm-openmp/openmp-release80-r363030/runtime/src/kmp_os.h (revision 348960) +++ vendor/llvm-openmp/openmp-release80-r363030/runtime/src/kmp_os.h (nonexistent) @@ -1,965 +0,0 @@ -/* - * kmp_os.h -- KPTS runtime header file. - */ - -//===----------------------------------------------------------------------===// -// -// The LLVM Compiler Infrastructure -// -// This file is dual licensed under the MIT and the University of Illinois Open -// Source Licenses. See LICENSE.txt for details. -// -//===----------------------------------------------------------------------===// - -#ifndef KMP_OS_H -#define KMP_OS_H - -#include "kmp_config.h" -#include -#include - -#define KMP_FTN_PLAIN 1 -#define KMP_FTN_APPEND 2 -#define KMP_FTN_UPPER 3 -/* -#define KMP_FTN_PREPEND 4 -#define KMP_FTN_UAPPEND 5 -*/ - -#define KMP_PTR_SKIP (sizeof(void *)) - -/* -------------------------- Compiler variations ------------------------ */ - -#define KMP_OFF 0 -#define KMP_ON 1 - -#define KMP_MEM_CONS_VOLATILE 0 -#define KMP_MEM_CONS_FENCE 1 - -#ifndef KMP_MEM_CONS_MODEL -#define KMP_MEM_CONS_MODEL KMP_MEM_CONS_VOLATILE -#endif - -/* ------------------------- Compiler recognition ---------------------- */ -#define KMP_COMPILER_ICC 0 -#define KMP_COMPILER_GCC 0 -#define KMP_COMPILER_CLANG 0 -#define KMP_COMPILER_MSVC 0 - -#if defined(__INTEL_COMPILER) -#undef KMP_COMPILER_ICC -#define KMP_COMPILER_ICC 1 -#elif defined(__clang__) -#undef KMP_COMPILER_CLANG -#define KMP_COMPILER_CLANG 1 -#elif defined(__GNUC__) -#undef KMP_COMPILER_GCC -#define KMP_COMPILER_GCC 1 -#elif defined(_MSC_VER) -#undef KMP_COMPILER_MSVC -#define KMP_COMPILER_MSVC 1 -#else -#error Unknown compiler -#endif - -#if (KMP_OS_LINUX || KMP_OS_WINDOWS) && !KMP_OS_CNK -#define KMP_AFFINITY_SUPPORTED 1 -#if KMP_OS_WINDOWS && KMP_ARCH_X86_64 -#define KMP_GROUP_AFFINITY 1 -#else -#define KMP_GROUP_AFFINITY 0 -#endif -#else -#define KMP_AFFINITY_SUPPORTED 0 -#define KMP_GROUP_AFFINITY 0 -#endif - -/* Check for quad-precision extension. */ -#define KMP_HAVE_QUAD 0 -#if KMP_ARCH_X86 || KMP_ARCH_X86_64 -#if KMP_COMPILER_ICC -/* _Quad is already defined for icc */ -#undef KMP_HAVE_QUAD -#define KMP_HAVE_QUAD 1 -#elif KMP_COMPILER_CLANG -/* Clang doesn't support a software-implemented - 128-bit extended precision type yet */ -typedef long double _Quad; -#elif KMP_COMPILER_GCC -/* GCC on NetBSD lacks __multc3/__divtc3 builtins needed for quad */ -#if !KMP_OS_NETBSD -typedef __float128 _Quad; -#undef KMP_HAVE_QUAD -#define KMP_HAVE_QUAD 1 -#endif -#elif KMP_COMPILER_MSVC -typedef long double _Quad; -#endif -#else -#if __LDBL_MAX_EXP__ >= 16384 && KMP_COMPILER_GCC -typedef long double _Quad; -#undef KMP_HAVE_QUAD -#define KMP_HAVE_QUAD 1 -#endif -#endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */ - -#define KMP_USE_X87CONTROL 0 -#if KMP_OS_WINDOWS -#define KMP_END_OF_LINE "\r\n" -typedef char kmp_int8; -typedef unsigned char kmp_uint8; -typedef short kmp_int16; -typedef unsigned short kmp_uint16; -typedef int kmp_int32; -typedef unsigned int kmp_uint32; -#define KMP_INT32_SPEC "d" -#define KMP_UINT32_SPEC "u" -#ifndef KMP_STRUCT64 -typedef __int64 kmp_int64; -typedef unsigned __int64 kmp_uint64; -#define KMP_INT64_SPEC "I64d" -#define KMP_UINT64_SPEC "I64u" -#else -struct kmp_struct64 { - kmp_int32 a, b; -}; -typedef struct kmp_struct64 kmp_int64; -typedef struct kmp_struct64 kmp_uint64; -/* Not sure what to use for KMP_[U]INT64_SPEC here */ -#endif -#if KMP_ARCH_X86 && KMP_MSVC_COMPAT -#undef KMP_USE_X87CONTROL -#define KMP_USE_X87CONTROL 1 -#endif -#if KMP_ARCH_X86_64 -#define KMP_INTPTR 1 -typedef __int64 kmp_intptr_t; -typedef unsigned __int64 kmp_uintptr_t; -#define KMP_INTPTR_SPEC "I64d" -#define KMP_UINTPTR_SPEC "I64u" -#endif -#endif /* KMP_OS_WINDOWS */ - -#if KMP_OS_UNIX -#define KMP_END_OF_LINE "\n" -typedef char kmp_int8; -typedef unsigned char kmp_uint8; -typedef short kmp_int16; -typedef unsigned short kmp_uint16; -typedef int kmp_int32; -typedef unsigned int kmp_uint32; -typedef long long kmp_int64; -typedef unsigned long long kmp_uint64; -#define KMP_INT32_SPEC "d" -#define KMP_UINT32_SPEC "u" -#define KMP_INT64_SPEC "lld" -#define KMP_UINT64_SPEC "llu" -#endif /* KMP_OS_UNIX */ - -#if KMP_ARCH_X86 || KMP_ARCH_ARM || KMP_ARCH_MIPS -#define KMP_SIZE_T_SPEC KMP_UINT32_SPEC -#elif KMP_ARCH_X86_64 || KMP_ARCH_PPC64 || KMP_ARCH_AARCH64 || KMP_ARCH_MIPS64 -#define KMP_SIZE_T_SPEC KMP_UINT64_SPEC -#else -#error "Can't determine size_t printf format specifier." -#endif - -#if KMP_ARCH_X86 -#define KMP_SIZE_T_MAX (0xFFFFFFFF) -#else -#define KMP_SIZE_T_MAX (0xFFFFFFFFFFFFFFFF) -#endif - -typedef size_t kmp_size_t; -typedef float kmp_real32; -typedef double kmp_real64; - -#ifndef KMP_INTPTR -#define KMP_INTPTR 1 -typedef long kmp_intptr_t; -typedef unsigned long kmp_uintptr_t; -#define KMP_INTPTR_SPEC "ld" -#define KMP_UINTPTR_SPEC "lu" -#endif - -#ifdef BUILD_I8 -typedef kmp_int64 kmp_int; -typedef kmp_uint64 kmp_uint; -#else -typedef kmp_int32 kmp_int; -typedef kmp_uint32 kmp_uint; -#endif /* BUILD_I8 */ -#define KMP_INT_MAX ((kmp_int32)0x7FFFFFFF) -#define KMP_INT_MIN ((kmp_int32)0x80000000) - -#ifdef __cplusplus -// macros to cast out qualifiers and to re-interpret types -#define CCAST(type, var) const_cast(var) -#define RCAST(type, var) reinterpret_cast(var) -//------------------------------------------------------------------------- -// template for debug prints specification ( d, u, lld, llu ), and to obtain -// signed/unsigned flavors of a type -template struct traits_t {}; -// int -template <> struct traits_t { - typedef signed int signed_t; - typedef unsigned int unsigned_t; - typedef double floating_t; - static char const *spec; - static const signed_t max_value = 0x7fffffff; - static const signed_t min_value = 0x80000000; - static const int type_size = sizeof(signed_t); -}; -// unsigned int -template <> struct traits_t { - typedef signed int signed_t; - typedef unsigned int unsigned_t; - typedef double floating_t; - static char const *spec; - static const unsigned_t max_value = 0xffffffff; - static const unsigned_t min_value = 0x00000000; - static const int type_size = sizeof(unsigned_t); -}; -// long -template <> struct traits_t { - typedef signed long signed_t; - typedef unsigned long unsigned_t; - typedef long double floating_t; - static char const *spec; - static const int type_size = sizeof(signed_t); -}; -// long long -template <> struct traits_t { - typedef signed long long signed_t; - typedef unsigned long long unsigned_t; - typedef long double floating_t; - static char const *spec; - static const signed_t max_value = 0x7fffffffffffffffLL; - static const signed_t min_value = 0x8000000000000000LL; - static const int type_size = sizeof(signed_t); -}; -// unsigned long long -template <> struct traits_t { - typedef signed long long signed_t; - typedef unsigned long long unsigned_t; - typedef long double floating_t; - static char const *spec; - static const unsigned_t max_value = 0xffffffffffffffffLL; - static const unsigned_t min_value = 0x0000000000000000LL; - static const int type_size = sizeof(unsigned_t); -}; -//------------------------------------------------------------------------- -#else -#define CCAST(type, var) (type)(var) -#define RCAST(type, var) (type)(var) -#endif // __cplusplus - -#define KMP_EXPORT extern /* export declaration in guide libraries */ - -#if __GNUC__ >= 4 && !defined(__MINGW32__) -#define __forceinline __inline -#endif - -#if KMP_OS_WINDOWS -#include - -static inline int KMP_GET_PAGE_SIZE(void) { - SYSTEM_INFO si; - GetSystemInfo(&si); - return si.dwPageSize; -} -#else -#define KMP_GET_PAGE_SIZE() getpagesize() -#endif - -#define PAGE_ALIGNED(_addr) \ - (!((size_t)_addr & (size_t)(KMP_GET_PAGE_SIZE() - 1))) -#define ALIGN_TO_PAGE(x) \ - (void *)(((size_t)(x)) & ~((size_t)(KMP_GET_PAGE_SIZE() - 1))) - -/* ---------- Support for cache alignment, padding, etc. ----------------*/ - -#ifdef __cplusplus -extern "C" { -#endif // __cplusplus - -#define INTERNODE_CACHE_LINE 4096 /* for multi-node systems */ - -/* Define the default size of the cache line */ -#ifndef CACHE_LINE -#define CACHE_LINE 128 /* cache line size in bytes */ -#else -#if (CACHE_LINE < 64) && !defined(KMP_OS_DARWIN) -// 2006-02-13: This produces too many warnings on OS X*. Disable for now -#warning CACHE_LINE is too small. -#endif -#endif /* CACHE_LINE */ - -#define KMP_CACHE_PREFETCH(ADDR) /* nothing */ - -// Define attribute that indicates a function does not return -#if __cplusplus >= 201103L -#define KMP_NORETURN [[noreturn]] -#elif KMP_OS_WINDOWS -#define KMP_NORETURN __declspec(noreturn) -#else -#define KMP_NORETURN __attribute__((noreturn)) -#endif - -#if KMP_OS_WINDOWS && KMP_MSVC_COMPAT -#define KMP_ALIGN(bytes) __declspec(align(bytes)) -#define KMP_THREAD_LOCAL __declspec(thread) -#define KMP_ALIAS /* Nothing */ -#else -#define KMP_ALIGN(bytes) __attribute__((aligned(bytes))) -#define KMP_THREAD_LOCAL __thread -#define KMP_ALIAS(alias_of) __attribute__((alias(alias_of))) -#endif - -#if KMP_HAVE_WEAK_ATTRIBUTE -#define KMP_WEAK_ATTRIBUTE __attribute__((weak)) -#else -#define KMP_WEAK_ATTRIBUTE /* Nothing */ -#endif - -// Define KMP_VERSION_SYMBOL and KMP_EXPAND_NAME -#ifndef KMP_STR -#define KMP_STR(x) _KMP_STR(x) -#define _KMP_STR(x) #x -#endif - -#ifdef KMP_USE_VERSION_SYMBOLS -// If using versioned symbols, KMP_EXPAND_NAME prepends -// __kmp_api_ to the real API name -#define KMP_EXPAND_NAME(api_name) _KMP_EXPAND_NAME(api_name) -#define _KMP_EXPAND_NAME(api_name) __kmp_api_##api_name -#define KMP_VERSION_SYMBOL(api_name, ver_num, ver_str) \ - _KMP_VERSION_SYMBOL(api_name, ver_num, ver_str, "VERSION") -#define _KMP_VERSION_SYMBOL(api_name, ver_num, ver_str, default_ver) \ - __typeof__(__kmp_api_##api_name) __kmp_api_##api_name##_##ver_num##_alias \ - __attribute__((alias(KMP_STR(__kmp_api_##api_name)))); \ - __asm__( \ - ".symver " KMP_STR(__kmp_api_##api_name##_##ver_num##_alias) "," KMP_STR( \ - api_name) "@" ver_str "\n\t"); \ - __asm__(".symver " KMP_STR(__kmp_api_##api_name) "," KMP_STR( \ - api_name) "@@" default_ver "\n\t") -#else // KMP_USE_VERSION_SYMBOLS -#define KMP_EXPAND_NAME(api_name) api_name -#define KMP_VERSION_SYMBOL(api_name, ver_num, ver_str) /* Nothing */ -#endif // KMP_USE_VERSION_SYMBOLS - -/* Temporary note: if performance testing of this passes, we can remove - all references to KMP_DO_ALIGN and replace with KMP_ALIGN. */ -#define KMP_DO_ALIGN(bytes) KMP_ALIGN(bytes) -#define KMP_ALIGN_CACHE KMP_ALIGN(CACHE_LINE) -#define KMP_ALIGN_CACHE_INTERNODE KMP_ALIGN(INTERNODE_CACHE_LINE) - -/* General purpose fence types for memory operations */ -enum kmp_mem_fence_type { - kmp_no_fence, /* No memory fence */ - kmp_acquire_fence, /* Acquire (read) memory fence */ - kmp_release_fence, /* Release (write) memory fence */ - kmp_full_fence /* Full (read+write) memory fence */ -}; - -// Synchronization primitives - -#if KMP_ASM_INTRINS && KMP_OS_WINDOWS - -#if KMP_MSVC_COMPAT && !KMP_COMPILER_CLANG -#pragma intrinsic(InterlockedExchangeAdd) -#pragma intrinsic(InterlockedCompareExchange) -#pragma intrinsic(InterlockedExchange) -#pragma intrinsic(InterlockedExchange64) -#endif - -// Using InterlockedIncrement / InterlockedDecrement causes a library loading -// ordering problem, so we use InterlockedExchangeAdd instead. -#define KMP_TEST_THEN_INC32(p) InterlockedExchangeAdd((volatile long *)(p), 1) -#define KMP_TEST_THEN_INC_ACQ32(p) \ - InterlockedExchangeAdd((volatile long *)(p), 1) -#define KMP_TEST_THEN_ADD4_32(p) InterlockedExchangeAdd((volatile long *)(p), 4) -#define KMP_TEST_THEN_ADD4_ACQ32(p) \ - InterlockedExchangeAdd((volatile long *)(p), 4) -#define KMP_TEST_THEN_DEC32(p) InterlockedExchangeAdd((volatile long *)(p), -1) -#define KMP_TEST_THEN_DEC_ACQ32(p) \ - InterlockedExchangeAdd((volatile long *)(p), -1) -#define KMP_TEST_THEN_ADD32(p, v) \ - InterlockedExchangeAdd((volatile long *)(p), (v)) - -#define KMP_COMPARE_AND_STORE_RET32(p, cv, sv) \ - InterlockedCompareExchange((volatile long *)(p), (long)(sv), (long)(cv)) - -#define KMP_XCHG_FIXED32(p, v) \ - InterlockedExchange((volatile long *)(p), (long)(v)) -#define KMP_XCHG_FIXED64(p, v) \ - InterlockedExchange64((volatile kmp_int64 *)(p), (kmp_int64)(v)) - -inline kmp_real32 KMP_XCHG_REAL32(volatile kmp_real32 *p, kmp_real32 v) { - kmp_int32 tmp = InterlockedExchange((volatile long *)p, *(long *)&v); - return *(kmp_real32 *)&tmp; -} - -// Routines that we still need to implement in assembly. -extern kmp_int8 __kmp_test_then_add8(volatile kmp_int8 *p, kmp_int8 v); -extern kmp_int8 __kmp_test_then_or8(volatile kmp_int8 *p, kmp_int8 v); -extern kmp_int8 __kmp_test_then_and8(volatile kmp_int8 *p, kmp_int8 v); -extern kmp_int32 __kmp_test_then_add32(volatile kmp_int32 *p, kmp_int32 v); -extern kmp_uint32 __kmp_test_then_or32(volatile kmp_uint32 *p, kmp_uint32 v); -extern kmp_uint32 __kmp_test_then_and32(volatile kmp_uint32 *p, kmp_uint32 v); -extern kmp_int64 __kmp_test_then_add64(volatile kmp_int64 *p, kmp_int64 v); -extern kmp_uint64 __kmp_test_then_or64(volatile kmp_uint64 *p, kmp_uint64 v); -extern kmp_uint64 __kmp_test_then_and64(volatile kmp_uint64 *p, kmp_uint64 v); - -extern kmp_int8 __kmp_compare_and_store8(volatile kmp_int8 *p, kmp_int8 cv, - kmp_int8 sv); -extern kmp_int16 __kmp_compare_and_store16(volatile kmp_int16 *p, kmp_int16 cv, - kmp_int16 sv); -extern kmp_int32 __kmp_compare_and_store32(volatile kmp_int32 *p, kmp_int32 cv, - kmp_int32 sv); -extern kmp_int32 __kmp_compare_and_store64(volatile kmp_int64 *p, kmp_int64 cv, - kmp_int64 sv); -extern kmp_int8 __kmp_compare_and_store_ret8(volatile kmp_int8 *p, kmp_int8 cv, - kmp_int8 sv); -extern kmp_int16 __kmp_compare_and_store_ret16(volatile kmp_int16 *p, - kmp_int16 cv, kmp_int16 sv); -extern kmp_int32 __kmp_compare_and_store_ret32(volatile kmp_int32 *p, - kmp_int32 cv, kmp_int32 sv); -extern kmp_int64 __kmp_compare_and_store_ret64(volatile kmp_int64 *p, - kmp_int64 cv, kmp_int64 sv); - -extern kmp_int8 __kmp_xchg_fixed8(volatile kmp_int8 *p, kmp_int8 v); -extern kmp_int16 __kmp_xchg_fixed16(volatile kmp_int16 *p, kmp_int16 v); -extern kmp_int32 __kmp_xchg_fixed32(volatile kmp_int32 *p, kmp_int32 v); -extern kmp_int64 __kmp_xchg_fixed64(volatile kmp_int64 *p, kmp_int64 v); -extern kmp_real32 __kmp_xchg_real32(volatile kmp_real32 *p, kmp_real32 v); -extern kmp_real64 __kmp_xchg_real64(volatile kmp_real64 *p, kmp_real64 v); - -//#define KMP_TEST_THEN_INC32(p) __kmp_test_then_add32((p), 1) -//#define KMP_TEST_THEN_INC_ACQ32(p) __kmp_test_then_add32((p), 1) -#define KMP_TEST_THEN_INC64(p) __kmp_test_then_add64((p), 1LL) -#define KMP_TEST_THEN_INC_ACQ64(p) __kmp_test_then_add64((p), 1LL) -//#define KMP_TEST_THEN_ADD4_32(p) __kmp_test_then_add32((p), 4) -//#define KMP_TEST_THEN_ADD4_ACQ32(p) __kmp_test_then_add32((p), 4) -#define KMP_TEST_THEN_ADD4_64(p) __kmp_test_then_add64((p), 4LL) -#define KMP_TEST_THEN_ADD4_ACQ64(p) __kmp_test_then_add64((p), 4LL) -//#define KMP_TEST_THEN_DEC32(p) __kmp_test_then_add32((p), -1) -//#define KMP_TEST_THEN_DEC_ACQ32(p) __kmp_test_then_add32((p), -1) -#define KMP_TEST_THEN_DEC64(p) __kmp_test_then_add64((p), -1LL) -#define KMP_TEST_THEN_DEC_ACQ64(p) __kmp_test_then_add64((p), -1LL) -//#define KMP_TEST_THEN_ADD32(p, v) __kmp_test_then_add32((p), (v)) -#define KMP_TEST_THEN_ADD8(p, v) __kmp_test_then_add8((p), (v)) -#define KMP_TEST_THEN_ADD64(p, v) __kmp_test_then_add64((p), (v)) - -#define KMP_TEST_THEN_OR8(p, v) __kmp_test_then_or8((p), (v)) -#define KMP_TEST_THEN_AND8(p, v) __kmp_test_then_and8((p), (v)) -#define KMP_TEST_THEN_OR32(p, v) __kmp_test_then_or32((p), (v)) -#define KMP_TEST_THEN_AND32(p, v) __kmp_test_then_and32((p), (v)) -#define KMP_TEST_THEN_OR64(p, v) __kmp_test_then_or64((p), (v)) -#define KMP_TEST_THEN_AND64(p, v) __kmp_test_then_and64((p), (v)) - -#define KMP_COMPARE_AND_STORE_ACQ8(p, cv, sv) \ - __kmp_compare_and_store8((p), (cv), (sv)) -#define KMP_COMPARE_AND_STORE_REL8(p, cv, sv) \ - __kmp_compare_and_store8((p), (cv), (sv)) -#define KMP_COMPARE_AND_STORE_ACQ16(p, cv, sv) \ - __kmp_compare_and_store16((p), (cv), (sv)) -#define KMP_COMPARE_AND_STORE_REL16(p, cv, sv) \ - __kmp_compare_and_store16((p), (cv), (sv)) -#define KMP_COMPARE_AND_STORE_ACQ32(p, cv, sv) \ - __kmp_compare_and_store32((volatile kmp_int32 *)(p), (kmp_int32)(cv), \ - (kmp_int32)(sv)) -#define KMP_COMPARE_AND_STORE_REL32(p, cv, sv) \ - __kmp_compare_and_store32((volatile kmp_int32 *)(p), (kmp_int32)(cv), \ - (kmp_int32)(sv)) -#define KMP_COMPARE_AND_STORE_ACQ64(p, cv, sv) \ - __kmp_compare_and_store64((volatile kmp_int64 *)(p), (kmp_int64)(cv), \ - (kmp_int64)(sv)) -#define KMP_COMPARE_AND_STORE_REL64(p, cv, sv) \ - __kmp_compare_and_store64((volatile kmp_int64 *)(p), (kmp_int64)(cv), \ - (kmp_int64)(sv)) - -#if KMP_ARCH_X86 -#define KMP_COMPARE_AND_STORE_PTR(p, cv, sv) \ - __kmp_compare_and_store32((volatile kmp_int32 *)(p), (kmp_int32)(cv), \ - (kmp_int32)(sv)) -#else /* 64 bit pointers */ -#define KMP_COMPARE_AND_STORE_PTR(p, cv, sv) \ - __kmp_compare_and_store64((volatile kmp_int64 *)(p), (kmp_int64)(cv), \ - (kmp_int64)(sv)) -#endif /* KMP_ARCH_X86 */ - -#define KMP_COMPARE_AND_STORE_RET8(p, cv, sv) \ - __kmp_compare_and_store_ret8((p), (cv), (sv)) -#define KMP_COMPARE_AND_STORE_RET16(p, cv, sv) \ - __kmp_compare_and_store_ret16((p), (cv), (sv)) -#define KMP_COMPARE_AND_STORE_RET64(p, cv, sv) \ - __kmp_compare_and_store_ret64((volatile kmp_int64 *)(p), (kmp_int64)(cv), \ - (kmp_int64)(sv)) - -#define KMP_XCHG_FIXED8(p, v) \ - __kmp_xchg_fixed8((volatile kmp_int8 *)(p), (kmp_int8)(v)); -#define KMP_XCHG_FIXED16(p, v) __kmp_xchg_fixed16((p), (v)); -//#define KMP_XCHG_FIXED32(p, v) __kmp_xchg_fixed32((p), (v)); -//#define KMP_XCHG_FIXED64(p, v) __kmp_xchg_fixed64((p), (v)); -//#define KMP_XCHG_REAL32(p, v) __kmp_xchg_real32((p), (v)); -#define KMP_XCHG_REAL64(p, v) __kmp_xchg_real64((p), (v)); - -#elif (KMP_ASM_INTRINS && KMP_OS_UNIX) || !(KMP_ARCH_X86 || KMP_ARCH_X86_64) - -/* cast p to correct type so that proper intrinsic will be used */ -#define KMP_TEST_THEN_INC32(p) \ - __sync_fetch_and_add((volatile kmp_int32 *)(p), 1) -#define KMP_TEST_THEN_INC_ACQ32(p) \ - __sync_fetch_and_add((volatile kmp_int32 *)(p), 1) -#define KMP_TEST_THEN_INC64(p) \ - __sync_fetch_and_add((volatile kmp_int64 *)(p), 1LL) -#define KMP_TEST_THEN_INC_ACQ64(p) \ - __sync_fetch_and_add((volatile kmp_int64 *)(p), 1LL) -#define KMP_TEST_THEN_ADD4_32(p) \ - __sync_fetch_and_add((volatile kmp_int32 *)(p), 4) -#define KMP_TEST_THEN_ADD4_ACQ32(p) \ - __sync_fetch_and_add((volatile kmp_int32 *)(p), 4) -#define KMP_TEST_THEN_ADD4_64(p) \ - __sync_fetch_and_add((volatile kmp_int64 *)(p), 4LL) -#define KMP_TEST_THEN_ADD4_ACQ64(p) \ - __sync_fetch_and_add((volatile kmp_int64 *)(p), 4LL) -#define KMP_TEST_THEN_DEC32(p) \ - __sync_fetch_and_sub((volatile kmp_int32 *)(p), 1) -#define KMP_TEST_THEN_DEC_ACQ32(p) \ - __sync_fetch_and_sub((volatile kmp_int32 *)(p), 1) -#define KMP_TEST_THEN_DEC64(p) \ - __sync_fetch_and_sub((volatile kmp_int64 *)(p), 1LL) -#define KMP_TEST_THEN_DEC_ACQ64(p) \ - __sync_fetch_and_sub((volatile kmp_int64 *)(p), 1LL) -#define KMP_TEST_THEN_ADD8(p, v) \ - __sync_fetch_and_add((volatile kmp_int8 *)(p), (kmp_int8)(v)) -#define KMP_TEST_THEN_ADD32(p, v) \ - __sync_fetch_and_add((volatile kmp_int32 *)(p), (kmp_int32)(v)) -#define KMP_TEST_THEN_ADD64(p, v) \ - __sync_fetch_and_add((volatile kmp_int64 *)(p), (kmp_int64)(v)) - -#define KMP_TEST_THEN_OR8(p, v) \ - __sync_fetch_and_or((volatile kmp_int8 *)(p), (kmp_int8)(v)) -#define KMP_TEST_THEN_AND8(p, v) \ - __sync_fetch_and_and((volatile kmp_int8 *)(p), (kmp_int8)(v)) -#define KMP_TEST_THEN_OR32(p, v) \ - __sync_fetch_and_or((volatile kmp_uint32 *)(p), (kmp_uint32)(v)) -#define KMP_TEST_THEN_AND32(p, v) \ - __sync_fetch_and_and((volatile kmp_uint32 *)(p), (kmp_uint32)(v)) -#define KMP_TEST_THEN_OR64(p, v) \ - __sync_fetch_and_or((volatile kmp_uint64 *)(p), (kmp_uint64)(v)) -#define KMP_TEST_THEN_AND64(p, v) \ - __sync_fetch_and_and((volatile kmp_uint64 *)(p), (kmp_uint64)(v)) - -#define KMP_COMPARE_AND_STORE_ACQ8(p, cv, sv) \ - __sync_bool_compare_and_swap((volatile kmp_uint8 *)(p), (kmp_uint8)(cv), \ - (kmp_uint8)(sv)) -#define KMP_COMPARE_AND_STORE_REL8(p, cv, sv) \ - __sync_bool_compare_and_swap((volatile kmp_uint8 *)(p), (kmp_uint8)(cv), \ - (kmp_uint8)(sv)) -#define KMP_COMPARE_AND_STORE_ACQ16(p, cv, sv) \ - __sync_bool_compare_and_swap((volatile kmp_uint16 *)(p), (kmp_uint16)(cv), \ - (kmp_uint16)(sv)) -#define KMP_COMPARE_AND_STORE_REL16(p, cv, sv) \ - __sync_bool_compare_and_swap((volatile kmp_uint16 *)(p), (kmp_uint16)(cv), \ - (kmp_uint16)(sv)) -#define KMP_COMPARE_AND_STORE_ACQ32(p, cv, sv) \ - __sync_bool_compare_and_swap((volatile kmp_uint32 *)(p), (kmp_uint32)(cv), \ - (kmp_uint32)(sv)) -#define KMP_COMPARE_AND_STORE_REL32(p, cv, sv) \ - __sync_bool_compare_and_swap((volatile kmp_uint32 *)(p), (kmp_uint32)(cv), \ - (kmp_uint32)(sv)) -#define KMP_COMPARE_AND_STORE_ACQ64(p, cv, sv) \ - __sync_bool_compare_and_swap((volatile kmp_uint64 *)(p), (kmp_uint64)(cv), \ - (kmp_uint64)(sv)) -#define KMP_COMPARE_AND_STORE_REL64(p, cv, sv) \ - __sync_bool_compare_and_swap((volatile kmp_uint64 *)(p), (kmp_uint64)(cv), \ - (kmp_uint64)(sv)) -#define KMP_COMPARE_AND_STORE_PTR(p, cv, sv) \ - __sync_bool_compare_and_swap((void *volatile *)(p), (void *)(cv), \ - (void *)(sv)) - -#define KMP_COMPARE_AND_STORE_RET8(p, cv, sv) \ - __sync_val_compare_and_swap((volatile kmp_uint8 *)(p), (kmp_uint8)(cv), \ - (kmp_uint8)(sv)) -#define KMP_COMPARE_AND_STORE_RET16(p, cv, sv) \ - __sync_val_compare_and_swap((volatile kmp_uint16 *)(p), (kmp_uint16)(cv), \ - (kmp_uint16)(sv)) -#define KMP_COMPARE_AND_STORE_RET32(p, cv, sv) \ - __sync_val_compare_and_swap((volatile kmp_uint32 *)(p), (kmp_uint32)(cv), \ - (kmp_uint32)(sv)) -#define KMP_COMPARE_AND_STORE_RET64(p, cv, sv) \ - __sync_val_compare_and_swap((volatile kmp_uint64 *)(p), (kmp_uint64)(cv), \ - (kmp_uint64)(sv)) - -#define KMP_XCHG_FIXED8(p, v) \ - __sync_lock_test_and_set((volatile kmp_uint8 *)(p), (kmp_uint8)(v)) -#define KMP_XCHG_FIXED16(p, v) \ - __sync_lock_test_and_set((volatile kmp_uint16 *)(p), (kmp_uint16)(v)) -#define KMP_XCHG_FIXED32(p, v) \ - __sync_lock_test_and_set((volatile kmp_uint32 *)(p), (kmp_uint32)(v)) -#define KMP_XCHG_FIXED64(p, v) \ - __sync_lock_test_and_set((volatile kmp_uint64 *)(p), (kmp_uint64)(v)) - -inline kmp_real32 KMP_XCHG_REAL32(volatile kmp_real32 *p, kmp_real32 v) { - kmp_int32 tmp = - __sync_lock_test_and_set((volatile kmp_uint32 *)(p), *(kmp_uint32 *)&v); - return *(kmp_real32 *)&tmp; -} - -inline kmp_real64 KMP_XCHG_REAL64(volatile kmp_real64 *p, kmp_real64 v) { - kmp_int64 tmp = - __sync_lock_test_and_set((volatile kmp_uint64 *)(p), *(kmp_uint64 *)&v); - return *(kmp_real64 *)&tmp; -} - -#else - -extern kmp_int8 __kmp_test_then_add8(volatile kmp_int8 *p, kmp_int8 v); -extern kmp_int8 __kmp_test_then_or8(volatile kmp_int8 *p, kmp_int8 v); -extern kmp_int8 __kmp_test_then_and8(volatile kmp_int8 *p, kmp_int8 v); -extern kmp_int32 __kmp_test_then_add32(volatile kmp_int32 *p, kmp_int32 v); -extern kmp_uint32 __kmp_test_then_or32(volatile kmp_uint32 *p, kmp_uint32 v); -extern kmp_uint32 __kmp_test_then_and32(volatile kmp_uint32 *p, kmp_uint32 v); -extern kmp_int64 __kmp_test_then_add64(volatile kmp_int64 *p, kmp_int64 v); -extern kmp_uint64 __kmp_test_then_or64(volatile kmp_uint64 *p, kmp_uint64 v); -extern kmp_uint64 __kmp_test_then_and64(volatile kmp_uint64 *p, kmp_uint64 v); - -extern kmp_int8 __kmp_compare_and_store8(volatile kmp_int8 *p, kmp_int8 cv, - kmp_int8 sv); -extern kmp_int16 __kmp_compare_and_store16(volatile kmp_int16 *p, kmp_int16 cv, - kmp_int16 sv); -extern kmp_int32 __kmp_compare_and_store32(volatile kmp_int32 *p, kmp_int32 cv, - kmp_int32 sv); -extern kmp_int32 __kmp_compare_and_store64(volatile kmp_int64 *p, kmp_int64 cv, - kmp_int64 sv); -extern kmp_int8 __kmp_compare_and_store_ret8(volatile kmp_int8 *p, kmp_int8 cv, - kmp_int8 sv); -extern kmp_int16 __kmp_compare_and_store_ret16(volatile kmp_int16 *p, - kmp_int16 cv, kmp_int16 sv); -extern kmp_int32 __kmp_compare_and_store_ret32(volatile kmp_int32 *p, - kmp_int32 cv, kmp_int32 sv); -extern kmp_int64 __kmp_compare_and_store_ret64(volatile kmp_int64 *p, - kmp_int64 cv, kmp_int64 sv); - -extern kmp_int8 __kmp_xchg_fixed8(volatile kmp_int8 *p, kmp_int8 v); -extern kmp_int16 __kmp_xchg_fixed16(volatile kmp_int16 *p, kmp_int16 v); -extern kmp_int32 __kmp_xchg_fixed32(volatile kmp_int32 *p, kmp_int32 v); -extern kmp_int64 __kmp_xchg_fixed64(volatile kmp_int64 *p, kmp_int64 v); -extern kmp_real32 __kmp_xchg_real32(volatile kmp_real32 *p, kmp_real32 v); -extern kmp_real64 __kmp_xchg_real64(volatile kmp_real64 *p, kmp_real64 v); - -#define KMP_TEST_THEN_INC32(p) \ - __kmp_test_then_add32((volatile kmp_int32 *)(p), 1) -#define KMP_TEST_THEN_INC_ACQ32(p) \ - __kmp_test_then_add32((volatile kmp_int32 *)(p), 1) -#define KMP_TEST_THEN_INC64(p) \ - __kmp_test_then_add64((volatile kmp_int64 *)(p), 1LL) -#define KMP_TEST_THEN_INC_ACQ64(p) \ - __kmp_test_then_add64((volatile kmp_int64 *)(p), 1LL) -#define KMP_TEST_THEN_ADD4_32(p) \ - __kmp_test_then_add32((volatile kmp_int32 *)(p), 4) -#define KMP_TEST_THEN_ADD4_ACQ32(p) \ - __kmp_test_then_add32((volatile kmp_int32 *)(p), 4) -#define KMP_TEST_THEN_ADD4_64(p) \ - __kmp_test_then_add64((volatile kmp_int64 *)(p), 4LL) -#define KMP_TEST_THEN_ADD4_ACQ64(p) \ - __kmp_test_then_add64((volatile kmp_int64 *)(p), 4LL) -#define KMP_TEST_THEN_DEC32(p) \ - __kmp_test_then_add32((volatile kmp_int32 *)(p), -1) -#define KMP_TEST_THEN_DEC_ACQ32(p) \ - __kmp_test_then_add32((volatile kmp_int32 *)(p), -1) -#define KMP_TEST_THEN_DEC64(p) \ - __kmp_test_then_add64((volatile kmp_int64 *)(p), -1LL) -#define KMP_TEST_THEN_DEC_ACQ64(p) \ - __kmp_test_then_add64((volatile kmp_int64 *)(p), -1LL) -#define KMP_TEST_THEN_ADD8(p, v) \ - __kmp_test_then_add8((volatile kmp_int8 *)(p), (kmp_int8)(v)) -#define KMP_TEST_THEN_ADD32(p, v) \ - __kmp_test_then_add32((volatile kmp_int32 *)(p), (kmp_int32)(v)) -#define KMP_TEST_THEN_ADD64(p, v) \ - __kmp_test_then_add64((volatile kmp_int64 *)(p), (kmp_int64)(v)) - -#define KMP_TEST_THEN_OR8(p, v) \ - __kmp_test_then_or8((volatile kmp_int8 *)(p), (kmp_int8)(v)) -#define KMP_TEST_THEN_AND8(p, v) \ - __kmp_test_then_and8((volatile kmp_int8 *)(p), (kmp_int8)(v)) -#define KMP_TEST_THEN_OR32(p, v) \ - __kmp_test_then_or32((volatile kmp_uint32 *)(p), (kmp_uint32)(v)) -#define KMP_TEST_THEN_AND32(p, v) \ - __kmp_test_then_and32((volatile kmp_uint32 *)(p), (kmp_uint32)(v)) -#define KMP_TEST_THEN_OR64(p, v) \ - __kmp_test_then_or64((volatile kmp_uint64 *)(p), (kmp_uint64)(v)) -#define KMP_TEST_THEN_AND64(p, v) \ - __kmp_test_then_and64((volatile kmp_uint64 *)(p), (kmp_uint64)(v)) - -#define KMP_COMPARE_AND_STORE_ACQ8(p, cv, sv) \ - __kmp_compare_and_store8((volatile kmp_int8 *)(p), (kmp_int8)(cv), \ - (kmp_int8)(sv)) -#define KMP_COMPARE_AND_STORE_REL8(p, cv, sv) \ - __kmp_compare_and_store8((volatile kmp_int8 *)(p), (kmp_int8)(cv), \ - (kmp_int8)(sv)) -#define KMP_COMPARE_AND_STORE_ACQ16(p, cv, sv) \ - __kmp_compare_and_store16((volatile kmp_int16 *)(p), (kmp_int16)(cv), \ - (kmp_int16)(sv)) -#define KMP_COMPARE_AND_STORE_REL16(p, cv, sv) \ - __kmp_compare_and_store16((volatile kmp_int16 *)(p), (kmp_int16)(cv), \ - (kmp_int16)(sv)) -#define KMP_COMPARE_AND_STORE_ACQ32(p, cv, sv) \ - __kmp_compare_and_store32((volatile kmp_int32 *)(p), (kmp_int32)(cv), \ - (kmp_int32)(sv)) -#define KMP_COMPARE_AND_STORE_REL32(p, cv, sv) \ - __kmp_compare_and_store32((volatile kmp_int32 *)(p), (kmp_int32)(cv), \ - (kmp_int32)(sv)) -#define KMP_COMPARE_AND_STORE_ACQ64(p, cv, sv) \ - __kmp_compare_and_store64((volatile kmp_int64 *)(p), (kmp_int64)(cv), \ - (kmp_int64)(sv)) -#define KMP_COMPARE_AND_STORE_REL64(p, cv, sv) \ - __kmp_compare_and_store64((volatile kmp_int64 *)(p), (kmp_int64)(cv), \ - (kmp_int64)(sv)) - -#if KMP_ARCH_X86 -#define KMP_COMPARE_AND_STORE_PTR(p, cv, sv) \ - __kmp_compare_and_store32((volatile kmp_int32 *)(p), (kmp_int32)(cv), \ - (kmp_int32)(sv)) -#else /* 64 bit pointers */ -#define KMP_COMPARE_AND_STORE_PTR(p, cv, sv) \ - __kmp_compare_and_store64((volatile kmp_int64 *)(p), (kmp_int64)(cv), \ - (kmp_int64)(sv)) -#endif /* KMP_ARCH_X86 */ - -#define KMP_COMPARE_AND_STORE_RET8(p, cv, sv) \ - __kmp_compare_and_store_ret8((p), (cv), (sv)) -#define KMP_COMPARE_AND_STORE_RET16(p, cv, sv) \ - __kmp_compare_and_store_ret16((p), (cv), (sv)) -#define KMP_COMPARE_AND_STORE_RET32(p, cv, sv) \ - __kmp_compare_and_store_ret32((volatile kmp_int32 *)(p), (kmp_int32)(cv), \ - (kmp_int32)(sv)) -#define KMP_COMPARE_AND_STORE_RET64(p, cv, sv) \ - __kmp_compare_and_store_ret64((volatile kmp_int64 *)(p), (kmp_int64)(cv), \ - (kmp_int64)(sv)) - -#define KMP_XCHG_FIXED8(p, v) \ - __kmp_xchg_fixed8((volatile kmp_int8 *)(p), (kmp_int8)(v)); -#define KMP_XCHG_FIXED16(p, v) __kmp_xchg_fixed16((p), (v)); -#define KMP_XCHG_FIXED32(p, v) __kmp_xchg_fixed32((p), (v)); -#define KMP_XCHG_FIXED64(p, v) __kmp_xchg_fixed64((p), (v)); -#define KMP_XCHG_REAL32(p, v) __kmp_xchg_real32((p), (v)); -#define KMP_XCHG_REAL64(p, v) __kmp_xchg_real64((p), (v)); - -#endif /* KMP_ASM_INTRINS */ - -/* ------------- relaxed consistency memory model stuff ------------------ */ - -#if KMP_OS_WINDOWS -#ifdef __ABSOFT_WIN -#define KMP_MB() asm("nop") -#define KMP_IMB() asm("nop") -#else -#define KMP_MB() /* _asm{ nop } */ -#define KMP_IMB() /* _asm{ nop } */ -#endif -#endif /* KMP_OS_WINDOWS */ - -#if KMP_ARCH_PPC64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64 || KMP_ARCH_MIPS || \ - KMP_ARCH_MIPS64 -#define KMP_MB() __sync_synchronize() -#endif - -#ifndef KMP_MB -#define KMP_MB() /* nothing to do */ -#endif - -#ifndef KMP_IMB -#define KMP_IMB() /* nothing to do */ -#endif - -#ifndef KMP_ST_REL32 -#define KMP_ST_REL32(A, D) (*(A) = (D)) -#endif - -#ifndef KMP_ST_REL64 -#define KMP_ST_REL64(A, D) (*(A) = (D)) -#endif - -#ifndef KMP_LD_ACQ32 -#define KMP_LD_ACQ32(A) (*(A)) -#endif - -#ifndef KMP_LD_ACQ64 -#define KMP_LD_ACQ64(A) (*(A)) -#endif - -/* ------------------------------------------------------------------------ */ -// FIXME - maybe this should this be -// -// #define TCR_4(a) (*(volatile kmp_int32 *)(&a)) -// #define TCW_4(a,b) (a) = (*(volatile kmp_int32 *)&(b)) -// -// #define TCR_8(a) (*(volatile kmp_int64 *)(a)) -// #define TCW_8(a,b) (a) = (*(volatile kmp_int64 *)(&b)) -// -// I'm fairly certain this is the correct thing to do, but I'm afraid -// of performance regressions. - -#define TCR_1(a) (a) -#define TCW_1(a, b) (a) = (b) -#define TCR_4(a) (a) -#define TCW_4(a, b) (a) = (b) -#define TCI_4(a) (++(a)) -#define TCD_4(a) (--(a)) -#define TCR_8(a) (a) -#define TCW_8(a, b) (a) = (b) -#define TCI_8(a) (++(a)) -#define TCD_8(a) (--(a)) -#define TCR_SYNC_4(a) (a) -#define TCW_SYNC_4(a, b) (a) = (b) -#define TCX_SYNC_4(a, b, c) \ - KMP_COMPARE_AND_STORE_REL32((volatile kmp_int32 *)(volatile void *)&(a), \ - (kmp_int32)(b), (kmp_int32)(c)) -#define TCR_SYNC_8(a) (a) -#define TCW_SYNC_8(a, b) (a) = (b) -#define TCX_SYNC_8(a, b, c) \ - KMP_COMPARE_AND_STORE_REL64((volatile kmp_int64 *)(volatile void *)&(a), \ - (kmp_int64)(b), (kmp_int64)(c)) - -#if KMP_ARCH_X86 || KMP_ARCH_MIPS -// What about ARM? -#define TCR_PTR(a) ((void *)TCR_4(a)) -#define TCW_PTR(a, b) TCW_4((a), (b)) -#define TCR_SYNC_PTR(a) ((void *)TCR_SYNC_4(a)) -#define TCW_SYNC_PTR(a, b) TCW_SYNC_4((a), (b)) -#define TCX_SYNC_PTR(a, b, c) ((void *)TCX_SYNC_4((a), (b), (c))) - -#else /* 64 bit pointers */ - -#define TCR_PTR(a) ((void *)TCR_8(a)) -#define TCW_PTR(a, b) TCW_8((a), (b)) -#define TCR_SYNC_PTR(a) ((void *)TCR_SYNC_8(a)) -#define TCW_SYNC_PTR(a, b) TCW_SYNC_8((a), (b)) -#define TCX_SYNC_PTR(a, b, c) ((void *)TCX_SYNC_8((a), (b), (c))) - -#endif /* KMP_ARCH_X86 */ - -/* If these FTN_{TRUE,FALSE} values change, may need to change several places - where they are used to check that language is Fortran, not C. */ - -#ifndef FTN_TRUE -#define FTN_TRUE TRUE -#endif - -#ifndef FTN_FALSE -#define FTN_FALSE FALSE -#endif - -typedef void (*microtask_t)(int *gtid, int *npr, ...); - -#ifdef USE_VOLATILE_CAST -#define VOLATILE_CAST(x) (volatile x) -#else -#define VOLATILE_CAST(x) (x) -#endif - -#define KMP_WAIT_YIELD __kmp_wait_yield_4 -#define KMP_WAIT_YIELD_PTR __kmp_wait_yield_4_ptr -#define KMP_EQ __kmp_eq_4 -#define KMP_NEQ __kmp_neq_4 -#define KMP_LT __kmp_lt_4 -#define KMP_GE __kmp_ge_4 -#define KMP_LE __kmp_le_4 - -/* Workaround for Intel(R) 64 code gen bug when taking address of static array - * (Intel(R) 64 Tracker #138) */ -#if (KMP_ARCH_X86_64 || KMP_ARCH_PPC64) && KMP_OS_LINUX -#define STATIC_EFI2_WORKAROUND -#else -#define STATIC_EFI2_WORKAROUND static -#endif - -// Support of BGET usage -#ifndef KMP_USE_BGET -#define KMP_USE_BGET 1 -#endif - -// Switches for OSS builds -#ifndef USE_CMPXCHG_FIX -#define USE_CMPXCHG_FIX 1 -#endif - -// Enable dynamic user lock -#if OMP_45_ENABLED -#define KMP_USE_DYNAMIC_LOCK 1 -#endif - -// Enable Intel(R) Transactional Synchronization Extensions (Intel(R) TSX) if -// dynamic user lock is turned on -#if KMP_USE_DYNAMIC_LOCK -// Visual studio can't handle the asm sections in this code -#define KMP_USE_TSX (KMP_ARCH_X86 || KMP_ARCH_X86_64) && !KMP_COMPILER_MSVC -#ifdef KMP_USE_ADAPTIVE_LOCKS -#undef KMP_USE_ADAPTIVE_LOCKS -#endif -#define KMP_USE_ADAPTIVE_LOCKS KMP_USE_TSX -#endif - -// Enable tick time conversion of ticks to seconds -#if KMP_STATS_ENABLED -#define KMP_HAVE_TICK_TIME \ - (KMP_OS_LINUX && (KMP_MIC || KMP_ARCH_X86 || KMP_ARCH_X86_64)) -#endif - -// Warning levels -enum kmp_warnings_level { - kmp_warnings_off = 0, /* No warnings */ - kmp_warnings_low, /* Minimal warnings (default) */ - kmp_warnings_explicit = 6, /* Explicitly set to ON - more warnings */ - kmp_warnings_verbose /* reserved */ -}; - -#ifdef __cplusplus -} // extern "C" -#endif // __cplusplus - -// Macros for C++11 atomic functions -#define KMP_ATOMIC_LD(p, order) (p)->load(std::memory_order_##order) -#define KMP_ATOMIC_OP(op, p, v, order) (p)->op(v, std::memory_order_##order) - -// For non-default load/store -#define KMP_ATOMIC_LD_ACQ(p) KMP_ATOMIC_LD(p, acquire) -#define KMP_ATOMIC_LD_RLX(p) KMP_ATOMIC_LD(p, relaxed) -#define KMP_ATOMIC_ST_REL(p, v) KMP_ATOMIC_OP(store, p, v, release) -#define KMP_ATOMIC_ST_RLX(p, v) KMP_ATOMIC_OP(store, p, v, relaxed) - -// For non-default fetch_ -#define KMP_ATOMIC_ADD(p, v) KMP_ATOMIC_OP(fetch_add, p, v, acq_rel) -#define KMP_ATOMIC_SUB(p, v) KMP_ATOMIC_OP(fetch_sub, p, v, acq_rel) -#define KMP_ATOMIC_AND(p, v) KMP_ATOMIC_OP(fetch_and, p, v, acq_rel) -#define KMP_ATOMIC_OR(p, v) KMP_ATOMIC_OP(fetch_or, p, v, acq_rel) -#define KMP_ATOMIC_INC(p) KMP_ATOMIC_OP(fetch_add, p, 1, acq_rel) -#define KMP_ATOMIC_DEC(p) KMP_ATOMIC_OP(fetch_sub, p, 1, acq_rel) -#define KMP_ATOMIC_ADD_RLX(p, v) KMP_ATOMIC_OP(fetch_add, p, v, relaxed) -#define KMP_ATOMIC_INC_RLX(p) KMP_ATOMIC_OP(fetch_add, p, 1, relaxed) - -// Callers of the following functions cannot see the side effect on "expected". -template -bool __kmp_atomic_compare_store(std::atomic *p, T expected, T desired) { - return p->compare_exchange_strong( - expected, desired, std::memory_order_acq_rel, std::memory_order_relaxed); -} - -template -bool __kmp_atomic_compare_store_acq(std::atomic *p, T expected, T desired) { - return p->compare_exchange_strong( - expected, desired, std::memory_order_acquire, std::memory_order_relaxed); -} - -template -bool __kmp_atomic_compare_store_rel(std::atomic *p, T expected, T desired) { - return p->compare_exchange_strong( - expected, desired, std::memory_order_release, std::memory_order_relaxed); -} - -#endif /* KMP_OS_H */ -// Safe C API -#include "kmp_safe_c_api.h" Property changes on: vendor/llvm-openmp/openmp-release80-r363030/runtime/src/kmp_os.h ___________________________________________________________________ Deleted: svn:eol-style ## -1 +0,0 ## -native \ No newline at end of property Deleted: svn:keywords ## -1 +0,0 ## -FreeBSD=%H \ No newline at end of property Deleted: svn:mime-type ## -1 +0,0 ## -text/plain \ No newline at end of property Index: vendor/llvm-openmp/openmp-release80-r363030/runtime/src/thirdparty/ittnotify/disable_warnings.h =================================================================== --- vendor/llvm-openmp/openmp-release80-r363030/runtime/src/thirdparty/ittnotify/disable_warnings.h (revision 348960) +++ vendor/llvm-openmp/openmp-release80-r363030/runtime/src/thirdparty/ittnotify/disable_warnings.h (nonexistent) @@ -1,30 +0,0 @@ - -//===----------------------------------------------------------------------===// -// -// The LLVM Compiler Infrastructure -// -// This file is dual licensed under the MIT and the University of Illinois Open -// Source Licenses. See LICENSE.txt for details. -// -//===----------------------------------------------------------------------===// - - -#include "ittnotify_config.h" - -#if ITT_PLATFORM==ITT_PLATFORM_WIN - -#pragma warning (disable: 593) /* parameter "XXXX" was set but never used */ -#pragma warning (disable: 344) /* typedef name has already been declared (with same type) */ -#pragma warning (disable: 174) /* expression has no effect */ -#pragma warning (disable: 4127) /* conditional expression is constant */ -#pragma warning (disable: 4306) /* conversion from '?' to '?' of greater size */ - -#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ - -#if defined __INTEL_COMPILER - -#pragma warning (disable: 869) /* parameter "XXXXX" was never referenced */ -#pragma warning (disable: 1418) /* external function definition with no prior declaration */ -#pragma warning (disable: 1419) /* external declaration in primary source file */ - -#endif /* __INTEL_COMPILER */ Property changes on: vendor/llvm-openmp/openmp-release80-r363030/runtime/src/thirdparty/ittnotify/disable_warnings.h ___________________________________________________________________ Deleted: svn:eol-style ## -1 +0,0 ## -native \ No newline at end of property Deleted: svn:keywords ## -1 +0,0 ## -FreeBSD=%H \ No newline at end of property Deleted: svn:mime-type ## -1 +0,0 ## -text/plain \ No newline at end of property Index: vendor/llvm-openmp/openmp-release80-r363030/runtime/src/thirdparty/ittnotify/ittnotify_static.h =================================================================== --- vendor/llvm-openmp/openmp-release80-r363030/runtime/src/thirdparty/ittnotify/ittnotify_static.h (revision 348960) +++ vendor/llvm-openmp/openmp-release80-r363030/runtime/src/thirdparty/ittnotify/ittnotify_static.h (nonexistent) @@ -1,342 +0,0 @@ - -//===----------------------------------------------------------------------===// -// -// The LLVM Compiler Infrastructure -// -// This file is dual licensed under the MIT and the University of Illinois Open -// Source Licenses. See LICENSE.txt for details. -// -//===----------------------------------------------------------------------===// - - -#include "ittnotify_config.h" - -#ifndef ITT_FORMAT_DEFINED -# ifndef ITT_FORMAT -# define ITT_FORMAT -# endif /* ITT_FORMAT */ -# ifndef ITT_NO_PARAMS -# define ITT_NO_PARAMS -# endif /* ITT_NO_PARAMS */ -#endif /* ITT_FORMAT_DEFINED */ - -/* - * parameters for macro expected: - * ITT_STUB(api, type, func_name, arguments, params, func_name_in_dll, group, printf_fmt) - */ -#ifdef __ITT_INTERNAL_INIT - -#ifndef __ITT_INTERNAL_BODY -#if ITT_PLATFORM==ITT_PLATFORM_WIN -ITT_STUB(ITTAPI, __itt_domain*, domain_createA, (const char *name), (ITT_FORMAT name), domain_createA, __itt_group_structure, "\"%s\"") -ITT_STUB(ITTAPI, __itt_domain*, domain_createW, (const wchar_t *name), (ITT_FORMAT name), domain_createW, __itt_group_structure, "\"%S\"") -#else /* ITT_PLATFORM!=ITT_PLATFORM_WIN */ -ITT_STUB(ITTAPI, __itt_domain*, domain_create, (const char *name), (ITT_FORMAT name), domain_create, __itt_group_structure, "\"%s\"") -#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ - -#if ITT_PLATFORM==ITT_PLATFORM_WIN -ITT_STUB(ITTAPI, __itt_string_handle*, string_handle_createA, (const char *name), (ITT_FORMAT name), string_handle_createA, __itt_group_structure, "\"%s\"") -ITT_STUB(ITTAPI, __itt_string_handle*, string_handle_createW, (const wchar_t *name), (ITT_FORMAT name), string_handle_createW, __itt_group_structure, "\"%S\"") -#else /* ITT_PLATFORM!=ITT_PLATFORM_WIN */ -ITT_STUB(ITTAPI, __itt_string_handle*, string_handle_create, (const char *name), (ITT_FORMAT name), string_handle_create, __itt_group_structure, "\"%s\"") -#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ - -#if ITT_PLATFORM==ITT_PLATFORM_WIN -ITT_STUB(ITTAPI, __itt_counter, counter_createA, (const char *name, const char *domain), (ITT_FORMAT name, domain), counter_createA, __itt_group_counter, "\"%s\", \"%s\"") -ITT_STUB(ITTAPI, __itt_counter, counter_createW, (const wchar_t *name, const wchar_t *domain), (ITT_FORMAT name, domain), counter_createW, __itt_group_counter, "\"%s\", \"%s\"") -#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ -ITT_STUB(ITTAPI, __itt_counter, counter_create, (const char *name, const char *domain), (ITT_FORMAT name, domain), counter_create, __itt_group_counter, "\"%s\", \"%s\"") -#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ - -#if ITT_PLATFORM==ITT_PLATFORM_WIN -ITT_STUB(ITTAPI, __itt_counter, counter_create_typedA, (const char *name, const char *domain, __itt_metadata_type type), (ITT_FORMAT name, domain, type), counter_create_typedA, __itt_group_counter, "\"%s\", \"%s\", %d") -ITT_STUB(ITTAPI, __itt_counter, counter_create_typedW, (const wchar_t *name, const wchar_t *domain, __itt_metadata_type type), (ITT_FORMAT name, domain, type), counter_create_typedW, __itt_group_counter, "\"%s\", \"%s\", %d") -#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ -ITT_STUB(ITTAPI, __itt_counter, counter_create_typed, (const char *name, const char *domain, __itt_metadata_type type), (ITT_FORMAT name, domain, type), counter_create_typed, __itt_group_counter, "\"%s\", \"%s\", %d") -#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ - - -ITT_STUBV(ITTAPI, void, pause, (void), (ITT_NO_PARAMS), pause, __itt_group_control | __itt_group_legacy, "no args") -ITT_STUBV(ITTAPI, void, resume, (void), (ITT_NO_PARAMS), resume, __itt_group_control | __itt_group_legacy, "no args") - -#if ITT_PLATFORM==ITT_PLATFORM_WIN -ITT_STUBV(ITTAPI, void, thread_set_nameA, (const char *name), (ITT_FORMAT name), thread_set_nameA, __itt_group_thread, "\"%s\"") -ITT_STUBV(ITTAPI, void, thread_set_nameW, (const wchar_t *name), (ITT_FORMAT name), thread_set_nameW, __itt_group_thread, "\"%S\"") -#else /* ITT_PLATFORM!=ITT_PLATFORM_WIN */ -ITT_STUBV(ITTAPI, void, thread_set_name, (const char *name), (ITT_FORMAT name), thread_set_name, __itt_group_thread, "\"%s\"") -#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ -ITT_STUBV(ITTAPI, void, thread_ignore, (void), (ITT_NO_PARAMS), thread_ignore, __itt_group_thread, "no args") - -#if ITT_PLATFORM==ITT_PLATFORM_WIN -ITT_STUB(LIBITTAPI, int, thr_name_setA, (const char *name, int namelen), (ITT_FORMAT name, namelen), thr_name_setA, __itt_group_thread | __itt_group_legacy, "\"%s\", %d") -ITT_STUB(LIBITTAPI, int, thr_name_setW, (const wchar_t *name, int namelen), (ITT_FORMAT name, namelen), thr_name_setW, __itt_group_thread | __itt_group_legacy, "\"%S\", %d") -#else /* ITT_PLATFORM!=ITT_PLATFORM_WIN */ -ITT_STUB(LIBITTAPI, int, thr_name_set, (const char *name, int namelen), (ITT_FORMAT name, namelen), thr_name_set, __itt_group_thread | __itt_group_legacy, "\"%s\", %d") -#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ -ITT_STUBV(LIBITTAPI, void, thr_ignore, (void), (ITT_NO_PARAMS), thr_ignore, __itt_group_thread | __itt_group_legacy, "no args") -#endif /* __ITT_INTERNAL_BODY */ - -ITT_STUBV(ITTAPI, void, enable_attach, (void), (ITT_NO_PARAMS), enable_attach, __itt_group_all, "no args") - -#else /* __ITT_INTERNAL_INIT */ - -ITT_STUBV(ITTAPI, void, detach, (void), (ITT_NO_PARAMS), detach, __itt_group_control | __itt_group_legacy, "no args") - -#if ITT_PLATFORM==ITT_PLATFORM_WIN -ITT_STUBV(ITTAPI, void, sync_createA, (void *addr, const char *objtype, const char *objname, int attribute), (ITT_FORMAT addr, objtype, objname, attribute), sync_createA, __itt_group_sync | __itt_group_fsync, "%p, \"%s\", \"%s\", %x") -ITT_STUBV(ITTAPI, void, sync_createW, (void *addr, const wchar_t *objtype, const wchar_t *objname, int attribute), (ITT_FORMAT addr, objtype, objname, attribute), sync_createW, __itt_group_sync | __itt_group_fsync, "%p, \"%S\", \"%S\", %x") -ITT_STUBV(ITTAPI, void, sync_renameA, (void *addr, const char *name), (ITT_FORMAT addr, name), sync_renameA, __itt_group_sync | __itt_group_fsync, "%p, \"%s\"") -ITT_STUBV(ITTAPI, void, sync_renameW, (void *addr, const wchar_t *name), (ITT_FORMAT addr, name), sync_renameW, __itt_group_sync | __itt_group_fsync, "%p, \"%S\"") -#else /* ITT_PLATFORM!=ITT_PLATFORM_WIN */ -ITT_STUBV(ITTAPI, void, sync_create, (void *addr, const char *objtype, const char *objname, int attribute), (ITT_FORMAT addr, objtype, objname, attribute), sync_create, __itt_group_sync | __itt_group_fsync, "%p, \"%s\", \"%s\", %x") -ITT_STUBV(ITTAPI, void, sync_rename, (void *addr, const char *name), (ITT_FORMAT addr, name), sync_rename, __itt_group_sync | __itt_group_fsync, "%p, \"%s\"") -#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ -ITT_STUBV(ITTAPI, void, sync_destroy, (void *addr), (ITT_FORMAT addr), sync_destroy, __itt_group_sync | __itt_group_fsync, "%p") - -ITT_STUBV(ITTAPI, void, sync_prepare, (void* addr), (ITT_FORMAT addr), sync_prepare, __itt_group_sync, "%p") -ITT_STUBV(ITTAPI, void, sync_cancel, (void *addr), (ITT_FORMAT addr), sync_cancel, __itt_group_sync, "%p") -ITT_STUBV(ITTAPI, void, sync_acquired, (void *addr), (ITT_FORMAT addr), sync_acquired, __itt_group_sync, "%p") -ITT_STUBV(ITTAPI, void, sync_releasing, (void* addr), (ITT_FORMAT addr), sync_releasing, __itt_group_sync, "%p") - -ITT_STUBV(ITTAPI, void, suppress_push, (unsigned int mask), (ITT_FORMAT mask), suppress_push, __itt_group_suppress, "%p") -ITT_STUBV(ITTAPI, void, suppress_pop, (void), (ITT_NO_PARAMS), suppress_pop, __itt_group_suppress, "no args") -ITT_STUBV(ITTAPI, void, suppress_mark_range, (__itt_suppress_mode_t mode, unsigned int mask, void * address, size_t size),(ITT_FORMAT mode, mask, address, size), suppress_mark_range, __itt_group_suppress, "%d, %p, %p, %d") -ITT_STUBV(ITTAPI, void, suppress_clear_range,(__itt_suppress_mode_t mode, unsigned int mask, void * address, size_t size),(ITT_FORMAT mode, mask, address, size), suppress_clear_range,__itt_group_suppress, "%d, %p, %p, %d") - -ITT_STUBV(ITTAPI, void, fsync_prepare, (void* addr), (ITT_FORMAT addr), sync_prepare, __itt_group_fsync, "%p") -ITT_STUBV(ITTAPI, void, fsync_cancel, (void *addr), (ITT_FORMAT addr), sync_cancel, __itt_group_fsync, "%p") -ITT_STUBV(ITTAPI, void, fsync_acquired, (void *addr), (ITT_FORMAT addr), sync_acquired, __itt_group_fsync, "%p") -ITT_STUBV(ITTAPI, void, fsync_releasing, (void* addr), (ITT_FORMAT addr), sync_releasing, __itt_group_fsync, "%p") - -ITT_STUBV(ITTAPI, void, model_site_begin, (__itt_model_site *site, __itt_model_site_instance *instance, const char *name), (ITT_FORMAT site, instance, name), model_site_begin, __itt_group_model, "%p, %p, \"%s\"") -ITT_STUBV(ITTAPI, void, model_site_end, (__itt_model_site *site, __itt_model_site_instance *instance), (ITT_FORMAT site, instance), model_site_end, __itt_group_model, "%p, %p") -ITT_STUBV(ITTAPI, void, model_task_begin, (__itt_model_task *task, __itt_model_task_instance *instance, const char *name), (ITT_FORMAT task, instance, name), model_task_begin, __itt_group_model, "%p, %p, \"%s\"") -ITT_STUBV(ITTAPI, void, model_task_end, (__itt_model_task *task, __itt_model_task_instance *instance), (ITT_FORMAT task, instance), model_task_end, __itt_group_model, "%p, %p") -ITT_STUBV(ITTAPI, void, model_lock_acquire, (void *lock), (ITT_FORMAT lock), model_lock_acquire, __itt_group_model, "%p") -ITT_STUBV(ITTAPI, void, model_lock_release, (void *lock), (ITT_FORMAT lock), model_lock_release, __itt_group_model, "%p") -ITT_STUBV(ITTAPI, void, model_record_allocation, (void *addr, size_t size), (ITT_FORMAT addr, size), model_record_allocation, __itt_group_model, "%p, %d") -ITT_STUBV(ITTAPI, void, model_record_deallocation, (void *addr), (ITT_FORMAT addr), model_record_deallocation, __itt_group_model, "%p") -ITT_STUBV(ITTAPI, void, model_induction_uses, (void* addr, size_t size), (ITT_FORMAT addr, size), model_induction_uses, __itt_group_model, "%p, %d") -ITT_STUBV(ITTAPI, void, model_reduction_uses, (void* addr, size_t size), (ITT_FORMAT addr, size), model_reduction_uses, __itt_group_model, "%p, %d") -ITT_STUBV(ITTAPI, void, model_observe_uses, (void* addr, size_t size), (ITT_FORMAT addr, size), model_observe_uses, __itt_group_model, "%p, %d") -ITT_STUBV(ITTAPI, void, model_clear_uses, (void* addr), (ITT_FORMAT addr), model_clear_uses, __itt_group_model, "%p") - -#ifndef __ITT_INTERNAL_BODY -#if ITT_PLATFORM==ITT_PLATFORM_WIN -ITT_STUBV(ITTAPI, void, model_site_beginW, (const wchar_t *name), (ITT_FORMAT name), model_site_beginW, __itt_group_model, "\"%s\"") -ITT_STUBV(ITTAPI, void, model_task_beginW, (const wchar_t *name), (ITT_FORMAT name), model_task_beginW, __itt_group_model, "\"%s\"") -ITT_STUBV(ITTAPI, void, model_iteration_taskW, (const wchar_t *name), (ITT_FORMAT name), model_iteration_taskW, __itt_group_model, "\"%s\"") -#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ -ITT_STUBV(ITTAPI, void, model_site_beginA, (const char *name), (ITT_FORMAT name), model_site_beginA, __itt_group_model, "\"%s\"") -ITT_STUBV(ITTAPI, void, model_site_beginAL, (const char *name, size_t len), (ITT_FORMAT name, len), model_site_beginAL, __itt_group_model, "\"%s\", %d") -ITT_STUBV(ITTAPI, void, model_task_beginA, (const char *name), (ITT_FORMAT name), model_task_beginA, __itt_group_model, "\"%s\"") -ITT_STUBV(ITTAPI, void, model_task_beginAL, (const char *name, size_t len), (ITT_FORMAT name, len), model_task_beginAL, __itt_group_model, "\"%s\", %d") -ITT_STUBV(ITTAPI, void, model_iteration_taskA, (const char *name), (ITT_FORMAT name), model_iteration_taskA, __itt_group_model, "\"%s\"") -ITT_STUBV(ITTAPI, void, model_iteration_taskAL, (const char *name, size_t len), (ITT_FORMAT name, len), model_iteration_taskAL, __itt_group_model, "\"%s\", %d") -ITT_STUBV(ITTAPI, void, model_site_end_2, (void), (ITT_NO_PARAMS), model_site_end_2, __itt_group_model, "no args") -ITT_STUBV(ITTAPI, void, model_task_end_2, (void), (ITT_NO_PARAMS), model_task_end_2, __itt_group_model, "no args") -ITT_STUBV(ITTAPI, void, model_lock_acquire_2, (void *lock), (ITT_FORMAT lock), model_lock_acquire_2, __itt_group_model, "%p") -ITT_STUBV(ITTAPI, void, model_lock_release_2, (void *lock), (ITT_FORMAT lock), model_lock_release_2, __itt_group_model, "%p") -ITT_STUBV(ITTAPI, void, model_aggregate_task, (size_t count), (ITT_FORMAT count), model_aggregate_task, __itt_group_model, "%d") -ITT_STUBV(ITTAPI, void, model_disable_push, (__itt_model_disable x), (ITT_FORMAT x), model_disable_push, __itt_group_model, "%p") -ITT_STUBV(ITTAPI, void, model_disable_pop, (void), (ITT_NO_PARAMS), model_disable_pop, __itt_group_model, "no args") -#endif /* __ITT_INTERNAL_BODY */ - -#ifndef __ITT_INTERNAL_BODY -#if ITT_PLATFORM==ITT_PLATFORM_WIN -ITT_STUB(ITTAPI, __itt_heap_function, heap_function_createA, (const char *name, const char *domain), (ITT_FORMAT name, domain), heap_function_createA, __itt_group_heap, "\"%s\", \"%s\"") -ITT_STUB(ITTAPI, __itt_heap_function, heap_function_createW, (const wchar_t *name, const wchar_t *domain), (ITT_FORMAT name, domain), heap_function_createW, __itt_group_heap, "\"%s\", \"%s\"") -#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ -ITT_STUB(ITTAPI, __itt_heap_function, heap_function_create, (const char *name, const char *domain), (ITT_FORMAT name, domain), heap_function_create, __itt_group_heap, "\"%s\", \"%s\"") -#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ -#endif /* __ITT_INTERNAL_BODY */ -ITT_STUBV(ITTAPI, void, heap_allocate_begin, (__itt_heap_function h, size_t size, int initialized), (ITT_FORMAT h, size, initialized), heap_allocate_begin, __itt_group_heap, "%p, %lu, %d") -ITT_STUBV(ITTAPI, void, heap_allocate_end, (__itt_heap_function h, void** addr, size_t size, int initialized), (ITT_FORMAT h, addr, size, initialized), heap_allocate_end, __itt_group_heap, "%p, %p, %lu, %d") -ITT_STUBV(ITTAPI, void, heap_free_begin, (__itt_heap_function h, void* addr), (ITT_FORMAT h, addr), heap_free_begin, __itt_group_heap, "%p, %p") -ITT_STUBV(ITTAPI, void, heap_free_end, (__itt_heap_function h, void* addr), (ITT_FORMAT h, addr), heap_free_end, __itt_group_heap, "%p, %p") -ITT_STUBV(ITTAPI, void, heap_reallocate_begin, (__itt_heap_function h, void* addr, size_t new_size, int initialized), (ITT_FORMAT h, addr, new_size, initialized), heap_reallocate_begin, __itt_group_heap, "%p, %p, %lu, %d") -ITT_STUBV(ITTAPI, void, heap_reallocate_end, (__itt_heap_function h, void* addr, void** new_addr, size_t new_size, int initialized), (ITT_FORMAT h, addr, new_addr, new_size, initialized), heap_reallocate_end, __itt_group_heap, "%p, %p, %p, %lu, %d") -ITT_STUBV(ITTAPI, void, heap_internal_access_begin, (void), (ITT_NO_PARAMS), heap_internal_access_begin, __itt_group_heap, "no args") -ITT_STUBV(ITTAPI, void, heap_internal_access_end, (void), (ITT_NO_PARAMS), heap_internal_access_end, __itt_group_heap, "no args") -ITT_STUBV(ITTAPI, void, heap_record_memory_growth_begin, (void), (ITT_NO_PARAMS), heap_record_memory_growth_begin, __itt_group_heap, "no args") -ITT_STUBV(ITTAPI, void, heap_record_memory_growth_end, (void), (ITT_NO_PARAMS), heap_record_memory_growth_end, __itt_group_heap, "no args") -ITT_STUBV(ITTAPI, void, heap_reset_detection, (unsigned int reset_mask), (ITT_FORMAT reset_mask), heap_reset_detection, __itt_group_heap, "%u") -ITT_STUBV(ITTAPI, void, heap_record, (unsigned int record_mask), (ITT_FORMAT record_mask), heap_record, __itt_group_heap, "%u") - -ITT_STUBV(ITTAPI, void, id_create, (const __itt_domain *domain, __itt_id id), (ITT_FORMAT domain, id), id_create, __itt_group_structure, "%p, %lu") -ITT_STUBV(ITTAPI, void, id_destroy, (const __itt_domain *domain, __itt_id id), (ITT_FORMAT domain, id), id_destroy, __itt_group_structure, "%p, %lu") - -ITT_STUB(ITTAPI, __itt_timestamp, get_timestamp, (void), (ITT_NO_PARAMS), get_timestamp, __itt_group_structure, "no args") - -ITT_STUBV(ITTAPI, void, region_begin, (const __itt_domain *domain, __itt_id id, __itt_id parent, __itt_string_handle *name), (ITT_FORMAT domain, id, parent, name), region_begin, __itt_group_structure, "%p, %lu, %lu, %p") -ITT_STUBV(ITTAPI, void, region_end, (const __itt_domain *domain, __itt_id id), (ITT_FORMAT domain, id), region_end, __itt_group_structure, "%p, %lu") - -#ifndef __ITT_INTERNAL_BODY -ITT_STUBV(ITTAPI, void, frame_begin_v3, (const __itt_domain *domain, __itt_id *id), (ITT_FORMAT domain, id), frame_begin_v3, __itt_group_structure, "%p, %p") -ITT_STUBV(ITTAPI, void, frame_end_v3, (const __itt_domain *domain, __itt_id *id), (ITT_FORMAT domain, id), frame_end_v3, __itt_group_structure, "%p, %p") -ITT_STUBV(ITTAPI, void, frame_submit_v3, (const __itt_domain *domain, __itt_id *id, __itt_timestamp begin, __itt_timestamp end), (ITT_FORMAT domain, id, begin, end), frame_submit_v3, __itt_group_structure, "%p, %p, %lu, %lu") -#endif /* __ITT_INTERNAL_BODY */ - -ITT_STUBV(ITTAPI, void, task_group, (const __itt_domain *domain, __itt_id id, __itt_id parent, __itt_string_handle *name), (ITT_FORMAT domain, id, parent, name), task_group, __itt_group_structure, "%p, %lu, %lu, %p") - -ITT_STUBV(ITTAPI, void, task_begin, (const __itt_domain *domain, __itt_id id, __itt_id parent, __itt_string_handle *name), (ITT_FORMAT domain, id, parent, name), task_begin, __itt_group_structure, "%p, %lu, %lu, %p") -ITT_STUBV(ITTAPI, void, task_begin_fn, (const __itt_domain *domain, __itt_id id, __itt_id parent, void* fn), (ITT_FORMAT domain, id, parent, fn), task_begin_fn, __itt_group_structure, "%p, %lu, %lu, %p") -ITT_STUBV(ITTAPI, void, task_end, (const __itt_domain *domain), (ITT_FORMAT domain), task_end, __itt_group_structure, "%p") - -ITT_STUBV(ITTAPI, void, counter_inc_v3, (const __itt_domain *domain, __itt_string_handle *name), (ITT_FORMAT domain, name), counter_inc_v3, __itt_group_structure, "%p, %p") -ITT_STUBV(ITTAPI, void, counter_inc_delta_v3, (const __itt_domain *domain, __itt_string_handle *name, unsigned long long value), (ITT_FORMAT domain, name, value), counter_inc_delta_v3, __itt_group_structure, "%p, %p, %lu") -ITT_STUBV(ITTAPI, void, counter_dec_v3, (const __itt_domain *domain, __itt_string_handle *name), (ITT_FORMAT domain, name), counter_dec_v3, __itt_group_structure, "%p, %p") -ITT_STUBV(ITTAPI, void, counter_dec_delta_v3, (const __itt_domain *domain, __itt_string_handle *name, unsigned long long value), (ITT_FORMAT domain, name, value), counter_dec_delta_v3, __itt_group_structure, "%p, %p, %lu") - -ITT_STUBV(ITTAPI, void, marker, (const __itt_domain *domain, __itt_id id, __itt_string_handle *name, __itt_scope scope), (ITT_FORMAT domain, id, name, scope), marker, __itt_group_structure, "%p, %lu, %p, %d") - -ITT_STUBV(ITTAPI, void, metadata_add, (const __itt_domain *domain, __itt_id id, __itt_string_handle *key, __itt_metadata_type type, size_t count, void *data), (ITT_FORMAT domain, id, key, type, count, data), metadata_add, __itt_group_structure, "%p, %lu, %p, %d, %lu, %p") -#if ITT_PLATFORM==ITT_PLATFORM_WIN -ITT_STUBV(ITTAPI, void, metadata_str_addA, (const __itt_domain *domain, __itt_id id, __itt_string_handle *key, const char* data, size_t length), (ITT_FORMAT domain, id, key, data, length), metadata_str_addA, __itt_group_structure, "%p, %lu, %p, %p, %lu") -ITT_STUBV(ITTAPI, void, metadata_str_addW, (const __itt_domain *domain, __itt_id id, __itt_string_handle *key, const wchar_t* data, size_t length), (ITT_FORMAT domain, id, key, data, length), metadata_str_addW, __itt_group_structure, "%p, %lu, %p, %p, %lu") -#else /* ITT_PLATFORM!=ITT_PLATFORM_WIN */ -ITT_STUBV(ITTAPI, void, metadata_str_add, (const __itt_domain *domain, __itt_id id, __itt_string_handle *key, const char* data, size_t length), (ITT_FORMAT domain, id, key, data, length), metadata_str_add, __itt_group_structure, "%p, %lu, %p, %p, %lu") -#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ - -ITT_STUBV(ITTAPI, void, relation_add_to_current, (const __itt_domain *domain, __itt_relation relation, __itt_id tail), (ITT_FORMAT domain, relation, tail), relation_add_to_current, __itt_group_structure, "%p, %lu, %p") -ITT_STUBV(ITTAPI, void, relation_add, (const __itt_domain *domain, __itt_id head, __itt_relation relation, __itt_id tail), (ITT_FORMAT domain, head, relation, tail), relation_add, __itt_group_structure, "%p, %p, %lu, %p") - -#ifndef __ITT_INTERNAL_BODY -#if ITT_PLATFORM==ITT_PLATFORM_WIN -ITT_STUB(LIBITTAPI, __itt_event, event_createA, (const char *name, int namelen), (ITT_FORMAT name, namelen), event_createA, __itt_group_mark | __itt_group_legacy, "\"%s\", %d") -ITT_STUB(LIBITTAPI, __itt_event, event_createW, (const wchar_t *name, int namelen), (ITT_FORMAT name, namelen), event_createW, __itt_group_mark | __itt_group_legacy, "\"%S\", %d") -#else /* ITT_PLATFORM!=ITT_PLATFORM_WIN */ -ITT_STUB(LIBITTAPI, __itt_event, event_create, (const char *name, int namelen), (ITT_FORMAT name, namelen), event_create, __itt_group_mark | __itt_group_legacy, "\"%s\", %d") -#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ -ITT_STUB(LIBITTAPI, int, event_start, (__itt_event event), (ITT_FORMAT event), event_start, __itt_group_mark | __itt_group_legacy, "%d") -ITT_STUB(LIBITTAPI, int, event_end, (__itt_event event), (ITT_FORMAT event), event_end, __itt_group_mark | __itt_group_legacy, "%d") -#endif /* __ITT_INTERNAL_BODY */ - -#ifndef __ITT_INTERNAL_BODY -#if ITT_PLATFORM==ITT_PLATFORM_WIN -ITT_STUBV(ITTAPI, void, sync_set_nameA, (void *addr, const char *objtype, const char *objname, int attribute), (ITT_FORMAT addr, objtype, objname, attribute), sync_set_nameA, __itt_group_sync | __itt_group_fsync | __itt_group_legacy, "%p, \"%s\", \"%s\", %x") -ITT_STUBV(ITTAPI, void, sync_set_nameW, (void *addr, const wchar_t *objtype, const wchar_t *objname, int attribute), (ITT_FORMAT addr, objtype, objname, attribute), sync_set_nameW, __itt_group_sync | __itt_group_fsync | __itt_group_legacy, "%p, \"%S\", \"%S\", %x") -#else /* ITT_PLATFORM!=ITT_PLATFORM_WIN */ -ITT_STUBV(ITTAPI, void, sync_set_name, (void *addr, const char *objtype, const char *objname, int attribute), (ITT_FORMAT addr, objtype, objname, attribute), sync_set_name, __itt_group_sync | __itt_group_fsync | __itt_group_legacy, "p, \"%s\", \"%s\", %x") -#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ - -#if ITT_PLATFORM==ITT_PLATFORM_WIN -ITT_STUB(LIBITTAPI, int, notify_sync_nameA, (void *p, const char *objtype, int typelen, const char *objname, int namelen, int attribute), (ITT_FORMAT p, objtype, typelen, objname, namelen, attribute), notify_sync_nameA, __itt_group_sync | __itt_group_fsync | __itt_group_legacy, "%p, \"%s\", %d, \"%s\", %d, %x") -ITT_STUB(LIBITTAPI, int, notify_sync_nameW, (void *p, const wchar_t *objtype, int typelen, const wchar_t *objname, int namelen, int attribute), (ITT_FORMAT p, objtype, typelen, objname, namelen, attribute), notify_sync_nameW, __itt_group_sync | __itt_group_fsync | __itt_group_legacy, "%p, \"%S\", %d, \"%S\", %d, %x") -#else /* ITT_PLATFORM!=ITT_PLATFORM_WIN */ -ITT_STUB(LIBITTAPI, int, notify_sync_name, (void *p, const char *objtype, int typelen, const char *objname, int namelen, int attribute), (ITT_FORMAT p, objtype, typelen, objname, namelen, attribute), notify_sync_name, __itt_group_sync | __itt_group_fsync | __itt_group_legacy, "%p, \"%s\", %d, \"%s\", %d, %x") -#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ - -ITT_STUBV(LIBITTAPI, void, notify_sync_prepare, (void *p), (ITT_FORMAT p), notify_sync_prepare, __itt_group_sync | __itt_group_fsync | __itt_group_legacy, "%p") -ITT_STUBV(LIBITTAPI, void, notify_sync_cancel, (void *p), (ITT_FORMAT p), notify_sync_cancel, __itt_group_sync | __itt_group_fsync | __itt_group_legacy, "%p") -ITT_STUBV(LIBITTAPI, void, notify_sync_acquired, (void *p), (ITT_FORMAT p), notify_sync_acquired, __itt_group_sync | __itt_group_fsync | __itt_group_legacy, "%p") -ITT_STUBV(LIBITTAPI, void, notify_sync_releasing, (void *p), (ITT_FORMAT p), notify_sync_releasing, __itt_group_sync | __itt_group_fsync | __itt_group_legacy, "%p") -#endif /* __ITT_INTERNAL_BODY */ - -ITT_STUBV(LIBITTAPI, void, memory_read, (void *addr, size_t size), (ITT_FORMAT addr, size), memory_read, __itt_group_legacy, "%p, %lu") -ITT_STUBV(LIBITTAPI, void, memory_write, (void *addr, size_t size), (ITT_FORMAT addr, size), memory_write, __itt_group_legacy, "%p, %lu") -ITT_STUBV(LIBITTAPI, void, memory_update, (void *addr, size_t size), (ITT_FORMAT addr, size), memory_update, __itt_group_legacy, "%p, %lu") - -ITT_STUB(LIBITTAPI, __itt_state_t, state_get, (void), (ITT_NO_PARAMS), state_get, __itt_group_legacy, "no args") -ITT_STUB(LIBITTAPI, __itt_state_t, state_set, (__itt_state_t s), (ITT_FORMAT s), state_set, __itt_group_legacy, "%d") -ITT_STUB(LIBITTAPI, __itt_obj_state_t, obj_mode_set, (__itt_obj_prop_t p, __itt_obj_state_t s), (ITT_FORMAT p, s), obj_mode_set, __itt_group_legacy, "%d, %d") -ITT_STUB(LIBITTAPI, __itt_thr_state_t, thr_mode_set, (__itt_thr_prop_t p, __itt_thr_state_t s), (ITT_FORMAT p, s), thr_mode_set, __itt_group_legacy, "%d, %d") - -#ifndef __ITT_INTERNAL_BODY -#if ITT_PLATFORM==ITT_PLATFORM_WIN -ITT_STUB(ITTAPI, __itt_frame, frame_createA, (const char *domain), (ITT_FORMAT domain), frame_createA, __itt_group_frame, "\"%s\"") -ITT_STUB(ITTAPI, __itt_frame, frame_createW, (const wchar_t *domain), (ITT_FORMAT domain), frame_createW, __itt_group_frame, "\"%s\"") -#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ -ITT_STUB(ITTAPI, __itt_frame, frame_create, (const char *domain), (ITT_FORMAT domain), frame_create, __itt_group_frame, "\"%s\"") -#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ -#endif /* __ITT_INTERNAL_BODY */ -ITT_STUBV(ITTAPI, void, frame_begin, (__itt_frame frame), (ITT_FORMAT frame), frame_begin, __itt_group_frame, "%p") -ITT_STUBV(ITTAPI, void, frame_end, (__itt_frame frame), (ITT_FORMAT frame), frame_end, __itt_group_frame, "%p") - -ITT_STUBV(ITTAPI, void, counter_destroy, (__itt_counter id), (ITT_FORMAT id), counter_destroy, __itt_group_counter, "%p") -ITT_STUBV(ITTAPI, void, counter_inc, (__itt_counter id), (ITT_FORMAT id), counter_inc, __itt_group_counter, "%p") -ITT_STUBV(ITTAPI, void, counter_inc_delta, (__itt_counter id, unsigned long long value), (ITT_FORMAT id, value), counter_inc_delta, __itt_group_counter, "%p, %lu") -ITT_STUBV(ITTAPI, void, counter_dec, (__itt_counter id), (ITT_FORMAT id), counter_dec, __itt_group_counter, "%p") -ITT_STUBV(ITTAPI, void, counter_dec_delta, (__itt_counter id, unsigned long long value), (ITT_FORMAT id, value), counter_dec_delta, __itt_group_counter, "%p, %lu") -ITT_STUBV(ITTAPI, void, counter_set_value, (__itt_counter id, void *value_ptr), (ITT_FORMAT id, value_ptr), counter_set_value, __itt_group_counter, "%p, %p") -ITT_STUBV(ITTAPI, void, counter_set_value_ex, (__itt_counter id, __itt_clock_domain *clock_domain, unsigned long long timestamp, void *value_ptr), (ITT_FORMAT id, clock_domain, timestamp, value_ptr), counter_set_value_ex, __itt_group_counter, "%p, %p, %llu, %p") - -#ifndef __ITT_INTERNAL_BODY -#if ITT_PLATFORM==ITT_PLATFORM_WIN -ITT_STUB(ITTAPI, __itt_mark_type, mark_createA, (const char *name), (ITT_FORMAT name), mark_createA, __itt_group_mark, "\"%s\"") -ITT_STUB(ITTAPI, __itt_mark_type, mark_createW, (const wchar_t *name), (ITT_FORMAT name), mark_createW, __itt_group_mark, "\"%S\"") -#else /* ITT_PLATFORM!=ITT_PLATFORM_WIN */ -ITT_STUB(ITTAPI, __itt_mark_type, mark_create, (const char *name), (ITT_FORMAT name), mark_create, __itt_group_mark, "\"%s\"") -#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ -#endif /* __ITT_INTERNAL_BODY */ -#if ITT_PLATFORM==ITT_PLATFORM_WIN -ITT_STUB(ITTAPI, int, markA, (__itt_mark_type mt, const char *parameter), (ITT_FORMAT mt, parameter), markA, __itt_group_mark, "%d, \"%s\"") -ITT_STUB(ITTAPI, int, markW, (__itt_mark_type mt, const wchar_t *parameter), (ITT_FORMAT mt, parameter), markW, __itt_group_mark, "%d, \"%S\"") -#else /* ITT_PLATFORM!=ITT_PLATFORM_WIN */ -ITT_STUB(ITTAPI, int, mark, (__itt_mark_type mt, const char *parameter), (ITT_FORMAT mt, parameter), mark, __itt_group_mark, "%d, \"%s\"") -#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ -ITT_STUB(ITTAPI, int, mark_off, (__itt_mark_type mt), (ITT_FORMAT mt), mark_off, __itt_group_mark, "%d") -#if ITT_PLATFORM==ITT_PLATFORM_WIN -ITT_STUB(ITTAPI, int, mark_globalA, (__itt_mark_type mt, const char *parameter), (ITT_FORMAT mt, parameter), mark_globalA, __itt_group_mark, "%d, \"%s\"") -ITT_STUB(ITTAPI, int, mark_globalW, (__itt_mark_type mt, const wchar_t *parameter), (ITT_FORMAT mt, parameter), mark_globalW, __itt_group_mark, "%d, \"%S\"") -#else /* ITT_PLATFORM!=ITT_PLATFORM_WIN */ -ITT_STUB(ITTAPI, int, mark_global, (__itt_mark_type mt, const char *parameter), (ITT_FORMAT mt, parameter), mark_global, __itt_group_mark, "%d, \"%S\"") -#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ -ITT_STUB(ITTAPI, int, mark_global_off, (__itt_mark_type mt), (ITT_FORMAT mt), mark_global_off, __itt_group_mark, "%d") - -#ifndef __ITT_INTERNAL_BODY -ITT_STUB(ITTAPI, __itt_caller, stack_caller_create, (void), (ITT_NO_PARAMS), stack_caller_create, __itt_group_stitch, "no args") -#endif /* __ITT_INTERNAL_BODY */ -ITT_STUBV(ITTAPI, void, stack_caller_destroy, (__itt_caller id), (ITT_FORMAT id), stack_caller_destroy, __itt_group_stitch, "%p") -ITT_STUBV(ITTAPI, void, stack_callee_enter, (__itt_caller id), (ITT_FORMAT id), stack_callee_enter, __itt_group_stitch, "%p") -ITT_STUBV(ITTAPI, void, stack_callee_leave, (__itt_caller id), (ITT_FORMAT id), stack_callee_leave, __itt_group_stitch, "%p") - -ITT_STUB(ITTAPI, __itt_clock_domain*, clock_domain_create, (__itt_get_clock_info_fn fn, void* fn_data), (ITT_FORMAT fn, fn_data), clock_domain_create, __itt_group_structure, "%p, %p") -ITT_STUBV(ITTAPI, void, clock_domain_reset, (void), (ITT_NO_PARAMS), clock_domain_reset, __itt_group_structure, "no args") -ITT_STUBV(ITTAPI, void, id_create_ex, (const __itt_domain *domain, __itt_clock_domain* clock_domain, unsigned long long timestamp, __itt_id id), (ITT_FORMAT domain, clock_domain, timestamp, id), id_create_ex, __itt_group_structure, "%p, %p, %lu, %lu") -ITT_STUBV(ITTAPI, void, id_destroy_ex, (const __itt_domain *domain, __itt_clock_domain* clock_domain, unsigned long long timestamp, __itt_id id), (ITT_FORMAT domain, clock_domain, timestamp, id), id_destroy_ex, __itt_group_structure, "%p, %p, %lu, %lu") -ITT_STUBV(ITTAPI, void, task_begin_ex, (const __itt_domain *domain, __itt_clock_domain* clock_domain, unsigned long long timestamp, __itt_id id, __itt_id parentid, __itt_string_handle *name), (ITT_FORMAT domain, clock_domain, timestamp, id, parentid, name), task_begin_ex, __itt_group_structure, "%p, %p, %lu, %lu, %lu, %p") -ITT_STUBV(ITTAPI, void, task_begin_fn_ex, (const __itt_domain *domain, __itt_clock_domain* clock_domain, unsigned long long timestamp, __itt_id id, __itt_id parentid, void* fn), (ITT_FORMAT domain, clock_domain, timestamp, id, parentid, fn), task_begin_fn_ex, __itt_group_structure, "%p, %p, %lu, %lu, %lu, %p") -ITT_STUBV(ITTAPI, void, task_end_ex, (const __itt_domain *domain, __itt_clock_domain* clock_domain, unsigned long long timestamp), (ITT_FORMAT domain, clock_domain, timestamp), task_end_ex, __itt_group_structure, "%p, %p, %lu") -ITT_STUBV(ITTAPI, void, task_begin_overlapped, (const __itt_domain *domain, __itt_id id, __itt_id parent, __itt_string_handle *name), (ITT_FORMAT domain, id, parent, name), task_begin_overlapped, __itt_group_structure, "%p, %lu, %lu, %p") -ITT_STUBV(ITTAPI, void, task_begin_overlapped_ex, (const __itt_domain *domain, __itt_clock_domain* clock_domain, unsigned long long timestamp, __itt_id id, __itt_id parentid, __itt_string_handle *name), (ITT_FORMAT domain, clock_domain, timestamp, id, parentid, name), task_begin_overlapped_ex, __itt_group_structure, "%p, %p, %lu, %lu, %lu, %p") -ITT_STUBV(ITTAPI, void, task_end_overlapped, (const __itt_domain *domain, __itt_id id), (ITT_FORMAT domain, id), task_end_overlapped, __itt_group_structure, "%p, %lu") -ITT_STUBV(ITTAPI, void, task_end_overlapped_ex, (const __itt_domain *domain, __itt_clock_domain* clock_domain, unsigned long long timestamp, __itt_id id), (ITT_FORMAT domain, clock_domain, timestamp, id), task_end_overlapped_ex, __itt_group_structure, "%p, %p, %lu, %lu") -ITT_STUBV(ITTAPI, void, marker_ex, (const __itt_domain *domain, __itt_clock_domain* clock_domain, unsigned long long timestamp, __itt_id id, __itt_string_handle *name, __itt_scope scope), (ITT_FORMAT domain, clock_domain, timestamp, id, name, scope), marker_ex, __itt_group_structure, "%p, %p, %lu, %lu, %p, %d") -ITT_STUBV(ITTAPI, void, metadata_add_with_scope, (const __itt_domain *domain, __itt_scope scope, __itt_string_handle *key, __itt_metadata_type type, size_t count, void *data), (ITT_FORMAT domain, scope, key, type, count, data), metadata_add_with_scope, __itt_group_structure, "%p, %d, %p, %d, %lu, %p") -#if ITT_PLATFORM==ITT_PLATFORM_WIN -ITT_STUBV(ITTAPI, void, metadata_str_add_with_scopeA, (const __itt_domain *domain, __itt_scope scope, __itt_string_handle *key, const char *data, size_t length), (ITT_FORMAT domain, scope, key, data, length), metadata_str_add_with_scopeA, __itt_group_structure, "%p, %d, %p, %p, %lu") -ITT_STUBV(ITTAPI, void, metadata_str_add_with_scopeW, (const __itt_domain *domain, __itt_scope scope, __itt_string_handle *key, const wchar_t *data, size_t length), (ITT_FORMAT domain, scope, key, data, length), metadata_str_add_with_scopeW, __itt_group_structure, "%p, %d, %p, %p, %lu") -#else /* ITT_PLATFORM!=ITT_PLATFORM_WIN */ -ITT_STUBV(ITTAPI, void, metadata_str_add_with_scope, (const __itt_domain *domain, __itt_scope scope, __itt_string_handle *key, const char *data, size_t length), (ITT_FORMAT domain, scope, key, data, length), metadata_str_add_with_scope, __itt_group_structure, "%p, %d, %p, %p, %lu") -#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ -ITT_STUBV(ITTAPI, void, relation_add_to_current_ex, (const __itt_domain *domain, __itt_clock_domain* clock_domain, unsigned long long timestamp, __itt_relation relation, __itt_id tail), (ITT_FORMAT domain, clock_domain, timestamp, relation, tail), relation_add_to_current_ex, __itt_group_structure, "%p, %p, %lu, %d, %lu") -ITT_STUBV(ITTAPI, void, relation_add_ex, (const __itt_domain *domain, __itt_clock_domain* clock_domain, unsigned long long timestamp, __itt_id head, __itt_relation relation, __itt_id tail), (ITT_FORMAT domain, clock_domain, timestamp, head, relation, tail), relation_add_ex, __itt_group_structure, "%p, %p, %lu, %lu, %d, %lu") -ITT_STUB(ITTAPI, __itt_track_group*, track_group_create, (__itt_string_handle* name, __itt_track_group_type track_group_type), (ITT_FORMAT name, track_group_type), track_group_create, __itt_group_structure, "%p, %d") -ITT_STUB(ITTAPI, __itt_track*, track_create, (__itt_track_group* track_group,__itt_string_handle* name, __itt_track_type track_type), (ITT_FORMAT track_group, name, track_type), track_create, __itt_group_structure, "%p, %p, %d") -ITT_STUBV(ITTAPI, void, set_track, (__itt_track *track), (ITT_FORMAT track), set_track, __itt_group_structure, "%p") - -#ifndef __ITT_INTERNAL_BODY -ITT_STUB(ITTAPI, const char*, api_version, (void), (ITT_NO_PARAMS), api_version, __itt_group_all & ~__itt_group_legacy, "no args") -#endif /* __ITT_INTERNAL_BODY */ - -#ifndef __ITT_INTERNAL_BODY -#if ITT_PLATFORM==ITT_PLATFORM_WIN -ITT_STUB(ITTAPI, int, av_saveA, (void *data, int rank, const int *dimensions, int type, const char *filePath, int columnOrder), (ITT_FORMAT data, rank, dimensions, type, filePath, columnOrder), av_saveA, __itt_group_arrays, "%p, %d, %p, %d, \"%s\", %d") -ITT_STUB(ITTAPI, int, av_saveW, (void *data, int rank, const int *dimensions, int type, const wchar_t *filePath, int columnOrder), (ITT_FORMAT data, rank, dimensions, type, filePath, columnOrder), av_saveW, __itt_group_arrays, "%p, %d, %p, %d, \"%S\", %d") -#else /* ITT_PLATFORM!=ITT_PLATFORM_WIN */ -ITT_STUB(ITTAPI, int, av_save, (void *data, int rank, const int *dimensions, int type, const char *filePath, int columnOrder), (ITT_FORMAT data, rank, dimensions, type, filePath, columnOrder), av_save, __itt_group_arrays, "%p, %d, %p, %d, \"%s\", %d") -#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ -#endif /* __ITT_INTERNAL_BODY */ - -#ifndef __ITT_INTERNAL_BODY -#if ITT_PLATFORM==ITT_PLATFORM_WIN -ITT_STUBV(ITTAPI, void, module_loadA, (void *start_addr, void* end_addr, const char *path), (ITT_FORMAT start_addr, end_addr, path), module_loadA, __itt_group_none, "%p, %p, %p") -ITT_STUBV(ITTAPI, void, module_loadW, (void *start_addr, void* end_addr, const wchar_t *path), (ITT_FORMAT start_addr, end_addr, path), module_loadW, __itt_group_none, "%p, %p, %p") -#else /* ITT_PLATFORM!=ITT_PLATFORM_WIN */ -ITT_STUBV(ITTAPI, void, module_load, (void *start_addr, void *end_addr, const char *path), (ITT_FORMAT start_addr, end_addr, path), module_load, __itt_group_none, "%p, %p, %p") -#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ -#endif /* __ITT_INTERNAL_BODY */ - - -#endif /* __ITT_INTERNAL_INIT */ Property changes on: vendor/llvm-openmp/openmp-release80-r363030/runtime/src/thirdparty/ittnotify/ittnotify_static.h ___________________________________________________________________ Deleted: svn:eol-style ## -1 +0,0 ## -native \ No newline at end of property Deleted: svn:keywords ## -1 +0,0 ## -FreeBSD=%H \ No newline at end of property Deleted: svn:mime-type ## -1 +0,0 ## -text/plain \ No newline at end of property Index: vendor/llvm-openmp/openmp-release80-r363030/runtime/src/thirdparty/ittnotify/ittnotify.h =================================================================== --- vendor/llvm-openmp/openmp-release80-r363030/runtime/src/thirdparty/ittnotify/ittnotify.h (revision 348960) +++ vendor/llvm-openmp/openmp-release80-r363030/runtime/src/thirdparty/ittnotify/ittnotify.h (nonexistent) @@ -1,4076 +0,0 @@ - -//===----------------------------------------------------------------------===// -// -// The LLVM Compiler Infrastructure -// -// This file is dual licensed under the MIT and the University of Illinois Open -// Source Licenses. See LICENSE.txt for details. -// -//===----------------------------------------------------------------------===// - -#ifndef _ITTNOTIFY_H_ -#define _ITTNOTIFY_H_ - -/** -@file -@brief Public User API functions and types -@mainpage - -The ITT API is used to annotate a user's program with additional information -that can be used by correctness and performance tools. The user inserts -calls in their program. Those calls generate information that is collected -at runtime, and used by Intel(R) Threading Tools. - -@section API Concepts -The following general concepts are used throughout the API. - -@subsection Unicode Support -Many API functions take character string arguments. On Windows, there -are two versions of each such function. The function name is suffixed -by W if Unicode support is enabled, and by A otherwise. Any API function -that takes a character string argument adheres to this convention. - -@subsection Conditional Compilation -Many users prefer having an option to modify ITT API code when linking it -inside their runtimes. ITT API header file provides a mechanism to replace -ITT API function names inside your code with empty strings. To do this, -define the macros INTEL_NO_ITTNOTIFY_API during compilation and remove the -static library from the linker script. - -@subsection Domains -[see domains] -Domains provide a way to separate notification for different modules or -libraries in a program. Domains are specified by dotted character strings, -e.g. TBB.Internal.Control. - -A mechanism (to be specified) is provided to enable and disable -domains. By default, all domains are enabled. -@subsection Named Entities and Instances -Named entities (frames, regions, tasks, and markers) communicate -information about the program to the analysis tools. A named entity often -refers to a section of program code, or to some set of logical concepts -that the programmer wants to group together. - -Named entities relate to the programmer's static view of the program. When -the program actually executes, many instances of a given named entity -may be created. - -The API annotations denote instances of named entities. The actual -named entities are displayed using the analysis tools. In other words, -the named entities come into existence when instances are created. - -Instances of named entities may have instance identifiers (IDs). Some -API calls use instance identifiers to create relationships between -different instances of named entities. Other API calls associate data -with instances of named entities. - -Some named entities must always have instance IDs. In particular, regions -and frames always have IDs. Task and markers need IDs only if the ID is -needed in another API call (such as adding a relation or metadata). - -The lifetime of instance IDs is distinct from the lifetime of -instances. This allows various relationships to be specified separate -from the actual execution of instances. This flexibility comes at the -expense of extra API calls. - -The same ID may not be reused for different instances, unless a previous -[ref] __itt_id_destroy call for that ID has been issued. -*/ - -/** @cond exclude_from_documentation */ -#ifndef ITT_OS_WIN -# define ITT_OS_WIN 1 -#endif /* ITT_OS_WIN */ - -#ifndef ITT_OS_LINUX -# define ITT_OS_LINUX 2 -#endif /* ITT_OS_LINUX */ - -#ifndef ITT_OS_MAC -# define ITT_OS_MAC 3 -#endif /* ITT_OS_MAC */ - -#ifndef ITT_OS_FREEBSD -# define ITT_OS_FREEBSD 4 -#endif /* ITT_OS_FREEBSD */ - -#ifndef ITT_OS -# if defined WIN32 || defined _WIN32 -# define ITT_OS ITT_OS_WIN -# elif defined( __APPLE__ ) && defined( __MACH__ ) -# define ITT_OS ITT_OS_MAC -# elif defined( __FreeBSD__ ) -# define ITT_OS ITT_OS_FREEBSD -# else -# define ITT_OS ITT_OS_LINUX -# endif -#endif /* ITT_OS */ - -#ifndef ITT_PLATFORM_WIN -# define ITT_PLATFORM_WIN 1 -#endif /* ITT_PLATFORM_WIN */ - -#ifndef ITT_PLATFORM_POSIX -# define ITT_PLATFORM_POSIX 2 -#endif /* ITT_PLATFORM_POSIX */ - -#ifndef ITT_PLATFORM_MAC -# define ITT_PLATFORM_MAC 3 -#endif /* ITT_PLATFORM_MAC */ - -#ifndef ITT_PLATFORM_FREEBSD -# define ITT_PLATFORM_FREEBSD 4 -#endif /* ITT_PLATFORM_FREEBSD */ - -#ifndef ITT_PLATFORM -# if ITT_OS==ITT_OS_WIN -# define ITT_PLATFORM ITT_PLATFORM_WIN -# elif ITT_OS==ITT_OS_MAC -# define ITT_PLATFORM ITT_PLATFORM_MAC -# elif ITT_OS==ITT_OS_FREEBSD -# define ITT_PLATFORM ITT_PLATFORM_FREEBSD -# else -# define ITT_PLATFORM ITT_PLATFORM_POSIX -# endif -#endif /* ITT_PLATFORM */ - -#if defined(_UNICODE) && !defined(UNICODE) -#define UNICODE -#endif - -#include -#if ITT_PLATFORM==ITT_PLATFORM_WIN -#include -#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ -#include -#if defined(UNICODE) || defined(_UNICODE) -#include -#endif /* UNICODE || _UNICODE */ -#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ - -#ifndef ITTAPI_CDECL -# if ITT_PLATFORM==ITT_PLATFORM_WIN -# define ITTAPI_CDECL __cdecl -# else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ -# if defined _M_IX86 || defined __i386__ -# define ITTAPI_CDECL __attribute__ ((cdecl)) -# else /* _M_IX86 || __i386__ */ -# define ITTAPI_CDECL /* actual only on x86 platform */ -# endif /* _M_IX86 || __i386__ */ -# endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ -#endif /* ITTAPI_CDECL */ - -#ifndef STDCALL -# if ITT_PLATFORM==ITT_PLATFORM_WIN -# define STDCALL __stdcall -# else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ -# if defined _M_IX86 || defined __i386__ -# define STDCALL __attribute__ ((stdcall)) -# else /* _M_IX86 || __i386__ */ -# define STDCALL /* supported only on x86 platform */ -# endif /* _M_IX86 || __i386__ */ -# endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ -#endif /* STDCALL */ - -#define ITTAPI ITTAPI_CDECL -#define LIBITTAPI ITTAPI_CDECL - -/* TODO: Temporary for compatibility! */ -#define ITTAPI_CALL ITTAPI_CDECL -#define LIBITTAPI_CALL ITTAPI_CDECL - -#if ITT_PLATFORM==ITT_PLATFORM_WIN -/* use __forceinline (VC++ specific) */ -#define ITT_INLINE __forceinline -#define ITT_INLINE_ATTRIBUTE /* nothing */ -#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ -/* - * Generally, functions are not inlined unless optimization is specified. - * For functions declared inline, this attribute inlines the function even - * if no optimization level was specified. - */ -#ifdef __STRICT_ANSI__ -#define ITT_INLINE static -#define ITT_INLINE_ATTRIBUTE __attribute__((unused)) -#else /* __STRICT_ANSI__ */ -#define ITT_INLINE static inline -#define ITT_INLINE_ATTRIBUTE __attribute__((always_inline, unused)) -#endif /* __STRICT_ANSI__ */ -#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ -/** @endcond */ - -#ifdef INTEL_ITTNOTIFY_ENABLE_LEGACY -# if ITT_PLATFORM==ITT_PLATFORM_WIN -# pragma message("WARNING!!! Deprecated API is used. Please undefine INTEL_ITTNOTIFY_ENABLE_LEGACY macro") -# else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ -# warning "Deprecated API is used. Please undefine INTEL_ITTNOTIFY_ENABLE_LEGACY macro" -# endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ -# include "legacy/ittnotify.h" -#endif /* INTEL_ITTNOTIFY_ENABLE_LEGACY */ - -/** @cond exclude_from_documentation */ -/* Helper macro for joining tokens */ -#define ITT_JOIN_AUX(p,n) p##n -#define ITT_JOIN(p,n) ITT_JOIN_AUX(p,n) - -#ifdef ITT_MAJOR -#undef ITT_MAJOR -#endif -#ifdef ITT_MINOR -#undef ITT_MINOR -#endif -#define ITT_MAJOR 3 -#define ITT_MINOR 0 - -/* Standard versioning of a token with major and minor version numbers */ -#define ITT_VERSIONIZE(x) \ - ITT_JOIN(x, \ - ITT_JOIN(_, \ - ITT_JOIN(ITT_MAJOR, \ - ITT_JOIN(_, ITT_MINOR)))) - -#ifndef INTEL_ITTNOTIFY_PREFIX -# define INTEL_ITTNOTIFY_PREFIX __itt_ -#endif /* INTEL_ITTNOTIFY_PREFIX */ -#ifndef INTEL_ITTNOTIFY_POSTFIX -# define INTEL_ITTNOTIFY_POSTFIX _ptr_ -#endif /* INTEL_ITTNOTIFY_POSTFIX */ - -#define ITTNOTIFY_NAME_AUX(n) ITT_JOIN(INTEL_ITTNOTIFY_PREFIX,n) -#define ITTNOTIFY_NAME(n) ITT_VERSIONIZE(ITTNOTIFY_NAME_AUX(ITT_JOIN(n,INTEL_ITTNOTIFY_POSTFIX))) - -#define ITTNOTIFY_VOID(n) (!ITTNOTIFY_NAME(n)) ? (void)0 : ITTNOTIFY_NAME(n) -#define ITTNOTIFY_DATA(n) (!ITTNOTIFY_NAME(n)) ? 0 : ITTNOTIFY_NAME(n) - -#define ITTNOTIFY_VOID_D0(n,d) (!(d)->flags) ? (void)0 : (!ITTNOTIFY_NAME(n)) ? (void)0 : ITTNOTIFY_NAME(n)(d) -#define ITTNOTIFY_VOID_D1(n,d,x) (!(d)->flags) ? (void)0 : (!ITTNOTIFY_NAME(n)) ? (void)0 : ITTNOTIFY_NAME(n)(d,x) -#define ITTNOTIFY_VOID_D2(n,d,x,y) (!(d)->flags) ? (void)0 : (!ITTNOTIFY_NAME(n)) ? (void)0 : ITTNOTIFY_NAME(n)(d,x,y) -#define ITTNOTIFY_VOID_D3(n,d,x,y,z) (!(d)->flags) ? (void)0 : (!ITTNOTIFY_NAME(n)) ? (void)0 : ITTNOTIFY_NAME(n)(d,x,y,z) -#define ITTNOTIFY_VOID_D4(n,d,x,y,z,a) (!(d)->flags) ? (void)0 : (!ITTNOTIFY_NAME(n)) ? (void)0 : ITTNOTIFY_NAME(n)(d,x,y,z,a) -#define ITTNOTIFY_VOID_D5(n,d,x,y,z,a,b) (!(d)->flags) ? (void)0 : (!ITTNOTIFY_NAME(n)) ? (void)0 : ITTNOTIFY_NAME(n)(d,x,y,z,a,b) -#define ITTNOTIFY_VOID_D6(n,d,x,y,z,a,b,c) (!(d)->flags) ? (void)0 : (!ITTNOTIFY_NAME(n)) ? (void)0 : ITTNOTIFY_NAME(n)(d,x,y,z,a,b,c) -#define ITTNOTIFY_DATA_D0(n,d) (!(d)->flags) ? 0 : (!ITTNOTIFY_NAME(n)) ? 0 : ITTNOTIFY_NAME(n)(d) -#define ITTNOTIFY_DATA_D1(n,d,x) (!(d)->flags) ? 0 : (!ITTNOTIFY_NAME(n)) ? 0 : ITTNOTIFY_NAME(n)(d,x) -#define ITTNOTIFY_DATA_D2(n,d,x,y) (!(d)->flags) ? 0 : (!ITTNOTIFY_NAME(n)) ? 0 : ITTNOTIFY_NAME(n)(d,x,y) -#define ITTNOTIFY_DATA_D3(n,d,x,y,z) (!(d)->flags) ? 0 : (!ITTNOTIFY_NAME(n)) ? 0 : ITTNOTIFY_NAME(n)(d,x,y,z) -#define ITTNOTIFY_DATA_D4(n,d,x,y,z,a) (!(d)->flags) ? 0 : (!ITTNOTIFY_NAME(n)) ? 0 : ITTNOTIFY_NAME(n)(d,x,y,z,a) -#define ITTNOTIFY_DATA_D5(n,d,x,y,z,a,b) (!(d)->flags) ? 0 : (!ITTNOTIFY_NAME(n)) ? 0 : ITTNOTIFY_NAME(n)(d,x,y,z,a,b) -#define ITTNOTIFY_DATA_D6(n,d,x,y,z,a,b,c) (!(d)->flags) ? 0 : (!ITTNOTIFY_NAME(n)) ? 0 : ITTNOTIFY_NAME(n)(d,x,y,z,a,b,c) - -#ifdef ITT_STUB -#undef ITT_STUB -#endif -#ifdef ITT_STUBV -#undef ITT_STUBV -#endif -#define ITT_STUBV(api,type,name,args) \ - typedef type (api* ITT_JOIN(ITTNOTIFY_NAME(name),_t)) args; \ - extern ITT_JOIN(ITTNOTIFY_NAME(name),_t) ITTNOTIFY_NAME(name); -#define ITT_STUB ITT_STUBV -/** @endcond */ - -#ifdef __cplusplus -extern "C" { -#endif /* __cplusplus */ - -/** @cond exclude_from_gpa_documentation */ -/** - * @defgroup public Public API - * @{ - * @} - */ - -/** - * @defgroup control Collection Control - * @ingroup public - * General behavior: application continues to run, but no profiling information is being collected - * - * Pausing occurs not only for the current thread but for all process as well as spawned processes - * - Intel(R) Parallel Inspector and Intel(R) Inspector XE: - * - Does not analyze or report errors that involve memory access. - * - Other errors are reported as usual. Pausing data collection in - * Intel(R) Parallel Inspector and Intel(R) Inspector XE - * only pauses tracing and analyzing memory access. - * It does not pause tracing or analyzing threading APIs. - * . - * - Intel(R) Parallel Amplifier and Intel(R) VTune(TM) Amplifier XE: - * - Does continue to record when new threads are started. - * . - * - Other effects: - * - Possible reduction of runtime overhead. - * . - * @{ - */ -/** @brief Pause collection */ -void ITTAPI __itt_pause(void); -/** @brief Resume collection */ -void ITTAPI __itt_resume(void); -/** @brief Detach collection */ -void ITTAPI __itt_detach(void); - -/** @cond exclude_from_documentation */ -#ifndef INTEL_NO_MACRO_BODY -#ifndef INTEL_NO_ITTNOTIFY_API -ITT_STUBV(ITTAPI, void, pause, (void)) -ITT_STUBV(ITTAPI, void, resume, (void)) -ITT_STUBV(ITTAPI, void, detach, (void)) -#define __itt_pause ITTNOTIFY_VOID(pause) -#define __itt_pause_ptr ITTNOTIFY_NAME(pause) -#define __itt_resume ITTNOTIFY_VOID(resume) -#define __itt_resume_ptr ITTNOTIFY_NAME(resume) -#define __itt_detach ITTNOTIFY_VOID(detach) -#define __itt_detach_ptr ITTNOTIFY_NAME(detach) -#else /* INTEL_NO_ITTNOTIFY_API */ -#define __itt_pause() -#define __itt_pause_ptr 0 -#define __itt_resume() -#define __itt_resume_ptr 0 -#define __itt_detach() -#define __itt_detach_ptr 0 -#endif /* INTEL_NO_ITTNOTIFY_API */ -#else /* INTEL_NO_MACRO_BODY */ -#define __itt_pause_ptr 0 -#define __itt_resume_ptr 0 -#define __itt_detach_ptr 0 -#endif /* INTEL_NO_MACRO_BODY */ -/** @endcond */ -/** @} control group */ -/** @endcond */ - -/** - * @defgroup threads Threads - * @ingroup public - * Give names to threads - * @{ - */ -/** - * @brief Sets thread name of calling thread - * @param[in] name - name of thread - */ -#if ITT_PLATFORM==ITT_PLATFORM_WIN -void ITTAPI __itt_thread_set_nameA(const char *name); -void ITTAPI __itt_thread_set_nameW(const wchar_t *name); -#if defined(UNICODE) || defined(_UNICODE) -# define __itt_thread_set_name __itt_thread_set_nameW -# define __itt_thread_set_name_ptr __itt_thread_set_nameW_ptr -#else /* UNICODE */ -# define __itt_thread_set_name __itt_thread_set_nameA -# define __itt_thread_set_name_ptr __itt_thread_set_nameA_ptr -#endif /* UNICODE */ -#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ -void ITTAPI __itt_thread_set_name(const char *name); -#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ - -/** @cond exclude_from_documentation */ -#ifndef INTEL_NO_MACRO_BODY -#ifndef INTEL_NO_ITTNOTIFY_API -#if ITT_PLATFORM==ITT_PLATFORM_WIN -ITT_STUBV(ITTAPI, void, thread_set_nameA, (const char *name)) -ITT_STUBV(ITTAPI, void, thread_set_nameW, (const wchar_t *name)) -#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ -ITT_STUBV(ITTAPI, void, thread_set_name, (const char *name)) -#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ -#if ITT_PLATFORM==ITT_PLATFORM_WIN -#define __itt_thread_set_nameA ITTNOTIFY_VOID(thread_set_nameA) -#define __itt_thread_set_nameA_ptr ITTNOTIFY_NAME(thread_set_nameA) -#define __itt_thread_set_nameW ITTNOTIFY_VOID(thread_set_nameW) -#define __itt_thread_set_nameW_ptr ITTNOTIFY_NAME(thread_set_nameW) -#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ -#define __itt_thread_set_name ITTNOTIFY_VOID(thread_set_name) -#define __itt_thread_set_name_ptr ITTNOTIFY_NAME(thread_set_name) -#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ -#else /* INTEL_NO_ITTNOTIFY_API */ -#if ITT_PLATFORM==ITT_PLATFORM_WIN -#define __itt_thread_set_nameA(name) -#define __itt_thread_set_nameA_ptr 0 -#define __itt_thread_set_nameW(name) -#define __itt_thread_set_nameW_ptr 0 -#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ -#define __itt_thread_set_name(name) -#define __itt_thread_set_name_ptr 0 -#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ -#endif /* INTEL_NO_ITTNOTIFY_API */ -#else /* INTEL_NO_MACRO_BODY */ -#if ITT_PLATFORM==ITT_PLATFORM_WIN -#define __itt_thread_set_nameA_ptr 0 -#define __itt_thread_set_nameW_ptr 0 -#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ -#define __itt_thread_set_name_ptr 0 -#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ -#endif /* INTEL_NO_MACRO_BODY */ -/** @endcond */ - -/** @cond exclude_from_gpa_documentation */ - -/** - * @brief Mark current thread as ignored from this point on, for the duration of its existence. - */ -void ITTAPI __itt_thread_ignore(void); - -/** @cond exclude_from_documentation */ -#ifndef INTEL_NO_MACRO_BODY -#ifndef INTEL_NO_ITTNOTIFY_API -ITT_STUBV(ITTAPI, void, thread_ignore, (void)) -#define __itt_thread_ignore ITTNOTIFY_VOID(thread_ignore) -#define __itt_thread_ignore_ptr ITTNOTIFY_NAME(thread_ignore) -#else /* INTEL_NO_ITTNOTIFY_API */ -#define __itt_thread_ignore() -#define __itt_thread_ignore_ptr 0 -#endif /* INTEL_NO_ITTNOTIFY_API */ -#else /* INTEL_NO_MACRO_BODY */ -#define __itt_thread_ignore_ptr 0 -#endif /* INTEL_NO_MACRO_BODY */ -/** @endcond */ -/** @} threads group */ - -/** - * @defgroup suppress Error suppression - * @ingroup public - * General behavior: application continues to run, but errors are suppressed - * - * @{ - */ - -/*****************************************************************//** - * @name group of functions used for error suppression in correctness tools - *********************************************************************/ -/** @{ */ -/** - * @hideinitializer - * @brief possible value for suppression mask - */ -#define __itt_suppress_all_errors 0x7fffffff - -/** - * @hideinitializer - * @brief possible value for suppression mask (suppresses errors from threading analysis) - */ -#define __itt_suppress_threading_errors 0x000000ff - -/** - * @hideinitializer - * @brief possible value for suppression mask (suppresses errors from memory analysis) - */ -#define __itt_suppress_memory_errors 0x0000ff00 - -/** - * @brief Start suppressing errors identified in mask on this thread - */ -void ITTAPI __itt_suppress_push(unsigned int mask); - -/** @cond exclude_from_documentation */ -#ifndef INTEL_NO_MACRO_BODY -#ifndef INTEL_NO_ITTNOTIFY_API -ITT_STUBV(ITTAPI, void, suppress_push, (unsigned int mask)) -#define __itt_suppress_push ITTNOTIFY_VOID(suppress_push) -#define __itt_suppress_push_ptr ITTNOTIFY_NAME(suppress_push) -#else /* INTEL_NO_ITTNOTIFY_API */ -#define __itt_suppress_push(mask) -#define __itt_suppress_push_ptr 0 -#endif /* INTEL_NO_ITTNOTIFY_API */ -#else /* INTEL_NO_MACRO_BODY */ -#define __itt_suppress_push_ptr 0 -#endif /* INTEL_NO_MACRO_BODY */ -/** @endcond */ - -/** - * @brief Undo the effects of the matching call to __itt_suppress_push - */ -void ITTAPI __itt_suppress_pop(void); - -/** @cond exclude_from_documentation */ -#ifndef INTEL_NO_MACRO_BODY -#ifndef INTEL_NO_ITTNOTIFY_API -ITT_STUBV(ITTAPI, void, suppress_pop, (void)) -#define __itt_suppress_pop ITTNOTIFY_VOID(suppress_pop) -#define __itt_suppress_pop_ptr ITTNOTIFY_NAME(suppress_pop) -#else /* INTEL_NO_ITTNOTIFY_API */ -#define __itt_suppress_pop() -#define __itt_suppress_pop_ptr 0 -#endif /* INTEL_NO_ITTNOTIFY_API */ -#else /* INTEL_NO_MACRO_BODY */ -#define __itt_suppress_pop_ptr 0 -#endif /* INTEL_NO_MACRO_BODY */ -/** @endcond */ - -/** - * @enum __itt_model_disable - * @brief Enumerator for the disable methods - */ -typedef enum __itt_suppress_mode { - __itt_unsuppress_range, - __itt_suppress_range -} __itt_suppress_mode_t; - -/** - * @brief Mark a range of memory for error suppression or unsuppression for error types included in mask - */ -void ITTAPI __itt_suppress_mark_range(__itt_suppress_mode_t mode, unsigned int mask, void * address, size_t size); - -/** @cond exclude_from_documentation */ -#ifndef INTEL_NO_MACRO_BODY -#ifndef INTEL_NO_ITTNOTIFY_API -ITT_STUBV(ITTAPI, void, suppress_mark_range, (__itt_suppress_mode_t mode, unsigned int mask, void * address, size_t size)) -#define __itt_suppress_mark_range ITTNOTIFY_VOID(suppress_mark_range) -#define __itt_suppress_mark_range_ptr ITTNOTIFY_NAME(suppress_mark_range) -#else /* INTEL_NO_ITTNOTIFY_API */ -#define __itt_suppress_mark_range(mask) -#define __itt_suppress_mark_range_ptr 0 -#endif /* INTEL_NO_ITTNOTIFY_API */ -#else /* INTEL_NO_MACRO_BODY */ -#define __itt_suppress_mark_range_ptr 0 -#endif /* INTEL_NO_MACRO_BODY */ -/** @endcond */ - -/** - * @brief Undo the effect of a matching call to __itt_suppress_mark_range. If not matching - * call is found, nothing is changed. - */ -void ITTAPI __itt_suppress_clear_range(__itt_suppress_mode_t mode, unsigned int mask, void * address, size_t size); - -/** @cond exclude_from_documentation */ -#ifndef INTEL_NO_MACRO_BODY -#ifndef INTEL_NO_ITTNOTIFY_API -ITT_STUBV(ITTAPI, void, suppress_clear_range, (__itt_suppress_mode_t mode, unsigned int mask, void * address, size_t size)) -#define __itt_suppress_clear_range ITTNOTIFY_VOID(suppress_clear_range) -#define __itt_suppress_clear_range_ptr ITTNOTIFY_NAME(suppress_clear_range) -#else /* INTEL_NO_ITTNOTIFY_API */ -#define __itt_suppress_clear_range(mask) -#define __itt_suppress_clear_range_ptr 0 -#endif /* INTEL_NO_ITTNOTIFY_API */ -#else /* INTEL_NO_MACRO_BODY */ -#define __itt_suppress_clear_range_ptr 0 -#endif /* INTEL_NO_MACRO_BODY */ -/** @endcond */ -/** @} */ -/** @} suppress group */ - -/** - * @defgroup sync Synchronization - * @ingroup public - * Indicate user-written synchronization code - * @{ - */ -/** - * @hideinitializer - * @brief possible value of attribute argument for sync object type - */ -#define __itt_attr_barrier 1 - -/** - * @hideinitializer - * @brief possible value of attribute argument for sync object type - */ -#define __itt_attr_mutex 2 - -/** -@brief Name a synchronization object -@param[in] addr Handle for the synchronization object. You should -use a real address to uniquely identify the synchronization object. -@param[in] objtype null-terminated object type string. If NULL is -passed, the name will be "User Synchronization". -@param[in] objname null-terminated object name string. If NULL, -no name will be assigned to the object. -@param[in] attribute one of [#__itt_attr_barrier, #__itt_attr_mutex] - */ - -#if ITT_PLATFORM==ITT_PLATFORM_WIN -void ITTAPI __itt_sync_createA(void *addr, const char *objtype, const char *objname, int attribute); -void ITTAPI __itt_sync_createW(void *addr, const wchar_t *objtype, const wchar_t *objname, int attribute); -#if defined(UNICODE) || defined(_UNICODE) -# define __itt_sync_create __itt_sync_createW -# define __itt_sync_create_ptr __itt_sync_createW_ptr -#else /* UNICODE */ -# define __itt_sync_create __itt_sync_createA -# define __itt_sync_create_ptr __itt_sync_createA_ptr -#endif /* UNICODE */ -#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ -void ITTAPI __itt_sync_create (void *addr, const char *objtype, const char *objname, int attribute); -#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ - -/** @cond exclude_from_documentation */ -#ifndef INTEL_NO_MACRO_BODY -#ifndef INTEL_NO_ITTNOTIFY_API -#if ITT_PLATFORM==ITT_PLATFORM_WIN -ITT_STUBV(ITTAPI, void, sync_createA, (void *addr, const char *objtype, const char *objname, int attribute)) -ITT_STUBV(ITTAPI, void, sync_createW, (void *addr, const wchar_t *objtype, const wchar_t *objname, int attribute)) -#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ -ITT_STUBV(ITTAPI, void, sync_create, (void *addr, const char* objtype, const char* objname, int attribute)) -#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ -#if ITT_PLATFORM==ITT_PLATFORM_WIN -#define __itt_sync_createA ITTNOTIFY_VOID(sync_createA) -#define __itt_sync_createA_ptr ITTNOTIFY_NAME(sync_createA) -#define __itt_sync_createW ITTNOTIFY_VOID(sync_createW) -#define __itt_sync_createW_ptr ITTNOTIFY_NAME(sync_createW) -#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ -#define __itt_sync_create ITTNOTIFY_VOID(sync_create) -#define __itt_sync_create_ptr ITTNOTIFY_NAME(sync_create) -#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ -#else /* INTEL_NO_ITTNOTIFY_API */ -#if ITT_PLATFORM==ITT_PLATFORM_WIN -#define __itt_sync_createA(addr, objtype, objname, attribute) -#define __itt_sync_createA_ptr 0 -#define __itt_sync_createW(addr, objtype, objname, attribute) -#define __itt_sync_createW_ptr 0 -#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ -#define __itt_sync_create(addr, objtype, objname, attribute) -#define __itt_sync_create_ptr 0 -#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ -#endif /* INTEL_NO_ITTNOTIFY_API */ -#else /* INTEL_NO_MACRO_BODY */ -#if ITT_PLATFORM==ITT_PLATFORM_WIN -#define __itt_sync_createA_ptr 0 -#define __itt_sync_createW_ptr 0 -#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ -#define __itt_sync_create_ptr 0 -#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ -#endif /* INTEL_NO_MACRO_BODY */ -/** @endcond */ - -/** -@brief Rename a synchronization object - -You can use the rename call to assign or reassign a name to a given -synchronization object. -@param[in] addr handle for the synchronization object. -@param[in] name null-terminated object name string. -*/ -#if ITT_PLATFORM==ITT_PLATFORM_WIN -void ITTAPI __itt_sync_renameA(void *addr, const char *name); -void ITTAPI __itt_sync_renameW(void *addr, const wchar_t *name); -#if defined(UNICODE) || defined(_UNICODE) -# define __itt_sync_rename __itt_sync_renameW -# define __itt_sync_rename_ptr __itt_sync_renameW_ptr -#else /* UNICODE */ -# define __itt_sync_rename __itt_sync_renameA -# define __itt_sync_rename_ptr __itt_sync_renameA_ptr -#endif /* UNICODE */ -#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ -void ITTAPI __itt_sync_rename(void *addr, const char *name); -#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ - -/** @cond exclude_from_documentation */ -#ifndef INTEL_NO_MACRO_BODY -#ifndef INTEL_NO_ITTNOTIFY_API -#if ITT_PLATFORM==ITT_PLATFORM_WIN -ITT_STUBV(ITTAPI, void, sync_renameA, (void *addr, const char *name)) -ITT_STUBV(ITTAPI, void, sync_renameW, (void *addr, const wchar_t *name)) -#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ -ITT_STUBV(ITTAPI, void, sync_rename, (void *addr, const char *name)) -#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ -#if ITT_PLATFORM==ITT_PLATFORM_WIN -#define __itt_sync_renameA ITTNOTIFY_VOID(sync_renameA) -#define __itt_sync_renameA_ptr ITTNOTIFY_NAME(sync_renameA) -#define __itt_sync_renameW ITTNOTIFY_VOID(sync_renameW) -#define __itt_sync_renameW_ptr ITTNOTIFY_NAME(sync_renameW) -#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ -#define __itt_sync_rename ITTNOTIFY_VOID(sync_rename) -#define __itt_sync_rename_ptr ITTNOTIFY_NAME(sync_rename) -#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ -#else /* INTEL_NO_ITTNOTIFY_API */ -#if ITT_PLATFORM==ITT_PLATFORM_WIN -#define __itt_sync_renameA(addr, name) -#define __itt_sync_renameA_ptr 0 -#define __itt_sync_renameW(addr, name) -#define __itt_sync_renameW_ptr 0 -#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ -#define __itt_sync_rename(addr, name) -#define __itt_sync_rename_ptr 0 -#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ -#endif /* INTEL_NO_ITTNOTIFY_API */ -#else /* INTEL_NO_MACRO_BODY */ -#if ITT_PLATFORM==ITT_PLATFORM_WIN -#define __itt_sync_renameA_ptr 0 -#define __itt_sync_renameW_ptr 0 -#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ -#define __itt_sync_rename_ptr 0 -#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ -#endif /* INTEL_NO_MACRO_BODY */ -/** @endcond */ - -/** - @brief Destroy a synchronization object. - @param addr Handle for the synchronization object. - */ -void ITTAPI __itt_sync_destroy(void *addr); - -/** @cond exclude_from_documentation */ -#ifndef INTEL_NO_MACRO_BODY -#ifndef INTEL_NO_ITTNOTIFY_API -ITT_STUBV(ITTAPI, void, sync_destroy, (void *addr)) -#define __itt_sync_destroy ITTNOTIFY_VOID(sync_destroy) -#define __itt_sync_destroy_ptr ITTNOTIFY_NAME(sync_destroy) -#else /* INTEL_NO_ITTNOTIFY_API */ -#define __itt_sync_destroy(addr) -#define __itt_sync_destroy_ptr 0 -#endif /* INTEL_NO_ITTNOTIFY_API */ -#else /* INTEL_NO_MACRO_BODY */ -#define __itt_sync_destroy_ptr 0 -#endif /* INTEL_NO_MACRO_BODY */ -/** @endcond */ - -/*****************************************************************//** - * @name group of functions is used for performance measurement tools - *********************************************************************/ -/** @{ */ -/** - * @brief Enter spin loop on user-defined sync object - */ -void ITTAPI __itt_sync_prepare(void* addr); - -/** @cond exclude_from_documentation */ -#ifndef INTEL_NO_MACRO_BODY -#ifndef INTEL_NO_ITTNOTIFY_API -ITT_STUBV(ITTAPI, void, sync_prepare, (void *addr)) -#define __itt_sync_prepare ITTNOTIFY_VOID(sync_prepare) -#define __itt_sync_prepare_ptr ITTNOTIFY_NAME(sync_prepare) -#else /* INTEL_NO_ITTNOTIFY_API */ -#define __itt_sync_prepare(addr) -#define __itt_sync_prepare_ptr 0 -#endif /* INTEL_NO_ITTNOTIFY_API */ -#else /* INTEL_NO_MACRO_BODY */ -#define __itt_sync_prepare_ptr 0 -#endif /* INTEL_NO_MACRO_BODY */ -/** @endcond */ - -/** - * @brief Quit spin loop without acquiring spin object - */ -void ITTAPI __itt_sync_cancel(void *addr); - -/** @cond exclude_from_documentation */ -#ifndef INTEL_NO_MACRO_BODY -#ifndef INTEL_NO_ITTNOTIFY_API -ITT_STUBV(ITTAPI, void, sync_cancel, (void *addr)) -#define __itt_sync_cancel ITTNOTIFY_VOID(sync_cancel) -#define __itt_sync_cancel_ptr ITTNOTIFY_NAME(sync_cancel) -#else /* INTEL_NO_ITTNOTIFY_API */ -#define __itt_sync_cancel(addr) -#define __itt_sync_cancel_ptr 0 -#endif /* INTEL_NO_ITTNOTIFY_API */ -#else /* INTEL_NO_MACRO_BODY */ -#define __itt_sync_cancel_ptr 0 -#endif /* INTEL_NO_MACRO_BODY */ -/** @endcond */ - -/** - * @brief Successful spin loop completion (sync object acquired) - */ -void ITTAPI __itt_sync_acquired(void *addr); - -/** @cond exclude_from_documentation */ -#ifndef INTEL_NO_MACRO_BODY -#ifndef INTEL_NO_ITTNOTIFY_API -ITT_STUBV(ITTAPI, void, sync_acquired, (void *addr)) -#define __itt_sync_acquired ITTNOTIFY_VOID(sync_acquired) -#define __itt_sync_acquired_ptr ITTNOTIFY_NAME(sync_acquired) -#else /* INTEL_NO_ITTNOTIFY_API */ -#define __itt_sync_acquired(addr) -#define __itt_sync_acquired_ptr 0 -#endif /* INTEL_NO_ITTNOTIFY_API */ -#else /* INTEL_NO_MACRO_BODY */ -#define __itt_sync_acquired_ptr 0 -#endif /* INTEL_NO_MACRO_BODY */ -/** @endcond */ - -/** - * @brief Start sync object releasing code. Is called before the lock release call. - */ -void ITTAPI __itt_sync_releasing(void* addr); - -/** @cond exclude_from_documentation */ -#ifndef INTEL_NO_MACRO_BODY -#ifndef INTEL_NO_ITTNOTIFY_API -ITT_STUBV(ITTAPI, void, sync_releasing, (void *addr)) -#define __itt_sync_releasing ITTNOTIFY_VOID(sync_releasing) -#define __itt_sync_releasing_ptr ITTNOTIFY_NAME(sync_releasing) -#else /* INTEL_NO_ITTNOTIFY_API */ -#define __itt_sync_releasing(addr) -#define __itt_sync_releasing_ptr 0 -#endif /* INTEL_NO_ITTNOTIFY_API */ -#else /* INTEL_NO_MACRO_BODY */ -#define __itt_sync_releasing_ptr 0 -#endif /* INTEL_NO_MACRO_BODY */ -/** @endcond */ -/** @} */ - -/** @} sync group */ - -/**************************************************************//** - * @name group of functions is used for correctness checking tools - ******************************************************************/ -/** @{ */ -/** - * @ingroup legacy - * @deprecated Legacy API - * @brief Fast synchronization which does no require spinning. - * - This special function is to be used by TBB and OpenMP libraries only when they know - * there is no spin but they need to suppress TC warnings about shared variable modifications. - * - It only has corresponding pointers in static library and does not have corresponding function - * in dynamic library. - * @see void __itt_sync_prepare(void* addr); - */ -void ITTAPI __itt_fsync_prepare(void* addr); - -/** @cond exclude_from_documentation */ -#ifndef INTEL_NO_MACRO_BODY -#ifndef INTEL_NO_ITTNOTIFY_API -ITT_STUBV(ITTAPI, void, fsync_prepare, (void *addr)) -#define __itt_fsync_prepare ITTNOTIFY_VOID(fsync_prepare) -#define __itt_fsync_prepare_ptr ITTNOTIFY_NAME(fsync_prepare) -#else /* INTEL_NO_ITTNOTIFY_API */ -#define __itt_fsync_prepare(addr) -#define __itt_fsync_prepare_ptr 0 -#endif /* INTEL_NO_ITTNOTIFY_API */ -#else /* INTEL_NO_MACRO_BODY */ -#define __itt_fsync_prepare_ptr 0 -#endif /* INTEL_NO_MACRO_BODY */ -/** @endcond */ - -/** - * @ingroup legacy - * @deprecated Legacy API - * @brief Fast synchronization which does no require spinning. - * - This special function is to be used by TBB and OpenMP libraries only when they know - * there is no spin but they need to suppress TC warnings about shared variable modifications. - * - It only has corresponding pointers in static library and does not have corresponding function - * in dynamic library. - * @see void __itt_sync_cancel(void *addr); - */ -void ITTAPI __itt_fsync_cancel(void *addr); - -/** @cond exclude_from_documentation */ -#ifndef INTEL_NO_MACRO_BODY -#ifndef INTEL_NO_ITTNOTIFY_API -ITT_STUBV(ITTAPI, void, fsync_cancel, (void *addr)) -#define __itt_fsync_cancel ITTNOTIFY_VOID(fsync_cancel) -#define __itt_fsync_cancel_ptr ITTNOTIFY_NAME(fsync_cancel) -#else /* INTEL_NO_ITTNOTIFY_API */ -#define __itt_fsync_cancel(addr) -#define __itt_fsync_cancel_ptr 0 -#endif /* INTEL_NO_ITTNOTIFY_API */ -#else /* INTEL_NO_MACRO_BODY */ -#define __itt_fsync_cancel_ptr 0 -#endif /* INTEL_NO_MACRO_BODY */ -/** @endcond */ - -/** - * @ingroup legacy - * @deprecated Legacy API - * @brief Fast synchronization which does no require spinning. - * - This special function is to be used by TBB and OpenMP libraries only when they know - * there is no spin but they need to suppress TC warnings about shared variable modifications. - * - It only has corresponding pointers in static library and does not have corresponding function - * in dynamic library. - * @see void __itt_sync_acquired(void *addr); - */ -void ITTAPI __itt_fsync_acquired(void *addr); - -/** @cond exclude_from_documentation */ -#ifndef INTEL_NO_MACRO_BODY -#ifndef INTEL_NO_ITTNOTIFY_API -ITT_STUBV(ITTAPI, void, fsync_acquired, (void *addr)) -#define __itt_fsync_acquired ITTNOTIFY_VOID(fsync_acquired) -#define __itt_fsync_acquired_ptr ITTNOTIFY_NAME(fsync_acquired) -#else /* INTEL_NO_ITTNOTIFY_API */ -#define __itt_fsync_acquired(addr) -#define __itt_fsync_acquired_ptr 0 -#endif /* INTEL_NO_ITTNOTIFY_API */ -#else /* INTEL_NO_MACRO_BODY */ -#define __itt_fsync_acquired_ptr 0 -#endif /* INTEL_NO_MACRO_BODY */ -/** @endcond */ - -/** - * @ingroup legacy - * @deprecated Legacy API - * @brief Fast synchronization which does no require spinning. - * - This special function is to be used by TBB and OpenMP libraries only when they know - * there is no spin but they need to suppress TC warnings about shared variable modifications. - * - It only has corresponding pointers in static library and does not have corresponding function - * in dynamic library. - * @see void __itt_sync_releasing(void* addr); - */ -void ITTAPI __itt_fsync_releasing(void* addr); - -/** @cond exclude_from_documentation */ -#ifndef INTEL_NO_MACRO_BODY -#ifndef INTEL_NO_ITTNOTIFY_API -ITT_STUBV(ITTAPI, void, fsync_releasing, (void *addr)) -#define __itt_fsync_releasing ITTNOTIFY_VOID(fsync_releasing) -#define __itt_fsync_releasing_ptr ITTNOTIFY_NAME(fsync_releasing) -#else /* INTEL_NO_ITTNOTIFY_API */ -#define __itt_fsync_releasing(addr) -#define __itt_fsync_releasing_ptr 0 -#endif /* INTEL_NO_ITTNOTIFY_API */ -#else /* INTEL_NO_MACRO_BODY */ -#define __itt_fsync_releasing_ptr 0 -#endif /* INTEL_NO_MACRO_BODY */ -/** @endcond */ -/** @} */ - -/** - * @defgroup model Modeling by Intel(R) Parallel Advisor - * @ingroup public - * This is the subset of itt used for modeling by Intel(R) Parallel Advisor. - * This API is called ONLY using annotate.h, by "Annotation" macros - * the user places in their sources during the parallelism modeling steps. - * - * site_begin/end and task_begin/end take the address of handle variables, - * which are writeable by the API. Handles must be 0 initialized prior - * to the first call to begin, or may cause a run-time failure. - * The handles are initialized in a multi-thread safe way by the API if - * the handle is 0. The commonly expected idiom is one static handle to - * identify a site or task. If a site or task of the same name has already - * been started during this collection, the same handle MAY be returned, - * but is not required to be - it is unspecified if data merging is done - * based on name. These routines also take an instance variable. Like - * the lexical instance, these must be 0 initialized. Unlike the lexical - * instance, this is used to track a single dynamic instance. - * - * API used by the Intel(R) Parallel Advisor to describe potential concurrency - * and related activities. User-added source annotations expand to calls - * to these procedures to enable modeling of a hypothetical concurrent - * execution serially. - * @{ - */ -#if !defined(_ADVISOR_ANNOTATE_H_) || defined(ANNOTATE_EXPAND_NULL) - -typedef void* __itt_model_site; /*!< @brief handle for lexical site */ -typedef void* __itt_model_site_instance; /*!< @brief handle for dynamic instance */ -typedef void* __itt_model_task; /*!< @brief handle for lexical site */ -typedef void* __itt_model_task_instance; /*!< @brief handle for dynamic instance */ - -/** - * @enum __itt_model_disable - * @brief Enumerator for the disable methods - */ -typedef enum { - __itt_model_disable_observation, - __itt_model_disable_collection -} __itt_model_disable; - -#endif /* !_ADVISOR_ANNOTATE_H_ || ANNOTATE_EXPAND_NULL */ - -/** - * @brief ANNOTATE_SITE_BEGIN/ANNOTATE_SITE_END support. - * - * site_begin/end model a potential concurrency site. - * site instances may be recursively nested with themselves. - * site_end exits the most recently started but unended site for the current - * thread. The handle passed to end may be used to validate structure. - * Instances of a site encountered on different threads concurrently - * are considered completely distinct. If the site name for two different - * lexical sites match, it is unspecified whether they are treated as the - * same or different for data presentation. - */ -void ITTAPI __itt_model_site_begin(__itt_model_site *site, __itt_model_site_instance *instance, const char *name); -#if ITT_PLATFORM==ITT_PLATFORM_WIN -void ITTAPI __itt_model_site_beginW(const wchar_t *name); -#endif -void ITTAPI __itt_model_site_beginA(const char *name); -void ITTAPI __itt_model_site_beginAL(const char *name, size_t siteNameLen); -void ITTAPI __itt_model_site_end (__itt_model_site *site, __itt_model_site_instance *instance); -void ITTAPI __itt_model_site_end_2(void); - -/** @cond exclude_from_documentation */ -#ifndef INTEL_NO_MACRO_BODY -#ifndef INTEL_NO_ITTNOTIFY_API -ITT_STUBV(ITTAPI, void, model_site_begin, (__itt_model_site *site, __itt_model_site_instance *instance, const char *name)) -#if ITT_PLATFORM==ITT_PLATFORM_WIN -ITT_STUBV(ITTAPI, void, model_site_beginW, (const wchar_t *name)) -#endif -ITT_STUBV(ITTAPI, void, model_site_beginA, (const char *name)) -ITT_STUBV(ITTAPI, void, model_site_beginAL, (const char *name, size_t siteNameLen)) -ITT_STUBV(ITTAPI, void, model_site_end, (__itt_model_site *site, __itt_model_site_instance *instance)) -ITT_STUBV(ITTAPI, void, model_site_end_2, (void)) -#define __itt_model_site_begin ITTNOTIFY_VOID(model_site_begin) -#define __itt_model_site_begin_ptr ITTNOTIFY_NAME(model_site_begin) -#if ITT_PLATFORM==ITT_PLATFORM_WIN -#define __itt_model_site_beginW ITTNOTIFY_VOID(model_site_beginW) -#define __itt_model_site_beginW_ptr ITTNOTIFY_NAME(model_site_beginW) -#endif -#define __itt_model_site_beginA ITTNOTIFY_VOID(model_site_beginA) -#define __itt_model_site_beginA_ptr ITTNOTIFY_NAME(model_site_beginA) -#define __itt_model_site_beginAL ITTNOTIFY_VOID(model_site_beginAL) -#define __itt_model_site_beginAL_ptr ITTNOTIFY_NAME(model_site_beginAL) -#define __itt_model_site_end ITTNOTIFY_VOID(model_site_end) -#define __itt_model_site_end_ptr ITTNOTIFY_NAME(model_site_end) -#define __itt_model_site_end_2 ITTNOTIFY_VOID(model_site_end_2) -#define __itt_model_site_end_2_ptr ITTNOTIFY_NAME(model_site_end_2) -#else /* INTEL_NO_ITTNOTIFY_API */ -#define __itt_model_site_begin(site, instance, name) -#define __itt_model_site_begin_ptr 0 -#if ITT_PLATFORM==ITT_PLATFORM_WIN -#define __itt_model_site_beginW(name) -#define __itt_model_site_beginW_ptr 0 -#endif -#define __itt_model_site_beginA(name) -#define __itt_model_site_beginA_ptr 0 -#define __itt_model_site_beginAL(name, siteNameLen) -#define __itt_model_site_beginAL_ptr 0 -#define __itt_model_site_end(site, instance) -#define __itt_model_site_end_ptr 0 -#define __itt_model_site_end_2() -#define __itt_model_site_end_2_ptr 0 -#endif /* INTEL_NO_ITTNOTIFY_API */ -#else /* INTEL_NO_MACRO_BODY */ -#define __itt_model_site_begin_ptr 0 -#if ITT_PLATFORM==ITT_PLATFORM_WIN -#define __itt_model_site_beginW_ptr 0 -#endif -#define __itt_model_site_beginA_ptr 0 -#define __itt_model_site_beginAL_ptr 0 -#define __itt_model_site_end_ptr 0 -#define __itt_model_site_end_2_ptr 0 -#endif /* INTEL_NO_MACRO_BODY */ -/** @endcond */ - -/** - * @brief ANNOTATE_TASK_BEGIN/ANNOTATE_TASK_END support - * - * task_begin/end model a potential task, which is contained within the most - * closely enclosing dynamic site. task_end exits the most recently started - * but unended task. The handle passed to end may be used to validate - * structure. It is unspecified if bad dynamic nesting is detected. If it - * is, it should be encoded in the resulting data collection. The collector - * should not fail due to construct nesting issues, nor attempt to directly - * indicate the problem. - */ -void ITTAPI __itt_model_task_begin(__itt_model_task *task, __itt_model_task_instance *instance, const char *name); -#if ITT_PLATFORM==ITT_PLATFORM_WIN -void ITTAPI __itt_model_task_beginW(const wchar_t *name); -void ITTAPI __itt_model_iteration_taskW(const wchar_t *name); -#endif -void ITTAPI __itt_model_task_beginA(const char *name); -void ITTAPI __itt_model_task_beginAL(const char *name, size_t taskNameLen); -void ITTAPI __itt_model_iteration_taskA(const char *name); -void ITTAPI __itt_model_iteration_taskAL(const char *name, size_t taskNameLen); -void ITTAPI __itt_model_task_end (__itt_model_task *task, __itt_model_task_instance *instance); -void ITTAPI __itt_model_task_end_2(void); - -/** @cond exclude_from_documentation */ -#ifndef INTEL_NO_MACRO_BODY -#ifndef INTEL_NO_ITTNOTIFY_API -ITT_STUBV(ITTAPI, void, model_task_begin, (__itt_model_task *task, __itt_model_task_instance *instance, const char *name)) -#if ITT_PLATFORM==ITT_PLATFORM_WIN -ITT_STUBV(ITTAPI, void, model_task_beginW, (const wchar_t *name)) -ITT_STUBV(ITTAPI, void, model_iteration_taskW, (const wchar_t *name)) -#endif -ITT_STUBV(ITTAPI, void, model_task_beginA, (const char *name)) -ITT_STUBV(ITTAPI, void, model_task_beginAL, (const char *name, size_t taskNameLen)) -ITT_STUBV(ITTAPI, void, model_iteration_taskA, (const char *name)) -ITT_STUBV(ITTAPI, void, model_iteration_taskAL, (const char *name, size_t taskNameLen)) -ITT_STUBV(ITTAPI, void, model_task_end, (__itt_model_task *task, __itt_model_task_instance *instance)) -ITT_STUBV(ITTAPI, void, model_task_end_2, (void)) -#define __itt_model_task_begin ITTNOTIFY_VOID(model_task_begin) -#define __itt_model_task_begin_ptr ITTNOTIFY_NAME(model_task_begin) -#if ITT_PLATFORM==ITT_PLATFORM_WIN -#define __itt_model_task_beginW ITTNOTIFY_VOID(model_task_beginW) -#define __itt_model_task_beginW_ptr ITTNOTIFY_NAME(model_task_beginW) -#define __itt_model_iteration_taskW ITTNOTIFY_VOID(model_iteration_taskW) -#define __itt_model_iteration_taskW_ptr ITTNOTIFY_NAME(model_iteration_taskW) -#endif -#define __itt_model_task_beginA ITTNOTIFY_VOID(model_task_beginA) -#define __itt_model_task_beginA_ptr ITTNOTIFY_NAME(model_task_beginA) -#define __itt_model_task_beginAL ITTNOTIFY_VOID(model_task_beginAL) -#define __itt_model_task_beginAL_ptr ITTNOTIFY_NAME(model_task_beginAL) -#define __itt_model_iteration_taskA ITTNOTIFY_VOID(model_iteration_taskA) -#define __itt_model_iteration_taskA_ptr ITTNOTIFY_NAME(model_iteration_taskA) -#define __itt_model_iteration_taskAL ITTNOTIFY_VOID(model_iteration_taskAL) -#define __itt_model_iteration_taskAL_ptr ITTNOTIFY_NAME(model_iteration_taskAL) -#define __itt_model_task_end ITTNOTIFY_VOID(model_task_end) -#define __itt_model_task_end_ptr ITTNOTIFY_NAME(model_task_end) -#define __itt_model_task_end_2 ITTNOTIFY_VOID(model_task_end_2) -#define __itt_model_task_end_2_ptr ITTNOTIFY_NAME(model_task_end_2) -#else /* INTEL_NO_ITTNOTIFY_API */ -#define __itt_model_task_begin(task, instance, name) -#define __itt_model_task_begin_ptr 0 -#if ITT_PLATFORM==ITT_PLATFORM_WIN -#define __itt_model_task_beginW(name) -#define __itt_model_task_beginW_ptr 0 -#endif -#define __itt_model_task_beginA(name) -#define __itt_model_task_beginA_ptr 0 -#define __itt_model_task_beginAL(name, siteNameLen) -#define __itt_model_task_beginAL_ptr 0 -#define __itt_model_iteration_taskA(name) -#define __itt_model_iteration_taskA_ptr 0 -#define __itt_model_iteration_taskAL(name, siteNameLen) -#define __itt_model_iteration_taskAL_ptr 0 -#define __itt_model_task_end(task, instance) -#define __itt_model_task_end_ptr 0 -#define __itt_model_task_end_2() -#define __itt_model_task_end_2_ptr 0 -#endif /* INTEL_NO_ITTNOTIFY_API */ -#else /* INTEL_NO_MACRO_BODY */ -#define __itt_model_task_begin_ptr 0 -#if ITT_PLATFORM==ITT_PLATFORM_WIN -#define __itt_model_task_beginW_ptr 0 -#endif -#define __itt_model_task_beginA_ptr 0 -#define __itt_model_task_beginAL_ptr 0 -#define __itt_model_iteration_taskA_ptr 0 -#define __itt_model_iteration_taskAL_ptr 0 -#define __itt_model_task_end_ptr 0 -#define __itt_model_task_end_2_ptr 0 -#endif /* INTEL_NO_MACRO_BODY */ -/** @endcond */ - -/** - * @brief ANNOTATE_LOCK_ACQUIRE/ANNOTATE_LOCK_RELEASE support - * - * lock_acquire/release model a potential lock for both lockset and - * performance modeling. Each unique address is modeled as a separate - * lock, with invalid addresses being valid lock IDs. Specifically: - * no storage is accessed by the API at the specified address - it is only - * used for lock identification. Lock acquires may be self-nested and are - * unlocked by a corresponding number of releases. - * (These closely correspond to __itt_sync_acquired/__itt_sync_releasing, - * but may not have identical semantics.) - */ -void ITTAPI __itt_model_lock_acquire(void *lock); -void ITTAPI __itt_model_lock_acquire_2(void *lock); -void ITTAPI __itt_model_lock_release(void *lock); -void ITTAPI __itt_model_lock_release_2(void *lock); - -/** @cond exclude_from_documentation */ -#ifndef INTEL_NO_MACRO_BODY -#ifndef INTEL_NO_ITTNOTIFY_API -ITT_STUBV(ITTAPI, void, model_lock_acquire, (void *lock)) -ITT_STUBV(ITTAPI, void, model_lock_acquire_2, (void *lock)) -ITT_STUBV(ITTAPI, void, model_lock_release, (void *lock)) -ITT_STUBV(ITTAPI, void, model_lock_release_2, (void *lock)) -#define __itt_model_lock_acquire ITTNOTIFY_VOID(model_lock_acquire) -#define __itt_model_lock_acquire_ptr ITTNOTIFY_NAME(model_lock_acquire) -#define __itt_model_lock_acquire_2 ITTNOTIFY_VOID(model_lock_acquire_2) -#define __itt_model_lock_acquire_2_ptr ITTNOTIFY_NAME(model_lock_acquire_2) -#define __itt_model_lock_release ITTNOTIFY_VOID(model_lock_release) -#define __itt_model_lock_release_ptr ITTNOTIFY_NAME(model_lock_release) -#define __itt_model_lock_release_2 ITTNOTIFY_VOID(model_lock_release_2) -#define __itt_model_lock_release_2_ptr ITTNOTIFY_NAME(model_lock_release_2) -#else /* INTEL_NO_ITTNOTIFY_API */ -#define __itt_model_lock_acquire(lock) -#define __itt_model_lock_acquire_ptr 0 -#define __itt_model_lock_acquire_2(lock) -#define __itt_model_lock_acquire_2_ptr 0 -#define __itt_model_lock_release(lock) -#define __itt_model_lock_release_ptr 0 -#define __itt_model_lock_release_2(lock) -#define __itt_model_lock_release_2_ptr 0 -#endif /* INTEL_NO_ITTNOTIFY_API */ -#else /* INTEL_NO_MACRO_BODY */ -#define __itt_model_lock_acquire_ptr 0 -#define __itt_model_lock_acquire_2_ptr 0 -#define __itt_model_lock_release_ptr 0 -#define __itt_model_lock_release_2_ptr 0 -#endif /* INTEL_NO_MACRO_BODY */ -/** @endcond */ - -/** - * @brief ANNOTATE_RECORD_ALLOCATION/ANNOTATE_RECORD_DEALLOCATION support - * - * record_allocation/deallocation describe user-defined memory allocator - * behavior, which may be required for correctness modeling to understand - * when storage is not expected to be actually reused across threads. - */ -void ITTAPI __itt_model_record_allocation (void *addr, size_t size); -void ITTAPI __itt_model_record_deallocation(void *addr); - -/** @cond exclude_from_documentation */ -#ifndef INTEL_NO_MACRO_BODY -#ifndef INTEL_NO_ITTNOTIFY_API -ITT_STUBV(ITTAPI, void, model_record_allocation, (void *addr, size_t size)) -ITT_STUBV(ITTAPI, void, model_record_deallocation, (void *addr)) -#define __itt_model_record_allocation ITTNOTIFY_VOID(model_record_allocation) -#define __itt_model_record_allocation_ptr ITTNOTIFY_NAME(model_record_allocation) -#define __itt_model_record_deallocation ITTNOTIFY_VOID(model_record_deallocation) -#define __itt_model_record_deallocation_ptr ITTNOTIFY_NAME(model_record_deallocation) -#else /* INTEL_NO_ITTNOTIFY_API */ -#define __itt_model_record_allocation(addr, size) -#define __itt_model_record_allocation_ptr 0 -#define __itt_model_record_deallocation(addr) -#define __itt_model_record_deallocation_ptr 0 -#endif /* INTEL_NO_ITTNOTIFY_API */ -#else /* INTEL_NO_MACRO_BODY */ -#define __itt_model_record_allocation_ptr 0 -#define __itt_model_record_deallocation_ptr 0 -#endif /* INTEL_NO_MACRO_BODY */ -/** @endcond */ - -/** - * @brief ANNOTATE_INDUCTION_USES support - * - * Note particular storage is inductive through the end of the current site - */ -void ITTAPI __itt_model_induction_uses(void* addr, size_t size); - -/** @cond exclude_from_documentation */ -#ifndef INTEL_NO_MACRO_BODY -#ifndef INTEL_NO_ITTNOTIFY_API -ITT_STUBV(ITTAPI, void, model_induction_uses, (void *addr, size_t size)) -#define __itt_model_induction_uses ITTNOTIFY_VOID(model_induction_uses) -#define __itt_model_induction_uses_ptr ITTNOTIFY_NAME(model_induction_uses) -#else /* INTEL_NO_ITTNOTIFY_API */ -#define __itt_model_induction_uses(addr, size) -#define __itt_model_induction_uses_ptr 0 -#endif /* INTEL_NO_ITTNOTIFY_API */ -#else /* INTEL_NO_MACRO_BODY */ -#define __itt_model_induction_uses_ptr 0 -#endif /* INTEL_NO_MACRO_BODY */ -/** @endcond */ - -/** - * @brief ANNOTATE_REDUCTION_USES support - * - * Note particular storage is used for reduction through the end - * of the current site - */ -void ITTAPI __itt_model_reduction_uses(void* addr, size_t size); - -/** @cond exclude_from_documentation */ -#ifndef INTEL_NO_MACRO_BODY -#ifndef INTEL_NO_ITTNOTIFY_API -ITT_STUBV(ITTAPI, void, model_reduction_uses, (void *addr, size_t size)) -#define __itt_model_reduction_uses ITTNOTIFY_VOID(model_reduction_uses) -#define __itt_model_reduction_uses_ptr ITTNOTIFY_NAME(model_reduction_uses) -#else /* INTEL_NO_ITTNOTIFY_API */ -#define __itt_model_reduction_uses(addr, size) -#define __itt_model_reduction_uses_ptr 0 -#endif /* INTEL_NO_ITTNOTIFY_API */ -#else /* INTEL_NO_MACRO_BODY */ -#define __itt_model_reduction_uses_ptr 0 -#endif /* INTEL_NO_MACRO_BODY */ -/** @endcond */ - -/** - * @brief ANNOTATE_OBSERVE_USES support - * - * Have correctness modeling record observations about uses of storage - * through the end of the current site - */ -void ITTAPI __itt_model_observe_uses(void* addr, size_t size); - -/** @cond exclude_from_documentation */ -#ifndef INTEL_NO_MACRO_BODY -#ifndef INTEL_NO_ITTNOTIFY_API -ITT_STUBV(ITTAPI, void, model_observe_uses, (void *addr, size_t size)) -#define __itt_model_observe_uses ITTNOTIFY_VOID(model_observe_uses) -#define __itt_model_observe_uses_ptr ITTNOTIFY_NAME(model_observe_uses) -#else /* INTEL_NO_ITTNOTIFY_API */ -#define __itt_model_observe_uses(addr, size) -#define __itt_model_observe_uses_ptr 0 -#endif /* INTEL_NO_ITTNOTIFY_API */ -#else /* INTEL_NO_MACRO_BODY */ -#define __itt_model_observe_uses_ptr 0 -#endif /* INTEL_NO_MACRO_BODY */ -/** @endcond */ - -/** - * @brief ANNOTATE_CLEAR_USES support - * - * Clear the special handling of a piece of storage related to induction, - * reduction or observe_uses - */ -void ITTAPI __itt_model_clear_uses(void* addr); - -/** @cond exclude_from_documentation */ -#ifndef INTEL_NO_MACRO_BODY -#ifndef INTEL_NO_ITTNOTIFY_API -ITT_STUBV(ITTAPI, void, model_clear_uses, (void *addr)) -#define __itt_model_clear_uses ITTNOTIFY_VOID(model_clear_uses) -#define __itt_model_clear_uses_ptr ITTNOTIFY_NAME(model_clear_uses) -#else /* INTEL_NO_ITTNOTIFY_API */ -#define __itt_model_clear_uses(addr) -#define __itt_model_clear_uses_ptr 0 -#endif /* INTEL_NO_ITTNOTIFY_API */ -#else /* INTEL_NO_MACRO_BODY */ -#define __itt_model_clear_uses_ptr 0 -#endif /* INTEL_NO_MACRO_BODY */ -/** @endcond */ - -/** - * @brief ANNOTATE_DISABLE_*_PUSH/ANNOTATE_DISABLE_*_POP support - * - * disable_push/disable_pop push and pop disabling based on a parameter. - * Disabling observations stops processing of memory references during - * correctness modeling, and all annotations that occur in the disabled - * region. This allows description of code that is expected to be handled - * specially during conversion to parallelism or that is not recognized - * by tools (e.g. some kinds of synchronization operations.) - * This mechanism causes all annotations in the disabled region, other - * than disable_push and disable_pop, to be ignored. (For example, this - * might validly be used to disable an entire parallel site and the contained - * tasks and locking in it for data collection purposes.) - * The disable for collection is a more expensive operation, but reduces - * collector overhead significantly. This applies to BOTH correctness data - * collection and performance data collection. For example, a site - * containing a task might only enable data collection for the first 10 - * iterations. Both performance and correctness data should reflect this, - * and the program should run as close to full speed as possible when - * collection is disabled. - */ -void ITTAPI __itt_model_disable_push(__itt_model_disable x); -void ITTAPI __itt_model_disable_pop(void); -void ITTAPI __itt_model_aggregate_task(size_t x); - -/** @cond exclude_from_documentation */ -#ifndef INTEL_NO_MACRO_BODY -#ifndef INTEL_NO_ITTNOTIFY_API -ITT_STUBV(ITTAPI, void, model_disable_push, (__itt_model_disable x)) -ITT_STUBV(ITTAPI, void, model_disable_pop, (void)) -ITT_STUBV(ITTAPI, void, model_aggregate_task, (size_t x)) -#define __itt_model_disable_push ITTNOTIFY_VOID(model_disable_push) -#define __itt_model_disable_push_ptr ITTNOTIFY_NAME(model_disable_push) -#define __itt_model_disable_pop ITTNOTIFY_VOID(model_disable_pop) -#define __itt_model_disable_pop_ptr ITTNOTIFY_NAME(model_disable_pop) -#define __itt_model_aggregate_task ITTNOTIFY_VOID(model_aggregate_task) -#define __itt_model_aggregate_task_ptr ITTNOTIFY_NAME(model_aggregate_task) -#else /* INTEL_NO_ITTNOTIFY_API */ -#define __itt_model_disable_push(x) -#define __itt_model_disable_push_ptr 0 -#define __itt_model_disable_pop() -#define __itt_model_disable_pop_ptr 0 -#define __itt_model_aggregate_task(x) -#define __itt_model_aggregate_task_ptr 0 -#endif /* INTEL_NO_ITTNOTIFY_API */ -#else /* INTEL_NO_MACRO_BODY */ -#define __itt_model_disable_push_ptr 0 -#define __itt_model_disable_pop_ptr 0 -#define __itt_model_aggregate_task_ptr 0 -#endif /* INTEL_NO_MACRO_BODY */ -/** @endcond */ -/** @} model group */ - -/** - * @defgroup heap Heap - * @ingroup public - * Heap group - * @{ - */ - -typedef void* __itt_heap_function; - -/** - * @brief Create an identification for heap function - * @return non-zero identifier or NULL - */ -#if ITT_PLATFORM==ITT_PLATFORM_WIN -__itt_heap_function ITTAPI __itt_heap_function_createA(const char* name, const char* domain); -__itt_heap_function ITTAPI __itt_heap_function_createW(const wchar_t* name, const wchar_t* domain); -#if defined(UNICODE) || defined(_UNICODE) -# define __itt_heap_function_create __itt_heap_function_createW -# define __itt_heap_function_create_ptr __itt_heap_function_createW_ptr -#else -# define __itt_heap_function_create __itt_heap_function_createA -# define __itt_heap_function_create_ptr __itt_heap_function_createA_ptr -#endif /* UNICODE */ -#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ -__itt_heap_function ITTAPI __itt_heap_function_create(const char* name, const char* domain); -#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ - -/** @cond exclude_from_documentation */ -#ifndef INTEL_NO_MACRO_BODY -#ifndef INTEL_NO_ITTNOTIFY_API -#if ITT_PLATFORM==ITT_PLATFORM_WIN -ITT_STUB(ITTAPI, __itt_heap_function, heap_function_createA, (const char* name, const char* domain)) -ITT_STUB(ITTAPI, __itt_heap_function, heap_function_createW, (const wchar_t* name, const wchar_t* domain)) -#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ -ITT_STUB(ITTAPI, __itt_heap_function, heap_function_create, (const char* name, const char* domain)) -#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ -#if ITT_PLATFORM==ITT_PLATFORM_WIN -#define __itt_heap_function_createA ITTNOTIFY_DATA(heap_function_createA) -#define __itt_heap_function_createA_ptr ITTNOTIFY_NAME(heap_function_createA) -#define __itt_heap_function_createW ITTNOTIFY_DATA(heap_function_createW) -#define __itt_heap_function_createW_ptr ITTNOTIFY_NAME(heap_function_createW) -#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ -#define __itt_heap_function_create ITTNOTIFY_DATA(heap_function_create) -#define __itt_heap_function_create_ptr ITTNOTIFY_NAME(heap_function_create) -#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ -#else /* INTEL_NO_ITTNOTIFY_API */ -#if ITT_PLATFORM==ITT_PLATFORM_WIN -#define __itt_heap_function_createA(name, domain) (__itt_heap_function)0 -#define __itt_heap_function_createA_ptr 0 -#define __itt_heap_function_createW(name, domain) (__itt_heap_function)0 -#define __itt_heap_function_createW_ptr 0 -#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ -#define __itt_heap_function_create(name, domain) (__itt_heap_function)0 -#define __itt_heap_function_create_ptr 0 -#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ -#endif /* INTEL_NO_ITTNOTIFY_API */ -#else /* INTEL_NO_MACRO_BODY */ -#if ITT_PLATFORM==ITT_PLATFORM_WIN -#define __itt_heap_function_createA_ptr 0 -#define __itt_heap_function_createW_ptr 0 -#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ -#define __itt_heap_function_create_ptr 0 -#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ -#endif /* INTEL_NO_MACRO_BODY */ -/** @endcond */ - -/** - * @brief Record an allocation begin occurrence. - */ -void ITTAPI __itt_heap_allocate_begin(__itt_heap_function h, size_t size, int initialized); - -/** @cond exclude_from_documentation */ -#ifndef INTEL_NO_MACRO_BODY -#ifndef INTEL_NO_ITTNOTIFY_API -ITT_STUBV(ITTAPI, void, heap_allocate_begin, (__itt_heap_function h, size_t size, int initialized)) -#define __itt_heap_allocate_begin ITTNOTIFY_VOID(heap_allocate_begin) -#define __itt_heap_allocate_begin_ptr ITTNOTIFY_NAME(heap_allocate_begin) -#else /* INTEL_NO_ITTNOTIFY_API */ -#define __itt_heap_allocate_begin(h, size, initialized) -#define __itt_heap_allocate_begin_ptr 0 -#endif /* INTEL_NO_ITTNOTIFY_API */ -#else /* INTEL_NO_MACRO_BODY */ -#define __itt_heap_allocate_begin_ptr 0 -#endif /* INTEL_NO_MACRO_BODY */ -/** @endcond */ - -/** - * @brief Record an allocation end occurrence. - */ -void ITTAPI __itt_heap_allocate_end(__itt_heap_function h, void** addr, size_t size, int initialized); - -/** @cond exclude_from_documentation */ -#ifndef INTEL_NO_MACRO_BODY -#ifndef INTEL_NO_ITTNOTIFY_API -ITT_STUBV(ITTAPI, void, heap_allocate_end, (__itt_heap_function h, void** addr, size_t size, int initialized)) -#define __itt_heap_allocate_end ITTNOTIFY_VOID(heap_allocate_end) -#define __itt_heap_allocate_end_ptr ITTNOTIFY_NAME(heap_allocate_end) -#else /* INTEL_NO_ITTNOTIFY_API */ -#define __itt_heap_allocate_end(h, addr, size, initialized) -#define __itt_heap_allocate_end_ptr 0 -#endif /* INTEL_NO_ITTNOTIFY_API */ -#else /* INTEL_NO_MACRO_BODY */ -#define __itt_heap_allocate_end_ptr 0 -#endif /* INTEL_NO_MACRO_BODY */ -/** @endcond */ - -/** - * @brief Record an free begin occurrence. - */ -void ITTAPI __itt_heap_free_begin(__itt_heap_function h, void* addr); - -/** @cond exclude_from_documentation */ -#ifndef INTEL_NO_MACRO_BODY -#ifndef INTEL_NO_ITTNOTIFY_API -ITT_STUBV(ITTAPI, void, heap_free_begin, (__itt_heap_function h, void* addr)) -#define __itt_heap_free_begin ITTNOTIFY_VOID(heap_free_begin) -#define __itt_heap_free_begin_ptr ITTNOTIFY_NAME(heap_free_begin) -#else /* INTEL_NO_ITTNOTIFY_API */ -#define __itt_heap_free_begin(h, addr) -#define __itt_heap_free_begin_ptr 0 -#endif /* INTEL_NO_ITTNOTIFY_API */ -#else /* INTEL_NO_MACRO_BODY */ -#define __itt_heap_free_begin_ptr 0 -#endif /* INTEL_NO_MACRO_BODY */ -/** @endcond */ - -/** - * @brief Record an free end occurrence. - */ -void ITTAPI __itt_heap_free_end(__itt_heap_function h, void* addr); - -/** @cond exclude_from_documentation */ -#ifndef INTEL_NO_MACRO_BODY -#ifndef INTEL_NO_ITTNOTIFY_API -ITT_STUBV(ITTAPI, void, heap_free_end, (__itt_heap_function h, void* addr)) -#define __itt_heap_free_end ITTNOTIFY_VOID(heap_free_end) -#define __itt_heap_free_end_ptr ITTNOTIFY_NAME(heap_free_end) -#else /* INTEL_NO_ITTNOTIFY_API */ -#define __itt_heap_free_end(h, addr) -#define __itt_heap_free_end_ptr 0 -#endif /* INTEL_NO_ITTNOTIFY_API */ -#else /* INTEL_NO_MACRO_BODY */ -#define __itt_heap_free_end_ptr 0 -#endif /* INTEL_NO_MACRO_BODY */ -/** @endcond */ - -/** - * @brief Record an reallocation begin occurrence. - */ -void ITTAPI __itt_heap_reallocate_begin(__itt_heap_function h, void* addr, size_t new_size, int initialized); - -/** @cond exclude_from_documentation */ -#ifndef INTEL_NO_MACRO_BODY -#ifndef INTEL_NO_ITTNOTIFY_API -ITT_STUBV(ITTAPI, void, heap_reallocate_begin, (__itt_heap_function h, void* addr, size_t new_size, int initialized)) -#define __itt_heap_reallocate_begin ITTNOTIFY_VOID(heap_reallocate_begin) -#define __itt_heap_reallocate_begin_ptr ITTNOTIFY_NAME(heap_reallocate_begin) -#else /* INTEL_NO_ITTNOTIFY_API */ -#define __itt_heap_reallocate_begin(h, addr, new_size, initialized) -#define __itt_heap_reallocate_begin_ptr 0 -#endif /* INTEL_NO_ITTNOTIFY_API */ -#else /* INTEL_NO_MACRO_BODY */ -#define __itt_heap_reallocate_begin_ptr 0 -#endif /* INTEL_NO_MACRO_BODY */ -/** @endcond */ - -/** - * @brief Record an reallocation end occurrence. - */ -void ITTAPI __itt_heap_reallocate_end(__itt_heap_function h, void* addr, void** new_addr, size_t new_size, int initialized); - -/** @cond exclude_from_documentation */ -#ifndef INTEL_NO_MACRO_BODY -#ifndef INTEL_NO_ITTNOTIFY_API -ITT_STUBV(ITTAPI, void, heap_reallocate_end, (__itt_heap_function h, void* addr, void** new_addr, size_t new_size, int initialized)) -#define __itt_heap_reallocate_end ITTNOTIFY_VOID(heap_reallocate_end) -#define __itt_heap_reallocate_end_ptr ITTNOTIFY_NAME(heap_reallocate_end) -#else /* INTEL_NO_ITTNOTIFY_API */ -#define __itt_heap_reallocate_end(h, addr, new_addr, new_size, initialized) -#define __itt_heap_reallocate_end_ptr 0 -#endif /* INTEL_NO_ITTNOTIFY_API */ -#else /* INTEL_NO_MACRO_BODY */ -#define __itt_heap_reallocate_end_ptr 0 -#endif /* INTEL_NO_MACRO_BODY */ -/** @endcond */ - -/** @brief internal access begin */ -void ITTAPI __itt_heap_internal_access_begin(void); - -/** @cond exclude_from_documentation */ -#ifndef INTEL_NO_MACRO_BODY -#ifndef INTEL_NO_ITTNOTIFY_API -ITT_STUBV(ITTAPI, void, heap_internal_access_begin, (void)) -#define __itt_heap_internal_access_begin ITTNOTIFY_VOID(heap_internal_access_begin) -#define __itt_heap_internal_access_begin_ptr ITTNOTIFY_NAME(heap_internal_access_begin) -#else /* INTEL_NO_ITTNOTIFY_API */ -#define __itt_heap_internal_access_begin() -#define __itt_heap_internal_access_begin_ptr 0 -#endif /* INTEL_NO_ITTNOTIFY_API */ -#else /* INTEL_NO_MACRO_BODY */ -#define __itt_heap_internal_access_begin_ptr 0 -#endif /* INTEL_NO_MACRO_BODY */ -/** @endcond */ - -/** @brief internal access end */ -void ITTAPI __itt_heap_internal_access_end(void); - -/** @cond exclude_from_documentation */ -#ifndef INTEL_NO_MACRO_BODY -#ifndef INTEL_NO_ITTNOTIFY_API -ITT_STUBV(ITTAPI, void, heap_internal_access_end, (void)) -#define __itt_heap_internal_access_end ITTNOTIFY_VOID(heap_internal_access_end) -#define __itt_heap_internal_access_end_ptr ITTNOTIFY_NAME(heap_internal_access_end) -#else /* INTEL_NO_ITTNOTIFY_API */ -#define __itt_heap_internal_access_end() -#define __itt_heap_internal_access_end_ptr 0 -#endif /* INTEL_NO_ITTNOTIFY_API */ -#else /* INTEL_NO_MACRO_BODY */ -#define __itt_heap_internal_access_end_ptr 0 -#endif /* INTEL_NO_MACRO_BODY */ -/** @endcond */ - -/** @brief record memory growth begin */ -void ITTAPI __itt_heap_record_memory_growth_begin(void); - -/** @cond exclude_from_documentation */ -#ifndef INTEL_NO_MACRO_BODY -#ifndef INTEL_NO_ITTNOTIFY_API -ITT_STUBV(ITTAPI, void, heap_record_memory_growth_begin, (void)) -#define __itt_heap_record_memory_growth_begin ITTNOTIFY_VOID(heap_record_memory_growth_begin) -#define __itt_heap_record_memory_growth_begin_ptr ITTNOTIFY_NAME(heap_record_memory_growth_begin) -#else /* INTEL_NO_ITTNOTIFY_API */ -#define __itt_heap_record_memory_growth_begin() -#define __itt_heap_record_memory_growth_begin_ptr 0 -#endif /* INTEL_NO_ITTNOTIFY_API */ -#else /* INTEL_NO_MACRO_BODY */ -#define __itt_heap_record_memory_growth_begin_ptr 0 -#endif /* INTEL_NO_MACRO_BODY */ -/** @endcond */ - -/** @brief record memory growth end */ -void ITTAPI __itt_heap_record_memory_growth_end(void); - -/** @cond exclude_from_documentation */ -#ifndef INTEL_NO_MACRO_BODY -#ifndef INTEL_NO_ITTNOTIFY_API -ITT_STUBV(ITTAPI, void, heap_record_memory_growth_end, (void)) -#define __itt_heap_record_memory_growth_end ITTNOTIFY_VOID(heap_record_memory_growth_end) -#define __itt_heap_record_memory_growth_end_ptr ITTNOTIFY_NAME(heap_record_memory_growth_end) -#else /* INTEL_NO_ITTNOTIFY_API */ -#define __itt_heap_record_memory_growth_end() -#define __itt_heap_record_memory_growth_end_ptr 0 -#endif /* INTEL_NO_ITTNOTIFY_API */ -#else /* INTEL_NO_MACRO_BODY */ -#define __itt_heap_record_memory_growth_end_ptr 0 -#endif /* INTEL_NO_MACRO_BODY */ -/** @endcond */ - -/** - * @brief Specify the type of heap detection/reporting to modify. - */ -/** - * @hideinitializer - * @brief Report on memory leaks. - */ -#define __itt_heap_leaks 0x00000001 - -/** - * @hideinitializer - * @brief Report on memory growth. - */ -#define __itt_heap_growth 0x00000002 - - -/** @brief heap reset detection */ -void ITTAPI __itt_heap_reset_detection(unsigned int reset_mask); - -/** @cond exclude_from_documentation */ -#ifndef INTEL_NO_MACRO_BODY -#ifndef INTEL_NO_ITTNOTIFY_API -ITT_STUBV(ITTAPI, void, heap_reset_detection, (unsigned int reset_mask)) -#define __itt_heap_reset_detection ITTNOTIFY_VOID(heap_reset_detection) -#define __itt_heap_reset_detection_ptr ITTNOTIFY_NAME(heap_reset_detection) -#else /* INTEL_NO_ITTNOTIFY_API */ -#define __itt_heap_reset_detection() -#define __itt_heap_reset_detection_ptr 0 -#endif /* INTEL_NO_ITTNOTIFY_API */ -#else /* INTEL_NO_MACRO_BODY */ -#define __itt_heap_reset_detection_ptr 0 -#endif /* INTEL_NO_MACRO_BODY */ -/** @endcond */ - -/** @brief report */ -void ITTAPI __itt_heap_record(unsigned int record_mask); - -/** @cond exclude_from_documentation */ -#ifndef INTEL_NO_MACRO_BODY -#ifndef INTEL_NO_ITTNOTIFY_API -ITT_STUBV(ITTAPI, void, heap_record, (unsigned int record_mask)) -#define __itt_heap_record ITTNOTIFY_VOID(heap_record) -#define __itt_heap_record_ptr ITTNOTIFY_NAME(heap_record) -#else /* INTEL_NO_ITTNOTIFY_API */ -#define __itt_heap_record() -#define __itt_heap_record_ptr 0 -#endif /* INTEL_NO_ITTNOTIFY_API */ -#else /* INTEL_NO_MACRO_BODY */ -#define __itt_heap_record_ptr 0 -#endif /* INTEL_NO_MACRO_BODY */ -/** @endcond */ - -/** @} heap group */ -/** @endcond */ -/* ========================================================================== */ - -/** - * @defgroup domains Domains - * @ingroup public - * Domains group - * @{ - */ - -/** @cond exclude_from_documentation */ -#pragma pack(push, 8) - -typedef struct ___itt_domain -{ - volatile int flags; /*!< Zero if disabled, non-zero if enabled. The meaning of different non-zero values is reserved to the runtime */ - const char* nameA; /*!< Copy of original name in ASCII. */ -#if defined(UNICODE) || defined(_UNICODE) - const wchar_t* nameW; /*!< Copy of original name in UNICODE. */ -#else /* UNICODE || _UNICODE */ - void* nameW; -#endif /* UNICODE || _UNICODE */ - int extra1; /*!< Reserved to the runtime */ - void* extra2; /*!< Reserved to the runtime */ - struct ___itt_domain* next; -} __itt_domain; - -#pragma pack(pop) -/** @endcond */ - -/** - * @ingroup domains - * @brief Create a domain. - * Create domain using some domain name: the URI naming style is recommended. - * Because the set of domains is expected to be static over the application's - * execution time, there is no mechanism to destroy a domain. - * Any domain can be accessed by any thread in the process, regardless of - * which thread created the domain. This call is thread-safe. - * @param[in] name name of domain - */ -#if ITT_PLATFORM==ITT_PLATFORM_WIN -__itt_domain* ITTAPI __itt_domain_createA(const char *name); -__itt_domain* ITTAPI __itt_domain_createW(const wchar_t *name); -#if defined(UNICODE) || defined(_UNICODE) -# define __itt_domain_create __itt_domain_createW -# define __itt_domain_create_ptr __itt_domain_createW_ptr -#else /* UNICODE */ -# define __itt_domain_create __itt_domain_createA -# define __itt_domain_create_ptr __itt_domain_createA_ptr -#endif /* UNICODE */ -#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ -__itt_domain* ITTAPI __itt_domain_create(const char *name); -#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ - -/** @cond exclude_from_documentation */ -#ifndef INTEL_NO_MACRO_BODY -#ifndef INTEL_NO_ITTNOTIFY_API -#if ITT_PLATFORM==ITT_PLATFORM_WIN -ITT_STUB(ITTAPI, __itt_domain*, domain_createA, (const char *name)) -ITT_STUB(ITTAPI, __itt_domain*, domain_createW, (const wchar_t *name)) -#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ -ITT_STUB(ITTAPI, __itt_domain*, domain_create, (const char *name)) -#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ -#if ITT_PLATFORM==ITT_PLATFORM_WIN -#define __itt_domain_createA ITTNOTIFY_DATA(domain_createA) -#define __itt_domain_createA_ptr ITTNOTIFY_NAME(domain_createA) -#define __itt_domain_createW ITTNOTIFY_DATA(domain_createW) -#define __itt_domain_createW_ptr ITTNOTIFY_NAME(domain_createW) -#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ -#define __itt_domain_create ITTNOTIFY_DATA(domain_create) -#define __itt_domain_create_ptr ITTNOTIFY_NAME(domain_create) -#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ -#else /* INTEL_NO_ITTNOTIFY_API */ -#if ITT_PLATFORM==ITT_PLATFORM_WIN -#define __itt_domain_createA(name) (__itt_domain*)0 -#define __itt_domain_createA_ptr 0 -#define __itt_domain_createW(name) (__itt_domain*)0 -#define __itt_domain_createW_ptr 0 -#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ -#define __itt_domain_create(name) (__itt_domain*)0 -#define __itt_domain_create_ptr 0 -#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ -#endif /* INTEL_NO_ITTNOTIFY_API */ -#else /* INTEL_NO_MACRO_BODY */ -#if ITT_PLATFORM==ITT_PLATFORM_WIN -#define __itt_domain_createA_ptr 0 -#define __itt_domain_createW_ptr 0 -#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ -#define __itt_domain_create_ptr 0 -#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ -#endif /* INTEL_NO_MACRO_BODY */ -/** @endcond */ -/** @} domains group */ - -/** - * @defgroup ids IDs - * @ingroup public - * IDs group - * @{ - */ - -/** @cond exclude_from_documentation */ -#pragma pack(push, 8) - -typedef struct ___itt_id -{ - unsigned long long d1, d2, d3; -} __itt_id; - -#pragma pack(pop) -/** @endcond */ - -static const __itt_id __itt_null = { 0, 0, 0 }; - -/** - * @ingroup ids - * @brief A convenience function is provided to create an ID without domain control. - * @brief This is a convenience function to initialize an __itt_id structure. This function - * does not affect the collector runtime in any way. After you make the ID with this - * function, you still must create it with the __itt_id_create function before using the ID - * to identify a named entity. - * @param[in] addr The address of object; high QWORD of the ID value. - * @param[in] extra The extra data to unique identify object; low QWORD of the ID value. - */ - -ITT_INLINE __itt_id ITTAPI __itt_id_make(void* addr, unsigned long long extra) ITT_INLINE_ATTRIBUTE; -ITT_INLINE __itt_id ITTAPI __itt_id_make(void* addr, unsigned long long extra) -{ - __itt_id id = __itt_null; - id.d1 = (unsigned long long)((uintptr_t)addr); - id.d2 = (unsigned long long)extra; - id.d3 = (unsigned long long)0; /* Reserved. Must be zero */ - return id; -} - -/** - * @ingroup ids - * @brief Create an instance of identifier. - * This establishes the beginning of the lifetime of an instance of - * the given ID in the trace. Once this lifetime starts, the ID - * can be used to tag named entity instances in calls such as - * __itt_task_begin, and to specify relationships among - * identified named entity instances, using the \ref relations APIs. - * Instance IDs are not domain specific! - * @param[in] domain The domain controlling the execution of this call. - * @param[in] id The ID to create. - */ -void ITTAPI __itt_id_create(const __itt_domain *domain, __itt_id id); - -/** @cond exclude_from_documentation */ -#ifndef INTEL_NO_MACRO_BODY -#ifndef INTEL_NO_ITTNOTIFY_API -ITT_STUBV(ITTAPI, void, id_create, (const __itt_domain *domain, __itt_id id)) -#define __itt_id_create(d,x) ITTNOTIFY_VOID_D1(id_create,d,x) -#define __itt_id_create_ptr ITTNOTIFY_NAME(id_create) -#else /* INTEL_NO_ITTNOTIFY_API */ -#define __itt_id_create(domain,id) -#define __itt_id_create_ptr 0 -#endif /* INTEL_NO_ITTNOTIFY_API */ -#else /* INTEL_NO_MACRO_BODY */ -#define __itt_id_create_ptr 0 -#endif /* INTEL_NO_MACRO_BODY */ -/** @endcond */ - -/** - * @ingroup ids - * @brief Destroy an instance of identifier. - * This ends the lifetime of the current instance of the given ID value in the trace. - * Any relationships that are established after this lifetime ends are invalid. - * This call must be performed before the given ID value can be reused for a different - * named entity instance. - * @param[in] domain The domain controlling the execution of this call. - * @param[in] id The ID to destroy. - */ -void ITTAPI __itt_id_destroy(const __itt_domain *domain, __itt_id id); - -/** @cond exclude_from_documentation */ -#ifndef INTEL_NO_MACRO_BODY -#ifndef INTEL_NO_ITTNOTIFY_API -ITT_STUBV(ITTAPI, void, id_destroy, (const __itt_domain *domain, __itt_id id)) -#define __itt_id_destroy(d,x) ITTNOTIFY_VOID_D1(id_destroy,d,x) -#define __itt_id_destroy_ptr ITTNOTIFY_NAME(id_destroy) -#else /* INTEL_NO_ITTNOTIFY_API */ -#define __itt_id_destroy(domain,id) -#define __itt_id_destroy_ptr 0 -#endif /* INTEL_NO_ITTNOTIFY_API */ -#else /* INTEL_NO_MACRO_BODY */ -#define __itt_id_destroy_ptr 0 -#endif /* INTEL_NO_MACRO_BODY */ -/** @endcond */ -/** @} ids group */ - -/** - * @defgroup handless String Handles - * @ingroup public - * String Handles group - * @{ - */ - -/** @cond exclude_from_documentation */ -#pragma pack(push, 8) - -typedef struct ___itt_string_handle -{ - const char* strA; /*!< Copy of original string in ASCII. */ -#if defined(UNICODE) || defined(_UNICODE) - const wchar_t* strW; /*!< Copy of original string in UNICODE. */ -#else /* UNICODE || _UNICODE */ - void* strW; -#endif /* UNICODE || _UNICODE */ - int extra1; /*!< Reserved. Must be zero */ - void* extra2; /*!< Reserved. Must be zero */ - struct ___itt_string_handle* next; -} __itt_string_handle; - -#pragma pack(pop) -/** @endcond */ - -/** - * @ingroup handles - * @brief Create a string handle. - * Create and return handle value that can be associated with a string. - * Consecutive calls to __itt_string_handle_create with the same name - * return the same value. Because the set of string handles is expected to remain - * static during the application's execution time, there is no mechanism to destroy a string handle. - * Any string handle can be accessed by any thread in the process, regardless of which thread created - * the string handle. This call is thread-safe. - * @param[in] name The input string - */ -#if ITT_PLATFORM==ITT_PLATFORM_WIN -__itt_string_handle* ITTAPI __itt_string_handle_createA(const char *name); -__itt_string_handle* ITTAPI __itt_string_handle_createW(const wchar_t *name); -#if defined(UNICODE) || defined(_UNICODE) -# define __itt_string_handle_create __itt_string_handle_createW -# define __itt_string_handle_create_ptr __itt_string_handle_createW_ptr -#else /* UNICODE */ -# define __itt_string_handle_create __itt_string_handle_createA -# define __itt_string_handle_create_ptr __itt_string_handle_createA_ptr -#endif /* UNICODE */ -#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ -__itt_string_handle* ITTAPI __itt_string_handle_create(const char *name); -#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ - -/** @cond exclude_from_documentation */ -#ifndef INTEL_NO_MACRO_BODY -#ifndef INTEL_NO_ITTNOTIFY_API -#if ITT_PLATFORM==ITT_PLATFORM_WIN -ITT_STUB(ITTAPI, __itt_string_handle*, string_handle_createA, (const char *name)) -ITT_STUB(ITTAPI, __itt_string_handle*, string_handle_createW, (const wchar_t *name)) -#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ -ITT_STUB(ITTAPI, __itt_string_handle*, string_handle_create, (const char *name)) -#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ -#if ITT_PLATFORM==ITT_PLATFORM_WIN -#define __itt_string_handle_createA ITTNOTIFY_DATA(string_handle_createA) -#define __itt_string_handle_createA_ptr ITTNOTIFY_NAME(string_handle_createA) -#define __itt_string_handle_createW ITTNOTIFY_DATA(string_handle_createW) -#define __itt_string_handle_createW_ptr ITTNOTIFY_NAME(string_handle_createW) -#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ -#define __itt_string_handle_create ITTNOTIFY_DATA(string_handle_create) -#define __itt_string_handle_create_ptr ITTNOTIFY_NAME(string_handle_create) -#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ -#else /* INTEL_NO_ITTNOTIFY_API */ -#if ITT_PLATFORM==ITT_PLATFORM_WIN -#define __itt_string_handle_createA(name) (__itt_string_handle*)0 -#define __itt_string_handle_createA_ptr 0 -#define __itt_string_handle_createW(name) (__itt_string_handle*)0 -#define __itt_string_handle_createW_ptr 0 -#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ -#define __itt_string_handle_create(name) (__itt_string_handle*)0 -#define __itt_string_handle_create_ptr 0 -#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ -#endif /* INTEL_NO_ITTNOTIFY_API */ -#else /* INTEL_NO_MACRO_BODY */ -#if ITT_PLATFORM==ITT_PLATFORM_WIN -#define __itt_string_handle_createA_ptr 0 -#define __itt_string_handle_createW_ptr 0 -#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ -#define __itt_string_handle_create_ptr 0 -#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ -#endif /* INTEL_NO_MACRO_BODY */ -/** @endcond */ -/** @} handles group */ - -/** @cond exclude_from_documentation */ -typedef unsigned long long __itt_timestamp; -/** @endcond */ - -#define __itt_timestamp_none ((__itt_timestamp)-1LL) - -/** @cond exclude_from_gpa_documentation */ - -/** - * @ingroup timestamps - * @brief Return timestamp corresponding to the current moment. - * This returns the timestamp in the format that is the most relevant for the current - * host or platform (RDTSC, QPC, and others). You can use the "<" operator to - * compare __itt_timestamp values. - */ -__itt_timestamp ITTAPI __itt_get_timestamp(void); - -/** @cond exclude_from_documentation */ -#ifndef INTEL_NO_MACRO_BODY -#ifndef INTEL_NO_ITTNOTIFY_API -ITT_STUB(ITTAPI, __itt_timestamp, get_timestamp, (void)) -#define __itt_get_timestamp ITTNOTIFY_DATA(get_timestamp) -#define __itt_get_timestamp_ptr ITTNOTIFY_NAME(get_timestamp) -#else /* INTEL_NO_ITTNOTIFY_API */ -#define __itt_get_timestamp() -#define __itt_get_timestamp_ptr 0 -#endif /* INTEL_NO_ITTNOTIFY_API */ -#else /* INTEL_NO_MACRO_BODY */ -#define __itt_get_timestamp_ptr 0 -#endif /* INTEL_NO_MACRO_BODY */ -/** @endcond */ -/** @} timestamps */ -/** @endcond */ - -/** @cond exclude_from_gpa_documentation */ - -/** - * @defgroup regions Regions - * @ingroup public - * Regions group - * @{ - */ -/** - * @ingroup regions - * @brief Begin of region instance. - * Successive calls to __itt_region_begin with the same ID are ignored - * until a call to __itt_region_end with the same ID - * @param[in] domain The domain for this region instance - * @param[in] id The instance ID for this region instance. Must not be __itt_null - * @param[in] parentid The instance ID for the parent of this region instance, or __itt_null - * @param[in] name The name of this region - */ -void ITTAPI __itt_region_begin(const __itt_domain *domain, __itt_id id, __itt_id parentid, __itt_string_handle *name); - -/** - * @ingroup regions - * @brief End of region instance. - * The first call to __itt_region_end with a given ID ends the - * region. Successive calls with the same ID are ignored, as are - * calls that do not have a matching __itt_region_begin call. - * @param[in] domain The domain for this region instance - * @param[in] id The instance ID for this region instance - */ -void ITTAPI __itt_region_end(const __itt_domain *domain, __itt_id id); - -/** @cond exclude_from_documentation */ -#ifndef INTEL_NO_MACRO_BODY -#ifndef INTEL_NO_ITTNOTIFY_API -ITT_STUBV(ITTAPI, void, region_begin, (const __itt_domain *domain, __itt_id id, __itt_id parentid, __itt_string_handle *name)) -ITT_STUBV(ITTAPI, void, region_end, (const __itt_domain *domain, __itt_id id)) -#define __itt_region_begin(d,x,y,z) ITTNOTIFY_VOID_D3(region_begin,d,x,y,z) -#define __itt_region_begin_ptr ITTNOTIFY_NAME(region_begin) -#define __itt_region_end(d,x) ITTNOTIFY_VOID_D1(region_end,d,x) -#define __itt_region_end_ptr ITTNOTIFY_NAME(region_end) -#else /* INTEL_NO_ITTNOTIFY_API */ -#define __itt_region_begin(d,x,y,z) -#define __itt_region_begin_ptr 0 -#define __itt_region_end(d,x) -#define __itt_region_end_ptr 0 -#endif /* INTEL_NO_ITTNOTIFY_API */ -#else /* INTEL_NO_MACRO_BODY */ -#define __itt_region_begin_ptr 0 -#define __itt_region_end_ptr 0 -#endif /* INTEL_NO_MACRO_BODY */ -/** @endcond */ -/** @} regions group */ - -/** - * @defgroup frames Frames - * @ingroup public - * Frames are similar to regions, but are intended to be easier to use and to implement. - * In particular: - * - Frames always represent periods of elapsed time - * - By default, frames have no nesting relationships - * @{ - */ - -/** - * @ingroup frames - * @brief Begin a frame instance. - * Successive calls to __itt_frame_begin with the - * same ID are ignored until a call to __itt_frame_end with the same ID. - * @param[in] domain The domain for this frame instance - * @param[in] id The instance ID for this frame instance or NULL - */ -void ITTAPI __itt_frame_begin_v3(const __itt_domain *domain, __itt_id *id); - -/** - * @ingroup frames - * @brief End a frame instance. - * The first call to __itt_frame_end with a given ID - * ends the frame. Successive calls with the same ID are ignored, as are - * calls that do not have a matching __itt_frame_begin call. - * @param[in] domain The domain for this frame instance - * @param[in] id The instance ID for this frame instance or NULL for current - */ -void ITTAPI __itt_frame_end_v3(const __itt_domain *domain, __itt_id *id); - -/** - * @ingroup frames - * @brief Submits a frame instance. - * Successive calls to __itt_frame_begin or __itt_frame_submit with the - * same ID are ignored until a call to __itt_frame_end or __itt_frame_submit - * with the same ID. - * Passing special __itt_timestamp_none value as "end" argument means - * take the current timestamp as the end timestamp. - * @param[in] domain The domain for this frame instance - * @param[in] id The instance ID for this frame instance or NULL - * @param[in] begin Timestamp of the beginning of the frame - * @param[in] end Timestamp of the end of the frame - */ -void ITTAPI __itt_frame_submit_v3(const __itt_domain *domain, __itt_id *id, - __itt_timestamp begin, __itt_timestamp end); - -/** @cond exclude_from_documentation */ -#ifndef INTEL_NO_MACRO_BODY -#ifndef INTEL_NO_ITTNOTIFY_API -ITT_STUBV(ITTAPI, void, frame_begin_v3, (const __itt_domain *domain, __itt_id *id)) -ITT_STUBV(ITTAPI, void, frame_end_v3, (const __itt_domain *domain, __itt_id *id)) -ITT_STUBV(ITTAPI, void, frame_submit_v3, (const __itt_domain *domain, __itt_id *id, __itt_timestamp begin, __itt_timestamp end)) -#define __itt_frame_begin_v3(d,x) ITTNOTIFY_VOID_D1(frame_begin_v3,d,x) -#define __itt_frame_begin_v3_ptr ITTNOTIFY_NAME(frame_begin_v3) -#define __itt_frame_end_v3(d,x) ITTNOTIFY_VOID_D1(frame_end_v3,d,x) -#define __itt_frame_end_v3_ptr ITTNOTIFY_NAME(frame_end_v3) -#define __itt_frame_submit_v3(d,x,b,e) ITTNOTIFY_VOID_D3(frame_submit_v3,d,x,b,e) -#define __itt_frame_submit_v3_ptr ITTNOTIFY_NAME(frame_submit_v3) -#else /* INTEL_NO_ITTNOTIFY_API */ -#define __itt_frame_begin_v3(domain,id) -#define __itt_frame_begin_v3_ptr 0 -#define __itt_frame_end_v3(domain,id) -#define __itt_frame_end_v3_ptr 0 -#define __itt_frame_submit_v3(domain,id,begin,end) -#define __itt_frame_submit_v3_ptr 0 -#endif /* INTEL_NO_ITTNOTIFY_API */ -#else /* INTEL_NO_MACRO_BODY */ -#define __itt_frame_begin_v3_ptr 0 -#define __itt_frame_end_v3_ptr 0 -#define __itt_frame_submit_v3_ptr 0 -#endif /* INTEL_NO_MACRO_BODY */ -/** @endcond */ -/** @} frames group */ -/** @endcond */ - -/** - * @defgroup taskgroup Task Group - * @ingroup public - * Task Group - * @{ - */ -/** - * @ingroup task_groups - * @brief Denotes a task_group instance. - * Successive calls to __itt_task_group with the same ID are ignored. - * @param[in] domain The domain for this task_group instance - * @param[in] id The instance ID for this task_group instance. Must not be __itt_null. - * @param[in] parentid The instance ID for the parent of this task_group instance, or __itt_null. - * @param[in] name The name of this task_group - */ -void ITTAPI __itt_task_group(const __itt_domain *domain, __itt_id id, __itt_id parentid, __itt_string_handle *name); - -/** @cond exclude_from_documentation */ -#ifndef INTEL_NO_MACRO_BODY -#ifndef INTEL_NO_ITTNOTIFY_API -ITT_STUBV(ITTAPI, void, task_group, (const __itt_domain *domain, __itt_id id, __itt_id parentid, __itt_string_handle *name)) -#define __itt_task_group(d,x,y,z) ITTNOTIFY_VOID_D3(task_group,d,x,y,z) -#define __itt_task_group_ptr ITTNOTIFY_NAME(task_group) -#else /* INTEL_NO_ITTNOTIFY_API */ -#define __itt_task_group(d,x,y,z) -#define __itt_task_group_ptr 0 -#endif /* INTEL_NO_ITTNOTIFY_API */ -#else /* INTEL_NO_MACRO_BODY */ -#define __itt_task_group_ptr 0 -#endif /* INTEL_NO_MACRO_BODY */ -/** @endcond */ -/** @} taskgroup group */ - -/** - * @defgroup tasks Tasks - * @ingroup public - * A task instance represents a piece of work performed by a particular - * thread for a period of time. A call to __itt_task_begin creates a - * task instance. This becomes the current instance for that task on that - * thread. A following call to __itt_task_end on the same thread ends the - * instance. There may be multiple simultaneous instances of tasks with the - * same name on different threads. If an ID is specified, the task instance - * receives that ID. Nested tasks are allowed. - * - * Note: The task is defined by the bracketing of __itt_task_begin and - * __itt_task_end on the same thread. If some scheduling mechanism causes - * task switching (the thread executes a different user task) or task - * switching (the user task switches to a different thread) then this breaks - * the notion of current instance. Additional API calls are required to - * deal with that possibility. - * @{ - */ - -/** - * @ingroup tasks - * @brief Begin a task instance. - * @param[in] domain The domain for this task - * @param[in] taskid The instance ID for this task instance, or __itt_null - * @param[in] parentid The parent instance to which this task instance belongs, or __itt_null - * @param[in] name The name of this task - */ -void ITTAPI __itt_task_begin(const __itt_domain *domain, __itt_id taskid, __itt_id parentid, __itt_string_handle *name); - -/** - * @ingroup tasks - * @brief Begin a task instance. - * @param[in] domain The domain for this task - * @param[in] taskid The identifier for this task instance (may be 0) - * @param[in] parentid The parent of this task (may be 0) - * @param[in] fn The pointer to the function you are tracing - */ -void ITTAPI __itt_task_begin_fn(const __itt_domain *domain, __itt_id taskid, __itt_id parentid, void* fn); - -/** - * @ingroup tasks - * @brief End the current task instance. - * @param[in] domain The domain for this task - */ -void ITTAPI __itt_task_end(const __itt_domain *domain); - -/** - * @ingroup tasks - * @brief Begin an overlapped task instance. - * @param[in] domain The domain for this task. - * @param[in] taskid The identifier for this task instance, *cannot* be __itt_null. - * @param[in] parentid The parent of this task, or __itt_null. - * @param[in] name The name of this task. - */ -void ITTAPI __itt_task_begin_overlapped(const __itt_domain* domain, __itt_id taskid, __itt_id parentid, __itt_string_handle* name); - -/** - * @ingroup tasks - * @brief End an overlapped task instance. - * @param[in] domain The domain for this task - * @param[in] taskid Explicit ID of finished task - */ -void ITTAPI __itt_task_end_overlapped(const __itt_domain *domain, __itt_id taskid); - -/** @cond exclude_from_documentation */ -#ifndef INTEL_NO_MACRO_BODY -#ifndef INTEL_NO_ITTNOTIFY_API -ITT_STUBV(ITTAPI, void, task_begin, (const __itt_domain *domain, __itt_id id, __itt_id parentid, __itt_string_handle *name)) -ITT_STUBV(ITTAPI, void, task_begin_fn, (const __itt_domain *domain, __itt_id id, __itt_id parentid, void* fn)) -ITT_STUBV(ITTAPI, void, task_end, (const __itt_domain *domain)) -ITT_STUBV(ITTAPI, void, task_begin_overlapped, (const __itt_domain *domain, __itt_id taskid, __itt_id parentid, __itt_string_handle *name)) -ITT_STUBV(ITTAPI, void, task_end_overlapped, (const __itt_domain *domain, __itt_id taskid)) -#define __itt_task_begin(d,x,y,z) ITTNOTIFY_VOID_D3(task_begin,d,x,y,z) -#define __itt_task_begin_ptr ITTNOTIFY_NAME(task_begin) -#define __itt_task_begin_fn(d,x,y,z) ITTNOTIFY_VOID_D3(task_begin_fn,d,x,y,z) -#define __itt_task_begin_fn_ptr ITTNOTIFY_NAME(task_begin_fn) -#define __itt_task_end(d) ITTNOTIFY_VOID_D0(task_end,d) -#define __itt_task_end_ptr ITTNOTIFY_NAME(task_end) -#define __itt_task_begin_overlapped(d,x,y,z) ITTNOTIFY_VOID_D3(task_begin_overlapped,d,x,y,z) -#define __itt_task_begin_overlapped_ptr ITTNOTIFY_NAME(task_begin_overlapped) -#define __itt_task_end_overlapped(d,x) ITTNOTIFY_VOID_D1(task_end_overlapped,d,x) -#define __itt_task_end_overlapped_ptr ITTNOTIFY_NAME(task_end_overlapped) -#else /* INTEL_NO_ITTNOTIFY_API */ -#define __itt_task_begin(domain,id,parentid,name) -#define __itt_task_begin_ptr 0 -#define __itt_task_begin_fn(domain,id,parentid,fn) -#define __itt_task_begin_fn_ptr 0 -#define __itt_task_end(domain) -#define __itt_task_end_ptr 0 -#define __itt_task_begin_overlapped(domain,taskid,parentid,name) -#define __itt_task_begin_overlapped_ptr 0 -#define __itt_task_end_overlapped(domain,taskid) -#define __itt_task_end_overlapped_ptr 0 -#endif /* INTEL_NO_ITTNOTIFY_API */ -#else /* INTEL_NO_MACRO_BODY */ -#define __itt_task_begin_ptr 0 -#define __itt_task_begin_fn_ptr 0 -#define __itt_task_end_ptr 0 -#define __itt_task_begin_overlapped_ptr 0 -#define __itt_task_end_overlapped_ptr 0 -#endif /* INTEL_NO_MACRO_BODY */ -/** @endcond */ -/** @} tasks group */ - - -/** - * @defgroup markers Markers - * Markers represent a single discreet event in time. Markers have a scope, - * described by an enumerated type __itt_scope. Markers are created by - * the API call __itt_marker. A marker instance can be given an ID for use in - * adding metadata. - * @{ - */ - -/** - * @brief Describes the scope of an event object in the trace. - */ -typedef enum -{ - __itt_scope_unknown = 0, - __itt_scope_global, - __itt_scope_track_group, - __itt_scope_track, - __itt_scope_task, - __itt_scope_marker -} __itt_scope; - -/** @cond exclude_from_documentation */ -#define __itt_marker_scope_unknown __itt_scope_unknown -#define __itt_marker_scope_global __itt_scope_global -#define __itt_marker_scope_process __itt_scope_track_group -#define __itt_marker_scope_thread __itt_scope_track -#define __itt_marker_scope_task __itt_scope_task -/** @endcond */ - -/** - * @ingroup markers - * @brief Create a marker instance - * @param[in] domain The domain for this marker - * @param[in] id The instance ID for this marker or __itt_null - * @param[in] name The name for this marker - * @param[in] scope The scope for this marker - */ -void ITTAPI __itt_marker(const __itt_domain *domain, __itt_id id, __itt_string_handle *name, __itt_scope scope); - -/** @cond exclude_from_documentation */ -#ifndef INTEL_NO_MACRO_BODY -#ifndef INTEL_NO_ITTNOTIFY_API -ITT_STUBV(ITTAPI, void, marker, (const __itt_domain *domain, __itt_id id, __itt_string_handle *name, __itt_scope scope)) -#define __itt_marker(d,x,y,z) ITTNOTIFY_VOID_D3(marker,d,x,y,z) -#define __itt_marker_ptr ITTNOTIFY_NAME(marker) -#else /* INTEL_NO_ITTNOTIFY_API */ -#define __itt_marker(domain,id,name,scope) -#define __itt_marker_ptr 0 -#endif /* INTEL_NO_ITTNOTIFY_API */ -#else /* INTEL_NO_MACRO_BODY */ -#define __itt_marker_ptr 0 -#endif /* INTEL_NO_MACRO_BODY */ -/** @endcond */ -/** @} markers group */ - -/** - * @defgroup metadata Metadata - * The metadata API is used to attach extra information to named - * entities. Metadata can be attached to an identified named entity by ID, - * or to the current entity (which is always a task). - * - * Conceptually metadata has a type (what kind of metadata), a key (the - * name of the metadata), and a value (the actual data). The encoding of - * the value depends on the type of the metadata. - * - * The type of metadata is specified by an enumerated type __itt_metdata_type. - * @{ - */ - -/** - * @ingroup parameters - * @brief describes the type of metadata - */ -typedef enum { - __itt_metadata_unknown = 0, - __itt_metadata_u64, /**< Unsigned 64-bit integer */ - __itt_metadata_s64, /**< Signed 64-bit integer */ - __itt_metadata_u32, /**< Unsigned 32-bit integer */ - __itt_metadata_s32, /**< Signed 32-bit integer */ - __itt_metadata_u16, /**< Unsigned 16-bit integer */ - __itt_metadata_s16, /**< Signed 16-bit integer */ - __itt_metadata_float, /**< Signed 32-bit floating-point */ - __itt_metadata_double /**< SIgned 64-bit floating-point */ -} __itt_metadata_type; - -/** - * @ingroup parameters - * @brief Add metadata to an instance of a named entity. - * @param[in] domain The domain controlling the call - * @param[in] id The identifier of the instance to which the metadata is to be added, or __itt_null to add to the current task - * @param[in] key The name of the metadata - * @param[in] type The type of the metadata - * @param[in] count The number of elements of the given type. If count == 0, no metadata will be added. - * @param[in] data The metadata itself -*/ -void ITTAPI __itt_metadata_add(const __itt_domain *domain, __itt_id id, __itt_string_handle *key, __itt_metadata_type type, size_t count, void *data); - -/** @cond exclude_from_documentation */ -#ifndef INTEL_NO_MACRO_BODY -#ifndef INTEL_NO_ITTNOTIFY_API -ITT_STUBV(ITTAPI, void, metadata_add, (const __itt_domain *domain, __itt_id id, __itt_string_handle *key, __itt_metadata_type type, size_t count, void *data)) -#define __itt_metadata_add(d,x,y,z,a,b) ITTNOTIFY_VOID_D5(metadata_add,d,x,y,z,a,b) -#define __itt_metadata_add_ptr ITTNOTIFY_NAME(metadata_add) -#else /* INTEL_NO_ITTNOTIFY_API */ -#define __itt_metadata_add(d,x,y,z,a,b) -#define __itt_metadata_add_ptr 0 -#endif /* INTEL_NO_ITTNOTIFY_API */ -#else /* INTEL_NO_MACRO_BODY */ -#define __itt_metadata_add_ptr 0 -#endif /* INTEL_NO_MACRO_BODY */ -/** @endcond */ - -/** - * @ingroup parameters - * @brief Add string metadata to an instance of a named entity. - * @param[in] domain The domain controlling the call - * @param[in] id The identifier of the instance to which the metadata is to be added, or __itt_null to add to the current task - * @param[in] key The name of the metadata - * @param[in] data The metadata itself - * @param[in] length The number of characters in the string, or -1 if the length is unknown but the string is null-terminated -*/ -#if ITT_PLATFORM==ITT_PLATFORM_WIN -void ITTAPI __itt_metadata_str_addA(const __itt_domain *domain, __itt_id id, __itt_string_handle *key, const char *data, size_t length); -void ITTAPI __itt_metadata_str_addW(const __itt_domain *domain, __itt_id id, __itt_string_handle *key, const wchar_t *data, size_t length); -#if defined(UNICODE) || defined(_UNICODE) -# define __itt_metadata_str_add __itt_metadata_str_addW -# define __itt_metadata_str_add_ptr __itt_metadata_str_addW_ptr -#else /* UNICODE */ -# define __itt_metadata_str_add __itt_metadata_str_addA -# define __itt_metadata_str_add_ptr __itt_metadata_str_addA_ptr -#endif /* UNICODE */ -#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ -void ITTAPI __itt_metadata_str_add(const __itt_domain *domain, __itt_id id, __itt_string_handle *key, const char *data, size_t length); -#endif - -/** @cond exclude_from_documentation */ -#ifndef INTEL_NO_MACRO_BODY -#ifndef INTEL_NO_ITTNOTIFY_API -#if ITT_PLATFORM==ITT_PLATFORM_WIN -ITT_STUBV(ITTAPI, void, metadata_str_addA, (const __itt_domain *domain, __itt_id id, __itt_string_handle *key, const char *data, size_t length)) -ITT_STUBV(ITTAPI, void, metadata_str_addW, (const __itt_domain *domain, __itt_id id, __itt_string_handle *key, const wchar_t *data, size_t length)) -#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ -ITT_STUBV(ITTAPI, void, metadata_str_add, (const __itt_domain *domain, __itt_id id, __itt_string_handle *key, const char *data, size_t length)) -#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ -#if ITT_PLATFORM==ITT_PLATFORM_WIN -#define __itt_metadata_str_addA(d,x,y,z,a) ITTNOTIFY_VOID_D4(metadata_str_addA,d,x,y,z,a) -#define __itt_metadata_str_addA_ptr ITTNOTIFY_NAME(metadata_str_addA) -#define __itt_metadata_str_addW(d,x,y,z,a) ITTNOTIFY_VOID_D4(metadata_str_addW,d,x,y,z,a) -#define __itt_metadata_str_addW_ptr ITTNOTIFY_NAME(metadata_str_addW) -#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ -#define __itt_metadata_str_add(d,x,y,z,a) ITTNOTIFY_VOID_D4(metadata_str_add,d,x,y,z,a) -#define __itt_metadata_str_add_ptr ITTNOTIFY_NAME(metadata_str_add) -#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ -#else /* INTEL_NO_ITTNOTIFY_API */ -#if ITT_PLATFORM==ITT_PLATFORM_WIN -#define __itt_metadata_str_addA(d,x,y,z,a) -#define __itt_metadata_str_addA_ptr 0 -#define __itt_metadata_str_addW(d,x,y,z,a) -#define __itt_metadata_str_addW_ptr 0 -#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ -#define __itt_metadata_str_add(d,x,y,z,a) -#define __itt_metadata_str_add_ptr 0 -#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ -#endif /* INTEL_NO_ITTNOTIFY_API */ -#else /* INTEL_NO_MACRO_BODY */ -#if ITT_PLATFORM==ITT_PLATFORM_WIN -#define __itt_metadata_str_addA_ptr 0 -#define __itt_metadata_str_addW_ptr 0 -#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ -#define __itt_metadata_str_add_ptr 0 -#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ -#endif /* INTEL_NO_MACRO_BODY */ -/** @endcond */ - -/** - * @ingroup parameters - * @brief Add metadata to an instance of a named entity. - * @param[in] domain The domain controlling the call - * @param[in] scope The scope of the instance to which the metadata is to be added - - * @param[in] id The identifier of the instance to which the metadata is to be added, or __itt_null to add to the current task - - * @param[in] key The name of the metadata - * @param[in] type The type of the metadata - * @param[in] count The number of elements of the given type. If count == 0, no metadata will be added. - * @param[in] data The metadata itself -*/ -void ITTAPI __itt_metadata_add_with_scope(const __itt_domain *domain, __itt_scope scope, __itt_string_handle *key, __itt_metadata_type type, size_t count, void *data); - -/** @cond exclude_from_documentation */ -#ifndef INTEL_NO_MACRO_BODY -#ifndef INTEL_NO_ITTNOTIFY_API -ITT_STUBV(ITTAPI, void, metadata_add_with_scope, (const __itt_domain *domain, __itt_scope scope, __itt_string_handle *key, __itt_metadata_type type, size_t count, void *data)) -#define __itt_metadata_add_with_scope(d,x,y,z,a,b) ITTNOTIFY_VOID_D5(metadata_add_with_scope,d,x,y,z,a,b) -#define __itt_metadata_add_with_scope_ptr ITTNOTIFY_NAME(metadata_add_with_scope) -#else /* INTEL_NO_ITTNOTIFY_API */ -#define __itt_metadata_add_with_scope(d,x,y,z,a,b) -#define __itt_metadata_add_with_scope_ptr 0 -#endif /* INTEL_NO_ITTNOTIFY_API */ -#else /* INTEL_NO_MACRO_BODY */ -#define __itt_metadata_add_with_scope_ptr 0 -#endif /* INTEL_NO_MACRO_BODY */ -/** @endcond */ - -/** - * @ingroup parameters - * @brief Add string metadata to an instance of a named entity. - * @param[in] domain The domain controlling the call - * @param[in] scope The scope of the instance to which the metadata is to be added - - * @param[in] id The identifier of the instance to which the metadata is to be added, or __itt_null to add to the current task - - * @param[in] key The name of the metadata - * @param[in] data The metadata itself - * @param[in] length The number of characters in the string, or -1 if the length is unknown but the string is null-terminated -*/ -#if ITT_PLATFORM==ITT_PLATFORM_WIN -void ITTAPI __itt_metadata_str_add_with_scopeA(const __itt_domain *domain, __itt_scope scope, __itt_string_handle *key, const char *data, size_t length); -void ITTAPI __itt_metadata_str_add_with_scopeW(const __itt_domain *domain, __itt_scope scope, __itt_string_handle *key, const wchar_t *data, size_t length); -#if defined(UNICODE) || defined(_UNICODE) -# define __itt_metadata_str_add_with_scope __itt_metadata_str_add_with_scopeW -# define __itt_metadata_str_add_with_scope_ptr __itt_metadata_str_add_with_scopeW_ptr -#else /* UNICODE */ -# define __itt_metadata_str_add_with_scope __itt_metadata_str_add_with_scopeA -# define __itt_metadata_str_add_with_scope_ptr __itt_metadata_str_add_with_scopeA_ptr -#endif /* UNICODE */ -#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ -void ITTAPI __itt_metadata_str_add_with_scope(const __itt_domain *domain, __itt_scope scope, __itt_string_handle *key, const char *data, size_t length); -#endif - -/** @cond exclude_from_documentation */ -#ifndef INTEL_NO_MACRO_BODY -#ifndef INTEL_NO_ITTNOTIFY_API -#if ITT_PLATFORM==ITT_PLATFORM_WIN -ITT_STUBV(ITTAPI, void, metadata_str_add_with_scopeA, (const __itt_domain *domain, __itt_scope scope, __itt_string_handle *key, const char *data, size_t length)) -ITT_STUBV(ITTAPI, void, metadata_str_add_with_scopeW, (const __itt_domain *domain, __itt_scope scope, __itt_string_handle *key, const wchar_t *data, size_t length)) -#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ -ITT_STUBV(ITTAPI, void, metadata_str_add_with_scope, (const __itt_domain *domain, __itt_scope scope, __itt_string_handle *key, const char *data, size_t length)) -#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ -#if ITT_PLATFORM==ITT_PLATFORM_WIN -#define __itt_metadata_str_add_with_scopeA(d,x,y,z,a) ITTNOTIFY_VOID_D4(metadata_str_add_with_scopeA,d,x,y,z,a) -#define __itt_metadata_str_add_with_scopeA_ptr ITTNOTIFY_NAME(metadata_str_add_with_scopeA) -#define __itt_metadata_str_add_with_scopeW(d,x,y,z,a) ITTNOTIFY_VOID_D4(metadata_str_add_with_scopeW,d,x,y,z,a) -#define __itt_metadata_str_add_with_scopeW_ptr ITTNOTIFY_NAME(metadata_str_add_with_scopeW) -#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ -#define __itt_metadata_str_add_with_scope(d,x,y,z,a) ITTNOTIFY_VOID_D4(metadata_str_add_with_scope,d,x,y,z,a) -#define __itt_metadata_str_add_with_scope_ptr ITTNOTIFY_NAME(metadata_str_add_with_scope) -#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ -#else /* INTEL_NO_ITTNOTIFY_API */ -#if ITT_PLATFORM==ITT_PLATFORM_WIN -#define __itt_metadata_str_add_with_scopeA(d,x,y,z,a) -#define __itt_metadata_str_add_with_scopeA_ptr 0 -#define __itt_metadata_str_add_with_scopeW(d,x,y,z,a) -#define __itt_metadata_str_add_with_scopeW_ptr 0 -#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ -#define __itt_metadata_str_add_with_scope(d,x,y,z,a) -#define __itt_metadata_str_add_with_scope_ptr 0 -#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ -#endif /* INTEL_NO_ITTNOTIFY_API */ -#else /* INTEL_NO_MACRO_BODY */ -#if ITT_PLATFORM==ITT_PLATFORM_WIN -#define __itt_metadata_str_add_with_scopeA_ptr 0 -#define __itt_metadata_str_add_with_scopeW_ptr 0 -#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ -#define __itt_metadata_str_add_with_scope_ptr 0 -#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ -#endif /* INTEL_NO_MACRO_BODY */ -/** @endcond */ - -/** @} metadata group */ - -/** - * @defgroup relations Relations - * Instances of named entities can be explicitly associated with other - * instances using instance IDs and the relationship API calls. - * - * @{ - */ - -/** - * @ingroup relations - * @brief The kind of relation between two instances is specified by the enumerated type __itt_relation. - * Relations between instances can be added with an API call. The relation - * API uses instance IDs. Relations can be added before or after the actual - * instances are created and persist independently of the instances. This - * is the motivation for having different lifetimes for instance IDs and - * the actual instances. - */ -typedef enum -{ - __itt_relation_is_unknown = 0, - __itt_relation_is_dependent_on, /**< "A is dependent on B" means that A cannot start until B completes */ - __itt_relation_is_sibling_of, /**< "A is sibling of B" means that A and B were created as a group */ - __itt_relation_is_parent_of, /**< "A is parent of B" means that A created B */ - __itt_relation_is_continuation_of, /**< "A is continuation of B" means that A assumes the dependencies of B */ - __itt_relation_is_child_of, /**< "A is child of B" means that A was created by B (inverse of is_parent_of) */ - __itt_relation_is_continued_by, /**< "A is continued by B" means that B assumes the dependencies of A (inverse of is_continuation_of) */ - __itt_relation_is_predecessor_to /**< "A is predecessor to B" means that B cannot start until A completes (inverse of is_dependent_on) */ -} __itt_relation; - -/** - * @ingroup relations - * @brief Add a relation to the current task instance. - * The current task instance is the head of the relation. - * @param[in] domain The domain controlling this call - * @param[in] relation The kind of relation - * @param[in] tail The ID for the tail of the relation - */ -void ITTAPI __itt_relation_add_to_current(const __itt_domain *domain, __itt_relation relation, __itt_id tail); - -/** - * @ingroup relations - * @brief Add a relation between two instance identifiers. - * @param[in] domain The domain controlling this call - * @param[in] head The ID for the head of the relation - * @param[in] relation The kind of relation - * @param[in] tail The ID for the tail of the relation - */ -void ITTAPI __itt_relation_add(const __itt_domain *domain, __itt_id head, __itt_relation relation, __itt_id tail); - -/** @cond exclude_from_documentation */ -#ifndef INTEL_NO_MACRO_BODY -#ifndef INTEL_NO_ITTNOTIFY_API -ITT_STUBV(ITTAPI, void, relation_add_to_current, (const __itt_domain *domain, __itt_relation relation, __itt_id tail)) -ITT_STUBV(ITTAPI, void, relation_add, (const __itt_domain *domain, __itt_id head, __itt_relation relation, __itt_id tail)) -#define __itt_relation_add_to_current(d,x,y) ITTNOTIFY_VOID_D2(relation_add_to_current,d,x,y) -#define __itt_relation_add_to_current_ptr ITTNOTIFY_NAME(relation_add_to_current) -#define __itt_relation_add(d,x,y,z) ITTNOTIFY_VOID_D3(relation_add,d,x,y,z) -#define __itt_relation_add_ptr ITTNOTIFY_NAME(relation_add) -#else /* INTEL_NO_ITTNOTIFY_API */ -#define __itt_relation_add_to_current(d,x,y) -#define __itt_relation_add_to_current_ptr 0 -#define __itt_relation_add(d,x,y,z) -#define __itt_relation_add_ptr 0 -#endif /* INTEL_NO_ITTNOTIFY_API */ -#else /* INTEL_NO_MACRO_BODY */ -#define __itt_relation_add_to_current_ptr 0 -#define __itt_relation_add_ptr 0 -#endif /* INTEL_NO_MACRO_BODY */ -/** @endcond */ -/** @} relations group */ - -/** @cond exclude_from_documentation */ -#pragma pack(push, 8) - -typedef struct ___itt_clock_info -{ - unsigned long long clock_freq; /*!< Clock domain frequency */ - unsigned long long clock_base; /*!< Clock domain base timestamp */ -} __itt_clock_info; - -#pragma pack(pop) -/** @endcond */ - -/** @cond exclude_from_documentation */ -typedef void (ITTAPI *__itt_get_clock_info_fn)(__itt_clock_info* clock_info, void* data); -/** @endcond */ - -/** @cond exclude_from_documentation */ -#pragma pack(push, 8) - -typedef struct ___itt_clock_domain -{ - __itt_clock_info info; /*!< Most recent clock domain info */ - __itt_get_clock_info_fn fn; /*!< Callback function pointer */ - void* fn_data; /*!< Input argument for the callback function */ - int extra1; /*!< Reserved. Must be zero */ - void* extra2; /*!< Reserved. Must be zero */ - struct ___itt_clock_domain* next; -} __itt_clock_domain; - -#pragma pack(pop) -/** @endcond */ - -/** - * @ingroup clockdomains - * @brief Create a clock domain. - * Certain applications require the capability to trace their application using - * a clock domain different than the CPU, for instance the instrumentation of events - * that occur on a GPU. - * Because the set of domains is expected to be static over the application's execution time, - * there is no mechanism to destroy a domain. - * Any domain can be accessed by any thread in the process, regardless of which thread created - * the domain. This call is thread-safe. - * @param[in] fn A pointer to a callback function which retrieves alternative CPU timestamps - * @param[in] fn_data Argument for a callback function; may be NULL - */ -__itt_clock_domain* ITTAPI __itt_clock_domain_create(__itt_get_clock_info_fn fn, void* fn_data); - -/** @cond exclude_from_documentation */ -#ifndef INTEL_NO_MACRO_BODY -#ifndef INTEL_NO_ITTNOTIFY_API -ITT_STUB(ITTAPI, __itt_clock_domain*, clock_domain_create, (__itt_get_clock_info_fn fn, void* fn_data)) -#define __itt_clock_domain_create ITTNOTIFY_DATA(clock_domain_create) -#define __itt_clock_domain_create_ptr ITTNOTIFY_NAME(clock_domain_create) -#else /* INTEL_NO_ITTNOTIFY_API */ -#define __itt_clock_domain_create(fn,fn_data) (__itt_clock_domain*)0 -#define __itt_clock_domain_create_ptr 0 -#endif /* INTEL_NO_ITTNOTIFY_API */ -#else /* INTEL_NO_MACRO_BODY */ -#define __itt_clock_domain_create_ptr 0 -#endif /* INTEL_NO_MACRO_BODY */ -/** @endcond */ - -/** - * @ingroup clockdomains - * @brief Recalculate clock domains frequences and clock base timestamps. - */ -void ITTAPI __itt_clock_domain_reset(void); - -/** @cond exclude_from_documentation */ -#ifndef INTEL_NO_MACRO_BODY -#ifndef INTEL_NO_ITTNOTIFY_API -ITT_STUBV(ITTAPI, void, clock_domain_reset, (void)) -#define __itt_clock_domain_reset ITTNOTIFY_VOID(clock_domain_reset) -#define __itt_clock_domain_reset_ptr ITTNOTIFY_NAME(clock_domain_reset) -#else /* INTEL_NO_ITTNOTIFY_API */ -#define __itt_clock_domain_reset() -#define __itt_clock_domain_reset_ptr 0 -#endif /* INTEL_NO_ITTNOTIFY_API */ -#else /* INTEL_NO_MACRO_BODY */ -#define __itt_clock_domain_reset_ptr 0 -#endif /* INTEL_NO_MACRO_BODY */ -/** @endcond */ - -/** - * @ingroup clockdomain - * @brief Create an instance of identifier. This establishes the beginning of the lifetime of - * an instance of the given ID in the trace. Once this lifetime starts, the ID can be used to - * tag named entity instances in calls such as __itt_task_begin, and to specify relationships among - * identified named entity instances, using the \ref relations APIs. - * @param[in] domain The domain controlling the execution of this call. - * @param[in] clock_domain The clock domain controlling the execution of this call. - * @param[in] timestamp The user defined timestamp. - * @param[in] id The ID to create. - */ -void ITTAPI __itt_id_create_ex(const __itt_domain* domain, __itt_clock_domain* clock_domain, unsigned long long timestamp, __itt_id id); - -/** - * @ingroup clockdomain - * @brief Destroy an instance of identifier. This ends the lifetime of the current instance of the - * given ID value in the trace. Any relationships that are established after this lifetime ends are - * invalid. This call must be performed before the given ID value can be reused for a different - * named entity instance. - * @param[in] domain The domain controlling the execution of this call. - * @param[in] clock_domain The clock domain controlling the execution of this call. - * @param[in] timestamp The user defined timestamp. - * @param[in] id The ID to destroy. - */ -void ITTAPI __itt_id_destroy_ex(const __itt_domain* domain, __itt_clock_domain* clock_domain, unsigned long long timestamp, __itt_id id); - -/** @cond exclude_from_documentation */ -#ifndef INTEL_NO_MACRO_BODY -#ifndef INTEL_NO_ITTNOTIFY_API -ITT_STUBV(ITTAPI, void, id_create_ex, (const __itt_domain *domain, __itt_clock_domain* clock_domain, unsigned long long timestamp, __itt_id id)) -ITT_STUBV(ITTAPI, void, id_destroy_ex, (const __itt_domain *domain, __itt_clock_domain* clock_domain, unsigned long long timestamp, __itt_id id)) -#define __itt_id_create_ex(d,x,y,z) ITTNOTIFY_VOID_D3(id_create_ex,d,x,y,z) -#define __itt_id_create_ex_ptr ITTNOTIFY_NAME(id_create_ex) -#define __itt_id_destroy_ex(d,x,y,z) ITTNOTIFY_VOID_D3(id_destroy_ex,d,x,y,z) -#define __itt_id_destroy_ex_ptr ITTNOTIFY_NAME(id_destroy_ex) -#else /* INTEL_NO_ITTNOTIFY_API */ -#define __itt_id_create_ex(domain,clock_domain,timestamp,id) -#define __itt_id_create_ex_ptr 0 -#define __itt_id_destroy_ex(domain,clock_domain,timestamp,id) -#define __itt_id_destroy_ex_ptr 0 -#endif /* INTEL_NO_ITTNOTIFY_API */ -#else /* INTEL_NO_MACRO_BODY */ -#define __itt_id_create_ex_ptr 0 -#define __itt_id_destroy_ex_ptr 0 -#endif /* INTEL_NO_MACRO_BODY */ -/** @endcond */ - -/** - * @ingroup clockdomain - * @brief Begin a task instance. - * @param[in] domain The domain for this task - * @param[in] clock_domain The clock domain controlling the execution of this call. - * @param[in] timestamp The user defined timestamp. - * @param[in] taskid The instance ID for this task instance, or __itt_null - * @param[in] parentid The parent instance to which this task instance belongs, or __itt_null - * @param[in] name The name of this task - */ -void ITTAPI __itt_task_begin_ex(const __itt_domain* domain, __itt_clock_domain* clock_domain, unsigned long long timestamp, __itt_id taskid, __itt_id parentid, __itt_string_handle* name); - -/** - * @ingroup clockdomain - * @brief Begin a task instance. - * @param[in] domain The domain for this task - * @param[in] clock_domain The clock domain controlling the execution of this call. - * @param[in] timestamp The user defined timestamp. - * @param[in] taskid The identifier for this task instance, or __itt_null - * @param[in] parentid The parent of this task, or __itt_null - * @param[in] fn The pointer to the function you are tracing - */ -void ITTAPI __itt_task_begin_fn_ex(const __itt_domain* domain, __itt_clock_domain* clock_domain, unsigned long long timestamp, __itt_id taskid, __itt_id parentid, void* fn); - -/** - * @ingroup clockdomain - * @brief End the current task instance. - * @param[in] domain The domain for this task - * @param[in] clock_domain The clock domain controlling the execution of this call. - * @param[in] timestamp The user defined timestamp. - */ -void ITTAPI __itt_task_end_ex(const __itt_domain* domain, __itt_clock_domain* clock_domain, unsigned long long timestamp); - -/** @cond exclude_from_documentation */ -#ifndef INTEL_NO_MACRO_BODY -#ifndef INTEL_NO_ITTNOTIFY_API -ITT_STUBV(ITTAPI, void, task_begin_ex, (const __itt_domain *domain, __itt_clock_domain* clock_domain, unsigned long long timestamp, __itt_id id, __itt_id parentid, __itt_string_handle *name)) -ITT_STUBV(ITTAPI, void, task_begin_fn_ex, (const __itt_domain *domain, __itt_clock_domain* clock_domain, unsigned long long timestamp, __itt_id id, __itt_id parentid, void* fn)) -ITT_STUBV(ITTAPI, void, task_end_ex, (const __itt_domain *domain, __itt_clock_domain* clock_domain, unsigned long long timestamp)) -#define __itt_task_begin_ex(d,x,y,z,a,b) ITTNOTIFY_VOID_D5(task_begin_ex,d,x,y,z,a,b) -#define __itt_task_begin_ex_ptr ITTNOTIFY_NAME(task_begin_ex) -#define __itt_task_begin_fn_ex(d,x,y,z,a,b) ITTNOTIFY_VOID_D5(task_begin_fn_ex,d,x,y,z,a,b) -#define __itt_task_begin_fn_ex_ptr ITTNOTIFY_NAME(task_begin_fn_ex) -#define __itt_task_end_ex(d,x,y) ITTNOTIFY_VOID_D2(task_end_ex,d,x,y) -#define __itt_task_end_ex_ptr ITTNOTIFY_NAME(task_end_ex) -#else /* INTEL_NO_ITTNOTIFY_API */ -#define __itt_task_begin_ex(domain,clock_domain,timestamp,id,parentid,name) -#define __itt_task_begin_ex_ptr 0 -#define __itt_task_begin_fn_ex(domain,clock_domain,timestamp,id,parentid,fn) -#define __itt_task_begin_fn_ex_ptr 0 -#define __itt_task_end_ex(domain,clock_domain,timestamp) -#define __itt_task_end_ex_ptr 0 -#endif /* INTEL_NO_ITTNOTIFY_API */ -#else /* INTEL_NO_MACRO_BODY */ -#define __itt_task_begin_ex_ptr 0 -#define __itt_task_begin_fn_ex_ptr 0 -#define __itt_task_end_ex_ptr 0 -#endif /* INTEL_NO_MACRO_BODY */ -/** @endcond */ - -/** - * @defgroup counters Counters - * @ingroup public - * Counters are user-defined objects with a monotonically increasing - * value. Counter values are 64-bit unsigned integers. - * Counters have names that can be displayed in - * the tools. - * @{ - */ - -/** - * @brief opaque structure for counter identification - */ -/** @cond exclude_from_documentation */ - -typedef struct ___itt_counter* __itt_counter; - -/** - * @brief Create an unsigned 64 bits integer counter with given name/domain - * - * After __itt_counter_create() is called, __itt_counter_inc(id), __itt_counter_inc_delta(id, delta), - * __itt_counter_set_value(id, value_ptr) or __itt_counter_set_value_ex(id, clock_domain, timestamp, value_ptr) - * can be used to change the value of the counter, where value_ptr is a pointer to an unsigned 64 bits integer - * - * The call is equal to __itt_counter_create_typed(name, domain, __itt_metadata_u64) - */ -#if ITT_PLATFORM==ITT_PLATFORM_WIN -__itt_counter ITTAPI __itt_counter_createA(const char *name, const char *domain); -__itt_counter ITTAPI __itt_counter_createW(const wchar_t *name, const wchar_t *domain); -#if defined(UNICODE) || defined(_UNICODE) -# define __itt_counter_create __itt_counter_createW -# define __itt_counter_create_ptr __itt_counter_createW_ptr -#else /* UNICODE */ -# define __itt_counter_create __itt_counter_createA -# define __itt_counter_create_ptr __itt_counter_createA_ptr -#endif /* UNICODE */ -#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ -__itt_counter ITTAPI __itt_counter_create(const char *name, const char *domain); -#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ - -#ifndef INTEL_NO_MACRO_BODY -#ifndef INTEL_NO_ITTNOTIFY_API -#if ITT_PLATFORM==ITT_PLATFORM_WIN -ITT_STUB(ITTAPI, __itt_counter, counter_createA, (const char *name, const char *domain)) -ITT_STUB(ITTAPI, __itt_counter, counter_createW, (const wchar_t *name, const wchar_t *domain)) -#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ -ITT_STUB(ITTAPI, __itt_counter, counter_create, (const char *name, const char *domain)) -#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ -#if ITT_PLATFORM==ITT_PLATFORM_WIN -#define __itt_counter_createA ITTNOTIFY_DATA(counter_createA) -#define __itt_counter_createA_ptr ITTNOTIFY_NAME(counter_createA) -#define __itt_counter_createW ITTNOTIFY_DATA(counter_createW) -#define __itt_counter_createW_ptr ITTNOTIFY_NAME(counter_createW) -#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ -#define __itt_counter_create ITTNOTIFY_DATA(counter_create) -#define __itt_counter_create_ptr ITTNOTIFY_NAME(counter_create) -#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ -#else /* INTEL_NO_ITTNOTIFY_API */ -#if ITT_PLATFORM==ITT_PLATFORM_WIN -#define __itt_counter_createA(name, domain) -#define __itt_counter_createA_ptr 0 -#define __itt_counter_createW(name, domain) -#define __itt_counter_createW_ptr 0 -#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ -#define __itt_counter_create(name, domain) -#define __itt_counter_create_ptr 0 -#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ -#endif /* INTEL_NO_ITTNOTIFY_API */ -#else /* INTEL_NO_MACRO_BODY */ -#if ITT_PLATFORM==ITT_PLATFORM_WIN -#define __itt_counter_createA_ptr 0 -#define __itt_counter_createW_ptr 0 -#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ -#define __itt_counter_create_ptr 0 -#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ -#endif /* INTEL_NO_MACRO_BODY */ -/** @endcond */ - -/** - * @brief Increment the unsigned 64 bits integer counter value - * - * Calling this function to non-unsigned 64 bits integer counters has no effect - */ -void ITTAPI __itt_counter_inc(__itt_counter id); - -#ifndef INTEL_NO_MACRO_BODY -#ifndef INTEL_NO_ITTNOTIFY_API -ITT_STUBV(ITTAPI, void, counter_inc, (__itt_counter id)) -#define __itt_counter_inc ITTNOTIFY_VOID(counter_inc) -#define __itt_counter_inc_ptr ITTNOTIFY_NAME(counter_inc) -#else /* INTEL_NO_ITTNOTIFY_API */ -#define __itt_counter_inc(id) -#define __itt_counter_inc_ptr 0 -#endif /* INTEL_NO_ITTNOTIFY_API */ -#else /* INTEL_NO_MACRO_BODY */ -#define __itt_counter_inc_ptr 0 -#endif /* INTEL_NO_MACRO_BODY */ -/** @endcond */ -/** - * @brief Increment the unsigned 64 bits integer counter value with x - * - * Calling this function to non-unsigned 64 bits integer counters has no effect - */ -void ITTAPI __itt_counter_inc_delta(__itt_counter id, unsigned long long value); - -#ifndef INTEL_NO_MACRO_BODY -#ifndef INTEL_NO_ITTNOTIFY_API -ITT_STUBV(ITTAPI, void, counter_inc_delta, (__itt_counter id, unsigned long long value)) -#define __itt_counter_inc_delta ITTNOTIFY_VOID(counter_inc_delta) -#define __itt_counter_inc_delta_ptr ITTNOTIFY_NAME(counter_inc_delta) -#else /* INTEL_NO_ITTNOTIFY_API */ -#define __itt_counter_inc_delta(id, value) -#define __itt_counter_inc_delta_ptr 0 -#endif /* INTEL_NO_ITTNOTIFY_API */ -#else /* INTEL_NO_MACRO_BODY */ -#define __itt_counter_inc_delta_ptr 0 -#endif /* INTEL_NO_MACRO_BODY */ -/** @endcond */ - -/** - * @brief Decrement the unsigned 64 bits integer counter value - * - * Calling this function to non-unsigned 64 bits integer counters has no effect - */ -void ITTAPI __itt_counter_dec(__itt_counter id); - -#ifndef INTEL_NO_MACRO_BODY -#ifndef INTEL_NO_ITTNOTIFY_API -ITT_STUBV(ITTAPI, void, counter_dec, (__itt_counter id)) -#define __itt_counter_dec ITTNOTIFY_VOID(counter_dec) -#define __itt_counter_dec_ptr ITTNOTIFY_NAME(counter_dec) -#else /* INTEL_NO_ITTNOTIFY_API */ -#define __itt_counter_dec(id) -#define __itt_counter_dec_ptr 0 -#endif /* INTEL_NO_ITTNOTIFY_API */ -#else /* INTEL_NO_MACRO_BODY */ -#define __itt_counter_dec_ptr 0 -#endif /* INTEL_NO_MACRO_BODY */ -/** @endcond */ -/** - * @brief Decrement the unsigned 64 bits integer counter value with x - * - * Calling this function to non-unsigned 64 bits integer counters has no effect - */ -void ITTAPI __itt_counter_dec_delta(__itt_counter id, unsigned long long value); - -#ifndef INTEL_NO_MACRO_BODY -#ifndef INTEL_NO_ITTNOTIFY_API -ITT_STUBV(ITTAPI, void, counter_dec_delta, (__itt_counter id, unsigned long long value)) -#define __itt_counter_dec_delta ITTNOTIFY_VOID(counter_dec_delta) -#define __itt_counter_dec_delta_ptr ITTNOTIFY_NAME(counter_dec_delta) -#else /* INTEL_NO_ITTNOTIFY_API */ -#define __itt_counter_dec_delta(id, value) -#define __itt_counter_dec_delta_ptr 0 -#endif /* INTEL_NO_ITTNOTIFY_API */ -#else /* INTEL_NO_MACRO_BODY */ -#define __itt_counter_dec_delta_ptr 0 -#endif /* INTEL_NO_MACRO_BODY */ -/** @endcond */ - -/** - * @ingroup counters - * @brief Increment a counter by one. - * The first call with a given name creates a counter by that name and sets its - * value to zero. Successive calls increment the counter value. - * @param[in] domain The domain controlling the call. Counter names are not domain specific. - * The domain argument is used only to enable or disable the API calls. - * @param[in] name The name of the counter - */ -void ITTAPI __itt_counter_inc_v3(const __itt_domain *domain, __itt_string_handle *name); - -/** - * @ingroup counters - * @brief Increment a counter by the value specified in delta. - * @param[in] domain The domain controlling the call. Counter names are not domain specific. - * The domain argument is used only to enable or disable the API calls. - * @param[in] name The name of the counter - * @param[in] delta The amount by which to increment the counter - */ -void ITTAPI __itt_counter_inc_delta_v3(const __itt_domain *domain, __itt_string_handle *name, unsigned long long delta); - -#ifndef INTEL_NO_MACRO_BODY -#ifndef INTEL_NO_ITTNOTIFY_API -ITT_STUBV(ITTAPI, void, counter_inc_v3, (const __itt_domain *domain, __itt_string_handle *name)) -ITT_STUBV(ITTAPI, void, counter_inc_delta_v3, (const __itt_domain *domain, __itt_string_handle *name, unsigned long long delta)) -#define __itt_counter_inc_v3(d,x) ITTNOTIFY_VOID_D1(counter_inc_v3,d,x) -#define __itt_counter_inc_v3_ptr ITTNOTIFY_NAME(counter_inc_v3) -#define __itt_counter_inc_delta_v3(d,x,y) ITTNOTIFY_VOID_D2(counter_inc_delta_v3,d,x,y) -#define __itt_counter_inc_delta_v3_ptr ITTNOTIFY_NAME(counter_inc_delta_v3) -#else /* INTEL_NO_ITTNOTIFY_API */ -#define __itt_counter_inc_v3(domain,name) -#define __itt_counter_inc_v3_ptr 0 -#define __itt_counter_inc_delta_v3(domain,name,delta) -#define __itt_counter_inc_delta_v3_ptr 0 -#endif /* INTEL_NO_ITTNOTIFY_API */ -#else /* INTEL_NO_MACRO_BODY */ -#define __itt_counter_inc_v3_ptr 0 -#define __itt_counter_inc_delta_v3_ptr 0 -#endif /* INTEL_NO_MACRO_BODY */ -/** @endcond */ - - -/** - * @ingroup counters - * @brief Decrement a counter by one. - * The first call with a given name creates a counter by that name and sets its - * value to zero. Successive calls decrement the counter value. - * @param[in] domain The domain controlling the call. Counter names are not domain specific. - * The domain argument is used only to enable or disable the API calls. - * @param[in] name The name of the counter - */ -void ITTAPI __itt_counter_dec_v3(const __itt_domain *domain, __itt_string_handle *name); - -/** - * @ingroup counters - * @brief Decrement a counter by the value specified in delta. - * @param[in] domain The domain controlling the call. Counter names are not domain specific. - * The domain argument is used only to enable or disable the API calls. - * @param[in] name The name of the counter - * @param[in] delta The amount by which to decrement the counter - */ -void ITTAPI __itt_counter_dec_delta_v3(const __itt_domain *domain, __itt_string_handle *name, unsigned long long delta); - -#ifndef INTEL_NO_MACRO_BODY -#ifndef INTEL_NO_ITTNOTIFY_API -ITT_STUBV(ITTAPI, void, counter_dec_v3, (const __itt_domain *domain, __itt_string_handle *name)) -ITT_STUBV(ITTAPI, void, counter_dec_delta_v3, (const __itt_domain *domain, __itt_string_handle *name, unsigned long long delta)) -#define __itt_counter_dec_v3(d,x) ITTNOTIFY_VOID_D1(counter_dec_v3,d,x) -#define __itt_counter_dec_v3_ptr ITTNOTIFY_NAME(counter_dec_v3) -#define __itt_counter_dec_delta_v3(d,x,y) ITTNOTIFY_VOID_D2(counter_dec_delta_v3,d,x,y) -#define __itt_counter_dec_delta_v3_ptr ITTNOTIFY_NAME(counter_dec_delta_v3) -#else /* INTEL_NO_ITTNOTIFY_API */ -#define __itt_counter_dec_v3(domain,name) -#define __itt_counter_dec_v3_ptr 0 -#define __itt_counter_dec_delta_v3(domain,name,delta) -#define __itt_counter_dec_delta_v3_ptr 0 -#endif /* INTEL_NO_ITTNOTIFY_API */ -#else /* INTEL_NO_MACRO_BODY */ -#define __itt_counter_dec_v3_ptr 0 -#define __itt_counter_dec_delta_v3_ptr 0 -#endif /* INTEL_NO_MACRO_BODY */ -/** @endcond */ - -/** @} counters group */ - - -/** - * @brief Set the counter value - */ -void ITTAPI __itt_counter_set_value(__itt_counter id, void *value_ptr); - -#ifndef INTEL_NO_MACRO_BODY -#ifndef INTEL_NO_ITTNOTIFY_API -ITT_STUBV(ITTAPI, void, counter_set_value, (__itt_counter id, void *value_ptr)) -#define __itt_counter_set_value ITTNOTIFY_VOID(counter_set_value) -#define __itt_counter_set_value_ptr ITTNOTIFY_NAME(counter_set_value) -#else /* INTEL_NO_ITTNOTIFY_API */ -#define __itt_counter_set_value(id, value_ptr) -#define __itt_counter_set_value_ptr 0 -#endif /* INTEL_NO_ITTNOTIFY_API */ -#else /* INTEL_NO_MACRO_BODY */ -#define __itt_counter_set_value_ptr 0 -#endif /* INTEL_NO_MACRO_BODY */ -/** @endcond */ - -/** - * @brief Set the counter value - */ -void ITTAPI __itt_counter_set_value_ex(__itt_counter id, __itt_clock_domain *clock_domain, unsigned long long timestamp, void *value_ptr); - -/** @cond exclude_from_documentation */ -#ifndef INTEL_NO_MACRO_BODY -#ifndef INTEL_NO_ITTNOTIFY_API -ITT_STUBV(ITTAPI, void, counter_set_value_ex, (__itt_counter id, __itt_clock_domain *clock_domain, unsigned long long timestamp, void *value_ptr)) -#define __itt_counter_set_value_ex ITTNOTIFY_VOID(counter_set_value_ex) -#define __itt_counter_set_value_ex_ptr ITTNOTIFY_NAME(counter_set_value_ex) -#else /* INTEL_NO_ITTNOTIFY_API */ -#define __itt_counter_set_value_ex(id, clock_domain, timestamp, value_ptr) -#define __itt_counter_set_value_ex_ptr 0 -#endif /* INTEL_NO_ITTNOTIFY_API */ -#else /* INTEL_NO_MACRO_BODY */ -#define __itt_counter_set_value_ex_ptr 0 -#endif /* INTEL_NO_MACRO_BODY */ -/** @endcond */ - -/** - * @brief Create a typed counter with given name/domain - * - * After __itt_counter_create_typed() is called, __itt_counter_inc(id), __itt_counter_inc_delta(id, delta), - * __itt_counter_set_value(id, value_ptr) or __itt_counter_set_value_ex(id, clock_domain, timestamp, value_ptr) - * can be used to change the value of the counter - */ -#if ITT_PLATFORM==ITT_PLATFORM_WIN -__itt_counter ITTAPI __itt_counter_create_typedA(const char *name, const char *domain, __itt_metadata_type type); -__itt_counter ITTAPI __itt_counter_create_typedW(const wchar_t *name, const wchar_t *domain, __itt_metadata_type type); -#if defined(UNICODE) || defined(_UNICODE) -# define __itt_counter_create_typed __itt_counter_create_typedW -# define __itt_counter_create_typed_ptr __itt_counter_create_typedW_ptr -#else /* UNICODE */ -# define __itt_counter_create_typed __itt_counter_create_typedA -# define __itt_counter_create_typed_ptr __itt_counter_create_typedA_ptr -#endif /* UNICODE */ -#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ -__itt_counter ITTAPI __itt_counter_create_typed(const char *name, const char *domain, __itt_metadata_type type); -#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ - -#ifndef INTEL_NO_MACRO_BODY -#ifndef INTEL_NO_ITTNOTIFY_API -#if ITT_PLATFORM==ITT_PLATFORM_WIN -ITT_STUB(ITTAPI, __itt_counter, counter_create_typedA, (const char *name, const char *domain, __itt_metadata_type type)) -ITT_STUB(ITTAPI, __itt_counter, counter_create_typedW, (const wchar_t *name, const wchar_t *domain, __itt_metadata_type type)) -#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ -ITT_STUB(ITTAPI, __itt_counter, counter_create_typed, (const char *name, const char *domain, __itt_metadata_type type)) -#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ -#if ITT_PLATFORM==ITT_PLATFORM_WIN -#define __itt_counter_create_typedA ITTNOTIFY_DATA(counter_create_typedA) -#define __itt_counter_create_typedA_ptr ITTNOTIFY_NAME(counter_create_typedA) -#define __itt_counter_create_typedW ITTNOTIFY_DATA(counter_create_typedW) -#define __itt_counter_create_typedW_ptr ITTNOTIFY_NAME(counter_create_typedW) -#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ -#define __itt_counter_create_typed ITTNOTIFY_DATA(counter_create_typed) -#define __itt_counter_create_typed_ptr ITTNOTIFY_NAME(counter_create_typed) -#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ -#else /* INTEL_NO_ITTNOTIFY_API */ -#if ITT_PLATFORM==ITT_PLATFORM_WIN -#define __itt_counter_create_typedA(name, domain, type) -#define __itt_counter_create_typedA_ptr 0 -#define __itt_counter_create_typedW(name, domain, type) -#define __itt_counter_create_typedW_ptr 0 -#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ -#define __itt_counter_create_typed(name, domain, type) -#define __itt_counter_create_typed_ptr 0 -#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ -#endif /* INTEL_NO_ITTNOTIFY_API */ -#else /* INTEL_NO_MACRO_BODY */ -#if ITT_PLATFORM==ITT_PLATFORM_WIN -#define __itt_counter_create_typedA_ptr 0 -#define __itt_counter_create_typedW_ptr 0 -#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ -#define __itt_counter_create_typed_ptr 0 -#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ -#endif /* INTEL_NO_MACRO_BODY */ -/** @endcond */ - -/** - * @brief Destroy the counter identified by the pointer previously returned by __itt_counter_create() or - * __itt_counter_create_typed() - */ -void ITTAPI __itt_counter_destroy(__itt_counter id); - -#ifndef INTEL_NO_MACRO_BODY -#ifndef INTEL_NO_ITTNOTIFY_API -ITT_STUBV(ITTAPI, void, counter_destroy, (__itt_counter id)) -#define __itt_counter_destroy ITTNOTIFY_VOID(counter_destroy) -#define __itt_counter_destroy_ptr ITTNOTIFY_NAME(counter_destroy) -#else /* INTEL_NO_ITTNOTIFY_API */ -#define __itt_counter_destroy(id) -#define __itt_counter_destroy_ptr 0 -#endif /* INTEL_NO_ITTNOTIFY_API */ -#else /* INTEL_NO_MACRO_BODY */ -#define __itt_counter_destroy_ptr 0 -#endif /* INTEL_NO_MACRO_BODY */ -/** @endcond */ -/** @} counters group */ - -/** - * @ingroup markers - * @brief Create a marker instance. - * @param[in] domain The domain for this marker - * @param[in] clock_domain The clock domain controlling the execution of this call. - * @param[in] timestamp The user defined timestamp. - * @param[in] id The instance ID for this marker, or __itt_null - * @param[in] name The name for this marker - * @param[in] scope The scope for this marker - */ -void ITTAPI __itt_marker_ex(const __itt_domain *domain, __itt_clock_domain* clock_domain, unsigned long long timestamp, __itt_id id, __itt_string_handle *name, __itt_scope scope); - -/** @cond exclude_from_documentation */ -#ifndef INTEL_NO_MACRO_BODY -#ifndef INTEL_NO_ITTNOTIFY_API -ITT_STUBV(ITTAPI, void, marker_ex, (const __itt_domain *domain, __itt_clock_domain* clock_domain, unsigned long long timestamp, __itt_id id, __itt_string_handle *name, __itt_scope scope)) -#define __itt_marker_ex(d,x,y,z,a,b) ITTNOTIFY_VOID_D5(marker_ex,d,x,y,z,a,b) -#define __itt_marker_ex_ptr ITTNOTIFY_NAME(marker_ex) -#else /* INTEL_NO_ITTNOTIFY_API */ -#define __itt_marker_ex(domain,clock_domain,timestamp,id,name,scope) -#define __itt_marker_ex_ptr 0 -#endif /* INTEL_NO_ITTNOTIFY_API */ -#else /* INTEL_NO_MACRO_BODY */ -#define __itt_marker_ex_ptr 0 -#endif /* INTEL_NO_MACRO_BODY */ -/** @endcond */ - -/** - * @ingroup clockdomain - * @brief Add a relation to the current task instance. - * The current task instance is the head of the relation. - * @param[in] domain The domain controlling this call - * @param[in] clock_domain The clock domain controlling the execution of this call. - * @param[in] timestamp The user defined timestamp. - * @param[in] relation The kind of relation - * @param[in] tail The ID for the tail of the relation - */ -void ITTAPI __itt_relation_add_to_current_ex(const __itt_domain *domain, __itt_clock_domain* clock_domain, unsigned long long timestamp, __itt_relation relation, __itt_id tail); - -/** - * @ingroup clockdomain - * @brief Add a relation between two instance identifiers. - * @param[in] domain The domain controlling this call - * @param[in] clock_domain The clock domain controlling the execution of this call. - * @param[in] timestamp The user defined timestamp. - * @param[in] head The ID for the head of the relation - * @param[in] relation The kind of relation - * @param[in] tail The ID for the tail of the relation - */ -void ITTAPI __itt_relation_add_ex(const __itt_domain *domain, __itt_clock_domain* clock_domain, unsigned long long timestamp, __itt_id head, __itt_relation relation, __itt_id tail); - -/** @cond exclude_from_documentation */ -#ifndef INTEL_NO_MACRO_BODY -#ifndef INTEL_NO_ITTNOTIFY_API -ITT_STUBV(ITTAPI, void, relation_add_to_current_ex, (const __itt_domain *domain, __itt_clock_domain* clock_domain, unsigned long long timestamp, __itt_relation relation, __itt_id tail)) -ITT_STUBV(ITTAPI, void, relation_add_ex, (const __itt_domain *domain, __itt_clock_domain* clock_domain, unsigned long long timestamp, __itt_id head, __itt_relation relation, __itt_id tail)) -#define __itt_relation_add_to_current_ex(d,x,y,z,a) ITTNOTIFY_VOID_D4(relation_add_to_current_ex,d,x,y,z,a) -#define __itt_relation_add_to_current_ex_ptr ITTNOTIFY_NAME(relation_add_to_current_ex) -#define __itt_relation_add_ex(d,x,y,z,a,b) ITTNOTIFY_VOID_D5(relation_add_ex,d,x,y,z,a,b) -#define __itt_relation_add_ex_ptr ITTNOTIFY_NAME(relation_add_ex) -#else /* INTEL_NO_ITTNOTIFY_API */ -#define __itt_relation_add_to_current_ex(domain,clock_domain,timestame,relation,tail) -#define __itt_relation_add_to_current_ex_ptr 0 -#define __itt_relation_add_ex(domain,clock_domain,timestamp,head,relation,tail) -#define __itt_relation_add_ex_ptr 0 -#endif /* INTEL_NO_ITTNOTIFY_API */ -#else /* INTEL_NO_MACRO_BODY */ -#define __itt_relation_add_to_current_ex_ptr 0 -#define __itt_relation_add_ex_ptr 0 -#endif /* INTEL_NO_MACRO_BODY */ -/** @endcond */ - -/** @cond exclude_from_documentation */ -typedef enum ___itt_track_group_type -{ - __itt_track_group_type_normal = 0 -} __itt_track_group_type; -/** @endcond */ - -/** @cond exclude_from_documentation */ -#pragma pack(push, 8) - -typedef struct ___itt_track_group -{ - __itt_string_handle* name; /*!< Name of the track group */ - struct ___itt_track* track; /*!< List of child tracks */ - __itt_track_group_type tgtype; /*!< Type of the track group */ - int extra1; /*!< Reserved. Must be zero */ - void* extra2; /*!< Reserved. Must be zero */ - struct ___itt_track_group* next; -} __itt_track_group; - -#pragma pack(pop) -/** @endcond */ - -/** - * @brief Placeholder for custom track types. Currently, "normal" custom track - * is the only available track type. - */ -typedef enum ___itt_track_type -{ - __itt_track_type_normal = 0 -#ifdef INTEL_ITTNOTIFY_API_PRIVATE - , __itt_track_type_queue -#endif /* INTEL_ITTNOTIFY_API_PRIVATE */ -} __itt_track_type; - -/** @cond exclude_from_documentation */ -#pragma pack(push, 8) - -typedef struct ___itt_track -{ - __itt_string_handle* name; /*!< Name of the track group */ - __itt_track_group* group; /*!< Parent group to a track */ - __itt_track_type ttype; /*!< Type of the track */ - int extra1; /*!< Reserved. Must be zero */ - void* extra2; /*!< Reserved. Must be zero */ - struct ___itt_track* next; -} __itt_track; - -#pragma pack(pop) -/** @endcond */ - -/** - * @brief Create logical track group. - */ -__itt_track_group* ITTAPI __itt_track_group_create(__itt_string_handle* name, __itt_track_group_type track_group_type); - -/** @cond exclude_from_documentation */ -#ifndef INTEL_NO_MACRO_BODY -#ifndef INTEL_NO_ITTNOTIFY_API -ITT_STUB(ITTAPI, __itt_track_group*, track_group_create, (__itt_string_handle* name, __itt_track_group_type track_group_type)) -#define __itt_track_group_create ITTNOTIFY_DATA(track_group_create) -#define __itt_track_group_create_ptr ITTNOTIFY_NAME(track_group_create) -#else /* INTEL_NO_ITTNOTIFY_API */ -#define __itt_track_group_create(name) (__itt_track_group*)0 -#define __itt_track_group_create_ptr 0 -#endif /* INTEL_NO_ITTNOTIFY_API */ -#else /* INTEL_NO_MACRO_BODY */ -#define __itt_track_group_create_ptr 0 -#endif /* INTEL_NO_MACRO_BODY */ -/** @endcond */ - -/** - * @brief Create logical track. - */ -__itt_track* ITTAPI __itt_track_create(__itt_track_group* track_group, __itt_string_handle* name, __itt_track_type track_type); - -/** @cond exclude_from_documentation */ -#ifndef INTEL_NO_MACRO_BODY -#ifndef INTEL_NO_ITTNOTIFY_API -ITT_STUB(ITTAPI, __itt_track*, track_create, (__itt_track_group* track_group,__itt_string_handle* name, __itt_track_type track_type)) -#define __itt_track_create ITTNOTIFY_DATA(track_create) -#define __itt_track_create_ptr ITTNOTIFY_NAME(track_create) -#else /* INTEL_NO_ITTNOTIFY_API */ -#define __itt_track_create(track_group,name,track_type) (__itt_track*)0 -#define __itt_track_create_ptr 0 -#endif /* INTEL_NO_ITTNOTIFY_API */ -#else /* INTEL_NO_MACRO_BODY */ -#define __itt_track_create_ptr 0 -#endif /* INTEL_NO_MACRO_BODY */ -/** @endcond */ - -/** - * @brief Set the logical track. - */ -void ITTAPI __itt_set_track(__itt_track* track); - -/** @cond exclude_from_documentation */ -#ifndef INTEL_NO_MACRO_BODY -#ifndef INTEL_NO_ITTNOTIFY_API -ITT_STUBV(ITTAPI, void, set_track, (__itt_track *track)) -#define __itt_set_track ITTNOTIFY_VOID(set_track) -#define __itt_set_track_ptr ITTNOTIFY_NAME(set_track) -#else /* INTEL_NO_ITTNOTIFY_API */ -#define __itt_set_track(track) -#define __itt_set_track_ptr 0 -#endif /* INTEL_NO_ITTNOTIFY_API */ -#else /* INTEL_NO_MACRO_BODY */ -#define __itt_set_track_ptr 0 -#endif /* INTEL_NO_MACRO_BODY */ -/** @endcond */ - -/* ========================================================================== */ -/** @cond exclude_from_gpa_documentation */ -/** - * @defgroup events Events - * @ingroup public - * Events group - * @{ - */ -/** @brief user event type */ -typedef int __itt_event; - -/** - * @brief Create an event notification - * @note name or namelen being null/name and namelen not matching, user event feature not enabled - * @return non-zero event identifier upon success and __itt_err otherwise - */ -#if ITT_PLATFORM==ITT_PLATFORM_WIN -__itt_event LIBITTAPI __itt_event_createA(const char *name, int namelen); -__itt_event LIBITTAPI __itt_event_createW(const wchar_t *name, int namelen); -#if defined(UNICODE) || defined(_UNICODE) -# define __itt_event_create __itt_event_createW -# define __itt_event_create_ptr __itt_event_createW_ptr -#else -# define __itt_event_create __itt_event_createA -# define __itt_event_create_ptr __itt_event_createA_ptr -#endif /* UNICODE */ -#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ -__itt_event LIBITTAPI __itt_event_create(const char *name, int namelen); -#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ - -/** @cond exclude_from_documentation */ -#ifndef INTEL_NO_MACRO_BODY -#ifndef INTEL_NO_ITTNOTIFY_API -#if ITT_PLATFORM==ITT_PLATFORM_WIN -ITT_STUB(LIBITTAPI, __itt_event, event_createA, (const char *name, int namelen)) -ITT_STUB(LIBITTAPI, __itt_event, event_createW, (const wchar_t *name, int namelen)) -#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ -ITT_STUB(LIBITTAPI, __itt_event, event_create, (const char *name, int namelen)) -#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ -#if ITT_PLATFORM==ITT_PLATFORM_WIN -#define __itt_event_createA ITTNOTIFY_DATA(event_createA) -#define __itt_event_createA_ptr ITTNOTIFY_NAME(event_createA) -#define __itt_event_createW ITTNOTIFY_DATA(event_createW) -#define __itt_event_createW_ptr ITTNOTIFY_NAME(event_createW) -#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ -#define __itt_event_create ITTNOTIFY_DATA(event_create) -#define __itt_event_create_ptr ITTNOTIFY_NAME(event_create) -#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ -#else /* INTEL_NO_ITTNOTIFY_API */ -#if ITT_PLATFORM==ITT_PLATFORM_WIN -#define __itt_event_createA(name, namelen) (__itt_event)0 -#define __itt_event_createA_ptr 0 -#define __itt_event_createW(name, namelen) (__itt_event)0 -#define __itt_event_createW_ptr 0 -#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ -#define __itt_event_create(name, namelen) (__itt_event)0 -#define __itt_event_create_ptr 0 -#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ -#endif /* INTEL_NO_ITTNOTIFY_API */ -#else /* INTEL_NO_MACRO_BODY */ -#if ITT_PLATFORM==ITT_PLATFORM_WIN -#define __itt_event_createA_ptr 0 -#define __itt_event_createW_ptr 0 -#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ -#define __itt_event_create_ptr 0 -#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ -#endif /* INTEL_NO_MACRO_BODY */ -/** @endcond */ - -/** - * @brief Record an event occurrence. - * @return __itt_err upon failure (invalid event id/user event feature not enabled) - */ -int LIBITTAPI __itt_event_start(__itt_event event); - -/** @cond exclude_from_documentation */ -#ifndef INTEL_NO_MACRO_BODY -#ifndef INTEL_NO_ITTNOTIFY_API -ITT_STUB(LIBITTAPI, int, event_start, (__itt_event event)) -#define __itt_event_start ITTNOTIFY_DATA(event_start) -#define __itt_event_start_ptr ITTNOTIFY_NAME(event_start) -#else /* INTEL_NO_ITTNOTIFY_API */ -#define __itt_event_start(event) (int)0 -#define __itt_event_start_ptr 0 -#endif /* INTEL_NO_ITTNOTIFY_API */ -#else /* INTEL_NO_MACRO_BODY */ -#define __itt_event_start_ptr 0 -#endif /* INTEL_NO_MACRO_BODY */ -/** @endcond */ - -/** - * @brief Record an event end occurrence. - * @note It is optional if events do not have durations. - * @return __itt_err upon failure (invalid event id/user event feature not enabled) - */ -int LIBITTAPI __itt_event_end(__itt_event event); - -/** @cond exclude_from_documentation */ -#ifndef INTEL_NO_MACRO_BODY -#ifndef INTEL_NO_ITTNOTIFY_API -ITT_STUB(LIBITTAPI, int, event_end, (__itt_event event)) -#define __itt_event_end ITTNOTIFY_DATA(event_end) -#define __itt_event_end_ptr ITTNOTIFY_NAME(event_end) -#else /* INTEL_NO_ITTNOTIFY_API */ -#define __itt_event_end(event) (int)0 -#define __itt_event_end_ptr 0 -#endif /* INTEL_NO_ITTNOTIFY_API */ -#else /* INTEL_NO_MACRO_BODY */ -#define __itt_event_end_ptr 0 -#endif /* INTEL_NO_MACRO_BODY */ -/** @endcond */ -/** @} events group */ - - -/** - * @defgroup arrays Arrays Visualizer - * @ingroup public - * Visualize arrays - * @{ - */ - -/** - * @enum __itt_av_data_type - * @brief Defines types of arrays data (for C/C++ intrinsic types) - */ -typedef enum -{ - __itt_e_first = 0, - __itt_e_char = 0, /* 1-byte integer */ - __itt_e_uchar, /* 1-byte unsigned integer */ - __itt_e_int16, /* 2-byte integer */ - __itt_e_uint16, /* 2-byte unsigned integer */ - __itt_e_int32, /* 4-byte integer */ - __itt_e_uint32, /* 4-byte unsigned integer */ - __itt_e_int64, /* 8-byte integer */ - __itt_e_uint64, /* 8-byte unsigned integer */ - __itt_e_float, /* 4-byte floating */ - __itt_e_double, /* 8-byte floating */ - __itt_e_last = __itt_e_double -} __itt_av_data_type; - -/** - * @brief Save an array data to a file. - * Output format is defined by the file extension. The csv and bmp formats are supported (bmp - for 2-dimensional array only). - * @param[in] data - pointer to the array data - * @param[in] rank - the rank of the array - * @param[in] dimensions - pointer to an array of integers, which specifies the array dimensions. - * The size of dimensions must be equal to the rank - * @param[in] type - the type of the array, specified as one of the __itt_av_data_type values (for intrinsic types) - * @param[in] filePath - the file path; the output format is defined by the file extension - * @param[in] columnOrder - defines how the array is stored in the linear memory. - * It should be 1 for column-major order (e.g. in FORTRAN) or 0 - for row-major order (e.g. in C). - */ - -#if ITT_PLATFORM==ITT_PLATFORM_WIN -int ITTAPI __itt_av_saveA(void *data, int rank, const int *dimensions, int type, const char *filePath, int columnOrder); -int ITTAPI __itt_av_saveW(void *data, int rank, const int *dimensions, int type, const wchar_t *filePath, int columnOrder); -#if defined(UNICODE) || defined(_UNICODE) -# define __itt_av_save __itt_av_saveW -# define __itt_av_save_ptr __itt_av_saveW_ptr -#else /* UNICODE */ -# define __itt_av_save __itt_av_saveA -# define __itt_av_save_ptr __itt_av_saveA_ptr -#endif /* UNICODE */ -#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ -int ITTAPI __itt_av_save(void *data, int rank, const int *dimensions, int type, const char *filePath, int columnOrder); -#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ - -/** @cond exclude_from_documentation */ -#ifndef INTEL_NO_MACRO_BODY -#ifndef INTEL_NO_ITTNOTIFY_API -#if ITT_PLATFORM==ITT_PLATFORM_WIN -ITT_STUB(ITTAPI, int, av_saveA, (void *data, int rank, const int *dimensions, int type, const char *filePath, int columnOrder)) -ITT_STUB(ITTAPI, int, av_saveW, (void *data, int rank, const int *dimensions, int type, const wchar_t *filePath, int columnOrder)) -#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ -ITT_STUB(ITTAPI, int, av_save, (void *data, int rank, const int *dimensions, int type, const char *filePath, int columnOrder)) -#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ -#if ITT_PLATFORM==ITT_PLATFORM_WIN -#define __itt_av_saveA ITTNOTIFY_DATA(av_saveA) -#define __itt_av_saveA_ptr ITTNOTIFY_NAME(av_saveA) -#define __itt_av_saveW ITTNOTIFY_DATA(av_saveW) -#define __itt_av_saveW_ptr ITTNOTIFY_NAME(av_saveW) -#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ -#define __itt_av_save ITTNOTIFY_DATA(av_save) -#define __itt_av_save_ptr ITTNOTIFY_NAME(av_save) -#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ -#else /* INTEL_NO_ITTNOTIFY_API */ -#if ITT_PLATFORM==ITT_PLATFORM_WIN -#define __itt_av_saveA(name) -#define __itt_av_saveA_ptr 0 -#define __itt_av_saveW(name) -#define __itt_av_saveW_ptr 0 -#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ -#define __itt_av_save(name) -#define __itt_av_save_ptr 0 -#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ -#endif /* INTEL_NO_ITTNOTIFY_API */ -#else /* INTEL_NO_MACRO_BODY */ -#if ITT_PLATFORM==ITT_PLATFORM_WIN -#define __itt_av_saveA_ptr 0 -#define __itt_av_saveW_ptr 0 -#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ -#define __itt_av_save_ptr 0 -#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ -#endif /* INTEL_NO_MACRO_BODY */ -/** @endcond */ - -void ITTAPI __itt_enable_attach(void); - -/** @cond exclude_from_documentation */ -#ifndef INTEL_NO_MACRO_BODY -#ifndef INTEL_NO_ITTNOTIFY_API -ITT_STUBV(ITTAPI, void, enable_attach, (void)) -#define __itt_enable_attach ITTNOTIFY_VOID(enable_attach) -#define __itt_enable_attach_ptr ITTNOTIFY_NAME(enable_attach) -#else /* INTEL_NO_ITTNOTIFY_API */ -#define __itt_enable_attach() -#define __itt_enable_attach_ptr 0 -#endif /* INTEL_NO_ITTNOTIFY_API */ -#else /* INTEL_NO_MACRO_BODY */ -#define __itt_enable_attach_ptr 0 -#endif /* INTEL_NO_MACRO_BODY */ -/** @endcond */ - -/** @cond exclude_from_gpa_documentation */ - -/** @} arrays group */ - -/** @endcond */ - -/** - * @brief Module load info - * This API is used to report necessary information in case of module relocation - * @param[in] start_addr - relocated module start address - * @param[in] end_addr - relocated module end address - * @param[in] path - file system path to the module - */ -#if ITT_PLATFORM==ITT_PLATFORM_WIN -void ITTAPI __itt_module_loadA(void *start_addr, void *end_addr, const char *path); -void ITTAPI __itt_module_loadW(void *start_addr, void *end_addr, const wchar_t *path); -#if defined(UNICODE) || defined(_UNICODE) -# define __itt_module_load __itt_module_loadW -# define __itt_module_load_ptr __itt_module_loadW_ptr -#else /* UNICODE */ -# define __itt_module_load __itt_module_loadA -# define __itt_module_load_ptr __itt_module_loadA_ptr -#endif /* UNICODE */ -#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ -void ITTAPI __itt_module_load(void *start_addr, void *end_addr, const char *path); -#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ - -/** @cond exclude_from_documentation */ -#ifndef INTEL_NO_MACRO_BODY -#ifndef INTEL_NO_ITTNOTIFY_API -#if ITT_PLATFORM==ITT_PLATFORM_WIN -ITT_STUB(ITTAPI, void, module_loadA, (void *start_addr, void *end_addr, const char *path)) -ITT_STUB(ITTAPI, void, module_loadW, (void *start_addr, void *end_addr, const wchar_t *path)) -#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ -ITT_STUB(ITTAPI, void, module_load, (void *start_addr, void *end_addr, const char *path)) -#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ -#if ITT_PLATFORM==ITT_PLATFORM_WIN -#define __itt_module_loadA ITTNOTIFY_VOID(module_loadA) -#define __itt_module_loadA_ptr ITTNOTIFY_NAME(module_loadA) -#define __itt_module_loadW ITTNOTIFY_VOID(module_loadW) -#define __itt_module_loadW_ptr ITTNOTIFY_NAME(module_loadW) -#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ -#define __itt_module_load ITTNOTIFY_VOID(module_load) -#define __itt_module_load_ptr ITTNOTIFY_NAME(module_load) -#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ -#else /* INTEL_NO_ITTNOTIFY_API */ -#if ITT_PLATFORM==ITT_PLATFORM_WIN -#define __itt_module_loadA(start_addr, end_addr, path) -#define __itt_module_loadA_ptr 0 -#define __itt_module_loadW(start_addr, end_addr, path) -#define __itt_module_loadW_ptr 0 -#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ -#define __itt_module_load(start_addr, end_addr, path) -#define __itt_module_load_ptr 0 -#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ -#endif /* INTEL_NO_ITTNOTIFY_API */ -#else /* INTEL_NO_MACRO_BODY */ -#if ITT_PLATFORM==ITT_PLATFORM_WIN -#define __itt_module_loadA_ptr 0 -#define __itt_module_loadW_ptr 0 -#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ -#define __itt_module_load_ptr 0 -#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ -#endif /* INTEL_NO_MACRO_BODY */ -/** @endcond */ - - - -#ifdef __cplusplus -} -#endif /* __cplusplus */ - -#endif /* _ITTNOTIFY_H_ */ - -#ifdef INTEL_ITTNOTIFY_API_PRIVATE - -#ifndef _ITTNOTIFY_PRIVATE_ -#define _ITTNOTIFY_PRIVATE_ - -#ifdef __cplusplus -extern "C" { -#endif /* __cplusplus */ - -/** - * @ingroup clockdomain - * @brief Begin an overlapped task instance. - * @param[in] domain The domain for this task - * @param[in] clock_domain The clock domain controlling the execution of this call. - * @param[in] timestamp The user defined timestamp. - * @param[in] taskid The identifier for this task instance, *cannot* be __itt_null. - * @param[in] parentid The parent of this task, or __itt_null. - * @param[in] name The name of this task. - */ -void ITTAPI __itt_task_begin_overlapped_ex(const __itt_domain* domain, __itt_clock_domain* clock_domain, unsigned long long timestamp, __itt_id taskid, __itt_id parentid, __itt_string_handle* name); - -/** - * @ingroup clockdomain - * @brief End an overlapped task instance. - * @param[in] domain The domain for this task - * @param[in] clock_domain The clock domain controlling the execution of this call. - * @param[in] timestamp The user defined timestamp. - * @param[in] taskid Explicit ID of finished task - */ -void ITTAPI __itt_task_end_overlapped_ex(const __itt_domain* domain, __itt_clock_domain* clock_domain, unsigned long long timestamp, __itt_id taskid); - -/** @cond exclude_from_documentation */ -#ifndef INTEL_NO_MACRO_BODY -#ifndef INTEL_NO_ITTNOTIFY_API -ITT_STUBV(ITTAPI, void, task_begin_overlapped_ex, (const __itt_domain* domain, __itt_clock_domain* clock_domain, unsigned long long timestamp, __itt_id taskid, __itt_id parentid, __itt_string_handle* name)) -ITT_STUBV(ITTAPI, void, task_end_overlapped_ex, (const __itt_domain* domain, __itt_clock_domain* clock_domain, unsigned long long timestamp, __itt_id taskid)) -#define __itt_task_begin_overlapped_ex(d,x,y,z,a,b) ITTNOTIFY_VOID_D5(task_begin_overlapped_ex,d,x,y,z,a,b) -#define __itt_task_begin_overlapped_ex_ptr ITTNOTIFY_NAME(task_begin_overlapped_ex) -#define __itt_task_end_overlapped_ex(d,x,y,z) ITTNOTIFY_VOID_D3(task_end_overlapped_ex,d,x,y,z) -#define __itt_task_end_overlapped_ex_ptr ITTNOTIFY_NAME(task_end_overlapped_ex) -#else /* INTEL_NO_ITTNOTIFY_API */ -#define __itt_task_begin_overlapped_ex(domain,clock_domain,timestamp,taskid,parentid,name) -#define __itt_task_begin_overlapped_ex_ptr 0 -#define __itt_task_end_overlapped_ex(domain,clock_domain,timestamp,taskid) -#define __itt_task_end_overlapped_ex_ptr 0 -#endif /* INTEL_NO_ITTNOTIFY_API */ -#else /* INTEL_NO_MACRO_BODY */ -#define __itt_task_begin_overlapped_ex_ptr 0 -#define __itt_task_end_overlapped_ptr 0 -#define __itt_task_end_overlapped_ex_ptr 0 -#endif /* INTEL_NO_MACRO_BODY */ -/** @endcond */ - -/** - * @defgroup makrs_internal Marks - * @ingroup internal - * Marks group - * @warning Internal API: - * - It is not shipped to outside of Intel - * - It is delivered to internal Intel teams using e-mail or SVN access only - * @{ - */ -/** @brief user mark type */ -typedef int __itt_mark_type; - -/** - * @brief Creates a user mark type with the specified name using char or Unicode string. - * @param[in] name - name of mark to create - * @return Returns a handle to the mark type - */ -#if ITT_PLATFORM==ITT_PLATFORM_WIN -__itt_mark_type ITTAPI __itt_mark_createA(const char *name); -__itt_mark_type ITTAPI __itt_mark_createW(const wchar_t *name); -#if defined(UNICODE) || defined(_UNICODE) -# define __itt_mark_create __itt_mark_createW -# define __itt_mark_create_ptr __itt_mark_createW_ptr -#else /* UNICODE */ -# define __itt_mark_create __itt_mark_createA -# define __itt_mark_create_ptr __itt_mark_createA_ptr -#endif /* UNICODE */ -#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ -__itt_mark_type ITTAPI __itt_mark_create(const char *name); -#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ - -/** @cond exclude_from_documentation */ -#ifndef INTEL_NO_MACRO_BODY -#ifndef INTEL_NO_ITTNOTIFY_API -#if ITT_PLATFORM==ITT_PLATFORM_WIN -ITT_STUB(ITTAPI, __itt_mark_type, mark_createA, (const char *name)) -ITT_STUB(ITTAPI, __itt_mark_type, mark_createW, (const wchar_t *name)) -#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ -ITT_STUB(ITTAPI, __itt_mark_type, mark_create, (const char *name)) -#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ -#if ITT_PLATFORM==ITT_PLATFORM_WIN -#define __itt_mark_createA ITTNOTIFY_DATA(mark_createA) -#define __itt_mark_createA_ptr ITTNOTIFY_NAME(mark_createA) -#define __itt_mark_createW ITTNOTIFY_DATA(mark_createW) -#define __itt_mark_createW_ptr ITTNOTIFY_NAME(mark_createW) -#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ -#define __itt_mark_create ITTNOTIFY_DATA(mark_create) -#define __itt_mark_create_ptr ITTNOTIFY_NAME(mark_create) -#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ -#else /* INTEL_NO_ITTNOTIFY_API */ -#if ITT_PLATFORM==ITT_PLATFORM_WIN -#define __itt_mark_createA(name) (__itt_mark_type)0 -#define __itt_mark_createA_ptr 0 -#define __itt_mark_createW(name) (__itt_mark_type)0 -#define __itt_mark_createW_ptr 0 -#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ -#define __itt_mark_create(name) (__itt_mark_type)0 -#define __itt_mark_create_ptr 0 -#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ -#endif /* INTEL_NO_ITTNOTIFY_API */ -#else /* INTEL_NO_MACRO_BODY */ -#if ITT_PLATFORM==ITT_PLATFORM_WIN -#define __itt_mark_createA_ptr 0 -#define __itt_mark_createW_ptr 0 -#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ -#define __itt_mark_create_ptr 0 -#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ -#endif /* INTEL_NO_MACRO_BODY */ -/** @endcond */ - -/** - * @brief Creates a "discrete" user mark type of the specified type and an optional parameter using char or Unicode string. - * - * - The mark of "discrete" type is placed to collection results in case of success. It appears in overtime view(s) as a special tick sign. - * - The call is "synchronous" - function returns after mark is actually added to results. - * - This function is useful, for example, to mark different phases of application - * (beginning of the next mark automatically meand end of current region). - * - Can be used together with "continuous" marks (see below) at the same collection session - * @param[in] mt - mark, created by __itt_mark_create(const char* name) function - * @param[in] parameter - string parameter of mark - * @return Returns zero value in case of success, non-zero value otherwise. - */ -#if ITT_PLATFORM==ITT_PLATFORM_WIN -int ITTAPI __itt_markA(__itt_mark_type mt, const char *parameter); -int ITTAPI __itt_markW(__itt_mark_type mt, const wchar_t *parameter); -#if defined(UNICODE) || defined(_UNICODE) -# define __itt_mark __itt_markW -# define __itt_mark_ptr __itt_markW_ptr -#else /* UNICODE */ -# define __itt_mark __itt_markA -# define __itt_mark_ptr __itt_markA_ptr -#endif /* UNICODE */ -#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ -int ITTAPI __itt_mark(__itt_mark_type mt, const char *parameter); -#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ - -/** @cond exclude_from_documentation */ -#ifndef INTEL_NO_MACRO_BODY -#ifndef INTEL_NO_ITTNOTIFY_API -#if ITT_PLATFORM==ITT_PLATFORM_WIN -ITT_STUB(ITTAPI, int, markA, (__itt_mark_type mt, const char *parameter)) -ITT_STUB(ITTAPI, int, markW, (__itt_mark_type mt, const wchar_t *parameter)) -#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ -ITT_STUB(ITTAPI, int, mark, (__itt_mark_type mt, const char *parameter)) -#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ -#if ITT_PLATFORM==ITT_PLATFORM_WIN -#define __itt_markA ITTNOTIFY_DATA(markA) -#define __itt_markA_ptr ITTNOTIFY_NAME(markA) -#define __itt_markW ITTNOTIFY_DATA(markW) -#define __itt_markW_ptr ITTNOTIFY_NAME(markW) -#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ -#define __itt_mark ITTNOTIFY_DATA(mark) -#define __itt_mark_ptr ITTNOTIFY_NAME(mark) -#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ -#else /* INTEL_NO_ITTNOTIFY_API */ -#if ITT_PLATFORM==ITT_PLATFORM_WIN -#define __itt_markA(mt, parameter) (int)0 -#define __itt_markA_ptr 0 -#define __itt_markW(mt, parameter) (int)0 -#define __itt_markW_ptr 0 -#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ -#define __itt_mark(mt, parameter) (int)0 -#define __itt_mark_ptr 0 -#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ -#endif /* INTEL_NO_ITTNOTIFY_API */ -#else /* INTEL_NO_MACRO_BODY */ -#if ITT_PLATFORM==ITT_PLATFORM_WIN -#define __itt_markA_ptr 0 -#define __itt_markW_ptr 0 -#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ -#define __itt_mark_ptr 0 -#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ -#endif /* INTEL_NO_MACRO_BODY */ -/** @endcond */ - -/** - * @brief Use this if necessary to create a "discrete" user event type (mark) for process - * rather then for one thread - * @see int __itt_mark(__itt_mark_type mt, const char* parameter); - */ -#if ITT_PLATFORM==ITT_PLATFORM_WIN -int ITTAPI __itt_mark_globalA(__itt_mark_type mt, const char *parameter); -int ITTAPI __itt_mark_globalW(__itt_mark_type mt, const wchar_t *parameter); -#if defined(UNICODE) || defined(_UNICODE) -# define __itt_mark_global __itt_mark_globalW -# define __itt_mark_global_ptr __itt_mark_globalW_ptr -#else /* UNICODE */ -# define __itt_mark_global __itt_mark_globalA -# define __itt_mark_global_ptr __itt_mark_globalA_ptr -#endif /* UNICODE */ -#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ -int ITTAPI __itt_mark_global(__itt_mark_type mt, const char *parameter); -#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ - -/** @cond exclude_from_documentation */ -#ifndef INTEL_NO_MACRO_BODY -#ifndef INTEL_NO_ITTNOTIFY_API -#if ITT_PLATFORM==ITT_PLATFORM_WIN -ITT_STUB(ITTAPI, int, mark_globalA, (__itt_mark_type mt, const char *parameter)) -ITT_STUB(ITTAPI, int, mark_globalW, (__itt_mark_type mt, const wchar_t *parameter)) -#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ -ITT_STUB(ITTAPI, int, mark_global, (__itt_mark_type mt, const char *parameter)) -#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ -#if ITT_PLATFORM==ITT_PLATFORM_WIN -#define __itt_mark_globalA ITTNOTIFY_DATA(mark_globalA) -#define __itt_mark_globalA_ptr ITTNOTIFY_NAME(mark_globalA) -#define __itt_mark_globalW ITTNOTIFY_DATA(mark_globalW) -#define __itt_mark_globalW_ptr ITTNOTIFY_NAME(mark_globalW) -#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ -#define __itt_mark_global ITTNOTIFY_DATA(mark_global) -#define __itt_mark_global_ptr ITTNOTIFY_NAME(mark_global) -#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ -#else /* INTEL_NO_ITTNOTIFY_API */ -#if ITT_PLATFORM==ITT_PLATFORM_WIN -#define __itt_mark_globalA(mt, parameter) (int)0 -#define __itt_mark_globalA_ptr 0 -#define __itt_mark_globalW(mt, parameter) (int)0 -#define __itt_mark_globalW_ptr 0 -#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ -#define __itt_mark_global(mt, parameter) (int)0 -#define __itt_mark_global_ptr 0 -#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ -#endif /* INTEL_NO_ITTNOTIFY_API */ -#else /* INTEL_NO_MACRO_BODY */ -#if ITT_PLATFORM==ITT_PLATFORM_WIN -#define __itt_mark_globalA_ptr 0 -#define __itt_mark_globalW_ptr 0 -#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ -#define __itt_mark_global_ptr 0 -#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ -#endif /* INTEL_NO_MACRO_BODY */ -/** @endcond */ - -/** - * @brief Creates an "end" point for "continuous" mark with specified name. - * - * - Returns zero value in case of success, non-zero value otherwise. - * Also returns non-zero value when preceding "begin" point for the - * mark with the same name failed to be created or not created. - * - The mark of "continuous" type is placed to collection results in - * case of success. It appears in overtime view(s) as a special tick - * sign (different from "discrete" mark) together with line from - * corresponding "begin" mark to "end" mark. - * @note Continuous marks can overlap and be nested inside each other. - * Discrete mark can be nested inside marked region - * @param[in] mt - mark, created by __itt_mark_create(const char* name) function - * @return Returns zero value in case of success, non-zero value otherwise. - */ -int ITTAPI __itt_mark_off(__itt_mark_type mt); - -/** @cond exclude_from_documentation */ -#ifndef INTEL_NO_MACRO_BODY -#ifndef INTEL_NO_ITTNOTIFY_API -ITT_STUB(ITTAPI, int, mark_off, (__itt_mark_type mt)) -#define __itt_mark_off ITTNOTIFY_DATA(mark_off) -#define __itt_mark_off_ptr ITTNOTIFY_NAME(mark_off) -#else /* INTEL_NO_ITTNOTIFY_API */ -#define __itt_mark_off(mt) (int)0 -#define __itt_mark_off_ptr 0 -#endif /* INTEL_NO_ITTNOTIFY_API */ -#else /* INTEL_NO_MACRO_BODY */ -#define __itt_mark_off_ptr 0 -#endif /* INTEL_NO_MACRO_BODY */ -/** @endcond */ - -/** - * @brief Use this if necessary to create an "end" point for mark of process - * @see int __itt_mark_off(__itt_mark_type mt); - */ -int ITTAPI __itt_mark_global_off(__itt_mark_type mt); - -/** @cond exclude_from_documentation */ -#ifndef INTEL_NO_MACRO_BODY -#ifndef INTEL_NO_ITTNOTIFY_API -ITT_STUB(ITTAPI, int, mark_global_off, (__itt_mark_type mt)) -#define __itt_mark_global_off ITTNOTIFY_DATA(mark_global_off) -#define __itt_mark_global_off_ptr ITTNOTIFY_NAME(mark_global_off) -#else /* INTEL_NO_ITTNOTIFY_API */ -#define __itt_mark_global_off(mt) (int)0 -#define __itt_mark_global_off_ptr 0 -#endif /* INTEL_NO_ITTNOTIFY_API */ -#else /* INTEL_NO_MACRO_BODY */ -#define __itt_mark_global_off_ptr 0 -#endif /* INTEL_NO_MACRO_BODY */ -/** @endcond */ -/** @} marks group */ - -/** - * @defgroup counters_internal Counters - * @ingroup internal - * Counters group - * @{ - */ - - -/** - * @defgroup stitch Stack Stitching - * @ingroup internal - * Stack Stitching group - * @{ - */ -/** - * @brief opaque structure for counter identification - */ -typedef struct ___itt_caller *__itt_caller; - -/** - * @brief Create the stitch point e.g. a point in call stack where other stacks should be stitched to. - * The function returns a unique identifier which is used to match the cut points with corresponding stitch points. - */ -__itt_caller ITTAPI __itt_stack_caller_create(void); - -/** @cond exclude_from_documentation */ -#ifndef INTEL_NO_MACRO_BODY -#ifndef INTEL_NO_ITTNOTIFY_API -ITT_STUB(ITTAPI, __itt_caller, stack_caller_create, (void)) -#define __itt_stack_caller_create ITTNOTIFY_DATA(stack_caller_create) -#define __itt_stack_caller_create_ptr ITTNOTIFY_NAME(stack_caller_create) -#else /* INTEL_NO_ITTNOTIFY_API */ -#define __itt_stack_caller_create() (__itt_caller)0 -#define __itt_stack_caller_create_ptr 0 -#endif /* INTEL_NO_ITTNOTIFY_API */ -#else /* INTEL_NO_MACRO_BODY */ -#define __itt_stack_caller_create_ptr 0 -#endif /* INTEL_NO_MACRO_BODY */ -/** @endcond */ - -/** - * @brief Destroy the inforamtion about stitch point identified by the pointer previously returned by __itt_stack_caller_create() - */ -void ITTAPI __itt_stack_caller_destroy(__itt_caller id); - -/** @cond exclude_from_documentation */ -#ifndef INTEL_NO_MACRO_BODY -#ifndef INTEL_NO_ITTNOTIFY_API -ITT_STUBV(ITTAPI, void, stack_caller_destroy, (__itt_caller id)) -#define __itt_stack_caller_destroy ITTNOTIFY_VOID(stack_caller_destroy) -#define __itt_stack_caller_destroy_ptr ITTNOTIFY_NAME(stack_caller_destroy) -#else /* INTEL_NO_ITTNOTIFY_API */ -#define __itt_stack_caller_destroy(id) -#define __itt_stack_caller_destroy_ptr 0 -#endif /* INTEL_NO_ITTNOTIFY_API */ -#else /* INTEL_NO_MACRO_BODY */ -#define __itt_stack_caller_destroy_ptr 0 -#endif /* INTEL_NO_MACRO_BODY */ -/** @endcond */ - -/** - * @brief Sets the cut point. Stack from each event which occurs after this call will be cut - * at the same stack level the function was called and stitched to the corresponding stitch point. - */ -void ITTAPI __itt_stack_callee_enter(__itt_caller id); - -/** @cond exclude_from_documentation */ -#ifndef INTEL_NO_MACRO_BODY -#ifndef INTEL_NO_ITTNOTIFY_API -ITT_STUBV(ITTAPI, void, stack_callee_enter, (__itt_caller id)) -#define __itt_stack_callee_enter ITTNOTIFY_VOID(stack_callee_enter) -#define __itt_stack_callee_enter_ptr ITTNOTIFY_NAME(stack_callee_enter) -#else /* INTEL_NO_ITTNOTIFY_API */ -#define __itt_stack_callee_enter(id) -#define __itt_stack_callee_enter_ptr 0 -#endif /* INTEL_NO_ITTNOTIFY_API */ -#else /* INTEL_NO_MACRO_BODY */ -#define __itt_stack_callee_enter_ptr 0 -#endif /* INTEL_NO_MACRO_BODY */ -/** @endcond */ - -/** - * @brief This function eliminates the cut point which was set by latest __itt_stack_callee_enter(). - */ -void ITTAPI __itt_stack_callee_leave(__itt_caller id); - -/** @cond exclude_from_documentation */ -#ifndef INTEL_NO_MACRO_BODY -#ifndef INTEL_NO_ITTNOTIFY_API -ITT_STUBV(ITTAPI, void, stack_callee_leave, (__itt_caller id)) -#define __itt_stack_callee_leave ITTNOTIFY_VOID(stack_callee_leave) -#define __itt_stack_callee_leave_ptr ITTNOTIFY_NAME(stack_callee_leave) -#else /* INTEL_NO_ITTNOTIFY_API */ -#define __itt_stack_callee_leave(id) -#define __itt_stack_callee_leave_ptr 0 -#endif /* INTEL_NO_ITTNOTIFY_API */ -#else /* INTEL_NO_MACRO_BODY */ -#define __itt_stack_callee_leave_ptr 0 -#endif /* INTEL_NO_MACRO_BODY */ -/** @endcond */ - -/** @} stitch group */ - -/* ***************************************************************************************************************************** */ - -#include - -/** @cond exclude_from_documentation */ -typedef enum __itt_error_code -{ - __itt_error_success = 0, /*!< no error */ - __itt_error_no_module = 1, /*!< module can't be loaded */ - /* %1$s -- library name; win: %2$d -- system error code; unx: %2$s -- system error message. */ - __itt_error_no_symbol = 2, /*!< symbol not found */ - /* %1$s -- library name, %2$s -- symbol name. */ - __itt_error_unknown_group = 3, /*!< unknown group specified */ - /* %1$s -- env var name, %2$s -- group name. */ - __itt_error_cant_read_env = 4, /*!< GetEnvironmentVariable() failed */ - /* %1$s -- env var name, %2$d -- system error. */ - __itt_error_env_too_long = 5, /*!< variable value too long */ - /* %1$s -- env var name, %2$d -- actual length of the var, %3$d -- max allowed length. */ - __itt_error_system = 6 /*!< pthread_mutexattr_init or pthread_mutex_init failed */ - /* %1$s -- function name, %2$d -- errno. */ -} __itt_error_code; - -typedef void (__itt_error_handler_t)(__itt_error_code code, va_list); -__itt_error_handler_t* __itt_set_error_handler(__itt_error_handler_t*); - -const char* ITTAPI __itt_api_version(void); -/** @endcond */ - -/** @cond exclude_from_documentation */ -#ifndef INTEL_NO_MACRO_BODY -#ifndef INTEL_NO_ITTNOTIFY_API -#define __itt_error_handler ITT_JOIN(INTEL_ITTNOTIFY_PREFIX, error_handler) -void __itt_error_handler(__itt_error_code code, va_list args); -extern const int ITTNOTIFY_NAME(err); -#define __itt_err ITTNOTIFY_NAME(err) -ITT_STUB(ITTAPI, const char*, api_version, (void)) -#define __itt_api_version ITTNOTIFY_DATA(api_version) -#define __itt_api_version_ptr ITTNOTIFY_NAME(api_version) -#else /* INTEL_NO_ITTNOTIFY_API */ -#define __itt_api_version() (const char*)0 -#define __itt_api_version_ptr 0 -#endif /* INTEL_NO_ITTNOTIFY_API */ -#else /* INTEL_NO_MACRO_BODY */ -#define __itt_api_version_ptr 0 -#endif /* INTEL_NO_MACRO_BODY */ -/** @endcond */ - -#ifdef __cplusplus -} -#endif /* __cplusplus */ - -#endif /* _ITTNOTIFY_PRIVATE_ */ - -#endif /* INTEL_ITTNOTIFY_API_PRIVATE */ Property changes on: vendor/llvm-openmp/openmp-release80-r363030/runtime/src/thirdparty/ittnotify/ittnotify.h ___________________________________________________________________ Deleted: svn:eol-style ## -1 +0,0 ## -native \ No newline at end of property Deleted: svn:keywords ## -1 +0,0 ## -FreeBSD=%H \ No newline at end of property Deleted: svn:mime-type ## -1 +0,0 ## -text/plain \ No newline at end of property Index: vendor/llvm-openmp/openmp-release80-r363030/runtime/src/thirdparty/ittnotify/ittnotify_config.h =================================================================== --- vendor/llvm-openmp/openmp-release80-r363030/runtime/src/thirdparty/ittnotify/ittnotify_config.h (revision 348960) +++ vendor/llvm-openmp/openmp-release80-r363030/runtime/src/thirdparty/ittnotify/ittnotify_config.h (nonexistent) @@ -1,588 +0,0 @@ - -//===----------------------------------------------------------------------===// -// -// The LLVM Compiler Infrastructure -// -// This file is dual licensed under the MIT and the University of Illinois Open -// Source Licenses. See LICENSE.txt for details. -// -//===----------------------------------------------------------------------===// - -#ifndef _ITTNOTIFY_CONFIG_H_ -#define _ITTNOTIFY_CONFIG_H_ - -/** @cond exclude_from_documentation */ -#ifndef ITT_OS_WIN -# define ITT_OS_WIN 1 -#endif /* ITT_OS_WIN */ - -#ifndef ITT_OS_LINUX -# define ITT_OS_LINUX 2 -#endif /* ITT_OS_LINUX */ - -#ifndef ITT_OS_MAC -# define ITT_OS_MAC 3 -#endif /* ITT_OS_MAC */ - -#ifndef ITT_OS_FREEBSD -# define ITT_OS_FREEBSD 4 -#endif /* ITT_OS_FREEBSD */ - -#ifndef ITT_OS -# if defined WIN32 || defined _WIN32 -# define ITT_OS ITT_OS_WIN -# elif defined( __APPLE__ ) && defined( __MACH__ ) -# define ITT_OS ITT_OS_MAC -# elif defined( __FreeBSD__ ) -# define ITT_OS ITT_OS_FREEBSD -# else -# define ITT_OS ITT_OS_LINUX -# endif -#endif /* ITT_OS */ - -#ifndef ITT_PLATFORM_WIN -# define ITT_PLATFORM_WIN 1 -#endif /* ITT_PLATFORM_WIN */ - -#ifndef ITT_PLATFORM_POSIX -# define ITT_PLATFORM_POSIX 2 -#endif /* ITT_PLATFORM_POSIX */ - -#ifndef ITT_PLATFORM_MAC -# define ITT_PLATFORM_MAC 3 -#endif /* ITT_PLATFORM_MAC */ - -#ifndef ITT_PLATFORM_FREEBSD -# define ITT_PLATFORM_FREEBSD 4 -#endif /* ITT_PLATFORM_FREEBSD */ - -#ifndef ITT_PLATFORM -# if ITT_OS==ITT_OS_WIN -# define ITT_PLATFORM ITT_PLATFORM_WIN -# elif ITT_OS==ITT_OS_MAC -# define ITT_PLATFORM ITT_PLATFORM_MAC -# elif ITT_OS==ITT_OS_FREEBSD -# define ITT_PLATFORM ITT_PLATFORM_FREEBSD -# else -# define ITT_PLATFORM ITT_PLATFORM_POSIX -# endif -#endif /* ITT_PLATFORM */ - -#if defined(_UNICODE) && !defined(UNICODE) -#define UNICODE -#endif - -#include -#if ITT_PLATFORM==ITT_PLATFORM_WIN -#include -#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ -#include -#if defined(UNICODE) || defined(_UNICODE) -#include -#endif /* UNICODE || _UNICODE */ -#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ - -#ifndef ITTAPI_CDECL -# if ITT_PLATFORM==ITT_PLATFORM_WIN -# define ITTAPI_CDECL __cdecl -# else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ -# if defined _M_IX86 || defined __i386__ -# define ITTAPI_CDECL __attribute__ ((cdecl)) -# else /* _M_IX86 || __i386__ */ -# define ITTAPI_CDECL /* actual only on x86 platform */ -# endif /* _M_IX86 || __i386__ */ -# endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ -#endif /* ITTAPI_CDECL */ - -#ifndef STDCALL -# if ITT_PLATFORM==ITT_PLATFORM_WIN -# define STDCALL __stdcall -# else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ -# if defined _M_IX86 || defined __i386__ -# define STDCALL __attribute__ ((stdcall)) -# else /* _M_IX86 || __i386__ */ -# define STDCALL /* supported only on x86 platform */ -# endif /* _M_IX86 || __i386__ */ -# endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ -#endif /* STDCALL */ - -#define ITTAPI ITTAPI_CDECL -#define LIBITTAPI ITTAPI_CDECL - -/* TODO: Temporary for compatibility! */ -#define ITTAPI_CALL ITTAPI_CDECL -#define LIBITTAPI_CALL ITTAPI_CDECL - -#if ITT_PLATFORM==ITT_PLATFORM_WIN -/* use __forceinline (VC++ specific) */ -#define ITT_INLINE __forceinline -#define ITT_INLINE_ATTRIBUTE /* nothing */ -#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ -/* - * Generally, functions are not inlined unless optimization is specified. - * For functions declared inline, this attribute inlines the function even - * if no optimization level was specified. - */ -#ifdef __STRICT_ANSI__ -#define ITT_INLINE static -#define ITT_INLINE_ATTRIBUTE __attribute__((unused)) -#else /* __STRICT_ANSI__ */ -#define ITT_INLINE static inline -#define ITT_INLINE_ATTRIBUTE __attribute__((always_inline, unused)) -#endif /* __STRICT_ANSI__ */ -#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ -/** @endcond */ - -#ifndef ITT_ARCH_IA32 -# define ITT_ARCH_IA32 1 -#endif /* ITT_ARCH_IA32 */ - -#ifndef ITT_ARCH_IA32E -# define ITT_ARCH_IA32E 2 -#endif /* ITT_ARCH_IA32E */ - -/* Was there a magical reason we didn't have 3 here before? */ -#ifndef ITT_ARCH_AARCH64 -# define ITT_ARCH_AARCH64 3 -#endif /* ITT_ARCH_AARCH64 */ - -#ifndef ITT_ARCH_ARM -# define ITT_ARCH_ARM 4 -#endif /* ITT_ARCH_ARM */ - -#ifndef ITT_ARCH_PPC64 -# define ITT_ARCH_PPC64 5 -#endif /* ITT_ARCH_PPC64 */ - -#ifndef ITT_ARCH_MIPS -# define ITT_ARCH_MIPS 6 -#endif /* ITT_ARCH_MIPS */ - -#ifndef ITT_ARCH_MIPS64 -# define ITT_ARCH_MIPS64 6 -#endif /* ITT_ARCH_MIPS64 */ - -#ifndef ITT_ARCH -# if defined _M_IX86 || defined __i386__ -# define ITT_ARCH ITT_ARCH_IA32 -# elif defined _M_X64 || defined _M_AMD64 || defined __x86_64__ -# define ITT_ARCH ITT_ARCH_IA32E -# elif defined _M_IA64 || defined __ia64__ -# define ITT_ARCH ITT_ARCH_IA64 -# elif defined _M_ARM || defined __arm__ -# define ITT_ARCH ITT_ARCH_ARM -# elif defined __powerpc64__ -# define ITT_ARCH ITT_ARCH_PPC64 -# elif defined __aarch64__ -# define ITT_ARCH ITT_ARCH_AARCH64 -# elif defined __mips__ && !defined __mips64 -# define ITT_ARCH ITT_ARCH_MIPS -# elif defined __mips__ && defined __mips64 -# define ITT_ARCH ITT_ARCH_MIPS64 -# endif -#endif - -#ifdef __cplusplus -# define ITT_EXTERN_C extern "C" -# define ITT_EXTERN_C_BEGIN extern "C" { -# define ITT_EXTERN_C_END } -#else -# define ITT_EXTERN_C /* nothing */ -# define ITT_EXTERN_C_BEGIN /* nothing */ -# define ITT_EXTERN_C_END /* nothing */ -#endif /* __cplusplus */ - -#define ITT_TO_STR_AUX(x) #x -#define ITT_TO_STR(x) ITT_TO_STR_AUX(x) - -#define __ITT_BUILD_ASSERT(expr, suffix) do { \ - static char __itt_build_check_##suffix[(expr) ? 1 : -1]; \ - __itt_build_check_##suffix[0] = 0; \ -} while(0) -#define _ITT_BUILD_ASSERT(expr, suffix) __ITT_BUILD_ASSERT((expr), suffix) -#define ITT_BUILD_ASSERT(expr) _ITT_BUILD_ASSERT((expr), __LINE__) - -#define ITT_MAGIC { 0xED, 0xAB, 0xAB, 0xEC, 0x0D, 0xEE, 0xDA, 0x30 } - -/* Replace with snapshot date YYYYMMDD for promotion build. */ -#define API_VERSION_BUILD 20151119 - -#ifndef API_VERSION_NUM -#define API_VERSION_NUM 0.0.0 -#endif /* API_VERSION_NUM */ - -#define API_VERSION "ITT-API-Version " ITT_TO_STR(API_VERSION_NUM) \ - " (" ITT_TO_STR(API_VERSION_BUILD) ")" - -/* OS communication functions */ -#if ITT_PLATFORM==ITT_PLATFORM_WIN -#include -typedef HMODULE lib_t; -typedef DWORD TIDT; -typedef CRITICAL_SECTION mutex_t; -#define MUTEX_INITIALIZER { 0 } -#define strong_alias(name, aliasname) /* empty for Windows */ -#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ -#include -#if defined(UNICODE) || defined(_UNICODE) -#include -#endif /* UNICODE */ -#ifndef _GNU_SOURCE -#define _GNU_SOURCE 1 /* need for PTHREAD_MUTEX_RECURSIVE */ -#endif /* _GNU_SOURCE */ -#ifndef __USE_UNIX98 -#define __USE_UNIX98 1 /* need for PTHREAD_MUTEX_RECURSIVE, on SLES11.1 with gcc 4.3.4 wherein pthread.h missing dependency on __USE_XOPEN2K8 */ -#endif /*__USE_UNIX98*/ -#include -typedef void* lib_t; -typedef pthread_t TIDT; -typedef pthread_mutex_t mutex_t; -#define MUTEX_INITIALIZER PTHREAD_MUTEX_INITIALIZER -#define _strong_alias(name, aliasname) \ - extern __typeof (name) aliasname __attribute__ ((alias (#name))); -#define strong_alias(name, aliasname) _strong_alias(name, aliasname) -#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ - -#if ITT_PLATFORM==ITT_PLATFORM_WIN -#define __itt_get_proc(lib, name) GetProcAddress(lib, name) -#define __itt_mutex_init(mutex) InitializeCriticalSection(mutex) -#define __itt_mutex_lock(mutex) EnterCriticalSection(mutex) -#define __itt_mutex_unlock(mutex) LeaveCriticalSection(mutex) -#define __itt_load_lib(name) LoadLibraryA(name) -#define __itt_unload_lib(handle) FreeLibrary(handle) -#define __itt_system_error() (int)GetLastError() -#define __itt_fstrcmp(s1, s2) lstrcmpA(s1, s2) -#define __itt_fstrnlen(s, l) strnlen_s(s, l) -#define __itt_fstrcpyn(s1, b, s2, l) strncpy_s(s1, b, s2, l) -#define __itt_fstrdup(s) _strdup(s) -#define __itt_thread_id() GetCurrentThreadId() -#define __itt_thread_yield() SwitchToThread() -#ifndef ITT_SIMPLE_INIT -ITT_INLINE long -__itt_interlocked_increment(volatile long* ptr) ITT_INLINE_ATTRIBUTE; -ITT_INLINE long __itt_interlocked_increment(volatile long* ptr) -{ - return InterlockedIncrement(ptr); -} -#endif /* ITT_SIMPLE_INIT */ - -#define DL_SYMBOLS (1) -#define PTHREAD_SYMBOLS (1) - -#else /* ITT_PLATFORM!=ITT_PLATFORM_WIN */ -#define __itt_get_proc(lib, name) dlsym(lib, name) -#define __itt_mutex_init(mutex) {\ - pthread_mutexattr_t mutex_attr; \ - int error_code = pthread_mutexattr_init(&mutex_attr); \ - if (error_code) \ - __itt_report_error(__itt_error_system, "pthread_mutexattr_init", \ - error_code); \ - error_code = pthread_mutexattr_settype(&mutex_attr, \ - PTHREAD_MUTEX_RECURSIVE); \ - if (error_code) \ - __itt_report_error(__itt_error_system, "pthread_mutexattr_settype", \ - error_code); \ - error_code = pthread_mutex_init(mutex, &mutex_attr); \ - if (error_code) \ - __itt_report_error(__itt_error_system, "pthread_mutex_init", \ - error_code); \ - error_code = pthread_mutexattr_destroy(&mutex_attr); \ - if (error_code) \ - __itt_report_error(__itt_error_system, "pthread_mutexattr_destroy", \ - error_code); \ -} -#define __itt_mutex_lock(mutex) pthread_mutex_lock(mutex) -#define __itt_mutex_unlock(mutex) pthread_mutex_unlock(mutex) -#define __itt_load_lib(name) dlopen(name, RTLD_LAZY) -#define __itt_unload_lib(handle) dlclose(handle) -#define __itt_system_error() errno -#define __itt_fstrcmp(s1, s2) strcmp(s1, s2) - -/* makes customer code define safe APIs for SDL_STRNLEN_S and SDL_STRNCPY_S */ -#ifdef SDL_STRNLEN_S -#define __itt_fstrnlen(s, l) SDL_STRNLEN_S(s, l) -#else -#define __itt_fstrnlen(s, l) strlen(s) -#endif /* SDL_STRNLEN_S */ -#ifdef SDL_STRNCPY_S -#define __itt_fstrcpyn(s1, b, s2, l) SDL_STRNCPY_S(s1, b, s2, l) -#else -#define __itt_fstrcpyn(s1, b, s2, l) strncpy(s1, s2, l) -#endif /* SDL_STRNCPY_S */ - -#define __itt_fstrdup(s) strdup(s) -#define __itt_thread_id() pthread_self() -#define __itt_thread_yield() sched_yield() -#if ITT_ARCH==ITT_ARCH_IA64 -#ifdef __INTEL_COMPILER -#define __TBB_machine_fetchadd4(addr, val) __fetchadd4_acq((void *)addr, val) -#else /* __INTEL_COMPILER */ -/* TODO: Add Support for not Intel compilers for IA-64 architecture */ -#endif /* __INTEL_COMPILER */ -#elif ITT_ARCH==ITT_ARCH_IA32 || ITT_ARCH==ITT_ARCH_IA32E /* ITT_ARCH!=ITT_ARCH_IA64 */ -ITT_INLINE long -__TBB_machine_fetchadd4(volatile void* ptr, long addend) ITT_INLINE_ATTRIBUTE; -ITT_INLINE long __TBB_machine_fetchadd4(volatile void* ptr, long addend) -{ - long result; - __asm__ __volatile__("lock\nxadd %0,%1" - : "=r"(result),"=m"(*(volatile int*)ptr) - : "0"(addend), "m"(*(volatile int*)ptr) - : "memory"); - return result; -} -#elif ITT_ARCH==ITT_ARCH_ARM || ITT_ARCH==ITT_ARCH_PPC64 || ITT_ARCH==ITT_ARCH_AARCH64 || ITT_ARCH==ITT_ARCH_MIPS || ITT_ARCH==ITT_ARCH_MIPS64 -#define __TBB_machine_fetchadd4(addr, val) __sync_fetch_and_add(addr, val) -#endif /* ITT_ARCH==ITT_ARCH_IA64 */ -#ifndef ITT_SIMPLE_INIT -ITT_INLINE long -__itt_interlocked_increment(volatile long* ptr) ITT_INLINE_ATTRIBUTE; -ITT_INLINE long __itt_interlocked_increment(volatile long* ptr) -{ - return __TBB_machine_fetchadd4(ptr, 1) + 1L; -} -#endif /* ITT_SIMPLE_INIT */ - -void* dlopen(const char*, int) __attribute__((weak)); -void* dlsym(void*, const char*) __attribute__((weak)); -int dlclose(void*) __attribute__((weak)); -#define DL_SYMBOLS (dlopen && dlsym && dlclose) - -int pthread_mutex_init(pthread_mutex_t*, const pthread_mutexattr_t*) __attribute__((weak)); -int pthread_mutex_lock(pthread_mutex_t*) __attribute__((weak)); -int pthread_mutex_unlock(pthread_mutex_t*) __attribute__((weak)); -int pthread_mutex_destroy(pthread_mutex_t*) __attribute__((weak)); -int pthread_mutexattr_init(pthread_mutexattr_t*) __attribute__((weak)); -int pthread_mutexattr_settype(pthread_mutexattr_t*, int) __attribute__((weak)); -int pthread_mutexattr_destroy(pthread_mutexattr_t*) __attribute__((weak)); -pthread_t pthread_self(void) __attribute__((weak)); -#define PTHREAD_SYMBOLS (pthread_mutex_init && pthread_mutex_lock && pthread_mutex_unlock && pthread_mutex_destroy && pthread_mutexattr_init && pthread_mutexattr_settype && pthread_mutexattr_destroy && pthread_self) - -#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ - -typedef enum { - __itt_collection_normal = 0, - __itt_collection_paused = 1 -} __itt_collection_state; - -typedef enum { - __itt_thread_normal = 0, - __itt_thread_ignored = 1 -} __itt_thread_state; - -#pragma pack(push, 8) - -typedef struct ___itt_thread_info -{ - const char* nameA; /*!< Copy of original name in ASCII. */ -#if defined(UNICODE) || defined(_UNICODE) - const wchar_t* nameW; /*!< Copy of original name in UNICODE. */ -#else /* UNICODE || _UNICODE */ - void* nameW; -#endif /* UNICODE || _UNICODE */ - TIDT tid; - __itt_thread_state state; /*!< Thread state (paused or normal) */ - int extra1; /*!< Reserved to the runtime */ - void* extra2; /*!< Reserved to the runtime */ - struct ___itt_thread_info* next; -} __itt_thread_info; - -#include "ittnotify_types.h" /* For __itt_group_id definition */ - -typedef struct ___itt_api_info_20101001 -{ - const char* name; - void** func_ptr; - void* init_func; - __itt_group_id group; -} __itt_api_info_20101001; - -typedef struct ___itt_api_info -{ - const char* name; - void** func_ptr; - void* init_func; - void* null_func; - __itt_group_id group; -} __itt_api_info; - -typedef struct __itt_counter_info -{ - const char* nameA; /*!< Copy of original name in ASCII. */ -#if defined(UNICODE) || defined(_UNICODE) - const wchar_t* nameW; /*!< Copy of original name in UNICODE. */ -#else /* UNICODE || _UNICODE */ - void* nameW; -#endif /* UNICODE || _UNICODE */ - const char* domainA; /*!< Copy of original name in ASCII. */ -#if defined(UNICODE) || defined(_UNICODE) - const wchar_t* domainW; /*!< Copy of original name in UNICODE. */ -#else /* UNICODE || _UNICODE */ - void* domainW; -#endif /* UNICODE || _UNICODE */ - int type; - long index; - int extra1; /*!< Reserved to the runtime */ - void* extra2; /*!< Reserved to the runtime */ - struct __itt_counter_info* next; -} __itt_counter_info_t; - -struct ___itt_domain; -struct ___itt_string_handle; - -typedef struct ___itt_global -{ - unsigned char magic[8]; - unsigned long version_major; - unsigned long version_minor; - unsigned long version_build; - volatile long api_initialized; - volatile long mutex_initialized; - volatile long atomic_counter; - mutex_t mutex; - lib_t lib; - void* error_handler; - const char** dll_path_ptr; - __itt_api_info* api_list_ptr; - struct ___itt_global* next; - /* Joinable structures below */ - __itt_thread_info* thread_list; - struct ___itt_domain* domain_list; - struct ___itt_string_handle* string_list; - __itt_collection_state state; - __itt_counter_info_t* counter_list; -} __itt_global; - -#pragma pack(pop) - -#define NEW_THREAD_INFO_W(gptr,h,h_tail,t,s,n) { \ - h = (__itt_thread_info*)malloc(sizeof(__itt_thread_info)); \ - if (h != NULL) { \ - h->tid = t; \ - h->nameA = NULL; \ - h->nameW = n ? _wcsdup(n) : NULL; \ - h->state = s; \ - h->extra1 = 0; /* reserved */ \ - h->extra2 = NULL; /* reserved */ \ - h->next = NULL; \ - if (h_tail == NULL) \ - (gptr)->thread_list = h; \ - else \ - h_tail->next = h; \ - } \ -} - -#define NEW_THREAD_INFO_A(gptr,h,h_tail,t,s,n) { \ - h = (__itt_thread_info*)malloc(sizeof(__itt_thread_info)); \ - if (h != NULL) { \ - h->tid = t; \ - h->nameA = n ? __itt_fstrdup(n) : NULL; \ - h->nameW = NULL; \ - h->state = s; \ - h->extra1 = 0; /* reserved */ \ - h->extra2 = NULL; /* reserved */ \ - h->next = NULL; \ - if (h_tail == NULL) \ - (gptr)->thread_list = h; \ - else \ - h_tail->next = h; \ - } \ -} - -#define NEW_DOMAIN_W(gptr,h,h_tail,name) { \ - h = (__itt_domain*)malloc(sizeof(__itt_domain)); \ - if (h != NULL) { \ - h->flags = 1; /* domain is enabled by default */ \ - h->nameA = NULL; \ - h->nameW = name ? _wcsdup(name) : NULL; \ - h->extra1 = 0; /* reserved */ \ - h->extra2 = NULL; /* reserved */ \ - h->next = NULL; \ - if (h_tail == NULL) \ - (gptr)->domain_list = h; \ - else \ - h_tail->next = h; \ - } \ -} - -#define NEW_DOMAIN_A(gptr,h,h_tail,name) { \ - h = (__itt_domain*)malloc(sizeof(__itt_domain)); \ - if (h != NULL) { \ - h->flags = 1; /* domain is enabled by default */ \ - h->nameA = name ? __itt_fstrdup(name) : NULL; \ - h->nameW = NULL; \ - h->extra1 = 0; /* reserved */ \ - h->extra2 = NULL; /* reserved */ \ - h->next = NULL; \ - if (h_tail == NULL) \ - (gptr)->domain_list = h; \ - else \ - h_tail->next = h; \ - } \ -} - -#define NEW_STRING_HANDLE_W(gptr,h,h_tail,name) { \ - h = (__itt_string_handle*)malloc(sizeof(__itt_string_handle)); \ - if (h != NULL) { \ - h->strA = NULL; \ - h->strW = name ? _wcsdup(name) : NULL; \ - h->extra1 = 0; /* reserved */ \ - h->extra2 = NULL; /* reserved */ \ - h->next = NULL; \ - if (h_tail == NULL) \ - (gptr)->string_list = h; \ - else \ - h_tail->next = h; \ - } \ -} - -#define NEW_STRING_HANDLE_A(gptr,h,h_tail,name) { \ - h = (__itt_string_handle*)malloc(sizeof(__itt_string_handle)); \ - if (h != NULL) { \ - h->strA = name ? __itt_fstrdup(name) : NULL; \ - h->strW = NULL; \ - h->extra1 = 0; /* reserved */ \ - h->extra2 = NULL; /* reserved */ \ - h->next = NULL; \ - if (h_tail == NULL) \ - (gptr)->string_list = h; \ - else \ - h_tail->next = h; \ - } \ -} - -#define NEW_COUNTER_W(gptr,h,h_tail,name,domain,type) { \ - h = (__itt_counter_info_t*)malloc(sizeof(__itt_counter_info_t)); \ - if (h != NULL) { \ - h->nameA = NULL; \ - h->nameW = name ? _wcsdup(name) : NULL; \ - h->domainA = NULL; \ - h->domainW = name ? _wcsdup(domain) : NULL; \ - h->type = type; \ - h->index = 0; \ - h->next = NULL; \ - if (h_tail == NULL) \ - (gptr)->counter_list = h; \ - else \ - h_tail->next = h; \ - } \ -} - -#define NEW_COUNTER_A(gptr,h,h_tail,name,domain,type) { \ - h = (__itt_counter_info_t*)malloc(sizeof(__itt_counter_info_t)); \ - if (h != NULL) { \ - h->nameA = name ? __itt_fstrdup(name) : NULL; \ - h->nameW = NULL; \ - h->domainA = domain ? __itt_fstrdup(domain) : NULL; \ - h->domainW = NULL; \ - h->type = type; \ - h->index = 0; \ - h->next = NULL; \ - if (h_tail == NULL) \ - (gptr)->counter_list = h; \ - else \ - h_tail->next = h; \ - } \ -} - -#endif /* _ITTNOTIFY_CONFIG_H_ */ Property changes on: vendor/llvm-openmp/openmp-release80-r363030/runtime/src/thirdparty/ittnotify/ittnotify_config.h ___________________________________________________________________ Deleted: svn:eol-style ## -1 +0,0 ## -native \ No newline at end of property Deleted: svn:keywords ## -1 +0,0 ## -FreeBSD=%H \ No newline at end of property Deleted: svn:mime-type ## -1 +0,0 ## -text/plain \ No newline at end of property Index: vendor/llvm-openmp/openmp-release80-r363030/runtime/src/thirdparty/ittnotify/ittnotify_types.h =================================================================== --- vendor/llvm-openmp/openmp-release80-r363030/runtime/src/thirdparty/ittnotify/ittnotify_types.h (revision 348960) +++ vendor/llvm-openmp/openmp-release80-r363030/runtime/src/thirdparty/ittnotify/ittnotify_types.h (nonexistent) @@ -1,68 +0,0 @@ - -//===----------------------------------------------------------------------===// -// -// The LLVM Compiler Infrastructure -// -// This file is dual licensed under the MIT and the University of Illinois Open -// Source Licenses. See LICENSE.txt for details. -// -//===----------------------------------------------------------------------===// - - -#ifndef _ITTNOTIFY_TYPES_H_ -#define _ITTNOTIFY_TYPES_H_ - -typedef enum ___itt_group_id -{ - __itt_group_none = 0, - __itt_group_legacy = 1<<0, - __itt_group_control = 1<<1, - __itt_group_thread = 1<<2, - __itt_group_mark = 1<<3, - __itt_group_sync = 1<<4, - __itt_group_fsync = 1<<5, - __itt_group_jit = 1<<6, - __itt_group_model = 1<<7, - __itt_group_splitter_min = 1<<7, - __itt_group_counter = 1<<8, - __itt_group_frame = 1<<9, - __itt_group_stitch = 1<<10, - __itt_group_heap = 1<<11, - __itt_group_splitter_max = 1<<12, - __itt_group_structure = 1<<12, - __itt_group_suppress = 1<<13, - __itt_group_arrays = 1<<14, - __itt_group_all = -1 -} __itt_group_id; - -#pragma pack(push, 8) - -typedef struct ___itt_group_list -{ - __itt_group_id id; - const char* name; -} __itt_group_list; - -#pragma pack(pop) - -#define ITT_GROUP_LIST(varname) \ - static __itt_group_list varname[] = { \ - { __itt_group_all, "all" }, \ - { __itt_group_control, "control" }, \ - { __itt_group_thread, "thread" }, \ - { __itt_group_mark, "mark" }, \ - { __itt_group_sync, "sync" }, \ - { __itt_group_fsync, "fsync" }, \ - { __itt_group_jit, "jit" }, \ - { __itt_group_model, "model" }, \ - { __itt_group_counter, "counter" }, \ - { __itt_group_frame, "frame" }, \ - { __itt_group_stitch, "stitch" }, \ - { __itt_group_heap, "heap" }, \ - { __itt_group_structure, "structure" }, \ - { __itt_group_suppress, "suppress" }, \ - { __itt_group_arrays, "arrays" }, \ - { __itt_group_none, NULL } \ - } - -#endif /* _ITTNOTIFY_TYPES_H_ */ Property changes on: vendor/llvm-openmp/openmp-release80-r363030/runtime/src/thirdparty/ittnotify/ittnotify_types.h ___________________________________________________________________ Deleted: svn:eol-style ## -1 +0,0 ## -native \ No newline at end of property Deleted: svn:keywords ## -1 +0,0 ## -FreeBSD=%H \ No newline at end of property Deleted: svn:mime-type ## -1 +0,0 ## -text/plain \ No newline at end of property Index: vendor/llvm-openmp/openmp-release80-r363030/runtime/src/thirdparty/ittnotify/legacy/ittnotify.h =================================================================== --- vendor/llvm-openmp/openmp-release80-r363030/runtime/src/thirdparty/ittnotify/legacy/ittnotify.h (revision 348960) +++ vendor/llvm-openmp/openmp-release80-r363030/runtime/src/thirdparty/ittnotify/legacy/ittnotify.h (nonexistent) @@ -1,992 +0,0 @@ - -//===----------------------------------------------------------------------===// -// -// The LLVM Compiler Infrastructure -// -// This file is dual licensed under the MIT and the University of Illinois Open -// Source Licenses. See LICENSE.txt for details. -// -//===----------------------------------------------------------------------===// - -#ifndef _LEGACY_ITTNOTIFY_H_ -#define _LEGACY_ITTNOTIFY_H_ - -/** - * @file - * @brief Legacy User API functions and types - */ - -/** @cond exclude_from_documentation */ -#ifndef ITT_OS_WIN -# define ITT_OS_WIN 1 -#endif /* ITT_OS_WIN */ - -#ifndef ITT_OS_LINUX -# define ITT_OS_LINUX 2 -#endif /* ITT_OS_LINUX */ - -#ifndef ITT_OS_MAC -# define ITT_OS_MAC 3 -#endif /* ITT_OS_MAC */ - -#ifndef ITT_OS_FREEBSD -# define ITT_OS_FREEBSD 4 -#endif /* ITT_OS_FREEBSD */ - -#ifndef ITT_OS -# if defined WIN32 || defined _WIN32 -# define ITT_OS ITT_OS_WIN -# elif defined( __APPLE__ ) && defined( __MACH__ ) -# define ITT_OS ITT_OS_MAC -# elif defined( __FreeBSD__ ) -# define ITT_OS ITT_OS_FREEBSD -# else -# define ITT_OS ITT_OS_LINUX -# endif -#endif /* ITT_OS */ - -#ifndef ITT_PLATFORM_WIN -# define ITT_PLATFORM_WIN 1 -#endif /* ITT_PLATFORM_WIN */ - -#ifndef ITT_PLATFORM_POSIX -# define ITT_PLATFORM_POSIX 2 -#endif /* ITT_PLATFORM_POSIX */ - -#ifndef ITT_PLATFORM_MAC -# define ITT_PLATFORM_MAC 3 -#endif /* ITT_PLATFORM_MAC */ - -#ifndef ITT_PLATFORM_FREEBSD -# define ITT_PLATFORM_FREEBSD 4 -#endif /* ITT_PLATFORM_FREEBSD */ - -#ifndef ITT_PLATFORM -# if ITT_OS==ITT_OS_WIN -# define ITT_PLATFORM ITT_PLATFORM_WIN -# elif ITT_OS==ITT_OS_MAC -# define ITT_PLATFORM ITT_PLATFORM_MAC -# elif ITT_OS==ITT_OS_FREEBSD -# define ITT_PLATFORM ITT_PLATFORM_FREEBSD -# else -# define ITT_PLATFORM ITT_PLATFORM_POSIX -# endif -#endif /* ITT_PLATFORM */ - -#if defined(_UNICODE) && !defined(UNICODE) -#define UNICODE -#endif - -#include -#if ITT_PLATFORM==ITT_PLATFORM_WIN -#include -#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ -#include -#if defined(UNICODE) || defined(_UNICODE) -#include -#endif /* UNICODE || _UNICODE */ -#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ - -#ifndef ITTAPI_CDECL -# if ITT_PLATFORM==ITT_PLATFORM_WIN -# define ITTAPI_CDECL __cdecl -# else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ -# if defined _M_IX86 || defined __i386__ -# define ITTAPI_CDECL __attribute__ ((cdecl)) -# else /* _M_IX86 || __i386__ */ -# define ITTAPI_CDECL /* actual only on x86 platform */ -# endif /* _M_IX86 || __i386__ */ -# endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ -#endif /* ITTAPI_CDECL */ - -#ifndef STDCALL -# if ITT_PLATFORM==ITT_PLATFORM_WIN -# define STDCALL __stdcall -# else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ -# if defined _M_IX86 || defined __i386__ -# define STDCALL __attribute__ ((stdcall)) -# else /* _M_IX86 || __i386__ */ -# define STDCALL /* supported only on x86 platform */ -# endif /* _M_IX86 || __i386__ */ -# endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ -#endif /* STDCALL */ - -#define ITTAPI ITTAPI_CDECL -#define LIBITTAPI ITTAPI_CDECL - -/* TODO: Temporary for compatibility! */ -#define ITTAPI_CALL ITTAPI_CDECL -#define LIBITTAPI_CALL ITTAPI_CDECL - -#if ITT_PLATFORM==ITT_PLATFORM_WIN -/* use __forceinline (VC++ specific) */ -#define ITT_INLINE __forceinline -#define ITT_INLINE_ATTRIBUTE /* nothing */ -#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ -/* - * Generally, functions are not inlined unless optimization is specified. - * For functions declared inline, this attribute inlines the function even - * if no optimization level was specified. - */ -#ifdef __STRICT_ANSI__ -#define ITT_INLINE static -#define ITT_INLINE_ATTRIBUTE __attribute__((unused)) -#else /* __STRICT_ANSI__ */ -#define ITT_INLINE static inline -#define ITT_INLINE_ATTRIBUTE __attribute__((always_inline, unused)) -#endif /* __STRICT_ANSI__ */ -#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ -/** @endcond */ - -/** @cond exclude_from_documentation */ -/* Helper macro for joining tokens */ -#define ITT_JOIN_AUX(p,n) p##n -#define ITT_JOIN(p,n) ITT_JOIN_AUX(p,n) - -#ifdef ITT_MAJOR -#undef ITT_MAJOR -#endif -#ifdef ITT_MINOR -#undef ITT_MINOR -#endif -#define ITT_MAJOR 3 -#define ITT_MINOR 0 - -/* Standard versioning of a token with major and minor version numbers */ -#define ITT_VERSIONIZE(x) \ - ITT_JOIN(x, \ - ITT_JOIN(_, \ - ITT_JOIN(ITT_MAJOR, \ - ITT_JOIN(_, ITT_MINOR)))) - -#ifndef INTEL_ITTNOTIFY_PREFIX -# define INTEL_ITTNOTIFY_PREFIX __itt_ -#endif /* INTEL_ITTNOTIFY_PREFIX */ -#ifndef INTEL_ITTNOTIFY_POSTFIX -# define INTEL_ITTNOTIFY_POSTFIX _ptr_ -#endif /* INTEL_ITTNOTIFY_POSTFIX */ - -#define ITTNOTIFY_NAME_AUX(n) ITT_JOIN(INTEL_ITTNOTIFY_PREFIX,n) -#define ITTNOTIFY_NAME(n) ITT_VERSIONIZE(ITTNOTIFY_NAME_AUX(ITT_JOIN(n,INTEL_ITTNOTIFY_POSTFIX))) - -#define ITTNOTIFY_VOID(n) (!ITTNOTIFY_NAME(n)) ? (void)0 : ITTNOTIFY_NAME(n) -#define ITTNOTIFY_DATA(n) (!ITTNOTIFY_NAME(n)) ? 0 : ITTNOTIFY_NAME(n) - -#define ITTNOTIFY_VOID_D0(n,d) (!(d)->flags) ? (void)0 : (!ITTNOTIFY_NAME(n)) ? (void)0 : ITTNOTIFY_NAME(n)(d) -#define ITTNOTIFY_VOID_D1(n,d,x) (!(d)->flags) ? (void)0 : (!ITTNOTIFY_NAME(n)) ? (void)0 : ITTNOTIFY_NAME(n)(d,x) -#define ITTNOTIFY_VOID_D2(n,d,x,y) (!(d)->flags) ? (void)0 : (!ITTNOTIFY_NAME(n)) ? (void)0 : ITTNOTIFY_NAME(n)(d,x,y) -#define ITTNOTIFY_VOID_D3(n,d,x,y,z) (!(d)->flags) ? (void)0 : (!ITTNOTIFY_NAME(n)) ? (void)0 : ITTNOTIFY_NAME(n)(d,x,y,z) -#define ITTNOTIFY_VOID_D4(n,d,x,y,z,a) (!(d)->flags) ? (void)0 : (!ITTNOTIFY_NAME(n)) ? (void)0 : ITTNOTIFY_NAME(n)(d,x,y,z,a) -#define ITTNOTIFY_VOID_D5(n,d,x,y,z,a,b) (!(d)->flags) ? (void)0 : (!ITTNOTIFY_NAME(n)) ? (void)0 : ITTNOTIFY_NAME(n)(d,x,y,z,a,b) -#define ITTNOTIFY_VOID_D6(n,d,x,y,z,a,b,c) (!(d)->flags) ? (void)0 : (!ITTNOTIFY_NAME(n)) ? (void)0 : ITTNOTIFY_NAME(n)(d,x,y,z,a,b,c) -#define ITTNOTIFY_DATA_D0(n,d) (!(d)->flags) ? 0 : (!ITTNOTIFY_NAME(n)) ? 0 : ITTNOTIFY_NAME(n)(d) -#define ITTNOTIFY_DATA_D1(n,d,x) (!(d)->flags) ? 0 : (!ITTNOTIFY_NAME(n)) ? 0 : ITTNOTIFY_NAME(n)(d,x) -#define ITTNOTIFY_DATA_D2(n,d,x,y) (!(d)->flags) ? 0 : (!ITTNOTIFY_NAME(n)) ? 0 : ITTNOTIFY_NAME(n)(d,x,y) -#define ITTNOTIFY_DATA_D3(n,d,x,y,z) (!(d)->flags) ? 0 : (!ITTNOTIFY_NAME(n)) ? 0 : ITTNOTIFY_NAME(n)(d,x,y,z) -#define ITTNOTIFY_DATA_D4(n,d,x,y,z,a) (!(d)->flags) ? 0 : (!ITTNOTIFY_NAME(n)) ? 0 : ITTNOTIFY_NAME(n)(d,x,y,z,a) -#define ITTNOTIFY_DATA_D5(n,d,x,y,z,a,b) (!(d)->flags) ? 0 : (!ITTNOTIFY_NAME(n)) ? 0 : ITTNOTIFY_NAME(n)(d,x,y,z,a,b) -#define ITTNOTIFY_DATA_D6(n,d,x,y,z,a,b,c) (!(d)->flags) ? 0 : (!ITTNOTIFY_NAME(n)) ? 0 : ITTNOTIFY_NAME(n)(d,x,y,z,a,b,c) - -#ifdef ITT_STUB -#undef ITT_STUB -#endif -#ifdef ITT_STUBV -#undef ITT_STUBV -#endif -#define ITT_STUBV(api,type,name,args) \ - typedef type (api* ITT_JOIN(ITTNOTIFY_NAME(name),_t)) args; \ - extern ITT_JOIN(ITTNOTIFY_NAME(name),_t) ITTNOTIFY_NAME(name); -#define ITT_STUB ITT_STUBV -/** @endcond */ - -#ifdef __cplusplus -extern "C" { -#endif /* __cplusplus */ - -/** - * @defgroup legacy Legacy API - * @{ - * @} - */ - -/** - * @defgroup legacy_control Collection Control - * @ingroup legacy - * General behavior: application continues to run, but no profiling information is being collected - * - * Pausing occurs not only for the current thread but for all process as well as spawned processes - * - Intel(R) Parallel Inspector and Intel(R) Inspector XE: - * - Does not analyze or report errors that involve memory access. - * - Other errors are reported as usual. Pausing data collection in - * Intel(R) Parallel Inspector and Intel(R) Inspector XE - * only pauses tracing and analyzing memory access. - * It does not pause tracing or analyzing threading APIs. - * . - * - Intel(R) Parallel Amplifier and Intel(R) VTune(TM) Amplifier XE: - * - Does continue to record when new threads are started. - * . - * - Other effects: - * - Possible reduction of runtime overhead. - * . - * @{ - */ -#ifndef _ITTNOTIFY_H_ -/** @brief Pause collection */ -void ITTAPI __itt_pause(void); -/** @brief Resume collection */ -void ITTAPI __itt_resume(void); -/** @brief Detach collection */ -void ITTAPI __itt_detach(void); - -/** @cond exclude_from_documentation */ -#ifndef INTEL_NO_MACRO_BODY -#ifndef INTEL_NO_ITTNOTIFY_API -ITT_STUBV(ITTAPI, void, pause, (void)) -ITT_STUBV(ITTAPI, void, resume, (void)) -ITT_STUBV(ITTAPI, void, detach, (void)) -#define __itt_pause ITTNOTIFY_VOID(pause) -#define __itt_pause_ptr ITTNOTIFY_NAME(pause) -#define __itt_resume ITTNOTIFY_VOID(resume) -#define __itt_resume_ptr ITTNOTIFY_NAME(resume) -#define __itt_detach ITTNOTIFY_VOID(detach) -#define __itt_detach_ptr ITTNOTIFY_NAME(detach) -#else /* INTEL_NO_ITTNOTIFY_API */ -#define __itt_pause() -#define __itt_pause_ptr 0 -#define __itt_resume() -#define __itt_resume_ptr 0 -#define __itt_detach() -#define __itt_detach_ptr 0 -#endif /* INTEL_NO_ITTNOTIFY_API */ -#else /* INTEL_NO_MACRO_BODY */ -#define __itt_pause_ptr 0 -#define __itt_resume_ptr 0 -#define __itt_detach_ptr 0 -#endif /* INTEL_NO_MACRO_BODY */ -/** @endcond */ -#endif /* _ITTNOTIFY_H_ */ -/** @} legacy_control group */ - -/** - * @defgroup legacy_threads Threads - * @ingroup legacy - * Threads group - * @warning Legacy API - * @{ - */ -/** - * @deprecated Legacy API - * @brief Set name to be associated with thread in analysis GUI. - * @return __itt_err upon failure (name or namelen being null,name and namelen mismatched) - */ -#if ITT_PLATFORM==ITT_PLATFORM_WIN -int LIBITTAPI __itt_thr_name_setA(const char *name, int namelen); -int LIBITTAPI __itt_thr_name_setW(const wchar_t *name, int namelen); -#if defined(UNICODE) || defined(_UNICODE) -# define __itt_thr_name_set __itt_thr_name_setW -# define __itt_thr_name_set_ptr __itt_thr_name_setW_ptr -#else -# define __itt_thr_name_set __itt_thr_name_setA -# define __itt_thr_name_set_ptr __itt_thr_name_setA_ptr -#endif /* UNICODE */ -#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ -int LIBITTAPI __itt_thr_name_set(const char *name, int namelen); -#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ - -/** @cond exclude_from_documentation */ -#ifndef INTEL_NO_MACRO_BODY -#ifndef INTEL_NO_ITTNOTIFY_API -#if ITT_PLATFORM==ITT_PLATFORM_WIN -ITT_STUB(LIBITTAPI, int, thr_name_setA, (const char *name, int namelen)) -ITT_STUB(LIBITTAPI, int, thr_name_setW, (const wchar_t *name, int namelen)) -#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ -ITT_STUB(LIBITTAPI, int, thr_name_set, (const char *name, int namelen)) -#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ -#if ITT_PLATFORM==ITT_PLATFORM_WIN -#define __itt_thr_name_setA ITTNOTIFY_DATA(thr_name_setA) -#define __itt_thr_name_setA_ptr ITTNOTIFY_NAME(thr_name_setA) -#define __itt_thr_name_setW ITTNOTIFY_DATA(thr_name_setW) -#define __itt_thr_name_setW_ptr ITTNOTIFY_NAME(thr_name_setW) -#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ -#define __itt_thr_name_set ITTNOTIFY_DATA(thr_name_set) -#define __itt_thr_name_set_ptr ITTNOTIFY_NAME(thr_name_set) -#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ -#else /* INTEL_NO_ITTNOTIFY_API */ -#if ITT_PLATFORM==ITT_PLATFORM_WIN -#define __itt_thr_name_setA(name, namelen) -#define __itt_thr_name_setA_ptr 0 -#define __itt_thr_name_setW(name, namelen) -#define __itt_thr_name_setW_ptr 0 -#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ -#define __itt_thr_name_set(name, namelen) -#define __itt_thr_name_set_ptr 0 -#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ -#endif /* INTEL_NO_ITTNOTIFY_API */ -#else /* INTEL_NO_MACRO_BODY */ -#if ITT_PLATFORM==ITT_PLATFORM_WIN -#define __itt_thr_name_setA_ptr 0 -#define __itt_thr_name_setW_ptr 0 -#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ -#define __itt_thr_name_set_ptr 0 -#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ -#endif /* INTEL_NO_MACRO_BODY */ -/** @endcond */ - -/** - * @deprecated Legacy API - * @brief Mark current thread as ignored from this point on, for the duration of its existence. - */ -void LIBITTAPI __itt_thr_ignore(void); - -/** @cond exclude_from_documentation */ -#ifndef INTEL_NO_MACRO_BODY -#ifndef INTEL_NO_ITTNOTIFY_API -ITT_STUBV(LIBITTAPI, void, thr_ignore, (void)) -#define __itt_thr_ignore ITTNOTIFY_VOID(thr_ignore) -#define __itt_thr_ignore_ptr ITTNOTIFY_NAME(thr_ignore) -#else /* INTEL_NO_ITTNOTIFY_API */ -#define __itt_thr_ignore() -#define __itt_thr_ignore_ptr 0 -#endif /* INTEL_NO_ITTNOTIFY_API */ -#else /* INTEL_NO_MACRO_BODY */ -#define __itt_thr_ignore_ptr 0 -#endif /* INTEL_NO_MACRO_BODY */ -/** @endcond */ -/** @} legacy_threads group */ - -/** - * @defgroup legacy_sync Synchronization - * @ingroup legacy - * Synchronization group - * @warning Legacy API - * @{ - */ -/** - * @hideinitializer - * @brief possible value of attribute argument for sync object type - */ -#define __itt_attr_barrier 1 - -/** - * @hideinitializer - * @brief possible value of attribute argument for sync object type - */ -#define __itt_attr_mutex 2 - -/** - * @deprecated Legacy API - * @brief Assign a name to a sync object using char or Unicode string - * @param[in] addr - pointer to the sync object. You should use a real pointer to your object - * to make sure that the values don't clash with other object addresses - * @param[in] objtype - null-terminated object type string. If NULL is passed, the object will - * be assumed to be of generic "User Synchronization" type - * @param[in] objname - null-terminated object name string. If NULL, no name will be assigned - * to the object -- you can use the __itt_sync_rename call later to assign - * the name - * @param[in] attribute - one of [#__itt_attr_barrier, #__itt_attr_mutex] values which defines the - * exact semantics of how prepare/acquired/releasing calls work. - */ -#if ITT_PLATFORM==ITT_PLATFORM_WIN -void ITTAPI __itt_sync_set_nameA(void *addr, const char *objtype, const char *objname, int attribute); -void ITTAPI __itt_sync_set_nameW(void *addr, const wchar_t *objtype, const wchar_t *objname, int attribute); -#if defined(UNICODE) || defined(_UNICODE) -# define __itt_sync_set_name __itt_sync_set_nameW -# define __itt_sync_set_name_ptr __itt_sync_set_nameW_ptr -#else /* UNICODE */ -# define __itt_sync_set_name __itt_sync_set_nameA -# define __itt_sync_set_name_ptr __itt_sync_set_nameA_ptr -#endif /* UNICODE */ -#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ -void ITTAPI __itt_sync_set_name(void *addr, const char* objtype, const char* objname, int attribute); -#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ - -/** @cond exclude_from_documentation */ -#ifndef INTEL_NO_MACRO_BODY -#ifndef INTEL_NO_ITTNOTIFY_API -#if ITT_PLATFORM==ITT_PLATFORM_WIN -ITT_STUBV(ITTAPI, void, sync_set_nameA, (void *addr, const char *objtype, const char *objname, int attribute)) -ITT_STUBV(ITTAPI, void, sync_set_nameW, (void *addr, const wchar_t *objtype, const wchar_t *objname, int attribute)) -#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ -ITT_STUBV(ITTAPI, void, sync_set_name, (void *addr, const char *objtype, const char *objname, int attribute)) -#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ -#if ITT_PLATFORM==ITT_PLATFORM_WIN -#define __itt_sync_set_nameA ITTNOTIFY_VOID(sync_set_nameA) -#define __itt_sync_set_nameA_ptr ITTNOTIFY_NAME(sync_set_nameA) -#define __itt_sync_set_nameW ITTNOTIFY_VOID(sync_set_nameW) -#define __itt_sync_set_nameW_ptr ITTNOTIFY_NAME(sync_set_nameW) -#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ -#define __itt_sync_set_name ITTNOTIFY_VOID(sync_set_name) -#define __itt_sync_set_name_ptr ITTNOTIFY_NAME(sync_set_name) -#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ -#else /* INTEL_NO_ITTNOTIFY_API */ -#if ITT_PLATFORM==ITT_PLATFORM_WIN -#define __itt_sync_set_nameA(addr, objtype, objname, attribute) -#define __itt_sync_set_nameA_ptr 0 -#define __itt_sync_set_nameW(addr, objtype, objname, attribute) -#define __itt_sync_set_nameW_ptr 0 -#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ -#define __itt_sync_set_name(addr, objtype, objname, attribute) -#define __itt_sync_set_name_ptr 0 -#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ -#endif /* INTEL_NO_ITTNOTIFY_API */ -#else /* INTEL_NO_MACRO_BODY */ -#if ITT_PLATFORM==ITT_PLATFORM_WIN -#define __itt_sync_set_nameA_ptr 0 -#define __itt_sync_set_nameW_ptr 0 -#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ -#define __itt_sync_set_name_ptr 0 -#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ -#endif /* INTEL_NO_MACRO_BODY */ -/** @endcond */ - -/** - * @deprecated Legacy API - * @brief Assign a name and type to a sync object using char or Unicode string - * @param[in] addr - pointer to the sync object. You should use a real pointer to your object - * to make sure that the values don't clash with other object addresses - * @param[in] objtype - null-terminated object type string. If NULL is passed, the object will - * be assumed to be of generic "User Synchronization" type - * @param[in] objname - null-terminated object name string. If NULL, no name will be assigned - * to the object -- you can use the __itt_sync_rename call later to assign - * the name - * @param[in] typelen, namelen - a length of string for appropriate objtype and objname parameter - * @param[in] attribute - one of [#__itt_attr_barrier, #__itt_attr_mutex] values which defines the - * exact semantics of how prepare/acquired/releasing calls work. - * @return __itt_err upon failure (name or namelen being null,name and namelen mismatched) - */ -#if ITT_PLATFORM==ITT_PLATFORM_WIN -int LIBITTAPI __itt_notify_sync_nameA(void *addr, const char *objtype, int typelen, const char *objname, int namelen, int attribute); -int LIBITTAPI __itt_notify_sync_nameW(void *addr, const wchar_t *objtype, int typelen, const wchar_t *objname, int namelen, int attribute); -#if defined(UNICODE) || defined(_UNICODE) -# define __itt_notify_sync_name __itt_notify_sync_nameW -#else -# define __itt_notify_sync_name __itt_notify_sync_nameA -#endif -#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ -int LIBITTAPI __itt_notify_sync_name(void *addr, const char *objtype, int typelen, const char *objname, int namelen, int attribute); -#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ - -/** @cond exclude_from_documentation */ -#ifndef INTEL_NO_MACRO_BODY -#ifndef INTEL_NO_ITTNOTIFY_API -#if ITT_PLATFORM==ITT_PLATFORM_WIN -ITT_STUB(LIBITTAPI, int, notify_sync_nameA, (void *addr, const char *objtype, int typelen, const char *objname, int namelen, int attribute)) -ITT_STUB(LIBITTAPI, int, notify_sync_nameW, (void *addr, const wchar_t *objtype, int typelen, const wchar_t *objname, int namelen, int attribute)) -#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ -ITT_STUB(LIBITTAPI, int, notify_sync_name, (void *addr, const char *objtype, int typelen, const char *objname, int namelen, int attribute)) -#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ -#if ITT_PLATFORM==ITT_PLATFORM_WIN -#define __itt_notify_sync_nameA ITTNOTIFY_DATA(notify_sync_nameA) -#define __itt_notify_sync_nameA_ptr ITTNOTIFY_NAME(notify_sync_nameA) -#define __itt_notify_sync_nameW ITTNOTIFY_DATA(notify_sync_nameW) -#define __itt_notify_sync_nameW_ptr ITTNOTIFY_NAME(notify_sync_nameW) -#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ -#define __itt_notify_sync_name ITTNOTIFY_DATA(notify_sync_name) -#define __itt_notify_sync_name_ptr ITTNOTIFY_NAME(notify_sync_name) -#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ -#else /* INTEL_NO_ITTNOTIFY_API */ -#if ITT_PLATFORM==ITT_PLATFORM_WIN -#define __itt_notify_sync_nameA(addr, objtype, typelen, objname, namelen, attribute) -#define __itt_notify_sync_nameA_ptr 0 -#define __itt_notify_sync_nameW(addr, objtype, typelen, objname, namelen, attribute) -#define __itt_notify_sync_nameW_ptr 0 -#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ -#define __itt_notify_sync_name(addr, objtype, typelen, objname, namelen, attribute) -#define __itt_notify_sync_name_ptr 0 -#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ -#endif /* INTEL_NO_ITTNOTIFY_API */ -#else /* INTEL_NO_MACRO_BODY */ -#if ITT_PLATFORM==ITT_PLATFORM_WIN -#define __itt_notify_sync_nameA_ptr 0 -#define __itt_notify_sync_nameW_ptr 0 -#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ -#define __itt_notify_sync_name_ptr 0 -#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ -#endif /* INTEL_NO_MACRO_BODY */ -/** @endcond */ - -/** - * @deprecated Legacy API - * @brief Enter spin loop on user-defined sync object - */ -void LIBITTAPI __itt_notify_sync_prepare(void* addr); - -/** @cond exclude_from_documentation */ -#ifndef INTEL_NO_MACRO_BODY -#ifndef INTEL_NO_ITTNOTIFY_API -ITT_STUBV(LIBITTAPI, void, notify_sync_prepare, (void *addr)) -#define __itt_notify_sync_prepare ITTNOTIFY_VOID(notify_sync_prepare) -#define __itt_notify_sync_prepare_ptr ITTNOTIFY_NAME(notify_sync_prepare) -#else /* INTEL_NO_ITTNOTIFY_API */ -#define __itt_notify_sync_prepare(addr) -#define __itt_notify_sync_prepare_ptr 0 -#endif /* INTEL_NO_ITTNOTIFY_API */ -#else /* INTEL_NO_MACRO_BODY */ -#define __itt_notify_sync_prepare_ptr 0 -#endif /* INTEL_NO_MACRO_BODY */ -/** @endcond */ - -/** - * @deprecated Legacy API - * @brief Quit spin loop without acquiring spin object - */ -void LIBITTAPI __itt_notify_sync_cancel(void *addr); - -/** @cond exclude_from_documentation */ -#ifndef INTEL_NO_MACRO_BODY -#ifndef INTEL_NO_ITTNOTIFY_API -ITT_STUBV(LIBITTAPI, void, notify_sync_cancel, (void *addr)) -#define __itt_notify_sync_cancel ITTNOTIFY_VOID(notify_sync_cancel) -#define __itt_notify_sync_cancel_ptr ITTNOTIFY_NAME(notify_sync_cancel) -#else /* INTEL_NO_ITTNOTIFY_API */ -#define __itt_notify_sync_cancel(addr) -#define __itt_notify_sync_cancel_ptr 0 -#endif /* INTEL_NO_ITTNOTIFY_API */ -#else /* INTEL_NO_MACRO_BODY */ -#define __itt_notify_sync_cancel_ptr 0 -#endif /* INTEL_NO_MACRO_BODY */ -/** @endcond */ - -/** - * @deprecated Legacy API - * @brief Successful spin loop completion (sync object acquired) - */ -void LIBITTAPI __itt_notify_sync_acquired(void *addr); - -/** @cond exclude_from_documentation */ -#ifndef INTEL_NO_MACRO_BODY -#ifndef INTEL_NO_ITTNOTIFY_API -ITT_STUBV(LIBITTAPI, void, notify_sync_acquired, (void *addr)) -#define __itt_notify_sync_acquired ITTNOTIFY_VOID(notify_sync_acquired) -#define __itt_notify_sync_acquired_ptr ITTNOTIFY_NAME(notify_sync_acquired) -#else /* INTEL_NO_ITTNOTIFY_API */ -#define __itt_notify_sync_acquired(addr) -#define __itt_notify_sync_acquired_ptr 0 -#endif /* INTEL_NO_ITTNOTIFY_API */ -#else /* INTEL_NO_MACRO_BODY */ -#define __itt_notify_sync_acquired_ptr 0 -#endif /* INTEL_NO_MACRO_BODY */ -/** @endcond */ - -/** - * @deprecated Legacy API - * @brief Start sync object releasing code. Is called before the lock release call. - */ -void LIBITTAPI __itt_notify_sync_releasing(void* addr); - -/** @cond exclude_from_documentation */ -#ifndef INTEL_NO_MACRO_BODY -#ifndef INTEL_NO_ITTNOTIFY_API -ITT_STUBV(LIBITTAPI, void, notify_sync_releasing, (void *addr)) -#define __itt_notify_sync_releasing ITTNOTIFY_VOID(notify_sync_releasing) -#define __itt_notify_sync_releasing_ptr ITTNOTIFY_NAME(notify_sync_releasing) -#else /* INTEL_NO_ITTNOTIFY_API */ -#define __itt_notify_sync_releasing(addr) -#define __itt_notify_sync_releasing_ptr 0 -#endif /* INTEL_NO_ITTNOTIFY_API */ -#else /* INTEL_NO_MACRO_BODY */ -#define __itt_notify_sync_releasing_ptr 0 -#endif /* INTEL_NO_MACRO_BODY */ -/** @endcond */ -/** @} legacy_sync group */ - -#ifndef _ITTNOTIFY_H_ -/** - * @defgroup legacy_events Events - * @ingroup legacy - * Events group - * @{ - */ - -/** @brief user event type */ -typedef int __itt_event; - -/** - * @brief Create an event notification - * @note name or namelen being null/name and namelen not matching, user event feature not enabled - * @return non-zero event identifier upon success and __itt_err otherwise - */ -#if ITT_PLATFORM==ITT_PLATFORM_WIN -__itt_event LIBITTAPI __itt_event_createA(const char *name, int namelen); -__itt_event LIBITTAPI __itt_event_createW(const wchar_t *name, int namelen); -#if defined(UNICODE) || defined(_UNICODE) -# define __itt_event_create __itt_event_createW -# define __itt_event_create_ptr __itt_event_createW_ptr -#else -# define __itt_event_create __itt_event_createA -# define __itt_event_create_ptr __itt_event_createA_ptr -#endif /* UNICODE */ -#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ -__itt_event LIBITTAPI __itt_event_create(const char *name, int namelen); -#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ - -/** @cond exclude_from_documentation */ -#ifndef INTEL_NO_MACRO_BODY -#ifndef INTEL_NO_ITTNOTIFY_API -#if ITT_PLATFORM==ITT_PLATFORM_WIN -ITT_STUB(LIBITTAPI, __itt_event, event_createA, (const char *name, int namelen)) -ITT_STUB(LIBITTAPI, __itt_event, event_createW, (const wchar_t *name, int namelen)) -#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ -ITT_STUB(LIBITTAPI, __itt_event, event_create, (const char *name, int namelen)) -#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ -#if ITT_PLATFORM==ITT_PLATFORM_WIN -#define __itt_event_createA ITTNOTIFY_DATA(event_createA) -#define __itt_event_createA_ptr ITTNOTIFY_NAME(event_createA) -#define __itt_event_createW ITTNOTIFY_DATA(event_createW) -#define __itt_event_createW_ptr ITTNOTIFY_NAME(event_createW) -#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ -#define __itt_event_create ITTNOTIFY_DATA(event_create) -#define __itt_event_create_ptr ITTNOTIFY_NAME(event_create) -#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ -#else /* INTEL_NO_ITTNOTIFY_API */ -#if ITT_PLATFORM==ITT_PLATFORM_WIN -#define __itt_event_createA(name, namelen) (__itt_event)0 -#define __itt_event_createA_ptr 0 -#define __itt_event_createW(name, namelen) (__itt_event)0 -#define __itt_event_createW_ptr 0 -#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ -#define __itt_event_create(name, namelen) (__itt_event)0 -#define __itt_event_create_ptr 0 -#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ -#endif /* INTEL_NO_ITTNOTIFY_API */ -#else /* INTEL_NO_MACRO_BODY */ -#if ITT_PLATFORM==ITT_PLATFORM_WIN -#define __itt_event_createA_ptr 0 -#define __itt_event_createW_ptr 0 -#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ -#define __itt_event_create_ptr 0 -#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ -#endif /* INTEL_NO_MACRO_BODY */ -/** @endcond */ - -/** - * @brief Record an event occurrence. - * @return __itt_err upon failure (invalid event id/user event feature not enabled) - */ -int LIBITTAPI __itt_event_start(__itt_event event); - -/** @cond exclude_from_documentation */ -#ifndef INTEL_NO_MACRO_BODY -#ifndef INTEL_NO_ITTNOTIFY_API -ITT_STUB(LIBITTAPI, int, event_start, (__itt_event event)) -#define __itt_event_start ITTNOTIFY_DATA(event_start) -#define __itt_event_start_ptr ITTNOTIFY_NAME(event_start) -#else /* INTEL_NO_ITTNOTIFY_API */ -#define __itt_event_start(event) (int)0 -#define __itt_event_start_ptr 0 -#endif /* INTEL_NO_ITTNOTIFY_API */ -#else /* INTEL_NO_MACRO_BODY */ -#define __itt_event_start_ptr 0 -#endif /* INTEL_NO_MACRO_BODY */ -/** @endcond */ - -/** - * @brief Record an event end occurrence. - * @note It is optional if events do not have durations. - * @return __itt_err upon failure (invalid event id/user event feature not enabled) - */ -int LIBITTAPI __itt_event_end(__itt_event event); - -/** @cond exclude_from_documentation */ -#ifndef INTEL_NO_MACRO_BODY -#ifndef INTEL_NO_ITTNOTIFY_API -ITT_STUB(LIBITTAPI, int, event_end, (__itt_event event)) -#define __itt_event_end ITTNOTIFY_DATA(event_end) -#define __itt_event_end_ptr ITTNOTIFY_NAME(event_end) -#else /* INTEL_NO_ITTNOTIFY_API */ -#define __itt_event_end(event) (int)0 -#define __itt_event_end_ptr 0 -#endif /* INTEL_NO_ITTNOTIFY_API */ -#else /* INTEL_NO_MACRO_BODY */ -#define __itt_event_end_ptr 0 -#endif /* INTEL_NO_MACRO_BODY */ -/** @endcond */ -/** @} legacy_events group */ -#endif /* _ITTNOTIFY_H_ */ - -/** - * @defgroup legacy_memory Memory Accesses - * @ingroup legacy - */ - -/** - * @deprecated Legacy API - * @brief Inform the tool of memory accesses on reading - */ -void LIBITTAPI __itt_memory_read(void *addr, size_t size); - -/** @cond exclude_from_documentation */ -#ifndef INTEL_NO_MACRO_BODY -#ifndef INTEL_NO_ITTNOTIFY_API -ITT_STUBV(LIBITTAPI, void, memory_read, (void *addr, size_t size)) -#define __itt_memory_read ITTNOTIFY_VOID(memory_read) -#define __itt_memory_read_ptr ITTNOTIFY_NAME(memory_read) -#else /* INTEL_NO_ITTNOTIFY_API */ -#define __itt_memory_read(addr, size) -#define __itt_memory_read_ptr 0 -#endif /* INTEL_NO_ITTNOTIFY_API */ -#else /* INTEL_NO_MACRO_BODY */ -#define __itt_memory_read_ptr 0 -#endif /* INTEL_NO_MACRO_BODY */ -/** @endcond */ - -/** - * @deprecated Legacy API - * @brief Inform the tool of memory accesses on writing - */ -void LIBITTAPI __itt_memory_write(void *addr, size_t size); - -/** @cond exclude_from_documentation */ -#ifndef INTEL_NO_MACRO_BODY -#ifndef INTEL_NO_ITTNOTIFY_API -ITT_STUBV(LIBITTAPI, void, memory_write, (void *addr, size_t size)) -#define __itt_memory_write ITTNOTIFY_VOID(memory_write) -#define __itt_memory_write_ptr ITTNOTIFY_NAME(memory_write) -#else /* INTEL_NO_ITTNOTIFY_API */ -#define __itt_memory_write(addr, size) -#define __itt_memory_write_ptr 0 -#endif /* INTEL_NO_ITTNOTIFY_API */ -#else /* INTEL_NO_MACRO_BODY */ -#define __itt_memory_write_ptr 0 -#endif /* INTEL_NO_MACRO_BODY */ -/** @endcond */ - -/** - * @deprecated Legacy API - * @brief Inform the tool of memory accesses on updating - */ -void LIBITTAPI __itt_memory_update(void *address, size_t size); - -/** @cond exclude_from_documentation */ -#ifndef INTEL_NO_MACRO_BODY -#ifndef INTEL_NO_ITTNOTIFY_API -ITT_STUBV(LIBITTAPI, void, memory_update, (void *addr, size_t size)) -#define __itt_memory_update ITTNOTIFY_VOID(memory_update) -#define __itt_memory_update_ptr ITTNOTIFY_NAME(memory_update) -#else /* INTEL_NO_ITTNOTIFY_API */ -#define __itt_memory_update(addr, size) -#define __itt_memory_update_ptr 0 -#endif /* INTEL_NO_ITTNOTIFY_API */ -#else /* INTEL_NO_MACRO_BODY */ -#define __itt_memory_update_ptr 0 -#endif /* INTEL_NO_MACRO_BODY */ -/** @endcond */ -/** @} legacy_memory group */ - -/** - * @defgroup legacy_state Thread and Object States - * @ingroup legacy - */ - -/** @brief state type */ -typedef int __itt_state_t; - -/** @cond exclude_from_documentation */ -typedef enum __itt_obj_state { - __itt_obj_state_err = 0, - __itt_obj_state_clr = 1, - __itt_obj_state_set = 2, - __itt_obj_state_use = 3 -} __itt_obj_state_t; - -typedef enum __itt_thr_state { - __itt_thr_state_err = 0, - __itt_thr_state_clr = 1, - __itt_thr_state_set = 2 -} __itt_thr_state_t; - -typedef enum __itt_obj_prop { - __itt_obj_prop_watch = 1, - __itt_obj_prop_ignore = 2, - __itt_obj_prop_sharable = 3 -} __itt_obj_prop_t; - -typedef enum __itt_thr_prop { - __itt_thr_prop_quiet = 1 -} __itt_thr_prop_t; -/** @endcond */ - -/** - * @deprecated Legacy API - * @brief managing thread and object states - */ -__itt_state_t LIBITTAPI __itt_state_get(void); - -/** @cond exclude_from_documentation */ -#ifndef INTEL_NO_MACRO_BODY -#ifndef INTEL_NO_ITTNOTIFY_API -ITT_STUB(ITTAPI, __itt_state_t, state_get, (void)) -#define __itt_state_get ITTNOTIFY_DATA(state_get) -#define __itt_state_get_ptr ITTNOTIFY_NAME(state_get) -#else /* INTEL_NO_ITTNOTIFY_API */ -#define __itt_state_get(void) (__itt_state_t)0 -#define __itt_state_get_ptr 0 -#endif /* INTEL_NO_ITTNOTIFY_API */ -#else /* INTEL_NO_MACRO_BODY */ -#define __itt_state_get_ptr 0 -#endif /* INTEL_NO_MACRO_BODY */ -/** @endcond */ - -/** - * @deprecated Legacy API - * @brief managing thread and object states - */ -__itt_state_t LIBITTAPI __itt_state_set(__itt_state_t s); - -/** @cond exclude_from_documentation */ -#ifndef INTEL_NO_MACRO_BODY -#ifndef INTEL_NO_ITTNOTIFY_API -ITT_STUB(ITTAPI, __itt_state_t, state_set, (__itt_state_t s)) -#define __itt_state_set ITTNOTIFY_DATA(state_set) -#define __itt_state_set_ptr ITTNOTIFY_NAME(state_set) -#else /* INTEL_NO_ITTNOTIFY_API */ -#define __itt_state_set(s) (__itt_state_t)0 -#define __itt_state_set_ptr 0 -#endif /* INTEL_NO_ITTNOTIFY_API */ -#else /* INTEL_NO_MACRO_BODY */ -#define __itt_state_set_ptr 0 -#endif /* INTEL_NO_MACRO_BODY */ -/** @endcond */ - -/** - * @deprecated Legacy API - * @brief managing thread and object modes - */ -__itt_thr_state_t LIBITTAPI __itt_thr_mode_set(__itt_thr_prop_t p, __itt_thr_state_t s); - -/** @cond exclude_from_documentation */ -#ifndef INTEL_NO_MACRO_BODY -#ifndef INTEL_NO_ITTNOTIFY_API -ITT_STUB(ITTAPI, __itt_thr_state_t, thr_mode_set, (__itt_thr_prop_t p, __itt_thr_state_t s)) -#define __itt_thr_mode_set ITTNOTIFY_DATA(thr_mode_set) -#define __itt_thr_mode_set_ptr ITTNOTIFY_NAME(thr_mode_set) -#else /* INTEL_NO_ITTNOTIFY_API */ -#define __itt_thr_mode_set(p, s) (__itt_thr_state_t)0 -#define __itt_thr_mode_set_ptr 0 -#endif /* INTEL_NO_ITTNOTIFY_API */ -#else /* INTEL_NO_MACRO_BODY */ -#define __itt_thr_mode_set_ptr 0 -#endif /* INTEL_NO_MACRO_BODY */ -/** @endcond */ - -/** - * @deprecated Legacy API - * @brief managing thread and object modes - */ -__itt_obj_state_t LIBITTAPI __itt_obj_mode_set(__itt_obj_prop_t p, __itt_obj_state_t s); - -/** @cond exclude_from_documentation */ -#ifndef INTEL_NO_MACRO_BODY -#ifndef INTEL_NO_ITTNOTIFY_API -ITT_STUB(ITTAPI, __itt_obj_state_t, obj_mode_set, (__itt_obj_prop_t p, __itt_obj_state_t s)) -#define __itt_obj_mode_set ITTNOTIFY_DATA(obj_mode_set) -#define __itt_obj_mode_set_ptr ITTNOTIFY_NAME(obj_mode_set) -#else /* INTEL_NO_ITTNOTIFY_API */ -#define __itt_obj_mode_set(p, s) (__itt_obj_state_t)0 -#define __itt_obj_mode_set_ptr 0 -#endif /* INTEL_NO_ITTNOTIFY_API */ -#else /* INTEL_NO_MACRO_BODY */ -#define __itt_obj_mode_set_ptr 0 -#endif /* INTEL_NO_MACRO_BODY */ -/** @endcond */ -/** @} legacy_state group */ - -/** - * @defgroup frames Frames - * @ingroup legacy - * Frames group - * @{ - */ -/** - * @brief opaque structure for frame identification - */ -typedef struct __itt_frame_t *__itt_frame; - -/** - * @brief Create a global frame with given domain - */ -#if ITT_PLATFORM==ITT_PLATFORM_WIN -__itt_frame ITTAPI __itt_frame_createA(const char *domain); -__itt_frame ITTAPI __itt_frame_createW(const wchar_t *domain); -#if defined(UNICODE) || defined(_UNICODE) -# define __itt_frame_create __itt_frame_createW -# define __itt_frame_create_ptr __itt_frame_createW_ptr -#else /* UNICODE */ -# define __itt_frame_create __itt_frame_createA -# define __itt_frame_create_ptr __itt_frame_createA_ptr -#endif /* UNICODE */ -#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ -__itt_frame ITTAPI __itt_frame_create(const char *domain); -#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ - -/** @cond exclude_from_documentation */ -#ifndef INTEL_NO_MACRO_BODY -#ifndef INTEL_NO_ITTNOTIFY_API -#if ITT_PLATFORM==ITT_PLATFORM_WIN -ITT_STUB(ITTAPI, __itt_frame, frame_createA, (const char *domain)) -ITT_STUB(ITTAPI, __itt_frame, frame_createW, (const wchar_t *domain)) -#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ -ITT_STUB(ITTAPI, __itt_frame, frame_create, (const char *domain)) -#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ -#if ITT_PLATFORM==ITT_PLATFORM_WIN -#define __itt_frame_createA ITTNOTIFY_DATA(frame_createA) -#define __itt_frame_createA_ptr ITTNOTIFY_NAME(frame_createA) -#define __itt_frame_createW ITTNOTIFY_DATA(frame_createW) -#define __itt_frame_createW_ptr ITTNOTIFY_NAME(frame_createW) -#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ -#define __itt_frame_create ITTNOTIFY_DATA(frame_create) -#define __itt_frame_create_ptr ITTNOTIFY_NAME(frame_create) -#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ -#else /* INTEL_NO_ITTNOTIFY_API */ -#if ITT_PLATFORM==ITT_PLATFORM_WIN -#define __itt_frame_createA(domain) -#define __itt_frame_createA_ptr 0 -#define __itt_frame_createW(domain) -#define __itt_frame_createW_ptr 0 -#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ -#define __itt_frame_create(domain) -#define __itt_frame_create_ptr 0 -#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ -#endif /* INTEL_NO_ITTNOTIFY_API */ -#else /* INTEL_NO_MACRO_BODY */ -#if ITT_PLATFORM==ITT_PLATFORM_WIN -#define __itt_frame_createA_ptr 0 -#define __itt_frame_createW_ptr 0 -#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ -#define __itt_frame_create_ptr 0 -#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ -#endif /* INTEL_NO_MACRO_BODY */ -/** @endcond */ - -/** @brief Record an frame begin occurrence. */ -void ITTAPI __itt_frame_begin(__itt_frame frame); -/** @brief Record an frame end occurrence. */ -void ITTAPI __itt_frame_end (__itt_frame frame); - -/** @cond exclude_from_documentation */ -#ifndef INTEL_NO_MACRO_BODY -#ifndef INTEL_NO_ITTNOTIFY_API -ITT_STUBV(ITTAPI, void, frame_begin, (__itt_frame frame)) -ITT_STUBV(ITTAPI, void, frame_end, (__itt_frame frame)) -#define __itt_frame_begin ITTNOTIFY_VOID(frame_begin) -#define __itt_frame_begin_ptr ITTNOTIFY_NAME(frame_begin) -#define __itt_frame_end ITTNOTIFY_VOID(frame_end) -#define __itt_frame_end_ptr ITTNOTIFY_NAME(frame_end) -#else /* INTEL_NO_ITTNOTIFY_API */ -#define __itt_frame_begin(frame) -#define __itt_frame_begin_ptr 0 -#define __itt_frame_end(frame) -#define __itt_frame_end_ptr 0 -#endif /* INTEL_NO_ITTNOTIFY_API */ -#else /* INTEL_NO_MACRO_BODY */ -#define __itt_frame_begin_ptr 0 -#define __itt_frame_end_ptr 0 -#endif /* INTEL_NO_MACRO_BODY */ -/** @endcond */ -/** @} frames group */ - -#ifdef __cplusplus -} -#endif /* __cplusplus */ - -#endif /* _LEGACY_ITTNOTIFY_H_ */ Property changes on: vendor/llvm-openmp/openmp-release80-r363030/runtime/src/thirdparty/ittnotify/legacy/ittnotify.h ___________________________________________________________________ Deleted: svn:eol-style ## -1 +0,0 ## -native \ No newline at end of property Deleted: svn:keywords ## -1 +0,0 ## -FreeBSD=%H \ No newline at end of property Deleted: svn:mime-type ## -1 +0,0 ## -text/plain \ No newline at end of property Index: vendor/llvm-openmp/openmp-release80-r363030/runtime/src/thirdparty/ittnotify/ittnotify_static.c =================================================================== --- vendor/llvm-openmp/openmp-release80-r363030/runtime/src/thirdparty/ittnotify/ittnotify_static.c (revision 348960) +++ vendor/llvm-openmp/openmp-release80-r363030/runtime/src/thirdparty/ittnotify/ittnotify_static.c (nonexistent) @@ -1,1202 +0,0 @@ - -//===----------------------------------------------------------------------===// -// -// The LLVM Compiler Infrastructure -// -// This file is dual licensed under the MIT and the University of Illinois Open -// Source Licenses. See LICENSE.txt for details. -// -//===----------------------------------------------------------------------===// - -#include "kmp_config.h" -#include "ittnotify_config.h" - -#if ITT_PLATFORM==ITT_PLATFORM_WIN -#if defined(__MINGW32__) -#include -#else -#define PATH_MAX 512 -#endif -#else /* ITT_PLATFORM!=ITT_PLATFORM_WIN */ -#include -#include -#include -#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ -#include -#include -#include -#include - -#define INTEL_NO_MACRO_BODY -#define INTEL_ITTNOTIFY_API_PRIVATE -#include "ittnotify.h" -#include "legacy/ittnotify.h" - -#if KMP_MSVC_COMPAT -#include "disable_warnings.h" -#endif - -static const char api_version[] = API_VERSION "\0\n@(#) $Revision: 481659 $\n"; - -#define _N_(n) ITT_JOIN(INTEL_ITTNOTIFY_PREFIX,n) - -#if ITT_OS==ITT_OS_WIN -static const char* ittnotify_lib_name = "libittnotify.dll"; -#elif ITT_OS==ITT_OS_LINUX || ITT_OS==ITT_OS_FREEBSD -static const char* ittnotify_lib_name = "libittnotify.so"; -#elif ITT_OS==ITT_OS_MAC -static const char* ittnotify_lib_name = "libittnotify.dylib"; -#else -#error Unsupported or unknown OS. -#endif - -#ifdef __ANDROID__ -#include -#include -#include -#include -#include -#include -#include - -#ifdef ITT_ANDROID_LOG - #define ITT_ANDROID_LOG_TAG "INTEL_VTUNE_USERAPI" - #define ITT_ANDROID_LOGI(...) ((void)__android_log_print(ANDROID_LOG_INFO, ITT_ANDROID_LOG_TAG, __VA_ARGS__)) - #define ITT_ANDROID_LOGW(...) ((void)__android_log_print(ANDROID_LOG_WARN, ITT_ANDROID_LOG_TAG, __VA_ARGS__)) - #define ITT_ANDROID_LOGE(...) ((void)__android_log_print(ANDROID_LOG_ERROR,ITT_ANDROID_LOG_TAG, __VA_ARGS__)) - #define ITT_ANDROID_LOGD(...) ((void)__android_log_print(ANDROID_LOG_DEBUG,ITT_ANDROID_LOG_TAG, __VA_ARGS__)) -#else - #define ITT_ANDROID_LOGI(...) - #define ITT_ANDROID_LOGW(...) - #define ITT_ANDROID_LOGE(...) - #define ITT_ANDROID_LOGD(...) -#endif - -/* default location of userapi collector on Android */ -#define ANDROID_ITTNOTIFY_DEFAULT_PATH_MASK(x) "/data/data/com.intel.vtune/perfrun/lib" \ - #x "/runtime/libittnotify.so" - -#if ITT_ARCH==ITT_ARCH_IA32 || ITT_ARCH==ITT_ARCH_ARM -#define ANDROID_ITTNOTIFY_DEFAULT_PATH ANDROID_ITTNOTIFY_DEFAULT_PATH_MASK(32) -#else -#define ANDROID_ITTNOTIFY_DEFAULT_PATH ANDROID_ITTNOTIFY_DEFAULT_PATH_MASK(64) -#endif - -#endif - -#ifndef PATH_MAX -#define PATH_MAX 4096 -#endif - - -#ifndef LIB_VAR_NAME -#if ITT_ARCH==ITT_ARCH_IA32 || ITT_ARCH==ITT_ARCH_ARM || ITT_ARCH==ITT_ARCH_MIPS -#define LIB_VAR_NAME INTEL_LIBITTNOTIFY32 -#else -#define LIB_VAR_NAME INTEL_LIBITTNOTIFY64 -#endif -#endif /* LIB_VAR_NAME */ - -#define ITT_MUTEX_INIT_AND_LOCK(p) { \ - if (PTHREAD_SYMBOLS) \ - { \ - if (!p.mutex_initialized) \ - { \ - if (__itt_interlocked_increment(&p.atomic_counter) == 1) \ - { \ - __itt_mutex_init(&p.mutex); \ - p.mutex_initialized = 1; \ - } \ - else \ - while (!p.mutex_initialized) \ - __itt_thread_yield(); \ - } \ - __itt_mutex_lock(&p.mutex); \ - } \ -} - -typedef int (__itt_init_ittlib_t)(const char*, __itt_group_id); - -/* this define used to control initialization function name. */ -#ifndef __itt_init_ittlib_name -ITT_EXTERN_C int _N_(init_ittlib)(const char*, __itt_group_id); -static __itt_init_ittlib_t* __itt_init_ittlib_ptr = _N_(init_ittlib); -#define __itt_init_ittlib_name __itt_init_ittlib_ptr -#endif /* __itt_init_ittlib_name */ - -typedef void (__itt_fini_ittlib_t)(void); - -/* this define used to control finalization function name. */ -#ifndef __itt_fini_ittlib_name -ITT_EXTERN_C void _N_(fini_ittlib)(void); -static __itt_fini_ittlib_t* __itt_fini_ittlib_ptr = _N_(fini_ittlib); -#define __itt_fini_ittlib_name __itt_fini_ittlib_ptr -#endif /* __itt_fini_ittlib_name */ - -/* building pointers to imported funcs */ -#undef ITT_STUBV -#undef ITT_STUB -#define ITT_STUB(api,type,name,args,params,ptr,group,format) \ -static type api ITT_VERSIONIZE(ITT_JOIN(_N_(name),_init)) args;\ -typedef type api ITT_JOIN(_N_(name),_t) args; \ -ITT_EXTERN_C_BEGIN ITT_JOIN(_N_(name),_t)* ITTNOTIFY_NAME(name) = ITT_VERSIONIZE(ITT_JOIN(_N_(name),_init)); ITT_EXTERN_C_END \ -static type api ITT_VERSIONIZE(ITT_JOIN(_N_(name),_init)) args \ -{ \ - __itt_init_ittlib_name(NULL, __itt_group_all); \ - if (ITTNOTIFY_NAME(name) && ITTNOTIFY_NAME(name) != ITT_VERSIONIZE(ITT_JOIN(_N_(name),_init))) \ - return ITTNOTIFY_NAME(name) params; \ - else \ - return (type)0; \ -} - -#define ITT_STUBV(api,type,name,args,params,ptr,group,format) \ -static type api ITT_VERSIONIZE(ITT_JOIN(_N_(name),_init)) args;\ -typedef type api ITT_JOIN(_N_(name),_t) args; \ -ITT_EXTERN_C_BEGIN ITT_JOIN(_N_(name),_t)* ITTNOTIFY_NAME(name) = ITT_VERSIONIZE(ITT_JOIN(_N_(name),_init)); ITT_EXTERN_C_END \ -static type api ITT_VERSIONIZE(ITT_JOIN(_N_(name),_init)) args \ -{ \ - __itt_init_ittlib_name(NULL, __itt_group_all); \ - if (ITTNOTIFY_NAME(name) && ITTNOTIFY_NAME(name) != ITT_VERSIONIZE(ITT_JOIN(_N_(name),_init))) \ - ITTNOTIFY_NAME(name) params; \ - else \ - return; \ -} - -#undef __ITT_INTERNAL_INIT -#include "ittnotify_static.h" - -#undef ITT_STUB -#undef ITT_STUBV -#define ITT_STUB(api,type,name,args,params,ptr,group,format) \ -static type api ITT_VERSIONIZE(ITT_JOIN(_N_(name),_init)) args;\ -typedef type api ITT_JOIN(_N_(name),_t) args; \ -ITT_EXTERN_C_BEGIN ITT_JOIN(_N_(name),_t)* ITTNOTIFY_NAME(name) = ITT_VERSIONIZE(ITT_JOIN(_N_(name),_init)); ITT_EXTERN_C_END - -#define ITT_STUBV(api,type,name,args,params,ptr,group,format) \ -static type api ITT_VERSIONIZE(ITT_JOIN(_N_(name),_init)) args;\ -typedef type api ITT_JOIN(_N_(name),_t) args; \ -ITT_EXTERN_C_BEGIN ITT_JOIN(_N_(name),_t)* ITTNOTIFY_NAME(name) = ITT_VERSIONIZE(ITT_JOIN(_N_(name),_init)); ITT_EXTERN_C_END - -#define __ITT_INTERNAL_INIT -#include "ittnotify_static.h" -#undef __ITT_INTERNAL_INIT - -ITT_GROUP_LIST(group_list); - -#pragma pack(push, 8) - -typedef struct ___itt_group_alias -{ - const char* env_var; - __itt_group_id groups; -} __itt_group_alias; - -static __itt_group_alias group_alias[] = { - { "KMP_FOR_TPROFILE", (__itt_group_id)(__itt_group_control | __itt_group_thread | __itt_group_sync | __itt_group_mark) }, - { "KMP_FOR_TCHECK", (__itt_group_id)(__itt_group_control | __itt_group_thread | __itt_group_sync | __itt_group_fsync | __itt_group_mark | __itt_group_suppress) }, - { NULL, (__itt_group_none) }, - { api_version, (__itt_group_none) } /* !!! Just to avoid unused code elimination !!! */ -}; - -#pragma pack(pop) - -#if ITT_PLATFORM==ITT_PLATFORM_WIN && KMP_MSVC_COMPAT -#pragma warning(push) -#pragma warning(disable: 4054) /* warning C4054: 'type cast' : from function pointer 'XXX' to data pointer 'void *' */ -#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ - -static __itt_api_info api_list[] = { -/* Define functions with static implementation */ -#undef ITT_STUB -#undef ITT_STUBV -#define ITT_STUB(api,type,name,args,params,nameindll,group,format) { ITT_TO_STR(ITT_JOIN(__itt_,nameindll)), (void**)(void*)&ITTNOTIFY_NAME(name), (void*)(size_t)&ITT_VERSIONIZE(ITT_JOIN(_N_(name),_init)), (void*)(size_t)&ITT_VERSIONIZE(ITT_JOIN(_N_(name),_init)), (__itt_group_id)(group)}, -#define ITT_STUBV ITT_STUB -#define __ITT_INTERNAL_INIT -#include "ittnotify_static.h" -#undef __ITT_INTERNAL_INIT -/* Define functions without static implementation */ -#undef ITT_STUB -#undef ITT_STUBV -#define ITT_STUB(api,type,name,args,params,nameindll,group,format) {ITT_TO_STR(ITT_JOIN(__itt_,nameindll)), (void**)(void*)&ITTNOTIFY_NAME(name), (void*)(size_t)&ITT_VERSIONIZE(ITT_JOIN(_N_(name),_init)), NULL, (__itt_group_id)(group)}, -#define ITT_STUBV ITT_STUB -#include "ittnotify_static.h" - {NULL, NULL, NULL, NULL, __itt_group_none} -}; - -#if ITT_PLATFORM==ITT_PLATFORM_WIN && KMP_MSVC_COMPAT -#pragma warning(pop) -#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ - -static const char dll_path[PATH_MAX] = { 0 }; - -/* static part descriptor which handles. all notification api attributes. */ -__itt_global _N_(_ittapi_global) = { - ITT_MAGIC, /* identification info */ - ITT_MAJOR, ITT_MINOR, API_VERSION_BUILD, /* version info */ - 0, /* api_initialized */ - 0, /* mutex_initialized */ - 0, /* atomic_counter */ - MUTEX_INITIALIZER, /* mutex */ - NULL, /* dynamic library handle */ - NULL, /* error_handler */ - (const char**)&dll_path, /* dll_path_ptr */ - (__itt_api_info*)&api_list, /* api_list_ptr */ - NULL, /* next __itt_global */ - NULL, /* thread_list */ - NULL, /* domain_list */ - NULL, /* string_list */ - __itt_collection_normal, /* collection state */ - NULL /* counter_list */ -}; - -typedef void (__itt_api_init_t)(__itt_global*, __itt_group_id); -typedef void (__itt_api_fini_t)(__itt_global*); - -/* ========================================================================= */ - -#ifdef ITT_NOTIFY_EXT_REPORT -ITT_EXTERN_C void _N_(error_handler)(__itt_error_code, va_list args); -#endif /* ITT_NOTIFY_EXT_REPORT */ - -#if ITT_PLATFORM==ITT_PLATFORM_WIN && KMP_MSVC_COMPAT -#pragma warning(push) -#pragma warning(disable: 4055) /* warning C4055: 'type cast' : from data pointer 'void *' to function pointer 'XXX' */ -#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ - -static void __itt_report_error(unsigned code_arg, ...) -{ - va_list args; - va_start(args, code_arg); - - // We use unsigned for the code argument and explicitly cast it here to the - // right enumerator because variadic functions are not compatible with - // default promotions. - __itt_error_code code = (__itt_error_code)code_arg; - - if (_N_(_ittapi_global).error_handler != NULL) - { - __itt_error_handler_t* handler = (__itt_error_handler_t*)(size_t)_N_(_ittapi_global).error_handler; - handler(code, args); - } -#ifdef ITT_NOTIFY_EXT_REPORT - _N_(error_handler)(code, args); -#endif /* ITT_NOTIFY_EXT_REPORT */ - va_end(args); -} - -#if ITT_PLATFORM==ITT_PLATFORM_WIN && KMP_MSVC_COMPAT -#pragma warning(pop) -#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ - -#if ITT_PLATFORM==ITT_PLATFORM_WIN -static __itt_domain* ITTAPI ITT_VERSIONIZE(ITT_JOIN(_N_(domain_createW),_init))(const wchar_t* name) -{ - __itt_domain *h_tail = NULL, *h = NULL; - - if (name == NULL) - { - return NULL; - } - - ITT_MUTEX_INIT_AND_LOCK(_N_(_ittapi_global)); - if (_N_(_ittapi_global).api_initialized) - { - if (ITTNOTIFY_NAME(domain_createW) && ITTNOTIFY_NAME(domain_createW) != ITT_VERSIONIZE(ITT_JOIN(_N_(domain_createW),_init))) - { - __itt_mutex_unlock(&_N_(_ittapi_global).mutex); - return ITTNOTIFY_NAME(domain_createW)(name); - } - } - for (h_tail = NULL, h = _N_(_ittapi_global).domain_list; h != NULL; h_tail = h, h = h->next) - { - if (h->nameW != NULL && !wcscmp(h->nameW, name)) break; - } - if (h == NULL) - { - NEW_DOMAIN_W(&_N_(_ittapi_global),h,h_tail,name); - } - if (PTHREAD_SYMBOLS) __itt_mutex_unlock(&_N_(_ittapi_global).mutex); - return h; -} - -static __itt_domain* ITTAPI ITT_VERSIONIZE(ITT_JOIN(_N_(domain_createA),_init))(const char* name) -#else /* ITT_PLATFORM!=ITT_PLATFORM_WIN */ -static __itt_domain* ITTAPI ITT_VERSIONIZE(ITT_JOIN(_N_(domain_create),_init))(const char* name) -#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ -{ - __itt_domain *h_tail = NULL, *h = NULL; - - if (name == NULL) - { - return NULL; - } - - ITT_MUTEX_INIT_AND_LOCK(_N_(_ittapi_global)); - if (_N_(_ittapi_global).api_initialized) - { -#if ITT_PLATFORM==ITT_PLATFORM_WIN - if (ITTNOTIFY_NAME(domain_createA) && ITTNOTIFY_NAME(domain_createA) != ITT_VERSIONIZE(ITT_JOIN(_N_(domain_createA),_init))) - { - __itt_mutex_unlock(&_N_(_ittapi_global).mutex); - return ITTNOTIFY_NAME(domain_createA)(name); - } -#else - if (ITTNOTIFY_NAME(domain_create) && ITTNOTIFY_NAME(domain_create) != ITT_VERSIONIZE(ITT_JOIN(_N_(domain_create),_init))) - { - if (PTHREAD_SYMBOLS) __itt_mutex_unlock(&_N_(_ittapi_global).mutex); - return ITTNOTIFY_NAME(domain_create)(name); - } -#endif - } - for (h_tail = NULL, h = _N_(_ittapi_global).domain_list; h != NULL; h_tail = h, h = h->next) - { - if (h->nameA != NULL && !__itt_fstrcmp(h->nameA, name)) break; - } - if (h == NULL) - { - NEW_DOMAIN_A(&_N_(_ittapi_global),h,h_tail,name); - } - if (PTHREAD_SYMBOLS) __itt_mutex_unlock(&_N_(_ittapi_global).mutex); - return h; -} - -#if ITT_PLATFORM==ITT_PLATFORM_WIN -static __itt_string_handle* ITTAPI ITT_VERSIONIZE(ITT_JOIN(_N_(string_handle_createW),_init))(const wchar_t* name) -{ - __itt_string_handle *h_tail = NULL, *h = NULL; - - if (name == NULL) - { - return NULL; - } - - ITT_MUTEX_INIT_AND_LOCK(_N_(_ittapi_global)); - if (_N_(_ittapi_global).api_initialized) - { - if (ITTNOTIFY_NAME(string_handle_createW) && ITTNOTIFY_NAME(string_handle_createW) != ITT_VERSIONIZE(ITT_JOIN(_N_(string_handle_createW),_init))) - { - __itt_mutex_unlock(&_N_(_ittapi_global).mutex); - return ITTNOTIFY_NAME(string_handle_createW)(name); - } - } - for (h_tail = NULL, h = _N_(_ittapi_global).string_list; h != NULL; h_tail = h, h = h->next) - { - if (h->strW != NULL && !wcscmp(h->strW, name)) break; - } - if (h == NULL) - { - NEW_STRING_HANDLE_W(&_N_(_ittapi_global),h,h_tail,name); - } - __itt_mutex_unlock(&_N_(_ittapi_global).mutex); - return h; -} - -static __itt_string_handle* ITTAPI ITT_VERSIONIZE(ITT_JOIN(_N_(string_handle_createA),_init))(const char* name) -#else /* ITT_PLATFORM!=ITT_PLATFORM_WIN */ -static __itt_string_handle* ITTAPI ITT_VERSIONIZE(ITT_JOIN(_N_(string_handle_create),_init))(const char* name) -#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ -{ - __itt_string_handle *h_tail = NULL, *h = NULL; - - if (name == NULL) - { - return NULL; - } - - ITT_MUTEX_INIT_AND_LOCK(_N_(_ittapi_global)); - if (_N_(_ittapi_global).api_initialized) - { -#if ITT_PLATFORM==ITT_PLATFORM_WIN - if (ITTNOTIFY_NAME(string_handle_createA) && ITTNOTIFY_NAME(string_handle_createA) != ITT_VERSIONIZE(ITT_JOIN(_N_(string_handle_createA),_init))) - { - __itt_mutex_unlock(&_N_(_ittapi_global).mutex); - return ITTNOTIFY_NAME(string_handle_createA)(name); - } -#else - if (ITTNOTIFY_NAME(string_handle_create) && ITTNOTIFY_NAME(string_handle_create) != ITT_VERSIONIZE(ITT_JOIN(_N_(string_handle_create),_init))) - { - if (PTHREAD_SYMBOLS) __itt_mutex_unlock(&_N_(_ittapi_global).mutex); - return ITTNOTIFY_NAME(string_handle_create)(name); - } -#endif - } - for (h_tail = NULL, h = _N_(_ittapi_global).string_list; h != NULL; h_tail = h, h = h->next) - { - if (h->strA != NULL && !__itt_fstrcmp(h->strA, name)) break; - } - if (h == NULL) - { - NEW_STRING_HANDLE_A(&_N_(_ittapi_global),h,h_tail,name); - } - if (PTHREAD_SYMBOLS) __itt_mutex_unlock(&_N_(_ittapi_global).mutex); - return h; -} - -#if ITT_PLATFORM==ITT_PLATFORM_WIN -static __itt_counter ITTAPI ITT_VERSIONIZE(ITT_JOIN(_N_(counter_createW),_init))(const wchar_t *name, const wchar_t *domain) -{ - __itt_counter_info_t *h_tail = NULL, *h = NULL; - __itt_metadata_type type = __itt_metadata_u64; - - if (name == NULL) - { - return NULL; - } - - ITT_MUTEX_INIT_AND_LOCK(_N_(_ittapi_global)); - if (_N_(_ittapi_global).api_initialized) - { - if (ITTNOTIFY_NAME(counter_createW) && ITTNOTIFY_NAME(counter_createW) != ITT_VERSIONIZE(ITT_JOIN(_N_(counter_createW),_init))) - { - __itt_mutex_unlock(&_N_(_ittapi_global).mutex); - return ITTNOTIFY_NAME(counter_createW)(name, domain); - } - } - for (h_tail = NULL, h = _N_(_ittapi_global).counter_list; h != NULL; h_tail = h, h = h->next) - { - if (h->nameW != NULL && h->type == type && !wcscmp(h->nameW, name) && ((h->domainW == NULL && domain == NULL) || - (h->domainW != NULL && domain != NULL && !wcscmp(h->domainW, domain)))) break; - - } - if (h == NULL) - { - NEW_COUNTER_W(&_N_(_ittapi_global),h,h_tail,name,domain,type); - } - __itt_mutex_unlock(&_N_(_ittapi_global).mutex); - return (__itt_counter)h; -} - -static __itt_counter ITTAPI ITT_VERSIONIZE(ITT_JOIN(_N_(counter_createA),_init))(const char *name, const char *domain) -#else /* ITT_PLATFORM!=ITT_PLATFORM_WIN */ -static __itt_counter ITTAPI ITT_VERSIONIZE(ITT_JOIN(_N_(counter_create),_init))(const char *name, const char *domain) -#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ -{ - __itt_counter_info_t *h_tail = NULL, *h = NULL; - __itt_metadata_type type = __itt_metadata_u64; - - if (name == NULL) - { - return NULL; - } - - ITT_MUTEX_INIT_AND_LOCK(_N_(_ittapi_global)); - if (_N_(_ittapi_global).api_initialized) - { -#if ITT_PLATFORM==ITT_PLATFORM_WIN - if (ITTNOTIFY_NAME(counter_createA) && ITTNOTIFY_NAME(counter_createA) != ITT_VERSIONIZE(ITT_JOIN(_N_(counter_createA),_init))) - { - __itt_mutex_unlock(&_N_(_ittapi_global).mutex); - return ITTNOTIFY_NAME(counter_createA)(name, domain); - } -#else - if (ITTNOTIFY_NAME(counter_create) && ITTNOTIFY_NAME(counter_create) != ITT_VERSIONIZE(ITT_JOIN(_N_(counter_create),_init))) - { - if (PTHREAD_SYMBOLS) __itt_mutex_unlock(&_N_(_ittapi_global).mutex); - return ITTNOTIFY_NAME(counter_create)(name, domain); - } -#endif - } - for (h_tail = NULL, h = _N_(_ittapi_global).counter_list; h != NULL; h_tail = h, h = h->next) - { - if (h->nameA != NULL && h->type == type && !__itt_fstrcmp(h->nameA, name) && ((h->domainA == NULL && domain == NULL) || - (h->domainA != NULL && domain != NULL && !__itt_fstrcmp(h->domainA, domain)))) break; - } - if (h == NULL) - { - NEW_COUNTER_A(&_N_(_ittapi_global),h,h_tail,name,domain,type); - } - if (PTHREAD_SYMBOLS) __itt_mutex_unlock(&_N_(_ittapi_global).mutex); - return (__itt_counter)h; -} - -#if ITT_PLATFORM==ITT_PLATFORM_WIN -static __itt_counter ITTAPI ITT_VERSIONIZE(ITT_JOIN(_N_(counter_create_typedW),_init))(const wchar_t *name, const wchar_t *domain, __itt_metadata_type type) -{ - __itt_counter_info_t *h_tail = NULL, *h = NULL; - - if (name == NULL) - { - return NULL; - } - - ITT_MUTEX_INIT_AND_LOCK(_N_(_ittapi_global)); - if (_N_(_ittapi_global).api_initialized) - { - if (ITTNOTIFY_NAME(counter_create_typedW) && ITTNOTIFY_NAME(counter_create_typedW) != ITT_VERSIONIZE(ITT_JOIN(_N_(counter_create_typedW),_init))) - { - __itt_mutex_unlock(&_N_(_ittapi_global).mutex); - return ITTNOTIFY_NAME(counter_create_typedW)(name, domain, type); - } - } - for (h_tail = NULL, h = _N_(_ittapi_global).counter_list; h != NULL; h_tail = h, h = h->next) - { - if (h->nameW != NULL && h->type == type && !wcscmp(h->nameW, name) && ((h->domainW == NULL && domain == NULL) || - (h->domainW != NULL && domain != NULL && !wcscmp(h->domainW, domain)))) break; - - } - if (h == NULL) - { - NEW_COUNTER_W(&_N_(_ittapi_global),h,h_tail,name,domain,type); - } - __itt_mutex_unlock(&_N_(_ittapi_global).mutex); - return (__itt_counter)h; -} - -static __itt_counter ITTAPI ITT_VERSIONIZE(ITT_JOIN(_N_(counter_create_typedA),_init))(const char *name, const char *domain, __itt_metadata_type type) -#else /* ITT_PLATFORM!=ITT_PLATFORM_WIN */ -static __itt_counter ITTAPI ITT_VERSIONIZE(ITT_JOIN(_N_(counter_create_typed),_init))(const char *name, const char *domain, __itt_metadata_type type) -#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ -{ - __itt_counter_info_t *h_tail = NULL, *h = NULL; - - if (name == NULL) - { - return NULL; - } - - ITT_MUTEX_INIT_AND_LOCK(_N_(_ittapi_global)); - if (_N_(_ittapi_global).api_initialized) - { -#if ITT_PLATFORM==ITT_PLATFORM_WIN - if (ITTNOTIFY_NAME(counter_create_typedA) && ITTNOTIFY_NAME(counter_create_typedA) != ITT_VERSIONIZE(ITT_JOIN(_N_(counter_create_typedA),_init))) - { - __itt_mutex_unlock(&_N_(_ittapi_global).mutex); - return ITTNOTIFY_NAME(counter_create_typedA)(name, domain, type); - } -#else - if (ITTNOTIFY_NAME(counter_create_typed) && ITTNOTIFY_NAME(counter_create_typed) != ITT_VERSIONIZE(ITT_JOIN(_N_(counter_create_typed),_init))) - { - if (PTHREAD_SYMBOLS) __itt_mutex_unlock(&_N_(_ittapi_global).mutex); - return ITTNOTIFY_NAME(counter_create_typed)(name, domain, type); - } -#endif - } - for (h_tail = NULL, h = _N_(_ittapi_global).counter_list; h != NULL; h_tail = h, h = h->next) - { - if (h->nameA != NULL && h->type == type && !__itt_fstrcmp(h->nameA, name) && ((h->domainA == NULL && domain == NULL) || - (h->domainA != NULL && domain != NULL && !__itt_fstrcmp(h->domainA, domain)))) break; - } - if (h == NULL) - { - NEW_COUNTER_A(&_N_(_ittapi_global),h,h_tail,name,domain,type); - } - if (PTHREAD_SYMBOLS) __itt_mutex_unlock(&_N_(_ittapi_global).mutex); - return (__itt_counter)h; -} - -/* -------------------------------------------------------------------------- */ - -static void ITTAPI ITT_VERSIONIZE(ITT_JOIN(_N_(pause),_init))(void) -{ - if (!_N_(_ittapi_global).api_initialized && _N_(_ittapi_global).thread_list == NULL) - { - __itt_init_ittlib_name(NULL, __itt_group_all); - } - if (ITTNOTIFY_NAME(pause) && ITTNOTIFY_NAME(pause) != ITT_VERSIONIZE(ITT_JOIN(_N_(pause),_init))) - { - ITTNOTIFY_NAME(pause)(); - } - else - { - _N_(_ittapi_global).state = __itt_collection_paused; - } -} - -static void ITTAPI ITT_VERSIONIZE(ITT_JOIN(_N_(resume),_init))(void) -{ - if (!_N_(_ittapi_global).api_initialized && _N_(_ittapi_global).thread_list == NULL) - { - __itt_init_ittlib_name(NULL, __itt_group_all); - } - if (ITTNOTIFY_NAME(resume) && ITTNOTIFY_NAME(resume) != ITT_VERSIONIZE(ITT_JOIN(_N_(resume),_init))) - { - ITTNOTIFY_NAME(resume)(); - } - else - { - _N_(_ittapi_global).state = __itt_collection_normal; - } -} - -#if ITT_PLATFORM==ITT_PLATFORM_WIN -static void ITTAPI ITT_VERSIONIZE(ITT_JOIN(_N_(thread_set_nameW),_init))(const wchar_t* name) -{ - if (!_N_(_ittapi_global).api_initialized && _N_(_ittapi_global).thread_list == NULL) - { - __itt_init_ittlib_name(NULL, __itt_group_all); - } - if (ITTNOTIFY_NAME(thread_set_nameW) && ITTNOTIFY_NAME(thread_set_nameW) != ITT_VERSIONIZE(ITT_JOIN(_N_(thread_set_nameW),_init))) - { - ITTNOTIFY_NAME(thread_set_nameW)(name); - } -} - -static int ITTAPI ITT_VERSIONIZE(ITT_JOIN(_N_(thr_name_setW),_init))(const wchar_t* name, int namelen) -{ - (void)namelen; - ITT_VERSIONIZE(ITT_JOIN(_N_(thread_set_nameW),_init))(name); - return 0; -} - -static void ITTAPI ITT_VERSIONIZE(ITT_JOIN(_N_(thread_set_nameA),_init))(const char* name) -#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ -static void ITTAPI ITT_VERSIONIZE(ITT_JOIN(_N_(thread_set_name),_init))(const char* name) -#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ -{ - if (!_N_(_ittapi_global).api_initialized && _N_(_ittapi_global).thread_list == NULL) - { - __itt_init_ittlib_name(NULL, __itt_group_all); - } -#if ITT_PLATFORM==ITT_PLATFORM_WIN - if (ITTNOTIFY_NAME(thread_set_nameA) && ITTNOTIFY_NAME(thread_set_nameA) != ITT_VERSIONIZE(ITT_JOIN(_N_(thread_set_nameA),_init))) - { - ITTNOTIFY_NAME(thread_set_nameA)(name); - } -#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ - if (ITTNOTIFY_NAME(thread_set_name) && ITTNOTIFY_NAME(thread_set_name) != ITT_VERSIONIZE(ITT_JOIN(_N_(thread_set_name),_init))) - { - ITTNOTIFY_NAME(thread_set_name)(name); - } -#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ -} - -#if ITT_PLATFORM==ITT_PLATFORM_WIN -static int ITTAPI ITT_VERSIONIZE(ITT_JOIN(_N_(thr_name_setA),_init))(const char* name, int namelen) -{ - (void)namelen; - ITT_VERSIONIZE(ITT_JOIN(_N_(thread_set_nameA),_init))(name); - return 0; -} -#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ -static int ITTAPI ITT_VERSIONIZE(ITT_JOIN(_N_(thr_name_set),_init))(const char* name, int namelen) -{ - (void)namelen; - ITT_VERSIONIZE(ITT_JOIN(_N_(thread_set_name),_init))(name); - return 0; -} -#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ - -static void ITTAPI ITT_VERSIONIZE(ITT_JOIN(_N_(thread_ignore),_init))(void) -{ - if (!_N_(_ittapi_global).api_initialized && _N_(_ittapi_global).thread_list == NULL) - { - __itt_init_ittlib_name(NULL, __itt_group_all); - } - if (ITTNOTIFY_NAME(thread_ignore) && ITTNOTIFY_NAME(thread_ignore) != ITT_VERSIONIZE(ITT_JOIN(_N_(thread_ignore),_init))) - { - ITTNOTIFY_NAME(thread_ignore)(); - } -} - -static void ITTAPI ITT_VERSIONIZE(ITT_JOIN(_N_(thr_ignore),_init))(void) -{ - ITT_VERSIONIZE(ITT_JOIN(_N_(thread_ignore),_init))(); -} - -static void ITTAPI ITT_VERSIONIZE(ITT_JOIN(_N_(enable_attach),_init))(void) -{ -#ifdef __ANDROID__ - /* - * if LIB_VAR_NAME env variable were set before then stay previous value - * else set default path - */ - setenv(ITT_TO_STR(LIB_VAR_NAME), ANDROID_ITTNOTIFY_DEFAULT_PATH, 0); -#endif -} - -/* -------------------------------------------------------------------------- */ - -static const char* __itt_fsplit(const char* s, const char* sep, const char** out, int* len) -{ - int i; - int j; - - if (!s || !sep || !out || !len) - return NULL; - - for (i = 0; s[i]; i++) - { - int b = 0; - for (j = 0; sep[j]; j++) - if (s[i] == sep[j]) - { - b = 1; - break; - } - if (!b) - break; - } - - if (!s[i]) - return NULL; - - *len = 0; - *out = &s[i]; - - for (; s[i]; i++, (*len)++) - { - int b = 0; - for (j = 0; sep[j]; j++) - if (s[i] == sep[j]) - { - b = 1; - break; - } - if (b) - break; - } - - for (; s[i]; i++) - { - int b = 0; - for (j = 0; sep[j]; j++) - if (s[i] == sep[j]) - { - b = 1; - break; - } - if (!b) - break; - } - - return &s[i]; -} - -/* This function return value of env variable that placed into static buffer. - * !!! The same static buffer is used for subsequent calls. !!! - * This was done to aviod dynamic allocation for few calls. - * Actually we need this function only four times. - */ -static const char* __itt_get_env_var(const char* name) -{ -#define MAX_ENV_VALUE_SIZE 4086 - static char env_buff[MAX_ENV_VALUE_SIZE]; - static char* env_value = (char*)env_buff; - - if (name != NULL) - { -#if ITT_PLATFORM==ITT_PLATFORM_WIN - size_t max_len = MAX_ENV_VALUE_SIZE - (size_t)(env_value - env_buff); - DWORD rc = GetEnvironmentVariableA(name, env_value, (DWORD)max_len); - if (rc >= max_len) - __itt_report_error(__itt_error_env_too_long, name, (size_t)rc - 1, (size_t)(max_len - 1)); - else if (rc > 0) - { - const char* ret = (const char*)env_value; - env_value += rc + 1; - return ret; - } - else - { - /* If environment variable is empty, GetEnvirornmentVariables() - * returns zero (number of characters (not including terminating null), - * and GetLastError() returns ERROR_SUCCESS. */ - DWORD err = GetLastError(); - if (err == ERROR_SUCCESS) - return env_value; - - if (err != ERROR_ENVVAR_NOT_FOUND) - __itt_report_error(__itt_error_cant_read_env, name, (int)err); - } -#else /* ITT_PLATFORM!=ITT_PLATFORM_WIN */ - char* env = getenv(name); - if (env != NULL) - { - size_t len = __itt_fstrnlen(env, MAX_ENV_VALUE_SIZE); - size_t max_len = MAX_ENV_VALUE_SIZE - (size_t)(env_value - env_buff); - if (len < max_len) - { - const char* ret = (const char*)env_value; - __itt_fstrcpyn(env_value, max_len, env, len + 1); - env_value += len + 1; - return ret; - } else - __itt_report_error(__itt_error_env_too_long, name, (size_t)len, (size_t)(max_len - 1)); - } -#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ - } - return NULL; -} - -static const char* __itt_get_lib_name(void) -{ - const char* lib_name = __itt_get_env_var(ITT_TO_STR(LIB_VAR_NAME)); - -#ifdef __ANDROID__ - if (lib_name == NULL) - { - -#if ITT_ARCH==ITT_ARCH_IA32 || ITT_ARCH==ITT_ARCH_ARM - const char* const marker_filename = "com.intel.itt.collector_lib_32"; -#else - const char* const marker_filename = "com.intel.itt.collector_lib_64"; -#endif - - char system_wide_marker_filename[PATH_MAX] = {0}; - int itt_marker_file_fd = -1; - ssize_t res = 0; - - res = snprintf(system_wide_marker_filename, PATH_MAX - 1, "%s%s", "/data/local/tmp/", marker_filename); - if (res < 0) - { - ITT_ANDROID_LOGE("Unable to concatenate marker file string."); - return lib_name; - } - itt_marker_file_fd = open(system_wide_marker_filename, O_RDONLY); - - if (itt_marker_file_fd == -1) - { - const pid_t my_pid = getpid(); - char cmdline_path[PATH_MAX] = {0}; - char package_name[PATH_MAX] = {0}; - char app_sandbox_file[PATH_MAX] = {0}; - int cmdline_fd = 0; - - ITT_ANDROID_LOGI("Unable to open system-wide marker file."); - res = snprintf(cmdline_path, PATH_MAX - 1, "/proc/%d/cmdline", my_pid); - if (res < 0) - { - ITT_ANDROID_LOGE("Unable to get cmdline path string."); - return lib_name; - } - - ITT_ANDROID_LOGI("CMD file: %s\n", cmdline_path); - cmdline_fd = open(cmdline_path, O_RDONLY); - if (cmdline_fd == -1) - { - ITT_ANDROID_LOGE("Unable to open %s file!", cmdline_path); - return lib_name; - } - res = read(cmdline_fd, package_name, PATH_MAX - 1); - if (res == -1) - { - ITT_ANDROID_LOGE("Unable to read %s file!", cmdline_path); - res = close(cmdline_fd); - if (res == -1) - { - ITT_ANDROID_LOGE("Unable to close %s file!", cmdline_path); - } - return lib_name; - } - res = close(cmdline_fd); - if (res == -1) - { - ITT_ANDROID_LOGE("Unable to close %s file!", cmdline_path); - return lib_name; - } - ITT_ANDROID_LOGI("Package name: %s\n", package_name); - res = snprintf(app_sandbox_file, PATH_MAX - 1, "/data/data/%s/%s", package_name, marker_filename); - if (res < 0) - { - ITT_ANDROID_LOGE("Unable to concatenate marker file string."); - return lib_name; - } - - ITT_ANDROID_LOGI("Lib marker file name: %s\n", app_sandbox_file); - itt_marker_file_fd = open(app_sandbox_file, O_RDONLY); - if (itt_marker_file_fd == -1) - { - ITT_ANDROID_LOGE("Unable to open app marker file!"); - return lib_name; - } - } - - { - char itt_lib_name[PATH_MAX] = {0}; - - res = read(itt_marker_file_fd, itt_lib_name, PATH_MAX - 1); - if (res == -1) - { - ITT_ANDROID_LOGE("Unable to read %s file!", itt_marker_file_fd); - res = close(itt_marker_file_fd); - if (res == -1) - { - ITT_ANDROID_LOGE("Unable to close %s file!", itt_marker_file_fd); - } - return lib_name; - } - ITT_ANDROID_LOGI("ITT Lib path: %s", itt_lib_name); - res = close(itt_marker_file_fd); - if (res == -1) - { - ITT_ANDROID_LOGE("Unable to close %s file!", itt_marker_file_fd); - return lib_name; - } - ITT_ANDROID_LOGI("Set env %s to %s", ITT_TO_STR(LIB_VAR_NAME), itt_lib_name); - res = setenv(ITT_TO_STR(LIB_VAR_NAME), itt_lib_name, 0); - if (res == -1) - { - ITT_ANDROID_LOGE("Unable to set env var!"); - return lib_name; - } - lib_name = __itt_get_env_var(ITT_TO_STR(LIB_VAR_NAME)); - ITT_ANDROID_LOGI("ITT Lib path from env: %s", lib_name); - } - } -#endif - - return lib_name; -} - -/* Avoid clashes with std::min, reported by tbb team */ -#define __itt_min(a,b) (a) < (b) ? (a) : (b) - -static __itt_group_id __itt_get_groups(void) -{ - int i; - __itt_group_id res = __itt_group_none; - const char* var_name = "INTEL_ITTNOTIFY_GROUPS"; - const char* group_str = __itt_get_env_var(var_name); - - if (group_str != NULL) - { - int len; - char gr[255]; - const char* chunk; - while ((group_str = __itt_fsplit(group_str, ",; ", &chunk, &len)) != NULL) - { - int min_len = __itt_min(len, (int)(sizeof(gr) - 1)); - __itt_fstrcpyn(gr, sizeof(gr) - 1, chunk, min_len); - gr[min_len] = 0; - - for (i = 0; group_list[i].name != NULL; i++) - { - if (!__itt_fstrcmp(gr, group_list[i].name)) - { - res = (__itt_group_id)(res | group_list[i].id); - break; - } - } - } - /* TODO: !!! Workaround for bug with warning for unknown group !!! - * Should be fixed in new initialization scheme. - * Now the following groups should be set always. */ - for (i = 0; group_list[i].id != __itt_group_none; i++) - if (group_list[i].id != __itt_group_all && - group_list[i].id > __itt_group_splitter_min && - group_list[i].id < __itt_group_splitter_max) - res = (__itt_group_id)(res | group_list[i].id); - return res; - } - else - { - for (i = 0; group_alias[i].env_var != NULL; i++) - if (__itt_get_env_var(group_alias[i].env_var) != NULL) - return group_alias[i].groups; - } - - return res; -} - -#undef __itt_min - -static int __itt_lib_version(lib_t lib) -{ - if (lib == NULL) - return 0; - if (__itt_get_proc(lib, "__itt_api_init")) - return 2; - if (__itt_get_proc(lib, "__itt_api_version")) - return 1; - return 0; -} - -/* It's not used right now! Comment it out to avoid warnings. -static void __itt_reinit_all_pointers(void) -{ - int i; - // Fill all pointers with initial stubs - for (i = 0; _N_(_ittapi_global).api_list_ptr[i].name != NULL; i++) - *_N_(_ittapi_global).api_list_ptr[i].func_ptr = _N_(_ittapi_global).api_list_ptr[i].init_func; -} -*/ - -static void __itt_nullify_all_pointers(void) -{ - int i; - /* Nulify all pointers except domain_create, string_handle_create and counter_create */ - for (i = 0; _N_(_ittapi_global).api_list_ptr[i].name != NULL; i++) - *_N_(_ittapi_global).api_list_ptr[i].func_ptr = _N_(_ittapi_global).api_list_ptr[i].null_func; -} - -#if ITT_PLATFORM==ITT_PLATFORM_WIN && KMP_MSVC_COMPAT -#pragma warning(push) -#pragma warning(disable: 4054) /* warning C4054: 'type cast' : from function pointer 'XXX' to data pointer 'void *' */ -#pragma warning(disable: 4055) /* warning C4055: 'type cast' : from data pointer 'void *' to function pointer 'XXX' */ -#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ - -ITT_EXTERN_C void _N_(fini_ittlib)(void) -{ - __itt_api_fini_t* __itt_api_fini_ptr = NULL; - static volatile TIDT current_thread = 0; - - if (_N_(_ittapi_global).api_initialized) - { - ITT_MUTEX_INIT_AND_LOCK(_N_(_ittapi_global)); - if (_N_(_ittapi_global).api_initialized) - { - if (current_thread == 0) - { - if (PTHREAD_SYMBOLS) current_thread = __itt_thread_id(); - if (_N_(_ittapi_global).lib != NULL) - { - __itt_api_fini_ptr = (__itt_api_fini_t*)(size_t)__itt_get_proc(_N_(_ittapi_global).lib, "__itt_api_fini"); - } - if (__itt_api_fini_ptr) - { - __itt_api_fini_ptr(&_N_(_ittapi_global)); - } - - __itt_nullify_all_pointers(); - - /* TODO: !!! not safe !!! don't support unload so far. - * if (_N_(_ittapi_global).lib != NULL) - * __itt_unload_lib(_N_(_ittapi_global).lib); - * _N_(_ittapi_global).lib = NULL; - */ - _N_(_ittapi_global).api_initialized = 0; - current_thread = 0; - } - } - if (PTHREAD_SYMBOLS) __itt_mutex_unlock(&_N_(_ittapi_global).mutex); - } -} - -ITT_EXTERN_C int _N_(init_ittlib)(const char* lib_name, __itt_group_id init_groups) -{ - int i; - __itt_group_id groups; -#ifdef ITT_COMPLETE_GROUP - __itt_group_id zero_group = __itt_group_none; -#endif /* ITT_COMPLETE_GROUP */ - static volatile TIDT current_thread = 0; - - if (!_N_(_ittapi_global).api_initialized) - { -#ifndef ITT_SIMPLE_INIT - ITT_MUTEX_INIT_AND_LOCK(_N_(_ittapi_global)); -#endif /* ITT_SIMPLE_INIT */ - - if (!_N_(_ittapi_global).api_initialized) - { - if (current_thread == 0) - { - if (PTHREAD_SYMBOLS) current_thread = __itt_thread_id(); - if (lib_name == NULL) - { - lib_name = __itt_get_lib_name(); - } - groups = __itt_get_groups(); - if (DL_SYMBOLS && (groups != __itt_group_none || lib_name != NULL)) - { - _N_(_ittapi_global).lib = __itt_load_lib((lib_name == NULL) ? ittnotify_lib_name : lib_name); - - if (_N_(_ittapi_global).lib != NULL) - { - __itt_api_init_t* __itt_api_init_ptr; - int lib_version = __itt_lib_version(_N_(_ittapi_global).lib); - - switch (lib_version) { - case 0: - groups = __itt_group_legacy; - case 1: - /* Fill all pointers from dynamic library */ - for (i = 0; _N_(_ittapi_global).api_list_ptr[i].name != NULL; i++) - { - if (_N_(_ittapi_global).api_list_ptr[i].group & groups & init_groups) - { - *_N_(_ittapi_global).api_list_ptr[i].func_ptr = (void*)__itt_get_proc(_N_(_ittapi_global).lib, _N_(_ittapi_global).api_list_ptr[i].name); - if (*_N_(_ittapi_global).api_list_ptr[i].func_ptr == NULL) - { - /* Restore pointers for function with static implementation */ - *_N_(_ittapi_global).api_list_ptr[i].func_ptr = _N_(_ittapi_global).api_list_ptr[i].null_func; - __itt_report_error(__itt_error_no_symbol, lib_name, _N_(_ittapi_global).api_list_ptr[i].name); -#ifdef ITT_COMPLETE_GROUP - zero_group = (__itt_group_id)(zero_group | _N_(_ittapi_global).api_list_ptr[i].group); -#endif /* ITT_COMPLETE_GROUP */ - } - } - else - *_N_(_ittapi_global).api_list_ptr[i].func_ptr = _N_(_ittapi_global).api_list_ptr[i].null_func; - } - - if (groups == __itt_group_legacy) - { - /* Compatibility with legacy tools */ - ITTNOTIFY_NAME(thread_ignore) = ITTNOTIFY_NAME(thr_ignore); -#if ITT_PLATFORM==ITT_PLATFORM_WIN - ITTNOTIFY_NAME(sync_createA) = ITTNOTIFY_NAME(sync_set_nameA); - ITTNOTIFY_NAME(sync_createW) = ITTNOTIFY_NAME(sync_set_nameW); -#else /* ITT_PLATFORM!=ITT_PLATFORM_WIN */ - ITTNOTIFY_NAME(sync_create) = ITTNOTIFY_NAME(sync_set_name); -#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ - ITTNOTIFY_NAME(sync_prepare) = ITTNOTIFY_NAME(notify_sync_prepare); - ITTNOTIFY_NAME(sync_cancel) = ITTNOTIFY_NAME(notify_sync_cancel); - ITTNOTIFY_NAME(sync_acquired) = ITTNOTIFY_NAME(notify_sync_acquired); - ITTNOTIFY_NAME(sync_releasing) = ITTNOTIFY_NAME(notify_sync_releasing); - } - -#ifdef ITT_COMPLETE_GROUP - for (i = 0; _N_(_ittapi_global).api_list_ptr[i].name != NULL; i++) - if (_N_(_ittapi_global).api_list_ptr[i].group & zero_group) - *_N_(_ittapi_global).api_list_ptr[i].func_ptr = _N_(_ittapi_global).api_list_ptr[i].null_func; -#endif /* ITT_COMPLETE_GROUP */ - break; - case 2: - __itt_api_init_ptr = (__itt_api_init_t*)(size_t)__itt_get_proc(_N_(_ittapi_global).lib, "__itt_api_init"); - if (__itt_api_init_ptr) - __itt_api_init_ptr(&_N_(_ittapi_global), init_groups); - break; - } - } - else - { - __itt_nullify_all_pointers(); - - __itt_report_error(__itt_error_no_module, lib_name, -#if ITT_PLATFORM==ITT_PLATFORM_WIN - __itt_system_error() -#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ - dlerror() -#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ - ); - } - } - else - { - __itt_nullify_all_pointers(); - } - _N_(_ittapi_global).api_initialized = 1; - current_thread = 0; - /* !!! Just to avoid unused code elimination !!! */ - if (__itt_fini_ittlib_ptr == _N_(fini_ittlib)) current_thread = 0; - } - } - -#ifndef ITT_SIMPLE_INIT - if (PTHREAD_SYMBOLS) __itt_mutex_unlock(&_N_(_ittapi_global).mutex); -#endif /* ITT_SIMPLE_INIT */ - } - - /* Evaluating if any function ptr is non empty and it's in init_groups */ - for (i = 0; _N_(_ittapi_global).api_list_ptr[i].name != NULL; i++) - { - if (*_N_(_ittapi_global).api_list_ptr[i].func_ptr != _N_(_ittapi_global).api_list_ptr[i].null_func && - _N_(_ittapi_global).api_list_ptr[i].group & init_groups) - { - return 1; - } - } - return 0; -} - -ITT_EXTERN_C __itt_error_handler_t* _N_(set_error_handler)(__itt_error_handler_t* handler) -{ - __itt_error_handler_t* prev = (__itt_error_handler_t*)(size_t)_N_(_ittapi_global).error_handler; - _N_(_ittapi_global).error_handler = (void*)(size_t)handler; - return prev; -} - -#if ITT_PLATFORM==ITT_PLATFORM_WIN && KMP_MSVC_COMPAT -#pragma warning(pop) -#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ Property changes on: vendor/llvm-openmp/openmp-release80-r363030/runtime/src/thirdparty/ittnotify/ittnotify_static.c ___________________________________________________________________ Deleted: svn:eol-style ## -1 +0,0 ## -native \ No newline at end of property Deleted: svn:keywords ## -1 +0,0 ## -FreeBSD=%H \ No newline at end of property Deleted: svn:mime-type ## -1 +0,0 ## -text/plain \ No newline at end of property Index: vendor/llvm-openmp/openmp-release80-r363030/runtime/src/z_Windows_NT-586_asm.asm =================================================================== --- vendor/llvm-openmp/openmp-release80-r363030/runtime/src/z_Windows_NT-586_asm.asm (revision 348960) +++ vendor/llvm-openmp/openmp-release80-r363030/runtime/src/z_Windows_NT-586_asm.asm (nonexistent) @@ -1,1299 +0,0 @@ -; z_Windows_NT-586_asm.asm: - microtasking routines specifically -; written for IA-32 architecture and Intel(R) 64 running Windows* OS - -; -;//===----------------------------------------------------------------------===// -;// -;// The LLVM Compiler Infrastructure -;// -;// This file is dual licensed under the MIT and the University of Illinois Open -;// Source Licenses. See LICENSE.txt for details. -;// -;//===----------------------------------------------------------------------===// -; - - TITLE z_Windows_NT-586_asm.asm - -; ============================= IA-32 architecture ========================== -ifdef _M_IA32 - - .586P - -if @Version gt 510 - .model HUGE -else -_TEXT SEGMENT PARA USE32 PUBLIC 'CODE' -_TEXT ENDS -_DATA SEGMENT DWORD USE32 PUBLIC 'DATA' -_DATA ENDS -CONST SEGMENT DWORD USE32 PUBLIC 'CONST' -CONST ENDS -_BSS SEGMENT DWORD USE32 PUBLIC 'BSS' -_BSS ENDS -$$SYMBOLS SEGMENT BYTE USE32 'DEBSYM' -$$SYMBOLS ENDS -$$TYPES SEGMENT BYTE USE32 'DEBTYP' -$$TYPES ENDS -_TLS SEGMENT DWORD USE32 PUBLIC 'TLS' -_TLS ENDS -FLAT GROUP _DATA, CONST, _BSS - ASSUME CS: FLAT, DS: FLAT, SS: FLAT -endif - - -;------------------------------------------------------------------------ -; FUNCTION ___kmp_x86_pause -; -; void -; __kmp_x86_pause( void ) -PUBLIC ___kmp_x86_pause -_p$ = 4 -_d$ = 8 -_TEXT SEGMENT - ALIGN 16 -___kmp_x86_pause PROC NEAR - - db 0f3H - db 090H ;; pause - ret - -___kmp_x86_pause ENDP -_TEXT ENDS - -;------------------------------------------------------------------------ -; FUNCTION ___kmp_x86_cpuid -; -; void -; __kmp_x86_cpuid( int mode, int mode2, struct kmp_cpuid *p ); -PUBLIC ___kmp_x86_cpuid -_TEXT SEGMENT - ALIGN 16 -_mode$ = 8 -_mode2$ = 12 -_p$ = 16 -_eax$ = 0 -_ebx$ = 4 -_ecx$ = 8 -_edx$ = 12 - -___kmp_x86_cpuid PROC NEAR - - push ebp - mov ebp, esp - - push edi - push ebx - push ecx - push edx - - mov eax, DWORD PTR _mode$[ebp] - mov ecx, DWORD PTR _mode2$[ebp] - cpuid ; Query the CPUID for the current processor - - mov edi, DWORD PTR _p$[ebp] - mov DWORD PTR _eax$[ edi ], eax - mov DWORD PTR _ebx$[ edi ], ebx - mov DWORD PTR _ecx$[ edi ], ecx - mov DWORD PTR _edx$[ edi ], edx - - pop edx - pop ecx - pop ebx - pop edi - - mov esp, ebp - pop ebp - ret - -___kmp_x86_cpuid ENDP -_TEXT ENDS - -;------------------------------------------------------------------------ -; FUNCTION ___kmp_test_then_add32 -; -; kmp_int32 -; __kmp_test_then_add32( volatile kmp_int32 *p, kmp_int32 d ); -PUBLIC ___kmp_test_then_add32 -_p$ = 4 -_d$ = 8 -_TEXT SEGMENT - ALIGN 16 -___kmp_test_then_add32 PROC NEAR - - mov eax, DWORD PTR _d$[esp] - mov ecx, DWORD PTR _p$[esp] -lock xadd DWORD PTR [ecx], eax - ret - -___kmp_test_then_add32 ENDP -_TEXT ENDS - -;------------------------------------------------------------------------ -; FUNCTION ___kmp_compare_and_store8 -; -; kmp_int8 -; __kmp_compare_and_store8( volatile kmp_int8 *p, kmp_int8 cv, kmp_int8 sv ); -PUBLIC ___kmp_compare_and_store8 -_TEXT SEGMENT - ALIGN 16 -_p$ = 4 -_cv$ = 8 -_sv$ = 12 - -___kmp_compare_and_store8 PROC NEAR - - mov ecx, DWORD PTR _p$[esp] - mov al, BYTE PTR _cv$[esp] - mov dl, BYTE PTR _sv$[esp] -lock cmpxchg BYTE PTR [ecx], dl - sete al ; if al == [ecx] set al = 1 else set al = 0 - and eax, 1 ; sign extend previous instruction - ret - -___kmp_compare_and_store8 ENDP -_TEXT ENDS - -;------------------------------------------------------------------------ -; FUNCTION ___kmp_compare_and_store16 -; -; kmp_int16 -; __kmp_compare_and_store16( volatile kmp_int16 *p, kmp_int16 cv, kmp_int16 sv ); -PUBLIC ___kmp_compare_and_store16 -_TEXT SEGMENT - ALIGN 16 -_p$ = 4 -_cv$ = 8 -_sv$ = 12 - -___kmp_compare_and_store16 PROC NEAR - - mov ecx, DWORD PTR _p$[esp] - mov ax, WORD PTR _cv$[esp] - mov dx, WORD PTR _sv$[esp] -lock cmpxchg WORD PTR [ecx], dx - sete al ; if ax == [ecx] set al = 1 else set al = 0 - and eax, 1 ; sign extend previous instruction - ret - -___kmp_compare_and_store16 ENDP -_TEXT ENDS - -;------------------------------------------------------------------------ -; FUNCTION ___kmp_compare_and_store32 -; -; kmp_int32 -; __kmp_compare_and_store32( volatile kmp_int32 *p, kmp_int32 cv, kmp_int32 sv ); -PUBLIC ___kmp_compare_and_store32 -_TEXT SEGMENT - ALIGN 16 -_p$ = 4 -_cv$ = 8 -_sv$ = 12 - -___kmp_compare_and_store32 PROC NEAR - - mov ecx, DWORD PTR _p$[esp] - mov eax, DWORD PTR _cv$[esp] - mov edx, DWORD PTR _sv$[esp] -lock cmpxchg DWORD PTR [ecx], edx - sete al ; if eax == [ecx] set al = 1 else set al = 0 - and eax, 1 ; sign extend previous instruction - ret - -___kmp_compare_and_store32 ENDP -_TEXT ENDS - -;------------------------------------------------------------------------ -; FUNCTION ___kmp_compare_and_store64 -; -; kmp_int32 -; __kmp_compare_and_store64( volatile kmp_int64 *p, kmp_int64 cv, kmp_int64 sv ); -PUBLIC ___kmp_compare_and_store64 -_TEXT SEGMENT - ALIGN 16 -_p$ = 8 -_cv_low$ = 12 -_cv_high$ = 16 -_sv_low$ = 20 -_sv_high$ = 24 - -___kmp_compare_and_store64 PROC NEAR - - push ebp - mov ebp, esp - push ebx - push edi - mov edi, DWORD PTR _p$[ebp] - mov eax, DWORD PTR _cv_low$[ebp] - mov edx, DWORD PTR _cv_high$[ebp] - mov ebx, DWORD PTR _sv_low$[ebp] - mov ecx, DWORD PTR _sv_high$[ebp] -lock cmpxchg8b QWORD PTR [edi] - sete al ; if edx:eax == [edi] set al = 1 else set al = 0 - and eax, 1 ; sign extend previous instruction - pop edi - pop ebx - mov esp, ebp - pop ebp - ret - -___kmp_compare_and_store64 ENDP -_TEXT ENDS - -;------------------------------------------------------------------------ -; FUNCTION ___kmp_xchg_fixed8 -; -; kmp_int8 -; __kmp_xchg_fixed8( volatile kmp_int8 *p, kmp_int8 d ); -PUBLIC ___kmp_xchg_fixed8 -_TEXT SEGMENT - ALIGN 16 -_p$ = 4 -_d$ = 8 - -___kmp_xchg_fixed8 PROC NEAR - - mov ecx, DWORD PTR _p$[esp] - mov al, BYTE PTR _d$[esp] -lock xchg BYTE PTR [ecx], al - ret - -___kmp_xchg_fixed8 ENDP -_TEXT ENDS - -;------------------------------------------------------------------------ -; FUNCTION ___kmp_xchg_fixed16 -; -; kmp_int16 -; __kmp_xchg_fixed16( volatile kmp_int16 *p, kmp_int16 d ); -PUBLIC ___kmp_xchg_fixed16 -_TEXT SEGMENT - ALIGN 16 -_p$ = 4 -_d$ = 8 - -___kmp_xchg_fixed16 PROC NEAR - - mov ecx, DWORD PTR _p$[esp] - mov ax, WORD PTR _d$[esp] -lock xchg WORD PTR [ecx], ax - ret - -___kmp_xchg_fixed16 ENDP -_TEXT ENDS - -;------------------------------------------------------------------------ -; FUNCTION ___kmp_xchg_fixed32 -; -; kmp_int32 -; __kmp_xchg_fixed32( volatile kmp_int32 *p, kmp_int32 d ); -PUBLIC ___kmp_xchg_fixed32 -_TEXT SEGMENT - ALIGN 16 -_p$ = 4 -_d$ = 8 - -___kmp_xchg_fixed32 PROC NEAR - - mov ecx, DWORD PTR _p$[esp] - mov eax, DWORD PTR _d$[esp] -lock xchg DWORD PTR [ecx], eax - ret - -___kmp_xchg_fixed32 ENDP -_TEXT ENDS - - -;------------------------------------------------------------------------ -; FUNCTION ___kmp_xchg_real32 -; -; kmp_real32 -; __kmp_xchg_real32( volatile kmp_real32 *p, kmp_real32 d ); -PUBLIC ___kmp_xchg_real32 -_TEXT SEGMENT - ALIGN 16 -_p$ = 8 -_d$ = 12 -_old_value$ = -4 - -___kmp_xchg_real32 PROC NEAR - - push ebp - mov ebp, esp - sub esp, 4 - push esi - mov esi, DWORD PTR _p$[ebp] - - fld DWORD PTR [esi] - ;; load - fst DWORD PTR _old_value$[ebp] - ;; store into old_value - - mov eax, DWORD PTR _d$[ebp] - -lock xchg DWORD PTR [esi], eax - - fld DWORD PTR _old_value$[ebp] - ;; return old_value - pop esi - mov esp, ebp - pop ebp - ret - -___kmp_xchg_real32 ENDP -_TEXT ENDS - - -;------------------------------------------------------------------------ -; FUNCTION ___kmp_compare_and_store_ret8 -; -; kmp_int8 -; __kmp_compare_and_store_ret8( volatile kmp_int8 *p, kmp_int8 cv, kmp_int8 sv ); -PUBLIC ___kmp_compare_and_store_ret8 -_TEXT SEGMENT - ALIGN 16 -_p$ = 4 -_cv$ = 8 -_sv$ = 12 - -___kmp_compare_and_store_ret8 PROC NEAR - - mov ecx, DWORD PTR _p$[esp] - mov al, BYTE PTR _cv$[esp] - mov dl, BYTE PTR _sv$[esp] -lock cmpxchg BYTE PTR [ecx], dl - ret - -___kmp_compare_and_store_ret8 ENDP -_TEXT ENDS - -;------------------------------------------------------------------------ -; FUNCTION ___kmp_compare_and_store_ret16 -; -; kmp_int16 -; __kmp_compare_and_store_ret16( volatile kmp_int16 *p, kmp_int16 cv, kmp_int16 sv ); -PUBLIC ___kmp_compare_and_store_ret16 -_TEXT SEGMENT - ALIGN 16 -_p$ = 4 -_cv$ = 8 -_sv$ = 12 - -___kmp_compare_and_store_ret16 PROC NEAR - - mov ecx, DWORD PTR _p$[esp] - mov ax, WORD PTR _cv$[esp] - mov dx, WORD PTR _sv$[esp] -lock cmpxchg WORD PTR [ecx], dx - ret - -___kmp_compare_and_store_ret16 ENDP -_TEXT ENDS - -;------------------------------------------------------------------------ -; FUNCTION ___kmp_compare_and_store_ret32 -; -; kmp_int32 -; __kmp_compare_and_store_ret32( volatile kmp_int32 *p, kmp_int32 cv, kmp_int32 sv ); -PUBLIC ___kmp_compare_and_store_ret32 -_TEXT SEGMENT - ALIGN 16 -_p$ = 4 -_cv$ = 8 -_sv$ = 12 - -___kmp_compare_and_store_ret32 PROC NEAR - - mov ecx, DWORD PTR _p$[esp] - mov eax, DWORD PTR _cv$[esp] - mov edx, DWORD PTR _sv$[esp] -lock cmpxchg DWORD PTR [ecx], edx - ret - -___kmp_compare_and_store_ret32 ENDP -_TEXT ENDS - -;------------------------------------------------------------------------ -; FUNCTION ___kmp_compare_and_store_ret64 -; -; kmp_int64 -; __kmp_compare_and_store_ret64( volatile kmp_int64 *p, kmp_int64 cv, kmp_int64 sv ); -PUBLIC ___kmp_compare_and_store_ret64 -_TEXT SEGMENT - ALIGN 16 -_p$ = 8 -_cv_low$ = 12 -_cv_high$ = 16 -_sv_low$ = 20 -_sv_high$ = 24 - -___kmp_compare_and_store_ret64 PROC NEAR - - push ebp - mov ebp, esp - push ebx - push edi - mov edi, DWORD PTR _p$[ebp] - mov eax, DWORD PTR _cv_low$[ebp] - mov edx, DWORD PTR _cv_high$[ebp] - mov ebx, DWORD PTR _sv_low$[ebp] - mov ecx, DWORD PTR _sv_high$[ebp] -lock cmpxchg8b QWORD PTR [edi] - pop edi - pop ebx - mov esp, ebp - pop ebp - ret - -___kmp_compare_and_store_ret64 ENDP -_TEXT ENDS - -;------------------------------------------------------------------------ -; FUNCTION ___kmp_load_x87_fpu_control_word -; -; void -; __kmp_load_x87_fpu_control_word( kmp_int16 *p ); -; -; parameters: -; p: 4(%esp) -PUBLIC ___kmp_load_x87_fpu_control_word -_TEXT SEGMENT - ALIGN 16 -_p$ = 4 - -___kmp_load_x87_fpu_control_word PROC NEAR - - mov eax, DWORD PTR _p$[esp] - fldcw WORD PTR [eax] - ret - -___kmp_load_x87_fpu_control_word ENDP -_TEXT ENDS - -;------------------------------------------------------------------------ -; FUNCTION ___kmp_store_x87_fpu_control_word -; -; void -; __kmp_store_x87_fpu_control_word( kmp_int16 *p ); -; -; parameters: -; p: 4(%esp) -PUBLIC ___kmp_store_x87_fpu_control_word -_TEXT SEGMENT - ALIGN 16 -_p$ = 4 - -___kmp_store_x87_fpu_control_word PROC NEAR - - mov eax, DWORD PTR _p$[esp] - fstcw WORD PTR [eax] - ret - -___kmp_store_x87_fpu_control_word ENDP -_TEXT ENDS - -;------------------------------------------------------------------------ -; FUNCTION ___kmp_clear_x87_fpu_status_word -; -; void -; __kmp_clear_x87_fpu_status_word(); -PUBLIC ___kmp_clear_x87_fpu_status_word -_TEXT SEGMENT - ALIGN 16 - -___kmp_clear_x87_fpu_status_word PROC NEAR - - fnclex - ret - -___kmp_clear_x87_fpu_status_word ENDP -_TEXT ENDS - - -;------------------------------------------------------------------------ -; FUNCTION ___kmp_invoke_microtask -; -; typedef void (*microtask_t)( int *gtid, int *tid, ... ); -; -; int -; __kmp_invoke_microtask( microtask_t pkfn, -; int gtid, int tid, -; int argc, void *p_argv[] ) -PUBLIC ___kmp_invoke_microtask -_TEXT SEGMENT - ALIGN 16 -_pkfn$ = 8 -_gtid$ = 12 -_tid$ = 16 -_argc$ = 20 -_argv$ = 24 -if OMPT_SUPPORT -_exit_frame$ = 28 -endif -_i$ = -8 -_stk_adj$ = -16 -_vptr$ = -12 -_qptr$ = -4 - -___kmp_invoke_microtask PROC NEAR -; Line 102 - push ebp - mov ebp, esp - sub esp, 16 ; 00000010H - push ebx - push esi - push edi -if OMPT_SUPPORT - mov eax, DWORD PTR _exit_frame$[ebp] - mov DWORD PTR [eax], ebp -endif -; Line 114 - mov eax, DWORD PTR _argc$[ebp] - mov DWORD PTR _i$[ebp], eax - -;; ------------------------------------------------------------ - lea edx, DWORD PTR [eax*4+8] - mov ecx, esp ; Save current SP into ECX - mov eax,edx ; Save the size of the args in eax - sub ecx,edx ; esp-((#args+2)*4) -> ecx -- without mods, stack ptr would be this - mov edx,ecx ; Save to edx - and ecx,-128 ; Mask off 7 bits - sub edx,ecx ; Amount to subtract from esp - sub esp,edx ; Prepare stack ptr-- Now it will be aligned on 128-byte boundary at the call - - add edx,eax ; Calculate total size of the stack decrement. - mov DWORD PTR _stk_adj$[ebp], edx -;; ------------------------------------------------------------ - - jmp SHORT $L22237 -$L22238: - mov ecx, DWORD PTR _i$[ebp] - sub ecx, 1 - mov DWORD PTR _i$[ebp], ecx -$L22237: - cmp DWORD PTR _i$[ebp], 0 - jle SHORT $L22239 -; Line 116 - mov edx, DWORD PTR _i$[ebp] - mov eax, DWORD PTR _argv$[ebp] - mov ecx, DWORD PTR [eax+edx*4-4] - mov DWORD PTR _vptr$[ebp], ecx -; Line 123 - mov eax, DWORD PTR _vptr$[ebp] -; Line 124 - push eax -; Line 127 - jmp SHORT $L22238 -$L22239: -; Line 129 - lea edx, DWORD PTR _tid$[ebp] - mov DWORD PTR _vptr$[ebp], edx -; Line 130 - lea eax, DWORD PTR _gtid$[ebp] - mov DWORD PTR _qptr$[ebp], eax -; Line 143 - mov eax, DWORD PTR _vptr$[ebp] -; Line 144 - push eax -; Line 145 - mov eax, DWORD PTR _qptr$[ebp] -; Line 146 - push eax -; Line 147 - call DWORD PTR _pkfn$[ebp] -; Line 148 - add esp, DWORD PTR _stk_adj$[ebp] -; Line 152 - mov eax, 1 -; Line 153 - pop edi - pop esi - pop ebx - mov esp, ebp - pop ebp - ret 0 -___kmp_invoke_microtask ENDP -_TEXT ENDS - -endif - -; ==================================== Intel(R) 64 =================================== - -ifdef _M_AMD64 - -;------------------------------------------------------------------------ -; FUNCTION __kmp_x86_cpuid -; -; void -; __kmp_x86_cpuid( int mode, int mode2, struct kmp_cpuid *p ); -; -; parameters: -; mode: ecx -; mode2: edx -; cpuid_buffer: r8 -PUBLIC __kmp_x86_cpuid -_TEXT SEGMENT - ALIGN 16 - -__kmp_x86_cpuid PROC FRAME ;NEAR - - push rbp - .pushreg rbp - mov rbp, rsp - .setframe rbp, 0 - push rbx ; callee-save register - .pushreg rbx - .ENDPROLOG - - mov r10, r8 ; p parameter - mov eax, ecx ; mode parameter - mov ecx, edx ; mode2 parameter - cpuid ; Query the CPUID for the current processor - - mov DWORD PTR 0[ r10 ], eax ; store results into buffer - mov DWORD PTR 4[ r10 ], ebx - mov DWORD PTR 8[ r10 ], ecx - mov DWORD PTR 12[ r10 ], edx - - pop rbx ; callee-save register - mov rsp, rbp - pop rbp - ret - -__kmp_x86_cpuid ENDP -_TEXT ENDS - - -;------------------------------------------------------------------------ -; FUNCTION __kmp_test_then_add32 -; -; kmp_int32 -; __kmp_test_then_add32( volatile kmp_int32 *p, kmp_int32 d ); -; -; parameters: -; p: rcx -; d: edx -; -; return: eax -PUBLIC __kmp_test_then_add32 -_TEXT SEGMENT - ALIGN 16 -__kmp_test_then_add32 PROC ;NEAR - - mov eax, edx -lock xadd DWORD PTR [rcx], eax - ret - -__kmp_test_then_add32 ENDP -_TEXT ENDS - - -;------------------------------------------------------------------------ -; FUNCTION __kmp_test_then_add64 -; -; kmp_int32 -; __kmp_test_then_add64( volatile kmp_int64 *p, kmp_int64 d ); -; -; parameters: -; p: rcx -; d: rdx -; -; return: rax -PUBLIC __kmp_test_then_add64 -_TEXT SEGMENT - ALIGN 16 -__kmp_test_then_add64 PROC ;NEAR - - mov rax, rdx -lock xadd QWORD PTR [rcx], rax - ret - -__kmp_test_then_add64 ENDP -_TEXT ENDS - - -;------------------------------------------------------------------------ -; FUNCTION __kmp_compare_and_store8 -; -; kmp_int8 -; __kmp_compare_and_store8( volatile kmp_int8 *p, kmp_int8 cv, kmp_int8 sv ); -; parameters: -; p: rcx -; cv: edx -; sv: r8d -; -; return: eax -PUBLIC __kmp_compare_and_store8 -_TEXT SEGMENT - ALIGN 16 - -__kmp_compare_and_store8 PROC ;NEAR - - mov al, dl ; "cv" - mov edx, r8d ; "sv" -lock cmpxchg BYTE PTR [rcx], dl - sete al ; if al == [rcx] set al = 1 else set al = 0 - and rax, 1 ; sign extend previous instruction - ret - -__kmp_compare_and_store8 ENDP -_TEXT ENDS - - -;------------------------------------------------------------------------ -; FUNCTION __kmp_compare_and_store16 -; -; kmp_int16 -; __kmp_compare_and_store16( volatile kmp_int16 *p, kmp_int16 cv, kmp_int16 sv ); -; parameters: -; p: rcx -; cv: edx -; sv: r8d -; -; return: eax -PUBLIC __kmp_compare_and_store16 -_TEXT SEGMENT - ALIGN 16 - -__kmp_compare_and_store16 PROC ;NEAR - - mov ax, dx ; "cv" - mov edx, r8d ; "sv" -lock cmpxchg WORD PTR [rcx], dx - sete al ; if ax == [rcx] set al = 1 else set al = 0 - and rax, 1 ; sign extend previous instruction - ret - -__kmp_compare_and_store16 ENDP -_TEXT ENDS - - -;------------------------------------------------------------------------ -; FUNCTION __kmp_compare_and_store32 -; -; kmp_int32 -; __kmp_compare_and_store32( volatile kmp_int32 *p, kmp_int32 cv, kmp_int32 sv ); -; parameters: -; p: rcx -; cv: edx -; sv: r8d -; -; return: eax -PUBLIC __kmp_compare_and_store32 -_TEXT SEGMENT - ALIGN 16 - -__kmp_compare_and_store32 PROC ;NEAR - - mov eax, edx ; "cv" - mov edx, r8d ; "sv" -lock cmpxchg DWORD PTR [rcx], edx - sete al ; if eax == [rcx] set al = 1 else set al = 0 - and rax, 1 ; sign extend previous instruction - ret - -__kmp_compare_and_store32 ENDP -_TEXT ENDS - - -;------------------------------------------------------------------------ -; FUNCTION __kmp_compare_and_store64 -; -; kmp_int32 -; __kmp_compare_and_store64( volatile kmp_int64 *p, kmp_int64 cv, kmp_int64 sv ); -; parameters: -; p: rcx -; cv: rdx -; sv: r8 -; -; return: eax -PUBLIC __kmp_compare_and_store64 -_TEXT SEGMENT - ALIGN 16 - -__kmp_compare_and_store64 PROC ;NEAR - - mov rax, rdx ; "cv" - mov rdx, r8 ; "sv" -lock cmpxchg QWORD PTR [rcx], rdx - sete al ; if rax == [rcx] set al = 1 else set al = 0 - and rax, 1 ; sign extend previous instruction - ret - -__kmp_compare_and_store64 ENDP -_TEXT ENDS - - -;------------------------------------------------------------------------ -; FUNCTION ___kmp_xchg_fixed8 -; -; kmp_int8 -; __kmp_xchg_fixed8( volatile kmp_int8 *p, kmp_int8 d ); -; -; parameters: -; p: rcx -; d: dl -; -; return: al -PUBLIC __kmp_xchg_fixed8 -_TEXT SEGMENT - ALIGN 16 - -__kmp_xchg_fixed8 PROC ;NEAR - - mov al, dl -lock xchg BYTE PTR [rcx], al - ret - -__kmp_xchg_fixed8 ENDP -_TEXT ENDS - - -;------------------------------------------------------------------------ -; FUNCTION ___kmp_xchg_fixed16 -; -; kmp_int16 -; __kmp_xchg_fixed16( volatile kmp_int16 *p, kmp_int16 d ); -; -; parameters: -; p: rcx -; d: dx -; -; return: ax -PUBLIC __kmp_xchg_fixed16 -_TEXT SEGMENT - ALIGN 16 - -__kmp_xchg_fixed16 PROC ;NEAR - - mov ax, dx -lock xchg WORD PTR [rcx], ax - ret - -__kmp_xchg_fixed16 ENDP -_TEXT ENDS - - -;------------------------------------------------------------------------ -; FUNCTION ___kmp_xchg_fixed32 -; -; kmp_int32 -; __kmp_xchg_fixed32( volatile kmp_int32 *p, kmp_int32 d ); -; -; parameters: -; p: rcx -; d: edx -; -; return: eax -PUBLIC __kmp_xchg_fixed32 -_TEXT SEGMENT - ALIGN 16 -__kmp_xchg_fixed32 PROC ;NEAR - - mov eax, edx -lock xchg DWORD PTR [rcx], eax - ret - -__kmp_xchg_fixed32 ENDP -_TEXT ENDS - - -;------------------------------------------------------------------------ -; FUNCTION ___kmp_xchg_fixed64 -; -; kmp_int64 -; __kmp_xchg_fixed64( volatile kmp_int64 *p, kmp_int64 d ); -; -; parameters: -; p: rcx -; d: rdx -; -; return: rax -PUBLIC __kmp_xchg_fixed64 -_TEXT SEGMENT - ALIGN 16 -__kmp_xchg_fixed64 PROC ;NEAR - - mov rax, rdx -lock xchg QWORD PTR [rcx], rax - ret - -__kmp_xchg_fixed64 ENDP -_TEXT ENDS - - -;------------------------------------------------------------------------ -; FUNCTION __kmp_compare_and_store_ret8 -; -; kmp_int8 -; __kmp_compare_and_store_ret8( volatile kmp_int8 *p, kmp_int8 cv, kmp_int8 sv ); -; parameters: -; p: rcx -; cv: edx -; sv: r8d -; -; return: eax -PUBLIC __kmp_compare_and_store_ret8 -_TEXT SEGMENT - ALIGN 16 - -__kmp_compare_and_store_ret8 PROC ;NEAR - mov al, dl ; "cv" - mov edx, r8d ; "sv" -lock cmpxchg BYTE PTR [rcx], dl - ; Compare AL with [rcx]. If equal set - ; ZF and exchange DL with [rcx]. Else, clear - ; ZF and load [rcx] into AL. - ret - -__kmp_compare_and_store_ret8 ENDP -_TEXT ENDS - - -;------------------------------------------------------------------------ -; FUNCTION __kmp_compare_and_store_ret16 -; -; kmp_int16 -; __kmp_compare_and_store_ret16( volatile kmp_int16 *p, kmp_int16 cv, kmp_int16 sv ); -; parameters: -; p: rcx -; cv: edx -; sv: r8d -; -; return: eax -PUBLIC __kmp_compare_and_store_ret16 -_TEXT SEGMENT - ALIGN 16 - -__kmp_compare_and_store_ret16 PROC ;NEAR - - mov ax, dx ; "cv" - mov edx, r8d ; "sv" -lock cmpxchg WORD PTR [rcx], dx - ret - -__kmp_compare_and_store_ret16 ENDP -_TEXT ENDS - - -;------------------------------------------------------------------------ -; FUNCTION __kmp_compare_and_store_ret32 -; -; kmp_int32 -; __kmp_compare_and_store_ret32( volatile kmp_int32 *p, kmp_int32 cv, kmp_int32 sv ); -; parameters: -; p: rcx -; cv: edx -; sv: r8d -; -; return: eax -PUBLIC __kmp_compare_and_store_ret32 -_TEXT SEGMENT - ALIGN 16 - -__kmp_compare_and_store_ret32 PROC ;NEAR - - mov eax, edx ; "cv" - mov edx, r8d ; "sv" -lock cmpxchg DWORD PTR [rcx], edx - ret - -__kmp_compare_and_store_ret32 ENDP -_TEXT ENDS - - -;------------------------------------------------------------------------ -; FUNCTION __kmp_compare_and_store_ret64 -; -; kmp_int64 -; __kmp_compare_and_store_ret64( volatile kmp_int64 *p, kmp_int64 cv, kmp_int64 sv ); -; parameters: -; p: rcx -; cv: rdx -; sv: r8 -; -; return: rax -PUBLIC __kmp_compare_and_store_ret64 -_TEXT SEGMENT - ALIGN 16 - -__kmp_compare_and_store_ret64 PROC ;NEAR - - mov rax, rdx ; "cv" - mov rdx, r8 ; "sv" -lock cmpxchg QWORD PTR [rcx], rdx - ret - -__kmp_compare_and_store_ret64 ENDP -_TEXT ENDS - - -;------------------------------------------------------------------------ -; FUNCTION __kmp_compare_and_store_loop8 -; -; kmp_int8 -; __kmp_compare_and_store_loop8( volatile kmp_int8 *p, kmp_int8 cv, kmp_int8 sv ); -; parameters: -; p: rcx -; cv: edx -; sv: r8d -; -; return: al -PUBLIC __kmp_compare_and_store_loop8 -_TEXT SEGMENT - ALIGN 16 - -__kmp_compare_and_store_loop8 PROC ;NEAR -$__kmp_loop: - mov al, dl ; "cv" - mov edx, r8d ; "sv" -lock cmpxchg BYTE PTR [rcx], dl - ; Compare AL with [rcx]. If equal set - ; ZF and exchange DL with [rcx]. Else, clear - ; ZF and load [rcx] into AL. - jz SHORT $__kmp_success - - db 0f3H - db 090H ; pause - - jmp SHORT $__kmp_loop - -$__kmp_success: - ret - -__kmp_compare_and_store_loop8 ENDP -_TEXT ENDS - - -;------------------------------------------------------------------------ -; FUNCTION __kmp_xchg_real32 -; -; kmp_real32 -; __kmp_xchg_real32( volatile kmp_real32 *p, kmp_real32 d ); -; -; parameters: -; p: rcx -; d: xmm1 (lower 4 bytes) -; -; return: xmm0 (lower 4 bytes) -PUBLIC __kmp_xchg_real32 -_TEXT SEGMENT - ALIGN 16 -__kmp_xchg_real32 PROC ;NEAR - - movd eax, xmm1 ; load d - -lock xchg DWORD PTR [rcx], eax - - movd xmm0, eax ; load old value into return register - ret - -__kmp_xchg_real32 ENDP -_TEXT ENDS - - -;------------------------------------------------------------------------ -; FUNCTION __kmp_xchg_real64 -; -; kmp_real64 -; __kmp_xchg_real64( volatile kmp_real64 *p, kmp_real64 d ); -; -; parameters: -; p: rcx -; d: xmm1 (lower 8 bytes) -; -; return: xmm0 (lower 8 bytes) -PUBLIC __kmp_xchg_real64 -_TEXT SEGMENT - ALIGN 16 -__kmp_xchg_real64 PROC ;NEAR - - movd rax, xmm1 ; load "d" - -lock xchg QWORD PTR [rcx], rax - - movd xmm0, rax ; load old value into return register - ret - -__kmp_xchg_real64 ENDP -_TEXT ENDS - -;------------------------------------------------------------------------ -; FUNCTION __kmp_load_x87_fpu_control_word -; -; void -; __kmp_load_x87_fpu_control_word( kmp_int16 *p ); -; -; parameters: -; p: rcx -PUBLIC __kmp_load_x87_fpu_control_word -_TEXT SEGMENT - ALIGN 16 -__kmp_load_x87_fpu_control_word PROC ;NEAR - - fldcw WORD PTR [rcx] - ret - -__kmp_load_x87_fpu_control_word ENDP -_TEXT ENDS - - -;------------------------------------------------------------------------ -; FUNCTION __kmp_store_x87_fpu_control_word -; -; void -; __kmp_store_x87_fpu_control_word( kmp_int16 *p ); -; -; parameters: -; p: rcx -PUBLIC __kmp_store_x87_fpu_control_word -_TEXT SEGMENT - ALIGN 16 -__kmp_store_x87_fpu_control_word PROC ;NEAR - - fstcw WORD PTR [rcx] - ret - -__kmp_store_x87_fpu_control_word ENDP -_TEXT ENDS - - -;------------------------------------------------------------------------ -; FUNCTION __kmp_clear_x87_fpu_status_word -; -; void -; __kmp_clear_x87_fpu_status_word() -PUBLIC __kmp_clear_x87_fpu_status_word -_TEXT SEGMENT - ALIGN 16 -__kmp_clear_x87_fpu_status_word PROC ;NEAR - - fnclex - ret - -__kmp_clear_x87_fpu_status_word ENDP -_TEXT ENDS - - -;------------------------------------------------------------------------ -; FUNCTION __kmp_invoke_microtask -; -; typedef void (*microtask_t)( int *gtid, int *tid, ... ); -; -; int -; __kmp_invoke_microtask( microtask_t pkfn, -; int gtid, int tid, -; int argc, void *p_argv[] ) { -; -; (*pkfn) ( >id, &tid, argv[0], ... ); -; return 1; -; } -; -; note: -; just before call to pkfn must have rsp 128-byte aligned for compiler -; -; parameters: -; rcx: pkfn 16[rbp] -; edx: gtid 24[rbp] -; r8d: tid 32[rbp] -; r9d: argc 40[rbp] -; [st]: p_argv 48[rbp] -; -; reg temps: -; rax: used all over the place -; rdx: used all over the place -; rcx: used as argument counter for push parms loop -; r10: used to hold pkfn function pointer argument -; -; return: eax (always 1/TRUE) -$_pkfn = 16 -$_gtid = 24 -$_tid = 32 -$_argc = 40 -$_p_argv = 48 -if OMPT_SUPPORT -$_exit_frame = 56 -endif - -PUBLIC __kmp_invoke_microtask -_TEXT SEGMENT - ALIGN 16 - -__kmp_invoke_microtask PROC FRAME ;NEAR - mov QWORD PTR 16[rsp], rdx ; home gtid parameter - mov QWORD PTR 24[rsp], r8 ; home tid parameter - push rbp ; save base pointer - .pushreg rbp - sub rsp, 0 ; no fixed allocation necessary - end prolog - - lea rbp, QWORD PTR [rsp] ; establish the base pointer - .setframe rbp, 0 - .ENDPROLOG -if OMPT_SUPPORT - mov rax, QWORD PTR $_exit_frame[rbp] - mov QWORD PTR [rax], rbp -endif - mov r10, rcx ; save pkfn pointer for later - -;; ------------------------------------------------------------ - mov rax, r9 ; rax <= argc - cmp rax, 2 - jge SHORT $_kmp_invoke_stack_align - mov rax, 2 ; set 4 homes if less than 2 parms -$_kmp_invoke_stack_align: - lea rdx, QWORD PTR [rax*8+16] ; rax <= (argc + 2) * 8 - mov rax, rsp ; Save current SP into rax - sub rax, rdx ; rsp - ((argc+2)*8) -> rax - ; without align, rsp would be this - and rax, -128 ; Mask off 7 bits (128-byte align) - add rax, rdx ; add space for push's in a loop below - mov rsp, rax ; Prepare the stack ptr - ; Now it will align to 128-byte at the call -;; ------------------------------------------------------------ - ; setup pkfn parameter stack - mov rax, r9 ; rax <= argc - shl rax, 3 ; rax <= argc*8 - mov rdx, QWORD PTR $_p_argv[rbp] ; rdx <= p_argv - add rdx, rax ; rdx <= &p_argv[argc] - mov rcx, r9 ; rcx <= argc - jecxz SHORT $_kmp_invoke_pass_parms ; nothing to push if argc=0 - cmp ecx, 1 ; if argc=1 branch ahead - je SHORT $_kmp_invoke_one_parm - sub ecx, 2 ; if argc=2 branch ahead, subtract two from - je SHORT $_kmp_invoke_two_parms - -$_kmp_invoke_push_parms: ; push last - 5th parms to pkfn on stack - sub rdx, 8 ; decrement p_argv pointer to previous parm - mov r8, QWORD PTR [rdx] ; r8 <= p_argv[rcx-1] - push r8 ; push p_argv[rcx-1] onto stack (reverse order) - sub ecx, 1 - jecxz SHORT $_kmp_invoke_two_parms - jmp SHORT $_kmp_invoke_push_parms - -$_kmp_invoke_two_parms: - sub rdx, 8 ; put 4th parm to pkfn in r9 - mov r9, QWORD PTR [rdx] ; r9 <= p_argv[1] - -$_kmp_invoke_one_parm: - sub rdx, 8 ; put 3rd parm to pkfn in r8 - mov r8, QWORD PTR [rdx] ; r8 <= p_argv[0] - -$_kmp_invoke_pass_parms: ; put 1st & 2nd parms to pkfn in registers - lea rdx, QWORD PTR $_tid[rbp] ; rdx <= &tid (2nd parm to pkfn) - lea rcx, QWORD PTR $_gtid[rbp] ; rcx <= >id (1st parm to pkfn) - sub rsp, 32 ; add stack space for first four parms - mov rax, r10 ; rax <= pkfn - call rax ; call (*pkfn)() - mov rax, 1 ; move 1 into return register; - - lea rsp, QWORD PTR [rbp] ; restore stack pointer - -; add rsp, 0 ; no fixed allocation necessary - start epilog - pop rbp ; restore frame pointer - ret -__kmp_invoke_microtask ENDP -_TEXT ENDS - -endif - -END Index: vendor/llvm-openmp/openmp-release80-r363030/runtime/src/kmp_stats_timing.h =================================================================== --- vendor/llvm-openmp/openmp-release80-r363030/runtime/src/kmp_stats_timing.h (revision 348960) +++ vendor/llvm-openmp/openmp-release80-r363030/runtime/src/kmp_stats_timing.h (nonexistent) @@ -1,116 +0,0 @@ -#ifndef KMP_STATS_TIMING_H -#define KMP_STATS_TIMING_H - -/** @file kmp_stats_timing.h - * Access to real time clock and timers. - */ - -//===----------------------------------------------------------------------===// -// -// The LLVM Compiler Infrastructure -// -// This file is dual licensed under the MIT and the University of Illinois Open -// Source Licenses. See LICENSE.txt for details. -// -//===----------------------------------------------------------------------===// - -#include "kmp_os.h" -#include -#include -#include -#if KMP_HAVE_X86INTRIN_H -#include -#endif - -class tsc_tick_count { -private: - int64_t my_count; - -public: - class tsc_interval_t { - int64_t value; - explicit tsc_interval_t(int64_t _value) : value(_value) {} - - public: - tsc_interval_t() : value(0) {} // Construct 0 time duration -#if KMP_HAVE_TICK_TIME - double seconds() const; // Return the length of a time interval in seconds -#endif - double ticks() const { return double(value); } - int64_t getValue() const { return value; } - tsc_interval_t &operator=(int64_t nvalue) { - value = nvalue; - return *this; - } - - friend class tsc_tick_count; - - friend tsc_interval_t operator-(const tsc_tick_count &t1, - const tsc_tick_count &t0); - friend tsc_interval_t operator-(const tsc_tick_count::tsc_interval_t &i1, - const tsc_tick_count::tsc_interval_t &i0); - friend tsc_interval_t &operator+=(tsc_tick_count::tsc_interval_t &i1, - const tsc_tick_count::tsc_interval_t &i0); - }; - -#if KMP_HAVE___BUILTIN_READCYCLECOUNTER - tsc_tick_count() - : my_count(static_cast(__builtin_readcyclecounter())) {} -#elif KMP_HAVE___RDTSC - tsc_tick_count() : my_count(static_cast(__rdtsc())) {} -#else -#error Must have high resolution timer defined -#endif - tsc_tick_count(int64_t value) : my_count(value) {} - int64_t getValue() const { return my_count; } - tsc_tick_count later(tsc_tick_count const other) const { - return my_count > other.my_count ? (*this) : other; - } - tsc_tick_count earlier(tsc_tick_count const other) const { - return my_count < other.my_count ? (*this) : other; - } -#if KMP_HAVE_TICK_TIME - static double tick_time(); // returns seconds per cycle (period) of clock -#endif - static tsc_tick_count now() { - return tsc_tick_count(); - } // returns the rdtsc register value - friend tsc_tick_count::tsc_interval_t operator-(const tsc_tick_count &t1, - const tsc_tick_count &t0); -}; - -inline tsc_tick_count::tsc_interval_t operator-(const tsc_tick_count &t1, - const tsc_tick_count &t0) { - return tsc_tick_count::tsc_interval_t(t1.my_count - t0.my_count); -} - -inline tsc_tick_count::tsc_interval_t -operator-(const tsc_tick_count::tsc_interval_t &i1, - const tsc_tick_count::tsc_interval_t &i0) { - return tsc_tick_count::tsc_interval_t(i1.value - i0.value); -} - -inline tsc_tick_count::tsc_interval_t & -operator+=(tsc_tick_count::tsc_interval_t &i1, - const tsc_tick_count::tsc_interval_t &i0) { - i1.value += i0.value; - return i1; -} - -#if KMP_HAVE_TICK_TIME -inline double tsc_tick_count::tsc_interval_t::seconds() const { - return value * tick_time(); -} -#endif - -extern std::string formatSI(double interval, int width, char unit); - -inline std::string formatSeconds(double interval, int width) { - return formatSI(interval, width, 'S'); -} - -inline std::string formatTicks(double interval, int width) { - return formatSI(interval, width, 'T'); -} - -#endif // KMP_STATS_TIMING_H Property changes on: vendor/llvm-openmp/openmp-release80-r363030/runtime/src/kmp_stats_timing.h ___________________________________________________________________ Deleted: svn:eol-style ## -1 +0,0 ## -native \ No newline at end of property Deleted: svn:keywords ## -1 +0,0 ## -FreeBSD=%H \ No newline at end of property Deleted: svn:mime-type ## -1 +0,0 ## -text/plain \ No newline at end of property Index: vendor/llvm-openmp/openmp-release80-r363030/runtime/src/kmp_environment.cpp =================================================================== --- vendor/llvm-openmp/openmp-release80-r363030/runtime/src/kmp_environment.cpp (revision 348960) +++ vendor/llvm-openmp/openmp-release80-r363030/runtime/src/kmp_environment.cpp (nonexistent) @@ -1,501 +0,0 @@ -/* - * kmp_environment.cpp -- Handle environment variables OS-independently. - */ - -//===----------------------------------------------------------------------===// -// -// The LLVM Compiler Infrastructure -// -// This file is dual licensed under the MIT and the University of Illinois Open -// Source Licenses. See LICENSE.txt for details. -// -//===----------------------------------------------------------------------===// - -/* We use GetEnvironmentVariable for Windows* OS instead of getenv because the - act of loading a DLL on Windows* OS makes any user-set environment variables - (i.e. with putenv()) unavailable. getenv() apparently gets a clean copy of - the env variables as they existed at the start of the run. JH 12/23/2002 - - On Windows* OS, there are two environments (at least, see below): - - 1. Environment maintained by Windows* OS on IA-32 architecture. Accessible - through GetEnvironmentVariable(), SetEnvironmentVariable(), and - GetEnvironmentStrings(). - - 2. Environment maintained by C RTL. Accessible through getenv(), putenv(). - - putenv() function updates both C and Windows* OS on IA-32 architecture. - getenv() function search for variables in C RTL environment only. - Windows* OS on IA-32 architecture functions work *only* with Windows* OS on - IA-32 architecture. - - Windows* OS on IA-32 architecture maintained by OS, so there is always only - one Windows* OS on IA-32 architecture per process. Changes in Windows* OS on - IA-32 architecture are process-visible. - - C environment maintained by C RTL. Multiple copies of C RTL may be present - in the process, and each C RTL maintains its own environment. :-( - - Thus, proper way to work with environment on Windows* OS is: - - 1. Set variables with putenv() function -- both C and Windows* OS on IA-32 - architecture are being updated. Windows* OS on IA-32 architecture may be - considered primary target, while updating C RTL environment is free bonus. - - 2. Get variables with GetEnvironmentVariable() -- getenv() does not - search Windows* OS on IA-32 architecture, and can not see variables - set with SetEnvironmentVariable(). - - 2007-04-05 -- lev -*/ - -#include "kmp_environment.h" - -#include "kmp.h" // -#include "kmp_i18n.h" -#include "kmp_os.h" // KMP_OS_*. -#include "kmp_str.h" // __kmp_str_*(). - -#if KMP_OS_UNIX -#include // getenv, setenv, unsetenv. -#include // strlen, strcpy. -#if KMP_OS_DARWIN -#include -#define environ (*_NSGetEnviron()) -#else -extern char **environ; -#endif -#elif KMP_OS_WINDOWS -#include // GetEnvironmentVariable, SetEnvironmentVariable, -// GetLastError. -#else -#error Unknown or unsupported OS. -#endif - -// TODO: Eliminate direct memory allocations, use string operations instead. - -static inline void *allocate(size_t size) { - void *ptr = KMP_INTERNAL_MALLOC(size); - if (ptr == NULL) { - KMP_FATAL(MemoryAllocFailed); - } - return ptr; -} // allocate - -char *__kmp_env_get(char const *name) { - - char *result = NULL; - -#if KMP_OS_UNIX - char const *value = getenv(name); - if (value != NULL) { - size_t len = KMP_STRLEN(value) + 1; - result = (char *)KMP_INTERNAL_MALLOC(len); - if (result == NULL) { - KMP_FATAL(MemoryAllocFailed); - } - KMP_STRNCPY_S(result, len, value, len); - } -#elif KMP_OS_WINDOWS - /* We use GetEnvironmentVariable for Windows* OS instead of getenv because the - act of loading a DLL on Windows* OS makes any user-set environment - variables (i.e. with putenv()) unavailable. getenv() apparently gets a - clean copy of the env variables as they existed at the start of the run. - JH 12/23/2002 */ - DWORD rc; - rc = GetEnvironmentVariable(name, NULL, 0); - if (!rc) { - DWORD error = GetLastError(); - if (error != ERROR_ENVVAR_NOT_FOUND) { - __kmp_fatal(KMP_MSG(CantGetEnvVar, name), KMP_ERR(error), __kmp_msg_null); - } - // Variable is not found, it's ok, just continue. - } else { - DWORD len = rc; - result = (char *)KMP_INTERNAL_MALLOC(len); - if (result == NULL) { - KMP_FATAL(MemoryAllocFailed); - } - rc = GetEnvironmentVariable(name, result, len); - if (!rc) { - // GetEnvironmentVariable() may return 0 if variable is empty. - // In such a case GetLastError() returns ERROR_SUCCESS. - DWORD error = GetLastError(); - if (error != ERROR_SUCCESS) { - // Unexpected error. The variable should be in the environment, - // and buffer should be large enough. - __kmp_fatal(KMP_MSG(CantGetEnvVar, name), KMP_ERR(error), - __kmp_msg_null); - KMP_INTERNAL_FREE((void *)result); - result = NULL; - } - } - } -#else -#error Unknown or unsupported OS. -#endif - - return result; - -} // func __kmp_env_get - -// TODO: Find and replace all regular free() with __kmp_env_free(). - -void __kmp_env_free(char const **value) { - - KMP_DEBUG_ASSERT(value != NULL); - KMP_INTERNAL_FREE(CCAST(char *, *value)); - *value = NULL; - -} // func __kmp_env_free - -int __kmp_env_exists(char const *name) { - -#if KMP_OS_UNIX - char const *value = getenv(name); - return ((value == NULL) ? (0) : (1)); -#elif KMP_OS_WINDOWS - DWORD rc; - rc = GetEnvironmentVariable(name, NULL, 0); - if (rc == 0) { - DWORD error = GetLastError(); - if (error != ERROR_ENVVAR_NOT_FOUND) { - __kmp_fatal(KMP_MSG(CantGetEnvVar, name), KMP_ERR(error), __kmp_msg_null); - } - return 0; - } - return 1; -#else -#error Unknown or unsupported OS. -#endif - -} // func __kmp_env_exists - -void __kmp_env_set(char const *name, char const *value, int overwrite) { - -#if KMP_OS_UNIX - int rc = setenv(name, value, overwrite); - if (rc != 0) { - // Dead code. I tried to put too many variables into Linux* OS - // environment on IA-32 architecture. When application consumes - // more than ~2.5 GB of memory, entire system feels bad. Sometimes - // application is killed (by OS?), sometimes system stops - // responding... But this error message never appears. --ln - __kmp_fatal(KMP_MSG(CantSetEnvVar, name), KMP_HNT(NotEnoughMemory), - __kmp_msg_null); - } -#elif KMP_OS_WINDOWS - BOOL rc; - if (!overwrite) { - rc = GetEnvironmentVariable(name, NULL, 0); - if (rc) { - // Variable exists, do not overwrite. - return; - } - DWORD error = GetLastError(); - if (error != ERROR_ENVVAR_NOT_FOUND) { - __kmp_fatal(KMP_MSG(CantGetEnvVar, name), KMP_ERR(error), __kmp_msg_null); - } - } - rc = SetEnvironmentVariable(name, value); - if (!rc) { - DWORD error = GetLastError(); - __kmp_fatal(KMP_MSG(CantSetEnvVar, name), KMP_ERR(error), __kmp_msg_null); - } -#else -#error Unknown or unsupported OS. -#endif - -} // func __kmp_env_set - -void __kmp_env_unset(char const *name) { - -#if KMP_OS_UNIX - unsetenv(name); -#elif KMP_OS_WINDOWS - BOOL rc = SetEnvironmentVariable(name, NULL); - if (!rc) { - DWORD error = GetLastError(); - __kmp_fatal(KMP_MSG(CantSetEnvVar, name), KMP_ERR(error), __kmp_msg_null); - } -#else -#error Unknown or unsupported OS. -#endif - -} // func __kmp_env_unset - -/* Intel OpenMP RTL string representation of environment: just a string of - characters, variables are separated with vertical bars, e. g.: - - "KMP_WARNINGS=0|KMP_AFFINITY=compact|" - - Empty variables are allowed and ignored: - - "||KMP_WARNINGS=1||" -*/ - -static void -___kmp_env_blk_parse_string(kmp_env_blk_t *block, // M: Env block to fill. - char const *env // I: String to parse. - ) { - - char const chr_delimiter = '|'; - char const str_delimiter[] = {chr_delimiter, 0}; - - char *bulk = NULL; - kmp_env_var_t *vars = NULL; - int count = 0; // Number of used elements in vars array. - int delimiters = 0; // Number of delimiters in input string. - - // Copy original string, we will modify the copy. - bulk = __kmp_str_format("%s", env); - - // Loop thru all the vars in environment block. Count delimiters (maximum - // number of variables is number of delimiters plus one). - { - char const *ptr = bulk; - for (;;) { - ptr = strchr(ptr, chr_delimiter); - if (ptr == NULL) { - break; - } - ++delimiters; - ptr += 1; - } - } - - // Allocate vars array. - vars = (kmp_env_var_t *)allocate((delimiters + 1) * sizeof(kmp_env_var_t)); - - // Loop thru all the variables. - { - char *var; // Pointer to variable (both name and value). - char *name; // Pointer to name of variable. - char *value; // Pointer to value. - char *buf; // Buffer for __kmp_str_token() function. - var = __kmp_str_token(bulk, str_delimiter, &buf); // Get the first var. - while (var != NULL) { - // Save found variable in vars array. - __kmp_str_split(var, '=', &name, &value); - KMP_DEBUG_ASSERT(count < delimiters + 1); - vars[count].name = name; - vars[count].value = value; - ++count; - // Get the next var. - var = __kmp_str_token(NULL, str_delimiter, &buf); - } - } - - // Fill out result. - block->bulk = bulk; - block->vars = vars; - block->count = count; -} - -/* Windows* OS (actually, DOS) environment block is a piece of memory with - environment variables. Each variable is terminated with zero byte, entire - block is terminated with one extra zero byte, so we have two zero bytes at - the end of environment block, e. g.: - - "HOME=C:\\users\\lev\x00OS=Windows_NT\x00\x00" - - It is not clear how empty environment is represented. "\x00\x00"? -*/ - -#if KMP_OS_WINDOWS -static void ___kmp_env_blk_parse_windows( - kmp_env_blk_t *block, // M: Env block to fill. - char const *env // I: Pointer to Windows* OS (DOS) environment block. - ) { - - char *bulk = NULL; - kmp_env_var_t *vars = NULL; - int count = 0; // Number of used elements in vars array. - int size = 0; // Size of bulk. - - char *name; // Pointer to name of variable. - char *value; // Pointer to value. - - if (env != NULL) { - - // Loop thru all the vars in environment block. Count variables, find size - // of block. - { - char const *var; // Pointer to beginning of var. - int len; // Length of variable. - count = 0; - var = - env; // The first variable starts and beginning of environment block. - len = KMP_STRLEN(var); - while (len != 0) { - ++count; - size = size + len + 1; - var = var + len + - 1; // Move pointer to the beginning of the next variable. - len = KMP_STRLEN(var); - } - size = - size + 1; // Total size of env block, including terminating zero byte. - } - - // Copy original block to bulk, we will modify bulk, not original block. - bulk = (char *)allocate(size); - KMP_MEMCPY_S(bulk, size, env, size); - // Allocate vars array. - vars = (kmp_env_var_t *)allocate(count * sizeof(kmp_env_var_t)); - - // Loop thru all the vars, now in bulk. - { - char *var; // Pointer to beginning of var. - int len; // Length of variable. - count = 0; - var = bulk; - len = KMP_STRLEN(var); - while (len != 0) { - // Save variable in vars array. - __kmp_str_split(var, '=', &name, &value); - vars[count].name = name; - vars[count].value = value; - ++count; - // Get the next var. - var = var + len + 1; - len = KMP_STRLEN(var); - } - } - } - - // Fill out result. - block->bulk = bulk; - block->vars = vars; - block->count = count; -} -#endif - -/* Unix environment block is a array of pointers to variables, last pointer in - array is NULL: - - { "HOME=/home/lev", "TERM=xterm", NULL } -*/ - -static void -___kmp_env_blk_parse_unix(kmp_env_blk_t *block, // M: Env block to fill. - char **env // I: Unix environment to parse. - ) { - - char *bulk = NULL; - kmp_env_var_t *vars = NULL; - int count = 0; - int size = 0; // Size of bulk. - - // Count number of variables and length of required bulk. - { - count = 0; - size = 0; - while (env[count] != NULL) { - size += KMP_STRLEN(env[count]) + 1; - ++count; - } - } - - // Allocate memory. - bulk = (char *)allocate(size); - vars = (kmp_env_var_t *)allocate(count * sizeof(kmp_env_var_t)); - - // Loop thru all the vars. - { - char *var; // Pointer to beginning of var. - char *name; // Pointer to name of variable. - char *value; // Pointer to value. - int len; // Length of variable. - int i; - var = bulk; - for (i = 0; i < count; ++i) { - // Copy variable to bulk. - len = KMP_STRLEN(env[i]); - KMP_MEMCPY_S(var, size, env[i], len + 1); - // Save found variable in vars array. - __kmp_str_split(var, '=', &name, &value); - vars[i].name = name; - vars[i].value = value; - // Move pointer. - var += len + 1; - } - } - - // Fill out result. - block->bulk = bulk; - block->vars = vars; - block->count = count; -} - -void __kmp_env_blk_init(kmp_env_blk_t *block, // M: Block to initialize. - char const *bulk // I: Initialization string, or NULL. - ) { - - if (bulk != NULL) { - ___kmp_env_blk_parse_string(block, bulk); - } else { -#if KMP_OS_UNIX - ___kmp_env_blk_parse_unix(block, environ); -#elif KMP_OS_WINDOWS - { - char *mem = GetEnvironmentStrings(); - if (mem == NULL) { - DWORD error = GetLastError(); - __kmp_fatal(KMP_MSG(CantGetEnvironment), KMP_ERR(error), - __kmp_msg_null); - } - ___kmp_env_blk_parse_windows(block, mem); - FreeEnvironmentStrings(mem); - } -#else -#error Unknown or unsupported OS. -#endif - } - -} // __kmp_env_blk_init - -static int ___kmp_env_var_cmp( // Comparison function for qsort(). - kmp_env_var_t const *lhs, kmp_env_var_t const *rhs) { - return strcmp(lhs->name, rhs->name); -} - -void __kmp_env_blk_sort( - kmp_env_blk_t *block // M: Block of environment variables to sort. - ) { - - qsort(CCAST(kmp_env_var_t *, block->vars), block->count, - sizeof(kmp_env_var_t), - (int (*)(void const *, void const *)) & ___kmp_env_var_cmp); - -} // __kmp_env_block_sort - -void __kmp_env_blk_free( - kmp_env_blk_t *block // M: Block of environment variables to free. - ) { - - KMP_INTERNAL_FREE(CCAST(kmp_env_var_t *, block->vars)); - __kmp_str_free(&(block->bulk)); - - block->count = 0; - block->vars = NULL; - -} // __kmp_env_blk_free - -char const * // R: Value of variable or NULL if variable does not exist. - __kmp_env_blk_var( - kmp_env_blk_t *block, // I: Block of environment variables. - char const *name // I: Name of variable to find. - ) { - - int i; - for (i = 0; i < block->count; ++i) { - if (strcmp(block->vars[i].name, name) == 0) { - return block->vars[i].value; - } - } - return NULL; - -} // __kmp_env_block_var - -// end of file // Property changes on: vendor/llvm-openmp/openmp-release80-r363030/runtime/src/kmp_environment.cpp ___________________________________________________________________ Deleted: svn:eol-style ## -1 +0,0 ## -native \ No newline at end of property Deleted: svn:keywords ## -1 +0,0 ## -FreeBSD=%H \ No newline at end of property Deleted: svn:mime-type ## -1 +0,0 ## -text/plain \ No newline at end of property Index: vendor/llvm-openmp/openmp-release80-r363030/runtime/src/kmp_sched.cpp =================================================================== --- vendor/llvm-openmp/openmp-release80-r363030/runtime/src/kmp_sched.cpp (revision 348960) +++ vendor/llvm-openmp/openmp-release80-r363030/runtime/src/kmp_sched.cpp (nonexistent) @@ -1,1001 +0,0 @@ -/* - * kmp_sched.cpp -- static scheduling -- iteration initialization - */ - -//===----------------------------------------------------------------------===// -// -// The LLVM Compiler Infrastructure -// -// This file is dual licensed under the MIT and the University of Illinois Open -// Source Licenses. See LICENSE.txt for details. -// -//===----------------------------------------------------------------------===// - -/* Static scheduling initialization. - - NOTE: team->t.t_nproc is a constant inside of any dispatch loop, however - it may change values between parallel regions. __kmp_max_nth - is the largest value __kmp_nth may take, 1 is the smallest. */ - -#include "kmp.h" -#include "kmp_error.h" -#include "kmp_i18n.h" -#include "kmp_itt.h" -#include "kmp_stats.h" -#include "kmp_str.h" - -#if OMPT_SUPPORT -#include "ompt-specific.h" -#endif - -#ifdef KMP_DEBUG -//------------------------------------------------------------------------- -// template for debug prints specification ( d, u, lld, llu ) -char const *traits_t::spec = "d"; -char const *traits_t::spec = "u"; -char const *traits_t::spec = "lld"; -char const *traits_t::spec = "llu"; -char const *traits_t::spec = "ld"; -//------------------------------------------------------------------------- -#endif - -template -static void __kmp_for_static_init(ident_t *loc, kmp_int32 global_tid, - kmp_int32 schedtype, kmp_int32 *plastiter, - T *plower, T *pupper, - typename traits_t::signed_t *pstride, - typename traits_t::signed_t incr, - typename traits_t::signed_t chunk -#if OMPT_SUPPORT && OMPT_OPTIONAL - , - void *codeptr -#endif - ) { - KMP_COUNT_BLOCK(OMP_LOOP_STATIC); - KMP_PUSH_PARTITIONED_TIMER(OMP_loop_static); - KMP_PUSH_PARTITIONED_TIMER(OMP_loop_static_scheduling); - - typedef typename traits_t::unsigned_t UT; - typedef typename traits_t::signed_t ST; - /* this all has to be changed back to TID and such.. */ - kmp_int32 gtid = global_tid; - kmp_uint32 tid; - kmp_uint32 nth; - UT trip_count; - kmp_team_t *team; - kmp_info_t *th = __kmp_threads[gtid]; - -#if OMPT_SUPPORT && OMPT_OPTIONAL - ompt_team_info_t *team_info = NULL; - ompt_task_info_t *task_info = NULL; - ompt_work_t ompt_work_type = ompt_work_loop; - - static kmp_int8 warn = 0; - - if (ompt_enabled.ompt_callback_work) { - // Only fully initialize variables needed by OMPT if OMPT is enabled. - team_info = __ompt_get_teaminfo(0, NULL); - task_info = __ompt_get_task_info_object(0); - // Determine workshare type - if (loc != NULL) { - if ((loc->flags & KMP_IDENT_WORK_LOOP) != 0) { - ompt_work_type = ompt_work_loop; - } else if ((loc->flags & KMP_IDENT_WORK_SECTIONS) != 0) { - ompt_work_type = ompt_work_sections; - } else if ((loc->flags & KMP_IDENT_WORK_DISTRIBUTE) != 0) { - ompt_work_type = ompt_work_distribute; - } else { - kmp_int8 bool_res = - KMP_COMPARE_AND_STORE_ACQ8(&warn, (kmp_int8)0, (kmp_int8)1); - if (bool_res) - KMP_WARNING(OmptOutdatedWorkshare); - } - KMP_DEBUG_ASSERT(ompt_work_type); - } - } -#endif - - KMP_DEBUG_ASSERT(plastiter && plower && pupper && pstride); - KE_TRACE(10, ("__kmpc_for_static_init called (%d)\n", global_tid)); -#ifdef KMP_DEBUG - { - char *buff; - // create format specifiers before the debug output - buff = __kmp_str_format( - "__kmpc_for_static_init: T#%%d sched=%%d liter=%%d iter=(%%%s," - " %%%s, %%%s) incr=%%%s chunk=%%%s signed?<%s>\n", - traits_t::spec, traits_t::spec, traits_t::spec, - traits_t::spec, traits_t::spec, traits_t::spec); - KD_TRACE(100, (buff, global_tid, schedtype, *plastiter, *plower, *pupper, - *pstride, incr, chunk)); - __kmp_str_free(&buff); - } -#endif - - if (__kmp_env_consistency_check) { - __kmp_push_workshare(global_tid, ct_pdo, loc); - if (incr == 0) { - __kmp_error_construct(kmp_i18n_msg_CnsLoopIncrZeroProhibited, ct_pdo, - loc); - } - } - /* special handling for zero-trip loops */ - if (incr > 0 ? (*pupper < *plower) : (*plower < *pupper)) { - if (plastiter != NULL) - *plastiter = FALSE; - /* leave pupper and plower set to entire iteration space */ - *pstride = incr; /* value should never be used */ -// *plower = *pupper - incr; -// let compiler bypass the illegal loop (like for(i=1;i<10;i--)) -// THE LINE COMMENTED ABOVE CAUSED shape2F/h_tests_1.f TO HAVE A FAILURE -// ON A ZERO-TRIP LOOP (lower=1, upper=0,stride=1) - JPH June 23, 2009. -#ifdef KMP_DEBUG - { - char *buff; - // create format specifiers before the debug output - buff = __kmp_str_format("__kmpc_for_static_init:(ZERO TRIP) liter=%%d " - "lower=%%%s upper=%%%s stride = %%%s " - "signed?<%s>, loc = %%s\n", - traits_t::spec, traits_t::spec, - traits_t::spec, traits_t::spec); - KD_TRACE(100, - (buff, *plastiter, *plower, *pupper, *pstride, loc->psource)); - __kmp_str_free(&buff); - } -#endif - KE_TRACE(10, ("__kmpc_for_static_init: T#%d return\n", global_tid)); - -#if OMPT_SUPPORT && OMPT_OPTIONAL - if (ompt_enabled.ompt_callback_work) { - ompt_callbacks.ompt_callback(ompt_callback_work)( - ompt_work_type, ompt_scope_begin, &(team_info->parallel_data), - &(task_info->task_data), 0, codeptr); - } -#endif - return; - } - -#if OMP_40_ENABLED - // Although there are schedule enumerations above kmp_ord_upper which are not - // schedules for "distribute", the only ones which are useful are dynamic, so - // cannot be seen here, since this codepath is only executed for static - // schedules. - if (schedtype > kmp_ord_upper) { - // we are in DISTRIBUTE construct - schedtype += kmp_sch_static - - kmp_distribute_static; // AC: convert to usual schedule type - tid = th->th.th_team->t.t_master_tid; - team = th->th.th_team->t.t_parent; - } else -#endif - { - tid = __kmp_tid_from_gtid(global_tid); - team = th->th.th_team; - } - - /* determine if "for" loop is an active worksharing construct */ - if (team->t.t_serialized) { - /* serialized parallel, each thread executes whole iteration space */ - if (plastiter != NULL) - *plastiter = TRUE; - /* leave pupper and plower set to entire iteration space */ - *pstride = - (incr > 0) ? (*pupper - *plower + 1) : (-(*plower - *pupper + 1)); - -#ifdef KMP_DEBUG - { - char *buff; - // create format specifiers before the debug output - buff = __kmp_str_format("__kmpc_for_static_init: (serial) liter=%%d " - "lower=%%%s upper=%%%s stride = %%%s\n", - traits_t::spec, traits_t::spec, - traits_t::spec); - KD_TRACE(100, (buff, *plastiter, *plower, *pupper, *pstride)); - __kmp_str_free(&buff); - } -#endif - KE_TRACE(10, ("__kmpc_for_static_init: T#%d return\n", global_tid)); - -#if OMPT_SUPPORT && OMPT_OPTIONAL - if (ompt_enabled.ompt_callback_work) { - ompt_callbacks.ompt_callback(ompt_callback_work)( - ompt_work_type, ompt_scope_begin, &(team_info->parallel_data), - &(task_info->task_data), *pstride, codeptr); - } -#endif - return; - } - nth = team->t.t_nproc; - if (nth == 1) { - if (plastiter != NULL) - *plastiter = TRUE; - *pstride = - (incr > 0) ? (*pupper - *plower + 1) : (-(*plower - *pupper + 1)); -#ifdef KMP_DEBUG - { - char *buff; - // create format specifiers before the debug output - buff = __kmp_str_format("__kmpc_for_static_init: (serial) liter=%%d " - "lower=%%%s upper=%%%s stride = %%%s\n", - traits_t::spec, traits_t::spec, - traits_t::spec); - KD_TRACE(100, (buff, *plastiter, *plower, *pupper, *pstride)); - __kmp_str_free(&buff); - } -#endif - KE_TRACE(10, ("__kmpc_for_static_init: T#%d return\n", global_tid)); - -#if OMPT_SUPPORT && OMPT_OPTIONAL - if (ompt_enabled.ompt_callback_work) { - ompt_callbacks.ompt_callback(ompt_callback_work)( - ompt_work_type, ompt_scope_begin, &(team_info->parallel_data), - &(task_info->task_data), *pstride, codeptr); - } -#endif - return; - } - - /* compute trip count */ - if (incr == 1) { - trip_count = *pupper - *plower + 1; - } else if (incr == -1) { - trip_count = *plower - *pupper + 1; - } else if (incr > 0) { - // upper-lower can exceed the limit of signed type - trip_count = (UT)(*pupper - *plower) / incr + 1; - } else { - trip_count = (UT)(*plower - *pupper) / (-incr) + 1; - } - - if (__kmp_env_consistency_check) { - /* tripcount overflow? */ - if (trip_count == 0 && *pupper != *plower) { - __kmp_error_construct(kmp_i18n_msg_CnsIterationRangeTooLarge, ct_pdo, - loc); - } - } - - /* compute remaining parameters */ - switch (schedtype) { - case kmp_sch_static: { - if (trip_count < nth) { - KMP_DEBUG_ASSERT( - __kmp_static == kmp_sch_static_greedy || - __kmp_static == - kmp_sch_static_balanced); // Unknown static scheduling type. - if (tid < trip_count) { - *pupper = *plower = *plower + tid * incr; - } else { - *plower = *pupper + incr; - } - if (plastiter != NULL) - *plastiter = (tid == trip_count - 1); - } else { - if (__kmp_static == kmp_sch_static_balanced) { - UT small_chunk = trip_count / nth; - UT extras = trip_count % nth; - *plower += incr * (tid * small_chunk + (tid < extras ? tid : extras)); - *pupper = *plower + small_chunk * incr - (tid < extras ? 0 : incr); - if (plastiter != NULL) - *plastiter = (tid == nth - 1); - } else { - T big_chunk_inc_count = - (trip_count / nth + ((trip_count % nth) ? 1 : 0)) * incr; - T old_upper = *pupper; - - KMP_DEBUG_ASSERT(__kmp_static == kmp_sch_static_greedy); - // Unknown static scheduling type. - - *plower += tid * big_chunk_inc_count; - *pupper = *plower + big_chunk_inc_count - incr; - if (incr > 0) { - if (*pupper < *plower) - *pupper = traits_t::max_value; - if (plastiter != NULL) - *plastiter = *plower <= old_upper && *pupper > old_upper - incr; - if (*pupper > old_upper) - *pupper = old_upper; // tracker C73258 - } else { - if (*pupper > *plower) - *pupper = traits_t::min_value; - if (plastiter != NULL) - *plastiter = *plower >= old_upper && *pupper < old_upper - incr; - if (*pupper < old_upper) - *pupper = old_upper; // tracker C73258 - } - } - } - *pstride = trip_count; - break; - } - case kmp_sch_static_chunked: { - ST span; - if (chunk < 1) { - chunk = 1; - } - span = chunk * incr; - *pstride = span * nth; - *plower = *plower + (span * tid); - *pupper = *plower + span - incr; - if (plastiter != NULL) - *plastiter = (tid == ((trip_count - 1) / (UT)chunk) % nth); - break; - } -#if OMP_45_ENABLED - case kmp_sch_static_balanced_chunked: { - T old_upper = *pupper; - // round up to make sure the chunk is enough to cover all iterations - UT span = (trip_count + nth - 1) / nth; - - // perform chunk adjustment - chunk = (span + chunk - 1) & ~(chunk - 1); - - span = chunk * incr; - *plower = *plower + (span * tid); - *pupper = *plower + span - incr; - if (incr > 0) { - if (*pupper > old_upper) - *pupper = old_upper; - } else if (*pupper < old_upper) - *pupper = old_upper; - - if (plastiter != NULL) - *plastiter = (tid == ((trip_count - 1) / (UT)chunk)); - break; - } -#endif - default: - KMP_ASSERT2(0, "__kmpc_for_static_init: unknown scheduling type"); - break; - } - -#if USE_ITT_BUILD - // Report loop metadata - if (KMP_MASTER_TID(tid) && __itt_metadata_add_ptr && - __kmp_forkjoin_frames_mode == 3 && -#if OMP_40_ENABLED - th->th.th_teams_microtask == NULL && -#endif - team->t.t_active_level == 1) { - kmp_uint64 cur_chunk = chunk; - // Calculate chunk in case it was not specified; it is specified for - // kmp_sch_static_chunked - if (schedtype == kmp_sch_static) { - cur_chunk = trip_count / nth + ((trip_count % nth) ? 1 : 0); - } - // 0 - "static" schedule - __kmp_itt_metadata_loop(loc, 0, trip_count, cur_chunk); - } -#endif -#ifdef KMP_DEBUG - { - char *buff; - // create format specifiers before the debug output - buff = __kmp_str_format("__kmpc_for_static_init: liter=%%d lower=%%%s " - "upper=%%%s stride = %%%s signed?<%s>\n", - traits_t::spec, traits_t::spec, - traits_t::spec, traits_t::spec); - KD_TRACE(100, (buff, *plastiter, *plower, *pupper, *pstride)); - __kmp_str_free(&buff); - } -#endif - KE_TRACE(10, ("__kmpc_for_static_init: T#%d return\n", global_tid)); - -#if OMPT_SUPPORT && OMPT_OPTIONAL - if (ompt_enabled.ompt_callback_work) { - ompt_callbacks.ompt_callback(ompt_callback_work)( - ompt_work_type, ompt_scope_begin, &(team_info->parallel_data), - &(task_info->task_data), trip_count, codeptr); - } -#endif - -#if KMP_STATS_ENABLED - { - kmp_int64 t; - kmp_int64 u = (kmp_int64)(*pupper); - kmp_int64 l = (kmp_int64)(*plower); - kmp_int64 i = (kmp_int64)incr; - /* compute trip count */ - if (i == 1) { - t = u - l + 1; - } else if (i == -1) { - t = l - u + 1; - } else if (i > 0) { - t = (u - l) / i + 1; - } else { - t = (l - u) / (-i) + 1; - } - KMP_COUNT_VALUE(OMP_loop_static_iterations, t); - KMP_POP_PARTITIONED_TIMER(); - } -#endif - return; -} - -template -static void __kmp_dist_for_static_init(ident_t *loc, kmp_int32 gtid, - kmp_int32 schedule, kmp_int32 *plastiter, - T *plower, T *pupper, T *pupperDist, - typename traits_t::signed_t *pstride, - typename traits_t::signed_t incr, - typename traits_t::signed_t chunk) { - KMP_COUNT_BLOCK(OMP_DISTRIBUTE); - typedef typename traits_t::unsigned_t UT; - typedef typename traits_t::signed_t ST; - kmp_uint32 tid; - kmp_uint32 nth; - kmp_uint32 team_id; - kmp_uint32 nteams; - UT trip_count; - kmp_team_t *team; - kmp_info_t *th; - - KMP_DEBUG_ASSERT(plastiter && plower && pupper && pupperDist && pstride); - KE_TRACE(10, ("__kmpc_dist_for_static_init called (%d)\n", gtid)); -#ifdef KMP_DEBUG - { - char *buff; - // create format specifiers before the debug output - buff = __kmp_str_format( - "__kmpc_dist_for_static_init: T#%%d schedLoop=%%d liter=%%d " - "iter=(%%%s, %%%s, %%%s) chunk=%%%s signed?<%s>\n", - traits_t::spec, traits_t::spec, traits_t::spec, - traits_t::spec, traits_t::spec); - KD_TRACE(100, - (buff, gtid, schedule, *plastiter, *plower, *pupper, incr, chunk)); - __kmp_str_free(&buff); - } -#endif - - if (__kmp_env_consistency_check) { - __kmp_push_workshare(gtid, ct_pdo, loc); - if (incr == 0) { - __kmp_error_construct(kmp_i18n_msg_CnsLoopIncrZeroProhibited, ct_pdo, - loc); - } - if (incr > 0 ? (*pupper < *plower) : (*plower < *pupper)) { - // The loop is illegal. - // Some zero-trip loops maintained by compiler, e.g.: - // for(i=10;i<0;++i) // lower >= upper - run-time check - // for(i=0;i>10;--i) // lower <= upper - run-time check - // for(i=0;i>10;++i) // incr > 0 - compile-time check - // for(i=10;i<0;--i) // incr < 0 - compile-time check - // Compiler does not check the following illegal loops: - // for(i=0;i<10;i+=incr) // where incr<0 - // for(i=10;i>0;i-=incr) // where incr<0 - __kmp_error_construct(kmp_i18n_msg_CnsLoopIncrIllegal, ct_pdo, loc); - } - } - tid = __kmp_tid_from_gtid(gtid); - th = __kmp_threads[gtid]; - nth = th->th.th_team_nproc; - team = th->th.th_team; -#if OMP_40_ENABLED - KMP_DEBUG_ASSERT(th->th.th_teams_microtask); // we are in the teams construct - nteams = th->th.th_teams_size.nteams; -#endif - team_id = team->t.t_master_tid; - KMP_DEBUG_ASSERT(nteams == (kmp_uint32)team->t.t_parent->t.t_nproc); - - // compute global trip count - if (incr == 1) { - trip_count = *pupper - *plower + 1; - } else if (incr == -1) { - trip_count = *plower - *pupper + 1; - } else if (incr > 0) { - // upper-lower can exceed the limit of signed type - trip_count = (UT)(*pupper - *plower) / incr + 1; - } else { - trip_count = (UT)(*plower - *pupper) / (-incr) + 1; - } - - *pstride = *pupper - *plower; // just in case (can be unused) - if (trip_count <= nteams) { - KMP_DEBUG_ASSERT( - __kmp_static == kmp_sch_static_greedy || - __kmp_static == - kmp_sch_static_balanced); // Unknown static scheduling type. - // only masters of some teams get single iteration, other threads get - // nothing - if (team_id < trip_count && tid == 0) { - *pupper = *pupperDist = *plower = *plower + team_id * incr; - } else { - *pupperDist = *pupper; - *plower = *pupper + incr; // compiler should skip loop body - } - if (plastiter != NULL) - *plastiter = (tid == 0 && team_id == trip_count - 1); - } else { - // Get the team's chunk first (each team gets at most one chunk) - if (__kmp_static == kmp_sch_static_balanced) { - UT chunkD = trip_count / nteams; - UT extras = trip_count % nteams; - *plower += - incr * (team_id * chunkD + (team_id < extras ? team_id : extras)); - *pupperDist = *plower + chunkD * incr - (team_id < extras ? 0 : incr); - if (plastiter != NULL) - *plastiter = (team_id == nteams - 1); - } else { - T chunk_inc_count = - (trip_count / nteams + ((trip_count % nteams) ? 1 : 0)) * incr; - T upper = *pupper; - KMP_DEBUG_ASSERT(__kmp_static == kmp_sch_static_greedy); - // Unknown static scheduling type. - *plower += team_id * chunk_inc_count; - *pupperDist = *plower + chunk_inc_count - incr; - // Check/correct bounds if needed - if (incr > 0) { - if (*pupperDist < *plower) - *pupperDist = traits_t::max_value; - if (plastiter != NULL) - *plastiter = *plower <= upper && *pupperDist > upper - incr; - if (*pupperDist > upper) - *pupperDist = upper; // tracker C73258 - if (*plower > *pupperDist) { - *pupper = *pupperDist; // no iterations available for the team - goto end; - } - } else { - if (*pupperDist > *plower) - *pupperDist = traits_t::min_value; - if (plastiter != NULL) - *plastiter = *plower >= upper && *pupperDist < upper - incr; - if (*pupperDist < upper) - *pupperDist = upper; // tracker C73258 - if (*plower < *pupperDist) { - *pupper = *pupperDist; // no iterations available for the team - goto end; - } - } - } - // Get the parallel loop chunk now (for thread) - // compute trip count for team's chunk - if (incr == 1) { - trip_count = *pupperDist - *plower + 1; - } else if (incr == -1) { - trip_count = *plower - *pupperDist + 1; - } else if (incr > 1) { - // upper-lower can exceed the limit of signed type - trip_count = (UT)(*pupperDist - *plower) / incr + 1; - } else { - trip_count = (UT)(*plower - *pupperDist) / (-incr) + 1; - } - KMP_DEBUG_ASSERT(trip_count); - switch (schedule) { - case kmp_sch_static: { - if (trip_count <= nth) { - KMP_DEBUG_ASSERT( - __kmp_static == kmp_sch_static_greedy || - __kmp_static == - kmp_sch_static_balanced); // Unknown static scheduling type. - if (tid < trip_count) - *pupper = *plower = *plower + tid * incr; - else - *plower = *pupper + incr; // no iterations available - if (plastiter != NULL) - if (*plastiter != 0 && !(tid == trip_count - 1)) - *plastiter = 0; - } else { - if (__kmp_static == kmp_sch_static_balanced) { - UT chunkL = trip_count / nth; - UT extras = trip_count % nth; - *plower += incr * (tid * chunkL + (tid < extras ? tid : extras)); - *pupper = *plower + chunkL * incr - (tid < extras ? 0 : incr); - if (plastiter != NULL) - if (*plastiter != 0 && !(tid == nth - 1)) - *plastiter = 0; - } else { - T chunk_inc_count = - (trip_count / nth + ((trip_count % nth) ? 1 : 0)) * incr; - T upper = *pupperDist; - KMP_DEBUG_ASSERT(__kmp_static == kmp_sch_static_greedy); - // Unknown static scheduling type. - *plower += tid * chunk_inc_count; - *pupper = *plower + chunk_inc_count - incr; - if (incr > 0) { - if (*pupper < *plower) - *pupper = traits_t::max_value; - if (plastiter != NULL) - if (*plastiter != 0 && - !(*plower <= upper && *pupper > upper - incr)) - *plastiter = 0; - if (*pupper > upper) - *pupper = upper; // tracker C73258 - } else { - if (*pupper > *plower) - *pupper = traits_t::min_value; - if (plastiter != NULL) - if (*plastiter != 0 && - !(*plower >= upper && *pupper < upper - incr)) - *plastiter = 0; - if (*pupper < upper) - *pupper = upper; // tracker C73258 - } - } - } - break; - } - case kmp_sch_static_chunked: { - ST span; - if (chunk < 1) - chunk = 1; - span = chunk * incr; - *pstride = span * nth; - *plower = *plower + (span * tid); - *pupper = *plower + span - incr; - if (plastiter != NULL) - if (*plastiter != 0 && !(tid == ((trip_count - 1) / (UT)chunk) % nth)) - *plastiter = 0; - break; - } - default: - KMP_ASSERT2(0, - "__kmpc_dist_for_static_init: unknown loop scheduling type"); - break; - } - } -end:; -#ifdef KMP_DEBUG - { - char *buff; - // create format specifiers before the debug output - buff = __kmp_str_format( - "__kmpc_dist_for_static_init: last=%%d lo=%%%s up=%%%s upDist=%%%s " - "stride=%%%s signed?<%s>\n", - traits_t::spec, traits_t::spec, traits_t::spec, - traits_t::spec, traits_t::spec); - KD_TRACE(100, (buff, *plastiter, *plower, *pupper, *pupperDist, *pstride)); - __kmp_str_free(&buff); - } -#endif - KE_TRACE(10, ("__kmpc_dist_for_static_init: T#%d return\n", gtid)); - return; -} - -template -static void __kmp_team_static_init(ident_t *loc, kmp_int32 gtid, - kmp_int32 *p_last, T *p_lb, T *p_ub, - typename traits_t::signed_t *p_st, - typename traits_t::signed_t incr, - typename traits_t::signed_t chunk) { - // The routine returns the first chunk distributed to the team and - // stride for next chunks calculation. - // Last iteration flag set for the team that will execute - // the last iteration of the loop. - // The routine is called for dist_schedue(static,chunk) only. - typedef typename traits_t::unsigned_t UT; - typedef typename traits_t::signed_t ST; - kmp_uint32 team_id; - kmp_uint32 nteams; - UT trip_count; - T lower; - T upper; - ST span; - kmp_team_t *team; - kmp_info_t *th; - - KMP_DEBUG_ASSERT(p_last && p_lb && p_ub && p_st); - KE_TRACE(10, ("__kmp_team_static_init called (%d)\n", gtid)); -#ifdef KMP_DEBUG - { - char *buff; - // create format specifiers before the debug output - buff = __kmp_str_format("__kmp_team_static_init enter: T#%%d liter=%%d " - "iter=(%%%s, %%%s, %%%s) chunk %%%s; signed?<%s>\n", - traits_t::spec, traits_t::spec, - traits_t::spec, traits_t::spec, - traits_t::spec); - KD_TRACE(100, (buff, gtid, *p_last, *p_lb, *p_ub, *p_st, chunk)); - __kmp_str_free(&buff); - } -#endif - - lower = *p_lb; - upper = *p_ub; - if (__kmp_env_consistency_check) { - if (incr == 0) { - __kmp_error_construct(kmp_i18n_msg_CnsLoopIncrZeroProhibited, ct_pdo, - loc); - } - if (incr > 0 ? (upper < lower) : (lower < upper)) { - // The loop is illegal. - // Some zero-trip loops maintained by compiler, e.g.: - // for(i=10;i<0;++i) // lower >= upper - run-time check - // for(i=0;i>10;--i) // lower <= upper - run-time check - // for(i=0;i>10;++i) // incr > 0 - compile-time check - // for(i=10;i<0;--i) // incr < 0 - compile-time check - // Compiler does not check the following illegal loops: - // for(i=0;i<10;i+=incr) // where incr<0 - // for(i=10;i>0;i-=incr) // where incr<0 - __kmp_error_construct(kmp_i18n_msg_CnsLoopIncrIllegal, ct_pdo, loc); - } - } - th = __kmp_threads[gtid]; - team = th->th.th_team; -#if OMP_40_ENABLED - KMP_DEBUG_ASSERT(th->th.th_teams_microtask); // we are in the teams construct - nteams = th->th.th_teams_size.nteams; -#endif - team_id = team->t.t_master_tid; - KMP_DEBUG_ASSERT(nteams == (kmp_uint32)team->t.t_parent->t.t_nproc); - - // compute trip count - if (incr == 1) { - trip_count = upper - lower + 1; - } else if (incr == -1) { - trip_count = lower - upper + 1; - } else if (incr > 0) { - // upper-lower can exceed the limit of signed type - trip_count = (UT)(upper - lower) / incr + 1; - } else { - trip_count = (UT)(lower - upper) / (-incr) + 1; - } - if (chunk < 1) - chunk = 1; - span = chunk * incr; - *p_st = span * nteams; - *p_lb = lower + (span * team_id); - *p_ub = *p_lb + span - incr; - if (p_last != NULL) - *p_last = (team_id == ((trip_count - 1) / (UT)chunk) % nteams); - // Correct upper bound if needed - if (incr > 0) { - if (*p_ub < *p_lb) // overflow? - *p_ub = traits_t::max_value; - if (*p_ub > upper) - *p_ub = upper; // tracker C73258 - } else { // incr < 0 - if (*p_ub > *p_lb) - *p_ub = traits_t::min_value; - if (*p_ub < upper) - *p_ub = upper; // tracker C73258 - } -#ifdef KMP_DEBUG - { - char *buff; - // create format specifiers before the debug output - buff = - __kmp_str_format("__kmp_team_static_init exit: T#%%d team%%u liter=%%d " - "iter=(%%%s, %%%s, %%%s) chunk %%%s\n", - traits_t::spec, traits_t::spec, - traits_t::spec, traits_t::spec); - KD_TRACE(100, (buff, gtid, team_id, *p_last, *p_lb, *p_ub, *p_st, chunk)); - __kmp_str_free(&buff); - } -#endif -} - -//------------------------------------------------------------------------------ -extern "C" { -/*! -@ingroup WORK_SHARING -@param loc Source code location -@param gtid Global thread id of this thread -@param schedtype Scheduling type -@param plastiter Pointer to the "last iteration" flag -@param plower Pointer to the lower bound -@param pupper Pointer to the upper bound -@param pstride Pointer to the stride -@param incr Loop increment -@param chunk The chunk size - -Each of the four functions here are identical apart from the argument types. - -The functions compute the upper and lower bounds and stride to be used for the -set of iterations to be executed by the current thread from the statically -scheduled loop that is described by the initial values of the bounds, stride, -increment and chunk size. - -@{ -*/ -void __kmpc_for_static_init_4(ident_t *loc, kmp_int32 gtid, kmp_int32 schedtype, - kmp_int32 *plastiter, kmp_int32 *plower, - kmp_int32 *pupper, kmp_int32 *pstride, - kmp_int32 incr, kmp_int32 chunk) { - __kmp_for_static_init(loc, gtid, schedtype, plastiter, plower, - pupper, pstride, incr, chunk -#if OMPT_SUPPORT && OMPT_OPTIONAL - , - OMPT_GET_RETURN_ADDRESS(0) -#endif - ); -} - -/*! - See @ref __kmpc_for_static_init_4 - */ -void __kmpc_for_static_init_4u(ident_t *loc, kmp_int32 gtid, - kmp_int32 schedtype, kmp_int32 *plastiter, - kmp_uint32 *plower, kmp_uint32 *pupper, - kmp_int32 *pstride, kmp_int32 incr, - kmp_int32 chunk) { - __kmp_for_static_init(loc, gtid, schedtype, plastiter, plower, - pupper, pstride, incr, chunk -#if OMPT_SUPPORT && OMPT_OPTIONAL - , - OMPT_GET_RETURN_ADDRESS(0) -#endif - ); -} - -/*! - See @ref __kmpc_for_static_init_4 - */ -void __kmpc_for_static_init_8(ident_t *loc, kmp_int32 gtid, kmp_int32 schedtype, - kmp_int32 *plastiter, kmp_int64 *plower, - kmp_int64 *pupper, kmp_int64 *pstride, - kmp_int64 incr, kmp_int64 chunk) { - __kmp_for_static_init(loc, gtid, schedtype, plastiter, plower, - pupper, pstride, incr, chunk -#if OMPT_SUPPORT && OMPT_OPTIONAL - , - OMPT_GET_RETURN_ADDRESS(0) -#endif - ); -} - -/*! - See @ref __kmpc_for_static_init_4 - */ -void __kmpc_for_static_init_8u(ident_t *loc, kmp_int32 gtid, - kmp_int32 schedtype, kmp_int32 *plastiter, - kmp_uint64 *plower, kmp_uint64 *pupper, - kmp_int64 *pstride, kmp_int64 incr, - kmp_int64 chunk) { - __kmp_for_static_init(loc, gtid, schedtype, plastiter, plower, - pupper, pstride, incr, chunk -#if OMPT_SUPPORT && OMPT_OPTIONAL - , - OMPT_GET_RETURN_ADDRESS(0) -#endif - ); -} -/*! -@} -*/ - -/*! -@ingroup WORK_SHARING -@param loc Source code location -@param gtid Global thread id of this thread -@param schedule Scheduling type for the parallel loop -@param plastiter Pointer to the "last iteration" flag -@param plower Pointer to the lower bound -@param pupper Pointer to the upper bound of loop chunk -@param pupperD Pointer to the upper bound of dist_chunk -@param pstride Pointer to the stride for parallel loop -@param incr Loop increment -@param chunk The chunk size for the parallel loop - -Each of the four functions here are identical apart from the argument types. - -The functions compute the upper and lower bounds and strides to be used for the -set of iterations to be executed by the current thread from the statically -scheduled loop that is described by the initial values of the bounds, strides, -increment and chunks for parallel loop and distribute constructs. - -@{ -*/ -void __kmpc_dist_for_static_init_4(ident_t *loc, kmp_int32 gtid, - kmp_int32 schedule, kmp_int32 *plastiter, - kmp_int32 *plower, kmp_int32 *pupper, - kmp_int32 *pupperD, kmp_int32 *pstride, - kmp_int32 incr, kmp_int32 chunk) { - __kmp_dist_for_static_init(loc, gtid, schedule, plastiter, plower, - pupper, pupperD, pstride, incr, chunk); -} - -/*! - See @ref __kmpc_dist_for_static_init_4 - */ -void __kmpc_dist_for_static_init_4u(ident_t *loc, kmp_int32 gtid, - kmp_int32 schedule, kmp_int32 *plastiter, - kmp_uint32 *plower, kmp_uint32 *pupper, - kmp_uint32 *pupperD, kmp_int32 *pstride, - kmp_int32 incr, kmp_int32 chunk) { - __kmp_dist_for_static_init(loc, gtid, schedule, plastiter, plower, - pupper, pupperD, pstride, incr, chunk); -} - -/*! - See @ref __kmpc_dist_for_static_init_4 - */ -void __kmpc_dist_for_static_init_8(ident_t *loc, kmp_int32 gtid, - kmp_int32 schedule, kmp_int32 *plastiter, - kmp_int64 *plower, kmp_int64 *pupper, - kmp_int64 *pupperD, kmp_int64 *pstride, - kmp_int64 incr, kmp_int64 chunk) { - __kmp_dist_for_static_init(loc, gtid, schedule, plastiter, plower, - pupper, pupperD, pstride, incr, chunk); -} - -/*! - See @ref __kmpc_dist_for_static_init_4 - */ -void __kmpc_dist_for_static_init_8u(ident_t *loc, kmp_int32 gtid, - kmp_int32 schedule, kmp_int32 *plastiter, - kmp_uint64 *plower, kmp_uint64 *pupper, - kmp_uint64 *pupperD, kmp_int64 *pstride, - kmp_int64 incr, kmp_int64 chunk) { - __kmp_dist_for_static_init(loc, gtid, schedule, plastiter, plower, - pupper, pupperD, pstride, incr, chunk); -} -/*! -@} -*/ - -//------------------------------------------------------------------------------ -// Auxiliary routines for Distribute Parallel Loop construct implementation -// Transfer call to template< type T > -// __kmp_team_static_init( ident_t *loc, int gtid, -// int *p_last, T *lb, T *ub, ST *st, ST incr, ST chunk ) - -/*! -@ingroup WORK_SHARING -@{ -@param loc Source location -@param gtid Global thread id -@param p_last pointer to last iteration flag -@param p_lb pointer to Lower bound -@param p_ub pointer to Upper bound -@param p_st Step (or increment if you prefer) -@param incr Loop increment -@param chunk The chunk size to block with - -The functions compute the upper and lower bounds and stride to be used for the -set of iterations to be executed by the current team from the statically -scheduled loop that is described by the initial values of the bounds, stride, -increment and chunk for the distribute construct as part of composite distribute -parallel loop construct. These functions are all identical apart from the types -of the arguments. -*/ - -void __kmpc_team_static_init_4(ident_t *loc, kmp_int32 gtid, kmp_int32 *p_last, - kmp_int32 *p_lb, kmp_int32 *p_ub, - kmp_int32 *p_st, kmp_int32 incr, - kmp_int32 chunk) { - KMP_DEBUG_ASSERT(__kmp_init_serial); - __kmp_team_static_init(loc, gtid, p_last, p_lb, p_ub, p_st, incr, - chunk); -} - -/*! - See @ref __kmpc_team_static_init_4 - */ -void __kmpc_team_static_init_4u(ident_t *loc, kmp_int32 gtid, kmp_int32 *p_last, - kmp_uint32 *p_lb, kmp_uint32 *p_ub, - kmp_int32 *p_st, kmp_int32 incr, - kmp_int32 chunk) { - KMP_DEBUG_ASSERT(__kmp_init_serial); - __kmp_team_static_init(loc, gtid, p_last, p_lb, p_ub, p_st, incr, - chunk); -} - -/*! - See @ref __kmpc_team_static_init_4 - */ -void __kmpc_team_static_init_8(ident_t *loc, kmp_int32 gtid, kmp_int32 *p_last, - kmp_int64 *p_lb, kmp_int64 *p_ub, - kmp_int64 *p_st, kmp_int64 incr, - kmp_int64 chunk) { - KMP_DEBUG_ASSERT(__kmp_init_serial); - __kmp_team_static_init(loc, gtid, p_last, p_lb, p_ub, p_st, incr, - chunk); -} - -/*! - See @ref __kmpc_team_static_init_4 - */ -void __kmpc_team_static_init_8u(ident_t *loc, kmp_int32 gtid, kmp_int32 *p_last, - kmp_uint64 *p_lb, kmp_uint64 *p_ub, - kmp_int64 *p_st, kmp_int64 incr, - kmp_int64 chunk) { - KMP_DEBUG_ASSERT(__kmp_init_serial); - __kmp_team_static_init(loc, gtid, p_last, p_lb, p_ub, p_st, incr, - chunk); -} -/*! -@} -*/ - -} // extern "C" Property changes on: vendor/llvm-openmp/openmp-release80-r363030/runtime/src/kmp_sched.cpp ___________________________________________________________________ Deleted: svn:eol-style ## -1 +0,0 ## -native \ No newline at end of property Deleted: svn:keywords ## -1 +0,0 ## -FreeBSD=%H \ No newline at end of property Deleted: svn:mime-type ## -1 +0,0 ## -text/plain \ No newline at end of property Index: vendor/llvm-openmp/openmp-release80-r363030/runtime/src/z_Windows_NT-586_util.cpp =================================================================== --- vendor/llvm-openmp/openmp-release80-r363030/runtime/src/z_Windows_NT-586_util.cpp (revision 348960) +++ vendor/llvm-openmp/openmp-release80-r363030/runtime/src/z_Windows_NT-586_util.cpp (nonexistent) @@ -1,136 +0,0 @@ -/* - * z_Windows_NT-586_util.cpp -- platform specific routines. - */ - -//===----------------------------------------------------------------------===// -// -// The LLVM Compiler Infrastructure -// -// This file is dual licensed under the MIT and the University of Illinois Open -// Source Licenses. See LICENSE.txt for details. -// -//===----------------------------------------------------------------------===// - -#include "kmp.h" - -#if (KMP_ARCH_X86 || KMP_ARCH_X86_64) -/* Only 32-bit "add-exchange" instruction on IA-32 architecture causes us to - use compare_and_store for these routines */ - -kmp_int8 __kmp_test_then_or8(volatile kmp_int8 *p, kmp_int8 d) { - kmp_int8 old_value, new_value; - - old_value = TCR_1(*p); - new_value = old_value | d; - - while (!__kmp_compare_and_store8(p, old_value, new_value)) { - KMP_CPU_PAUSE(); - old_value = TCR_1(*p); - new_value = old_value | d; - } - return old_value; -} - -kmp_int8 __kmp_test_then_and8(volatile kmp_int8 *p, kmp_int8 d) { - kmp_int8 old_value, new_value; - - old_value = TCR_1(*p); - new_value = old_value & d; - - while (!__kmp_compare_and_store8(p, old_value, new_value)) { - KMP_CPU_PAUSE(); - old_value = TCR_1(*p); - new_value = old_value & d; - } - return old_value; -} - -kmp_uint32 __kmp_test_then_or32(volatile kmp_uint32 *p, kmp_uint32 d) { - kmp_uint32 old_value, new_value; - - old_value = TCR_4(*p); - new_value = old_value | d; - - while (!__kmp_compare_and_store32((volatile kmp_int32 *)p, old_value, - new_value)) { - KMP_CPU_PAUSE(); - old_value = TCR_4(*p); - new_value = old_value | d; - } - return old_value; -} - -kmp_uint32 __kmp_test_then_and32(volatile kmp_uint32 *p, kmp_uint32 d) { - kmp_uint32 old_value, new_value; - - old_value = TCR_4(*p); - new_value = old_value & d; - - while (!__kmp_compare_and_store32((volatile kmp_int32 *)p, old_value, - new_value)) { - KMP_CPU_PAUSE(); - old_value = TCR_4(*p); - new_value = old_value & d; - } - return old_value; -} - -kmp_int8 __kmp_test_then_add8(volatile kmp_int8 *p, kmp_int8 d) { - kmp_int64 old_value, new_value; - - old_value = TCR_1(*p); - new_value = old_value + d; - while (!__kmp_compare_and_store8(p, old_value, new_value)) { - KMP_CPU_PAUSE(); - old_value = TCR_1(*p); - new_value = old_value + d; - } - return old_value; -} - -#if KMP_ARCH_X86 -kmp_int64 __kmp_test_then_add64(volatile kmp_int64 *p, kmp_int64 d) { - kmp_int64 old_value, new_value; - - old_value = TCR_8(*p); - new_value = old_value + d; - while (!__kmp_compare_and_store64(p, old_value, new_value)) { - KMP_CPU_PAUSE(); - old_value = TCR_8(*p); - new_value = old_value + d; - } - return old_value; -} -#endif /* KMP_ARCH_X86 */ - -kmp_uint64 __kmp_test_then_or64(volatile kmp_uint64 *p, kmp_uint64 d) { - kmp_uint64 old_value, new_value; - - old_value = TCR_8(*p); - new_value = old_value | d; - while (!__kmp_compare_and_store64((volatile kmp_int64 *)p, old_value, - new_value)) { - KMP_CPU_PAUSE(); - old_value = TCR_8(*p); - new_value = old_value | d; - } - - return old_value; -} - -kmp_uint64 __kmp_test_then_and64(volatile kmp_uint64 *p, kmp_uint64 d) { - kmp_uint64 old_value, new_value; - - old_value = TCR_8(*p); - new_value = old_value & d; - while (!__kmp_compare_and_store64((volatile kmp_int64 *)p, old_value, - new_value)) { - KMP_CPU_PAUSE(); - old_value = TCR_8(*p); - new_value = old_value & d; - } - - return old_value; -} - -#endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */ Property changes on: vendor/llvm-openmp/openmp-release80-r363030/runtime/src/z_Windows_NT-586_util.cpp ___________________________________________________________________ Deleted: svn:eol-style ## -1 +0,0 ## -native \ No newline at end of property Deleted: svn:keywords ## -1 +0,0 ## -FreeBSD=%H \ No newline at end of property Deleted: svn:mime-type ## -1 +0,0 ## -text/plain \ No newline at end of property Index: vendor/llvm-openmp/openmp-release80-r363030/runtime/src/kmp_version.cpp =================================================================== --- vendor/llvm-openmp/openmp-release80-r363030/runtime/src/kmp_version.cpp (revision 348960) +++ vendor/llvm-openmp/openmp-release80-r363030/runtime/src/kmp_version.cpp (nonexistent) @@ -1,208 +0,0 @@ -/* - * kmp_version.cpp - */ - -//===----------------------------------------------------------------------===// -// -// The LLVM Compiler Infrastructure -// -// This file is dual licensed under the MIT and the University of Illinois Open -// Source Licenses. See LICENSE.txt for details. -// -//===----------------------------------------------------------------------===// - -#include "kmp.h" -#include "kmp_io.h" -#include "kmp_version.h" - -// Replace with snapshot date YYYYMMDD for promotion build. -#define KMP_VERSION_BUILD 20140926 - -// Helper macros to convert value of macro to string literal. -#define _stringer(x) #x -#define stringer(x) _stringer(x) - -// Detect compiler. -#if KMP_COMPILER_ICC -#if __INTEL_COMPILER == 1010 -#define KMP_COMPILER "Intel(R) C++ Compiler 10.1" -#elif __INTEL_COMPILER == 1100 -#define KMP_COMPILER "Intel(R) C++ Compiler 11.0" -#elif __INTEL_COMPILER == 1110 -#define KMP_COMPILER "Intel(R) C++ Compiler 11.1" -#elif __INTEL_COMPILER == 1200 -#define KMP_COMPILER "Intel(R) C++ Compiler 12.0" -#elif __INTEL_COMPILER == 1210 -#define KMP_COMPILER "Intel(R) C++ Compiler 12.1" -#elif __INTEL_COMPILER == 1300 -#define KMP_COMPILER "Intel(R) C++ Compiler 13.0" -#elif __INTEL_COMPILER == 1310 -#define KMP_COMPILER "Intel(R) C++ Compiler 13.1" -#elif __INTEL_COMPILER == 1400 -#define KMP_COMPILER "Intel(R) C++ Compiler 14.0" -#elif __INTEL_COMPILER == 1410 -#define KMP_COMPILER "Intel(R) C++ Compiler 14.1" -#elif __INTEL_COMPILER == 1500 -#define KMP_COMPILER "Intel(R) C++ Compiler 15.0" -#elif __INTEL_COMPILER == 1600 -#define KMP_COMPILER "Intel(R) C++ Compiler 16.0" -#elif __INTEL_COMPILER == 1700 -#define KMP_COMPILER "Intel(R) C++ Compiler 17.0" -#elif __INTEL_COMPILER == 1800 -#define KMP_COMPILER "Intel(R) C++ Compiler 18.0" -#elif __INTEL_COMPILER == 9998 -#define KMP_COMPILER "Intel(R) C++ Compiler mainline" -#elif __INTEL_COMPILER == 9999 -#define KMP_COMPILER "Intel(R) C++ Compiler mainline" -#endif -#elif KMP_COMPILER_CLANG -#define KMP_COMPILER \ - "Clang " stringer(__clang_major__) "." stringer(__clang_minor__) -#elif KMP_COMPILER_GCC -#define KMP_COMPILER "GCC " stringer(__GNUC__) "." stringer(__GNUC_MINOR__) -#elif KMP_COMPILER_MSVC -#define KMP_COMPILER "MSVC " stringer(_MSC_FULL_VER) -#endif -#ifndef KMP_COMPILER -#warning "Unknown compiler" -#define KMP_COMPILER "unknown compiler" -#endif - -// Detect librray type (perf, stub). -#ifdef KMP_STUB -#define KMP_LIB_TYPE "stub" -#else -#define KMP_LIB_TYPE "performance" -#endif // KMP_LIB_TYPE - -// Detect link type (static, dynamic). -#if KMP_DYNAMIC_LIB -#define KMP_LINK_TYPE "dynamic" -#else -#define KMP_LINK_TYPE "static" -#endif // KMP_LINK_TYPE - -// Finally, define strings. -#define KMP_LIBRARY KMP_LIB_TYPE " library (" KMP_LINK_TYPE ")" -#define KMP_COPYRIGHT "" - -int const __kmp_version_major = KMP_VERSION_MAJOR; -int const __kmp_version_minor = KMP_VERSION_MINOR; -int const __kmp_version_build = KMP_VERSION_BUILD; -int const __kmp_openmp_version = -#if OMP_50_ENABLED - 201611; -#elif OMP_45_ENABLED - 201511; -#elif OMP_40_ENABLED - 201307; -#else - 201107; -#endif - -/* Do NOT change the format of this string! Intel(R) Thread Profiler checks for - a specific format some changes in the recognition routine there need to be - made before this is changed. */ -char const __kmp_copyright[] = KMP_VERSION_PREFIX KMP_LIBRARY - " ver. " stringer(KMP_VERSION_MAJOR) "." stringer( - KMP_VERSION_MINOR) "." stringer(KMP_VERSION_BUILD) " " KMP_COPYRIGHT; - -char const __kmp_version_copyright[] = KMP_VERSION_PREFIX KMP_COPYRIGHT; -char const __kmp_version_lib_ver[] = - KMP_VERSION_PREFIX "version: " stringer(KMP_VERSION_MAJOR) "." stringer( - KMP_VERSION_MINOR) "." stringer(KMP_VERSION_BUILD); -char const __kmp_version_lib_type[] = - KMP_VERSION_PREFIX "library type: " KMP_LIB_TYPE; -char const __kmp_version_link_type[] = - KMP_VERSION_PREFIX "link type: " KMP_LINK_TYPE; -char const __kmp_version_build_time[] = KMP_VERSION_PREFIX "build time: " - "no_timestamp"; -#if KMP_MIC2 -char const __kmp_version_target_env[] = - KMP_VERSION_PREFIX "target environment: MIC2"; -#endif -char const __kmp_version_build_compiler[] = - KMP_VERSION_PREFIX "build compiler: " KMP_COMPILER; - -// Called at serial initialization time. -static int __kmp_version_1_printed = FALSE; - -void __kmp_print_version_1(void) { - if (__kmp_version_1_printed) { - return; - } - __kmp_version_1_printed = TRUE; - -#ifndef KMP_STUB - kmp_str_buf_t buffer; - __kmp_str_buf_init(&buffer); - // Print version strings skipping initial magic. - __kmp_str_buf_print(&buffer, "%s\n", - &__kmp_version_lib_ver[KMP_VERSION_MAGIC_LEN]); - __kmp_str_buf_print(&buffer, "%s\n", - &__kmp_version_lib_type[KMP_VERSION_MAGIC_LEN]); - __kmp_str_buf_print(&buffer, "%s\n", - &__kmp_version_link_type[KMP_VERSION_MAGIC_LEN]); - __kmp_str_buf_print(&buffer, "%s\n", - &__kmp_version_build_time[KMP_VERSION_MAGIC_LEN]); -#if KMP_MIC - __kmp_str_buf_print(&buffer, "%s\n", - &__kmp_version_target_env[KMP_VERSION_MAGIC_LEN]); -#endif - __kmp_str_buf_print(&buffer, "%s\n", - &__kmp_version_build_compiler[KMP_VERSION_MAGIC_LEN]); -#if defined(KMP_GOMP_COMPAT) - __kmp_str_buf_print(&buffer, "%s\n", - &__kmp_version_alt_comp[KMP_VERSION_MAGIC_LEN]); -#endif /* defined(KMP_GOMP_COMPAT) */ - __kmp_str_buf_print(&buffer, "%s\n", - &__kmp_version_omp_api[KMP_VERSION_MAGIC_LEN]); - __kmp_str_buf_print(&buffer, "%sdynamic error checking: %s\n", - KMP_VERSION_PREF_STR, - (__kmp_env_consistency_check ? "yes" : "no")); -#ifdef KMP_DEBUG - for (int i = bs_plain_barrier; i < bs_last_barrier; ++i) { - __kmp_str_buf_print( - &buffer, "%s%s barrier branch bits: gather=%u, release=%u\n", - KMP_VERSION_PREF_STR, __kmp_barrier_type_name[i], - __kmp_barrier_gather_branch_bits[i], - __kmp_barrier_release_branch_bits[i]); // __kmp_str_buf_print - } - for (int i = bs_plain_barrier; i < bs_last_barrier; ++i) { - __kmp_str_buf_print( - &buffer, "%s%s barrier pattern: gather=%s, release=%s\n", - KMP_VERSION_PREF_STR, __kmp_barrier_type_name[i], - __kmp_barrier_pattern_name[__kmp_barrier_gather_pattern[i]], - __kmp_barrier_pattern_name - [__kmp_barrier_release_pattern[i]]); // __kmp_str_buf_print - } - __kmp_str_buf_print(&buffer, "%s\n", - &__kmp_version_lock[KMP_VERSION_MAGIC_LEN]); -#endif - __kmp_str_buf_print( - &buffer, "%sthread affinity support: %s\n", KMP_VERSION_PREF_STR, -#if KMP_AFFINITY_SUPPORTED - (KMP_AFFINITY_CAPABLE() - ? (__kmp_affinity_type == affinity_none ? "not used" : "yes") - : "no") -#else - "no" -#endif - ); - __kmp_printf("%s", buffer.str); - __kmp_str_buf_free(&buffer); - K_DIAG(1, ("KMP_VERSION is true\n")); -#endif // KMP_STUB -} // __kmp_print_version_1 - -// Called at parallel initialization time. -static int __kmp_version_2_printed = FALSE; - -void __kmp_print_version_2(void) { - if (__kmp_version_2_printed) { - return; - } - __kmp_version_2_printed = TRUE; -} // __kmp_print_version_2 - -// end of file // Property changes on: vendor/llvm-openmp/openmp-release80-r363030/runtime/src/kmp_version.cpp ___________________________________________________________________ Deleted: svn:eol-style ## -1 +0,0 ## -native \ No newline at end of property Deleted: svn:keywords ## -1 +0,0 ## -FreeBSD=%H \ No newline at end of property Deleted: svn:mime-type ## -1 +0,0 ## -text/plain \ No newline at end of property Index: vendor/llvm-openmp/openmp-release80-r363030/runtime/src/kmp_debug.h =================================================================== --- vendor/llvm-openmp/openmp-release80-r363030/runtime/src/kmp_debug.h (revision 348960) +++ vendor/llvm-openmp/openmp-release80-r363030/runtime/src/kmp_debug.h (nonexistent) @@ -1,180 +0,0 @@ -/* - * kmp_debug.h -- debug / assertion code for Assure library - */ - -//===----------------------------------------------------------------------===// -// -// The LLVM Compiler Infrastructure -// -// This file is dual licensed under the MIT and the University of Illinois Open -// Source Licenses. See LICENSE.txt for details. -// -//===----------------------------------------------------------------------===// - -#ifndef KMP_DEBUG_H -#define KMP_DEBUG_H - -#include - -#ifdef __cplusplus -extern "C" { -#endif // __cplusplus - -// ----------------------------------------------------------------------------- -// Build-time assertion. - -// New C++11 style build assert -#define KMP_BUILD_ASSERT(expr) static_assert(expr, "Build condition error") - -// ----------------------------------------------------------------------------- -// Run-time assertions. - -extern void __kmp_dump_debug_buffer(void); - -#ifdef KMP_USE_ASSERT -extern int __kmp_debug_assert(char const *expr, char const *file, int line); -#ifdef KMP_DEBUG -#define KMP_ASSERT(cond) \ - if (!(cond)) { \ - __kmp_debug_assert(#cond, __FILE__, __LINE__); \ - } -#define KMP_ASSERT2(cond, msg) \ - if (!(cond)) { \ - __kmp_debug_assert((msg), __FILE__, __LINE__); \ - } -#define KMP_DEBUG_ASSERT(cond) KMP_ASSERT(cond) -#define KMP_DEBUG_ASSERT2(cond, msg) KMP_ASSERT2(cond, msg) -#define KMP_DEBUG_USE_VAR(x) /* Nothing (it is used!) */ -#else -// Do not expose condition in release build. Use "assertion failure". -#define KMP_ASSERT(cond) \ - if (!(cond)) { \ - __kmp_debug_assert("assertion failure", __FILE__, __LINE__); \ - } -#define KMP_ASSERT2(cond, msg) KMP_ASSERT(cond) -#define KMP_DEBUG_ASSERT(cond) /* Nothing */ -#define KMP_DEBUG_ASSERT2(cond, msg) /* Nothing */ -#define KMP_DEBUG_USE_VAR(x) ((void)(x)) -#endif // KMP_DEBUG -#else -#define KMP_ASSERT(cond) /* Nothing */ -#define KMP_ASSERT2(cond, msg) /* Nothing */ -#define KMP_DEBUG_ASSERT(cond) /* Nothing */ -#define KMP_DEBUG_ASSERT2(cond, msg) /* Nothing */ -#define KMP_DEBUG_USE_VAR(x) ((void)(x)) -#endif // KMP_USE_ASSERT - -#ifdef KMP_DEBUG -extern void __kmp_debug_printf_stdout(char const *format, ...); -#endif -extern void __kmp_debug_printf(char const *format, ...); - -#ifdef KMP_DEBUG - -extern int kmp_a_debug; -extern int kmp_b_debug; -extern int kmp_c_debug; -extern int kmp_d_debug; -extern int kmp_e_debug; -extern int kmp_f_debug; -extern int kmp_diag; - -#define KA_TRACE(d, x) \ - if (kmp_a_debug >= d) { \ - __kmp_debug_printf x; \ - } -#define KB_TRACE(d, x) \ - if (kmp_b_debug >= d) { \ - __kmp_debug_printf x; \ - } -#define KC_TRACE(d, x) \ - if (kmp_c_debug >= d) { \ - __kmp_debug_printf x; \ - } -#define KD_TRACE(d, x) \ - if (kmp_d_debug >= d) { \ - __kmp_debug_printf x; \ - } -#define KE_TRACE(d, x) \ - if (kmp_e_debug >= d) { \ - __kmp_debug_printf x; \ - } -#define KF_TRACE(d, x) \ - if (kmp_f_debug >= d) { \ - __kmp_debug_printf x; \ - } -#define K_DIAG(d, x) \ - { \ - if (kmp_diag == d) { \ - __kmp_debug_printf_stdout x; \ - } \ - } - -#define KA_DUMP(d, x) \ - if (kmp_a_debug >= d) { \ - int ks; \ - __kmp_disable(&ks); \ - (x); \ - __kmp_enable(ks); \ - } -#define KB_DUMP(d, x) \ - if (kmp_b_debug >= d) { \ - int ks; \ - __kmp_disable(&ks); \ - (x); \ - __kmp_enable(ks); \ - } -#define KC_DUMP(d, x) \ - if (kmp_c_debug >= d) { \ - int ks; \ - __kmp_disable(&ks); \ - (x); \ - __kmp_enable(ks); \ - } -#define KD_DUMP(d, x) \ - if (kmp_d_debug >= d) { \ - int ks; \ - __kmp_disable(&ks); \ - (x); \ - __kmp_enable(ks); \ - } -#define KE_DUMP(d, x) \ - if (kmp_e_debug >= d) { \ - int ks; \ - __kmp_disable(&ks); \ - (x); \ - __kmp_enable(ks); \ - } -#define KF_DUMP(d, x) \ - if (kmp_f_debug >= d) { \ - int ks; \ - __kmp_disable(&ks); \ - (x); \ - __kmp_enable(ks); \ - } - -#else - -#define KA_TRACE(d, x) /* nothing to do */ -#define KB_TRACE(d, x) /* nothing to do */ -#define KC_TRACE(d, x) /* nothing to do */ -#define KD_TRACE(d, x) /* nothing to do */ -#define KE_TRACE(d, x) /* nothing to do */ -#define KF_TRACE(d, x) /* nothing to do */ -#define K_DIAG(d, x) \ - {} /* nothing to do */ - -#define KA_DUMP(d, x) /* nothing to do */ -#define KB_DUMP(d, x) /* nothing to do */ -#define KC_DUMP(d, x) /* nothing to do */ -#define KD_DUMP(d, x) /* nothing to do */ -#define KE_DUMP(d, x) /* nothing to do */ -#define KF_DUMP(d, x) /* nothing to do */ - -#endif // KMP_DEBUG - -#ifdef __cplusplus -} // extern "C" -#endif // __cplusplus - -#endif /* KMP_DEBUG_H */ Property changes on: vendor/llvm-openmp/openmp-release80-r363030/runtime/src/kmp_debug.h ___________________________________________________________________ Deleted: svn:eol-style ## -1 +0,0 ## -native \ No newline at end of property Deleted: svn:keywords ## -1 +0,0 ## -FreeBSD=%H \ No newline at end of property Deleted: svn:mime-type ## -1 +0,0 ## -text/plain \ No newline at end of property Index: vendor/llvm-openmp/openmp-release80-r363030/runtime/src/kmp_stub.cpp =================================================================== --- vendor/llvm-openmp/openmp-release80-r363030/runtime/src/kmp_stub.cpp (revision 348960) +++ vendor/llvm-openmp/openmp-release80-r363030/runtime/src/kmp_stub.cpp (nonexistent) @@ -1,370 +0,0 @@ -/* - * kmp_stub.cpp -- stub versions of user-callable OpenMP RT functions. - */ - -//===----------------------------------------------------------------------===// -// -// The LLVM Compiler Infrastructure -// -// This file is dual licensed under the MIT and the University of Illinois Open -// Source Licenses. See LICENSE.txt for details. -// -//===----------------------------------------------------------------------===// - -#include -#include -#include - -#define __KMP_IMP -#include "omp.h" // omp_* declarations, must be included before "kmp.h" -#include "kmp.h" // KMP_DEFAULT_STKSIZE -#include "kmp_stub.h" - -#if KMP_OS_WINDOWS -#include -#else -#include -#endif - -// Moved from omp.h -#define omp_set_max_active_levels ompc_set_max_active_levels -#define omp_set_schedule ompc_set_schedule -#define omp_get_ancestor_thread_num ompc_get_ancestor_thread_num -#define omp_get_team_size ompc_get_team_size - -#define omp_set_num_threads ompc_set_num_threads -#define omp_set_dynamic ompc_set_dynamic -#define omp_set_nested ompc_set_nested -#define omp_set_affinity_format ompc_set_affinity_format -#define omp_get_affinity_format ompc_get_affinity_format -#define omp_display_affinity ompc_display_affinity -#define omp_capture_affinity ompc_capture_affinity -#define kmp_set_stacksize kmpc_set_stacksize -#define kmp_set_stacksize_s kmpc_set_stacksize_s -#define kmp_set_blocktime kmpc_set_blocktime -#define kmp_set_library kmpc_set_library -#define kmp_set_defaults kmpc_set_defaults -#define kmp_set_disp_num_buffers kmpc_set_disp_num_buffers -#define kmp_malloc kmpc_malloc -#define kmp_aligned_malloc kmpc_aligned_malloc -#define kmp_calloc kmpc_calloc -#define kmp_realloc kmpc_realloc -#define kmp_free kmpc_free - -#if KMP_OS_WINDOWS -static double frequency = 0.0; -#endif - -// Helper functions. -static size_t __kmps_init() { - static int initialized = 0; - static size_t dummy = 0; - if (!initialized) { - // TODO: Analyze KMP_VERSION environment variable, print - // __kmp_version_copyright and __kmp_version_build_time. - // WARNING: Do not use "fprintf(stderr, ...)" because it will cause - // unresolved "__iob" symbol (see C70080). We need to extract __kmp_printf() - // stuff from kmp_runtime.cpp and use it. - - // Trick with dummy variable forces linker to keep __kmp_version_copyright - // and __kmp_version_build_time strings in executable file (in case of - // static linkage). When KMP_VERSION analysis is implemented, dummy - // variable should be deleted, function should return void. - dummy = __kmp_version_copyright - __kmp_version_build_time; - -#if KMP_OS_WINDOWS - LARGE_INTEGER freq; - BOOL status = QueryPerformanceFrequency(&freq); - if (status) { - frequency = double(freq.QuadPart); - } -#endif - - initialized = 1; - } - return dummy; -} // __kmps_init - -#define i __kmps_init(); - -/* set API functions */ -void omp_set_num_threads(omp_int_t num_threads) { i; } -void omp_set_dynamic(omp_int_t dynamic) { - i; - __kmps_set_dynamic(dynamic); -} -void omp_set_nested(omp_int_t nested) { - i; - __kmps_set_nested(nested); -} -void omp_set_max_active_levels(omp_int_t max_active_levels) { i; } -void omp_set_schedule(omp_sched_t kind, omp_int_t modifier) { - i; - __kmps_set_schedule((kmp_sched_t)kind, modifier); -} -int omp_get_ancestor_thread_num(omp_int_t level) { - i; - return (level) ? (-1) : (0); -} -int omp_get_team_size(omp_int_t level) { - i; - return (level) ? (-1) : (1); -} -int kmpc_set_affinity_mask_proc(int proc, void **mask) { - i; - return -1; -} -int kmpc_unset_affinity_mask_proc(int proc, void **mask) { - i; - return -1; -} -int kmpc_get_affinity_mask_proc(int proc, void **mask) { - i; - return -1; -} - -/* kmp API functions */ -void kmp_set_stacksize(omp_int_t arg) { - i; - __kmps_set_stacksize(arg); -} -void kmp_set_stacksize_s(size_t arg) { - i; - __kmps_set_stacksize(arg); -} -void kmp_set_blocktime(omp_int_t arg) { - i; - __kmps_set_blocktime(arg); -} -void kmp_set_library(omp_int_t arg) { - i; - __kmps_set_library(arg); -} -void kmp_set_defaults(char const *str) { i; } -void kmp_set_disp_num_buffers(omp_int_t arg) { i; } - -/* KMP memory management functions. */ -void *kmp_malloc(size_t size) { - i; - void *res; -#if KMP_OS_WINDOWS - // If succesfull returns a pointer to the memory block, otherwise returns - // NULL. - // Sets errno to ENOMEM or EINVAL if memory allocation failed or parameter - // validation failed. - res = _aligned_malloc(size, 1); -#else - res = malloc(size); -#endif - return res; -} -void *kmp_aligned_malloc(size_t sz, size_t a) { - i; - int err; - void *res; -#if KMP_OS_WINDOWS - res = _aligned_malloc(sz, a); -#else - if (err = posix_memalign(&res, a, sz)) { - errno = err; // can be EINVAL or ENOMEM - res = NULL; - } -#endif - return res; -} -void *kmp_calloc(size_t nelem, size_t elsize) { - i; - void *res; -#if KMP_OS_WINDOWS - res = _aligned_recalloc(NULL, nelem, elsize, 1); -#else - res = calloc(nelem, elsize); -#endif - return res; -} -void *kmp_realloc(void *ptr, size_t size) { - i; - void *res; -#if KMP_OS_WINDOWS - res = _aligned_realloc(ptr, size, 1); -#else - res = realloc(ptr, size); -#endif - return res; -} -void kmp_free(void *ptr) { - i; -#if KMP_OS_WINDOWS - _aligned_free(ptr); -#else - free(ptr); -#endif -} - -static int __kmps_blocktime = INT_MAX; - -void __kmps_set_blocktime(int arg) { - i; - __kmps_blocktime = arg; -} // __kmps_set_blocktime - -int __kmps_get_blocktime(void) { - i; - return __kmps_blocktime; -} // __kmps_get_blocktime - -static int __kmps_dynamic = 0; - -void __kmps_set_dynamic(int arg) { - i; - __kmps_dynamic = arg; -} // __kmps_set_dynamic - -int __kmps_get_dynamic(void) { - i; - return __kmps_dynamic; -} // __kmps_get_dynamic - -static int __kmps_library = 1000; - -void __kmps_set_library(int arg) { - i; - __kmps_library = arg; -} // __kmps_set_library - -int __kmps_get_library(void) { - i; - return __kmps_library; -} // __kmps_get_library - -static int __kmps_nested = 0; - -void __kmps_set_nested(int arg) { - i; - __kmps_nested = arg; -} // __kmps_set_nested - -int __kmps_get_nested(void) { - i; - return __kmps_nested; -} // __kmps_get_nested - -static size_t __kmps_stacksize = KMP_DEFAULT_STKSIZE; - -void __kmps_set_stacksize(int arg) { - i; - __kmps_stacksize = arg; -} // __kmps_set_stacksize - -int __kmps_get_stacksize(void) { - i; - return __kmps_stacksize; -} // __kmps_get_stacksize - -static kmp_sched_t __kmps_sched_kind = kmp_sched_default; -static int __kmps_sched_modifier = 0; - -void __kmps_set_schedule(kmp_sched_t kind, int modifier) { - i; - __kmps_sched_kind = kind; - __kmps_sched_modifier = modifier; -} // __kmps_set_schedule - -void __kmps_get_schedule(kmp_sched_t *kind, int *modifier) { - i; - *kind = __kmps_sched_kind; - *modifier = __kmps_sched_modifier; -} // __kmps_get_schedule - -#if OMP_40_ENABLED - -static kmp_proc_bind_t __kmps_proc_bind = proc_bind_false; - -void __kmps_set_proc_bind(kmp_proc_bind_t arg) { - i; - __kmps_proc_bind = arg; -} // __kmps_set_proc_bind - -kmp_proc_bind_t __kmps_get_proc_bind(void) { - i; - return __kmps_proc_bind; -} // __kmps_get_proc_bind - -#endif /* OMP_40_ENABLED */ - -double __kmps_get_wtime(void) { - // Elapsed wall clock time (in second) from "sometime in the past". - double wtime = 0.0; - i; -#if KMP_OS_WINDOWS - if (frequency > 0.0) { - LARGE_INTEGER now; - BOOL status = QueryPerformanceCounter(&now); - if (status) { - wtime = double(now.QuadPart) / frequency; - } - } -#else - // gettimeofday() returns seconds and microseconds since the Epoch. - struct timeval tval; - int rc; - rc = gettimeofday(&tval, NULL); - if (rc == 0) { - wtime = (double)(tval.tv_sec) + 1.0E-06 * (double)(tval.tv_usec); - } else { - // TODO: Assert or abort here. - } -#endif - return wtime; -} // __kmps_get_wtime - -double __kmps_get_wtick(void) { - // Number of seconds between successive clock ticks. - double wtick = 0.0; - i; -#if KMP_OS_WINDOWS - { - DWORD increment; - DWORD adjustment; - BOOL disabled; - BOOL rc; - rc = GetSystemTimeAdjustment(&adjustment, &increment, &disabled); - if (rc) { - wtick = 1.0E-07 * (double)(disabled ? increment : adjustment); - } else { - // TODO: Assert or abort here. - wtick = 1.0E-03; - } - } -#else - // TODO: gettimeofday() returns in microseconds, but what the precision? - wtick = 1.0E-06; -#endif - return wtick; -} // __kmps_get_wtick - -#if OMP_50_ENABLED -/* OpenMP 5.0 Memory Management */ -const omp_allocator_t *OMP_NULL_ALLOCATOR = NULL; -const omp_allocator_t *omp_default_mem_alloc = (const omp_allocator_t *)1; -const omp_allocator_t *omp_large_cap_mem_alloc = (const omp_allocator_t *)2; -const omp_allocator_t *omp_const_mem_alloc = (const omp_allocator_t *)3; -const omp_allocator_t *omp_high_bw_mem_alloc = (const omp_allocator_t *)4; -const omp_allocator_t *omp_low_lat_mem_alloc = (const omp_allocator_t *)5; -const omp_allocator_t *omp_cgroup_mem_alloc = (const omp_allocator_t *)6; -const omp_allocator_t *omp_pteam_mem_alloc = (const omp_allocator_t *)7; -const omp_allocator_t *omp_thread_mem_alloc = (const omp_allocator_t *)8; -/* OpenMP 5.0 Affinity Format */ -void omp_set_affinity_format(char const *format) { i; } -size_t omp_get_affinity_format(char *buffer, size_t size) { - i; - return 0; -} -void omp_display_affinity(char const *format) { i; } -size_t omp_capture_affinity(char *buffer, size_t buf_size, char const *format) { - i; - return 0; -} -#endif /* OMP_50_ENABLED */ - -// end of file // Property changes on: vendor/llvm-openmp/openmp-release80-r363030/runtime/src/kmp_stub.cpp ___________________________________________________________________ Deleted: svn:eol-style ## -1 +0,0 ## -native \ No newline at end of property Deleted: svn:keywords ## -1 +0,0 ## -FreeBSD=%H \ No newline at end of property Deleted: svn:mime-type ## -1 +0,0 ## -text/plain \ No newline at end of property Index: vendor/llvm-openmp/openmp-release80-r363030/runtime/src/kmp_debugger.cpp =================================================================== --- vendor/llvm-openmp/openmp-release80-r363030/runtime/src/kmp_debugger.cpp (revision 348960) +++ vendor/llvm-openmp/openmp-release80-r363030/runtime/src/kmp_debugger.cpp (nonexistent) @@ -1,293 +0,0 @@ -#include "kmp_config.h" - -#if USE_DEBUGGER -/* - * kmp_debugger.cpp -- debugger support. - */ - -//===----------------------------------------------------------------------===// -// -// The LLVM Compiler Infrastructure -// -// This file is dual licensed under the MIT and the University of Illinois Open -// Source Licenses. See LICENSE.txt for details. -// -//===----------------------------------------------------------------------===// - -#include "kmp.h" -#include "kmp_lock.h" -#include "kmp_omp.h" -#include "kmp_str.h" - -// NOTE: All variable names are known to the debugger, do not change! - -#ifdef __cplusplus -extern "C" { -extern kmp_omp_struct_info_t __kmp_omp_debug_struct_info; -} // extern "C" -#endif // __cplusplus - -int __kmp_debugging = FALSE; // Boolean whether currently debugging OpenMP RTL. - -#define offset_and_size_of(structure, field) \ - { offsetof(structure, field), sizeof(((structure *)NULL)->field) } - -#define offset_and_size_not_available \ - { -1, -1 } - -#define addr_and_size_of(var) \ - { (kmp_uint64)(&var), sizeof(var) } - -#define nthr_buffer_size 1024 -static kmp_int32 kmp_omp_nthr_info_buffer[nthr_buffer_size] = { - nthr_buffer_size * sizeof(kmp_int32)}; - -/* TODO: Check punctuation for various platforms here */ -static char func_microtask[] = "__kmp_invoke_microtask"; -static char func_fork[] = "__kmpc_fork_call"; -static char func_fork_teams[] = "__kmpc_fork_teams"; - -// Various info about runtime structures: addresses, field offsets, sizes, etc. -kmp_omp_struct_info_t __kmp_omp_debug_struct_info = { - - /* Change this only if you make a fundamental data structure change here */ - KMP_OMP_VERSION, - - /* sanity check. Only should be checked if versions are identical - * This is also used for backward compatibility to get the runtime - * structure size if it the runtime is older than the interface */ - sizeof(kmp_omp_struct_info_t), - - /* OpenMP RTL version info. */ - addr_and_size_of(__kmp_version_major), - addr_and_size_of(__kmp_version_minor), - addr_and_size_of(__kmp_version_build), - addr_and_size_of(__kmp_openmp_version), - {(kmp_uint64)(__kmp_copyright) + KMP_VERSION_MAGIC_LEN, - 0}, // Skip magic prefix. - - /* Various globals. */ - addr_and_size_of(__kmp_threads), - addr_and_size_of(__kmp_root), - addr_and_size_of(__kmp_threads_capacity), -#if KMP_USE_MONITOR - addr_and_size_of(__kmp_monitor), -#endif -#if !KMP_USE_DYNAMIC_LOCK - addr_and_size_of(__kmp_user_lock_table), -#endif - addr_and_size_of(func_microtask), - addr_and_size_of(func_fork), - addr_and_size_of(func_fork_teams), - addr_and_size_of(__kmp_team_counter), - addr_and_size_of(__kmp_task_counter), - addr_and_size_of(kmp_omp_nthr_info_buffer), - sizeof(void *), - OMP_LOCK_T_SIZE < sizeof(void *), - bs_last_barrier, - INITIAL_TASK_DEQUE_SIZE, - - // thread structure information - sizeof(kmp_base_info_t), - offset_and_size_of(kmp_base_info_t, th_info), - offset_and_size_of(kmp_base_info_t, th_team), - offset_and_size_of(kmp_base_info_t, th_root), - offset_and_size_of(kmp_base_info_t, th_serial_team), - offset_and_size_of(kmp_base_info_t, th_ident), - offset_and_size_of(kmp_base_info_t, th_spin_here), - offset_and_size_of(kmp_base_info_t, th_next_waiting), - offset_and_size_of(kmp_base_info_t, th_task_team), - offset_and_size_of(kmp_base_info_t, th_current_task), - offset_and_size_of(kmp_base_info_t, th_task_state), - offset_and_size_of(kmp_base_info_t, th_bar), - offset_and_size_of(kmp_bstate_t, b_worker_arrived), - -#if OMP_40_ENABLED - // teams information - offset_and_size_of(kmp_base_info_t, th_teams_microtask), - offset_and_size_of(kmp_base_info_t, th_teams_level), - offset_and_size_of(kmp_teams_size_t, nteams), - offset_and_size_of(kmp_teams_size_t, nth), -#endif - - // kmp_desc structure (for info field above) - sizeof(kmp_desc_base_t), - offset_and_size_of(kmp_desc_base_t, ds_tid), - offset_and_size_of(kmp_desc_base_t, ds_gtid), -// On Windows* OS, ds_thread contains a thread /handle/, which is not usable, -// while thread /id/ is in ds_thread_id. -#if KMP_OS_WINDOWS - offset_and_size_of(kmp_desc_base_t, ds_thread_id), -#else - offset_and_size_of(kmp_desc_base_t, ds_thread), -#endif - - // team structure information - sizeof(kmp_base_team_t), - offset_and_size_of(kmp_base_team_t, t_master_tid), - offset_and_size_of(kmp_base_team_t, t_ident), - offset_and_size_of(kmp_base_team_t, t_parent), - offset_and_size_of(kmp_base_team_t, t_nproc), - offset_and_size_of(kmp_base_team_t, t_threads), - offset_and_size_of(kmp_base_team_t, t_serialized), - offset_and_size_of(kmp_base_team_t, t_id), - offset_and_size_of(kmp_base_team_t, t_pkfn), - offset_and_size_of(kmp_base_team_t, t_task_team), - offset_and_size_of(kmp_base_team_t, t_implicit_task_taskdata), -#if OMP_40_ENABLED - offset_and_size_of(kmp_base_team_t, t_cancel_request), -#endif - offset_and_size_of(kmp_base_team_t, t_bar), - offset_and_size_of(kmp_balign_team_t, b_master_arrived), - offset_and_size_of(kmp_balign_team_t, b_team_arrived), - - // root structure information - sizeof(kmp_base_root_t), - offset_and_size_of(kmp_base_root_t, r_root_team), - offset_and_size_of(kmp_base_root_t, r_hot_team), - offset_and_size_of(kmp_base_root_t, r_uber_thread), - offset_and_size_not_available, - - // ident structure information - sizeof(ident_t), - offset_and_size_of(ident_t, psource), - offset_and_size_of(ident_t, flags), - - // lock structure information - sizeof(kmp_base_queuing_lock_t), - offset_and_size_of(kmp_base_queuing_lock_t, initialized), - offset_and_size_of(kmp_base_queuing_lock_t, location), - offset_and_size_of(kmp_base_queuing_lock_t, tail_id), - offset_and_size_of(kmp_base_queuing_lock_t, head_id), - offset_and_size_of(kmp_base_queuing_lock_t, next_ticket), - offset_and_size_of(kmp_base_queuing_lock_t, now_serving), - offset_and_size_of(kmp_base_queuing_lock_t, owner_id), - offset_and_size_of(kmp_base_queuing_lock_t, depth_locked), - offset_and_size_of(kmp_base_queuing_lock_t, flags), - -#if !KMP_USE_DYNAMIC_LOCK - /* Lock table. */ - sizeof(kmp_lock_table_t), - offset_and_size_of(kmp_lock_table_t, used), - offset_and_size_of(kmp_lock_table_t, allocated), - offset_and_size_of(kmp_lock_table_t, table), -#endif - - // Task team structure information. - sizeof(kmp_base_task_team_t), - offset_and_size_of(kmp_base_task_team_t, tt_threads_data), - offset_and_size_of(kmp_base_task_team_t, tt_found_tasks), - offset_and_size_of(kmp_base_task_team_t, tt_nproc), - offset_and_size_of(kmp_base_task_team_t, tt_unfinished_threads), - offset_and_size_of(kmp_base_task_team_t, tt_active), - - // task_data_t. - sizeof(kmp_taskdata_t), - offset_and_size_of(kmp_taskdata_t, td_task_id), - offset_and_size_of(kmp_taskdata_t, td_flags), - offset_and_size_of(kmp_taskdata_t, td_team), - offset_and_size_of(kmp_taskdata_t, td_parent), - offset_and_size_of(kmp_taskdata_t, td_level), - offset_and_size_of(kmp_taskdata_t, td_ident), - offset_and_size_of(kmp_taskdata_t, td_allocated_child_tasks), - offset_and_size_of(kmp_taskdata_t, td_incomplete_child_tasks), - - offset_and_size_of(kmp_taskdata_t, td_taskwait_ident), - offset_and_size_of(kmp_taskdata_t, td_taskwait_counter), - offset_and_size_of(kmp_taskdata_t, td_taskwait_thread), - -#if OMP_40_ENABLED - offset_and_size_of(kmp_taskdata_t, td_taskgroup), - offset_and_size_of(kmp_taskgroup_t, count), - offset_and_size_of(kmp_taskgroup_t, cancel_request), - - offset_and_size_of(kmp_taskdata_t, td_depnode), - offset_and_size_of(kmp_depnode_list_t, node), - offset_and_size_of(kmp_depnode_list_t, next), - offset_and_size_of(kmp_base_depnode_t, successors), - offset_and_size_of(kmp_base_depnode_t, task), - offset_and_size_of(kmp_base_depnode_t, npredecessors), - offset_and_size_of(kmp_base_depnode_t, nrefs), -#endif - offset_and_size_of(kmp_task_t, routine), - - // thread_data_t. - sizeof(kmp_thread_data_t), - offset_and_size_of(kmp_base_thread_data_t, td_deque), - offset_and_size_of(kmp_base_thread_data_t, td_deque_size), - offset_and_size_of(kmp_base_thread_data_t, td_deque_head), - offset_and_size_of(kmp_base_thread_data_t, td_deque_tail), - offset_and_size_of(kmp_base_thread_data_t, td_deque_ntasks), - offset_and_size_of(kmp_base_thread_data_t, td_deque_last_stolen), - - // The last field. - KMP_OMP_VERSION, - -}; // __kmp_omp_debug_struct_info - -#undef offset_and_size_of -#undef addr_and_size_of - -/* Intel compiler on IA-32 architecture issues a warning "conversion - from "unsigned long long" to "char *" may lose significant bits" - when 64-bit value is assigned to 32-bit pointer. Use this function - to suppress the warning. */ -static inline void *__kmp_convert_to_ptr(kmp_uint64 addr) { -#if KMP_COMPILER_ICC -#pragma warning(push) -#pragma warning(disable : 810) // conversion from "unsigned long long" to "char -// *" may lose significant bits -#pragma warning(disable : 1195) // conversion from integer to smaller pointer -#endif // KMP_COMPILER_ICC - return (void *)addr; -#if KMP_COMPILER_ICC -#pragma warning(pop) -#endif // KMP_COMPILER_ICC -} // __kmp_convert_to_ptr - -static int kmp_location_match(kmp_str_loc_t *loc, kmp_omp_nthr_item_t *item) { - - int file_match = 0; - int func_match = 0; - int line_match = 0; - - char *file = (char *)__kmp_convert_to_ptr(item->file); - char *func = (char *)__kmp_convert_to_ptr(item->func); - file_match = __kmp_str_fname_match(&loc->fname, file); - func_match = - item->func == 0 // If item->func is NULL, it allows any func name. - || strcmp(func, "*") == 0 || - (loc->func != NULL && strcmp(loc->func, func) == 0); - line_match = - item->begin <= loc->line && - (item->end <= 0 || - loc->line <= item->end); // if item->end <= 0, it means "end of file". - - return (file_match && func_match && line_match); - -} // kmp_location_match - -int __kmp_omp_num_threads(ident_t const *ident) { - - int num_threads = 0; - - kmp_omp_nthr_info_t *info = (kmp_omp_nthr_info_t *)__kmp_convert_to_ptr( - __kmp_omp_debug_struct_info.nthr_info.addr); - if (info->num > 0 && info->array != 0) { - kmp_omp_nthr_item_t *items = - (kmp_omp_nthr_item_t *)__kmp_convert_to_ptr(info->array); - kmp_str_loc_t loc = __kmp_str_loc_init(ident->psource, 1); - int i; - for (i = 0; i < info->num; ++i) { - if (kmp_location_match(&loc, &items[i])) { - num_threads = items[i].num_threads; - } - } - __kmp_str_loc_free(&loc); - } - - return num_threads; - ; - -} // __kmp_omp_num_threads -#endif /* USE_DEBUGGER */ Property changes on: vendor/llvm-openmp/openmp-release80-r363030/runtime/src/kmp_debugger.cpp ___________________________________________________________________ Deleted: svn:eol-style ## -1 +0,0 ## -native \ No newline at end of property Deleted: svn:keywords ## -1 +0,0 ## -FreeBSD=%H \ No newline at end of property Deleted: svn:mime-type ## -1 +0,0 ## -text/plain \ No newline at end of property Index: vendor/llvm-openmp/openmp-release80-r363030/runtime/src/kmp_dispatch.cpp =================================================================== --- vendor/llvm-openmp/openmp-release80-r363030/runtime/src/kmp_dispatch.cpp (revision 348960) +++ vendor/llvm-openmp/openmp-release80-r363030/runtime/src/kmp_dispatch.cpp (nonexistent) @@ -1,2595 +0,0 @@ -/* - * kmp_dispatch.cpp: dynamic scheduling - iteration initialization and dispatch. - */ - -//===----------------------------------------------------------------------===// -// -// The LLVM Compiler Infrastructure -// -// This file is dual licensed under the MIT and the University of Illinois Open -// Source Licenses. See LICENSE.txt for details. -// -//===----------------------------------------------------------------------===// - -/* Dynamic scheduling initialization and dispatch. - * - * NOTE: __kmp_nth is a constant inside of any dispatch loop, however - * it may change values between parallel regions. __kmp_max_nth - * is the largest value __kmp_nth may take, 1 is the smallest. - */ - -#include "kmp.h" -#include "kmp_error.h" -#include "kmp_i18n.h" -#include "kmp_itt.h" -#include "kmp_stats.h" -#include "kmp_str.h" -#if KMP_USE_X87CONTROL -#include -#endif -#include "kmp_lock.h" -#include "kmp_dispatch.h" -#if KMP_USE_HIER_SCHED -#include "kmp_dispatch_hier.h" -#endif - -#if OMPT_SUPPORT -#include "ompt-specific.h" -#endif - -/* ------------------------------------------------------------------------ */ -/* ------------------------------------------------------------------------ */ - -void __kmp_dispatch_deo_error(int *gtid_ref, int *cid_ref, ident_t *loc_ref) { - kmp_info_t *th; - - KMP_DEBUG_ASSERT(gtid_ref); - - if (__kmp_env_consistency_check) { - th = __kmp_threads[*gtid_ref]; - if (th->th.th_root->r.r_active && - (th->th.th_dispatch->th_dispatch_pr_current->pushed_ws != ct_none)) { -#if KMP_USE_DYNAMIC_LOCK - __kmp_push_sync(*gtid_ref, ct_ordered_in_pdo, loc_ref, NULL, 0); -#else - __kmp_push_sync(*gtid_ref, ct_ordered_in_pdo, loc_ref, NULL); -#endif - } - } -} - -void __kmp_dispatch_dxo_error(int *gtid_ref, int *cid_ref, ident_t *loc_ref) { - kmp_info_t *th; - - if (__kmp_env_consistency_check) { - th = __kmp_threads[*gtid_ref]; - if (th->th.th_dispatch->th_dispatch_pr_current->pushed_ws != ct_none) { - __kmp_pop_sync(*gtid_ref, ct_ordered_in_pdo, loc_ref); - } - } -} - -// Initialize a dispatch_private_info_template buffer for a particular -// type of schedule,chunk. The loop description is found in lb (lower bound), -// ub (upper bound), and st (stride). nproc is the number of threads relevant -// to the scheduling (often the number of threads in a team, but not always if -// hierarchical scheduling is used). tid is the id of the thread calling -// the function within the group of nproc threads. It will have a value -// between 0 and nproc - 1. This is often just the thread id within a team, but -// is not necessarily the case when using hierarchical scheduling. -// loc is the source file location of the corresponding loop -// gtid is the global thread id -template -void __kmp_dispatch_init_algorithm(ident_t *loc, int gtid, - dispatch_private_info_template *pr, - enum sched_type schedule, T lb, T ub, - typename traits_t::signed_t st, -#if USE_ITT_BUILD - kmp_uint64 *cur_chunk, -#endif - typename traits_t::signed_t chunk, - T nproc, T tid) { - typedef typename traits_t::unsigned_t UT; - typedef typename traits_t::floating_t DBL; - - int active; - T tc; - kmp_info_t *th; - kmp_team_t *team; - -#ifdef KMP_DEBUG - typedef typename traits_t::signed_t ST; - { - char *buff; - // create format specifiers before the debug output - buff = __kmp_str_format("__kmp_dispatch_init_algorithm: T#%%d called " - "pr:%%p lb:%%%s ub:%%%s st:%%%s " - "schedule:%%d chunk:%%%s nproc:%%%s tid:%%%s\n", - traits_t::spec, traits_t::spec, - traits_t::spec, traits_t::spec, - traits_t::spec, traits_t::spec); - KD_TRACE(10, (buff, gtid, pr, lb, ub, st, schedule, chunk, nproc, tid)); - __kmp_str_free(&buff); - } -#endif - /* setup data */ - th = __kmp_threads[gtid]; - team = th->th.th_team; - active = !team->t.t_serialized; - -#if USE_ITT_BUILD - int itt_need_metadata_reporting = __itt_metadata_add_ptr && - __kmp_forkjoin_frames_mode == 3 && - KMP_MASTER_GTID(gtid) && -#if OMP_40_ENABLED - th->th.th_teams_microtask == NULL && -#endif - team->t.t_active_level == 1; -#endif -#if (KMP_STATIC_STEAL_ENABLED) - if (SCHEDULE_HAS_NONMONOTONIC(schedule)) - // AC: we now have only one implementation of stealing, so use it - schedule = kmp_sch_static_steal; - else -#endif - schedule = SCHEDULE_WITHOUT_MODIFIERS(schedule); - - /* Pick up the nomerge/ordered bits from the scheduling type */ - if ((schedule >= kmp_nm_lower) && (schedule < kmp_nm_upper)) { - pr->flags.nomerge = TRUE; - schedule = - (enum sched_type)(((int)schedule) - (kmp_nm_lower - kmp_sch_lower)); - } else { - pr->flags.nomerge = FALSE; - } - pr->type_size = traits_t::type_size; // remember the size of variables - if (kmp_ord_lower & schedule) { - pr->flags.ordered = TRUE; - schedule = - (enum sched_type)(((int)schedule) - (kmp_ord_lower - kmp_sch_lower)); - } else { - pr->flags.ordered = FALSE; - } - - if (schedule == kmp_sch_static) { - schedule = __kmp_static; - } else { - if (schedule == kmp_sch_runtime) { - // Use the scheduling specified by OMP_SCHEDULE (or __kmp_sch_default if - // not specified) - schedule = team->t.t_sched.r_sched_type; - // Detail the schedule if needed (global controls are differentiated - // appropriately) - if (schedule == kmp_sch_guided_chunked) { - schedule = __kmp_guided; - } else if (schedule == kmp_sch_static) { - schedule = __kmp_static; - } - // Use the chunk size specified by OMP_SCHEDULE (or default if not - // specified) - chunk = team->t.t_sched.chunk; -#if USE_ITT_BUILD - if (cur_chunk) - *cur_chunk = chunk; -#endif -#ifdef KMP_DEBUG - { - char *buff; - // create format specifiers before the debug output - buff = __kmp_str_format("__kmp_dispatch_init_algorithm: T#%%d new: " - "schedule:%%d chunk:%%%s\n", - traits_t::spec); - KD_TRACE(10, (buff, gtid, schedule, chunk)); - __kmp_str_free(&buff); - } -#endif - } else { - if (schedule == kmp_sch_guided_chunked) { - schedule = __kmp_guided; - } - if (chunk <= 0) { - chunk = KMP_DEFAULT_CHUNK; - } - } - - if (schedule == kmp_sch_auto) { - // mapping and differentiation: in the __kmp_do_serial_initialize() - schedule = __kmp_auto; -#ifdef KMP_DEBUG - { - char *buff; - // create format specifiers before the debug output - buff = __kmp_str_format( - "__kmp_dispatch_init_algorithm: kmp_sch_auto: T#%%d new: " - "schedule:%%d chunk:%%%s\n", - traits_t::spec); - KD_TRACE(10, (buff, gtid, schedule, chunk)); - __kmp_str_free(&buff); - } -#endif - } - - /* guided analytical not safe for too many threads */ - if (schedule == kmp_sch_guided_analytical_chunked && nproc > 1 << 20) { - schedule = kmp_sch_guided_iterative_chunked; - KMP_WARNING(DispatchManyThreads); - } -#if OMP_45_ENABLED - if (schedule == kmp_sch_runtime_simd) { - // compiler provides simd_width in the chunk parameter - schedule = team->t.t_sched.r_sched_type; - // Detail the schedule if needed (global controls are differentiated - // appropriately) - if (schedule == kmp_sch_static || schedule == kmp_sch_auto || - schedule == __kmp_static) { - schedule = kmp_sch_static_balanced_chunked; - } else { - if (schedule == kmp_sch_guided_chunked || schedule == __kmp_guided) { - schedule = kmp_sch_guided_simd; - } - chunk = team->t.t_sched.chunk * chunk; - } -#if USE_ITT_BUILD - if (cur_chunk) - *cur_chunk = chunk; -#endif -#ifdef KMP_DEBUG - { - char *buff; - // create format specifiers before the debug output - buff = __kmp_str_format("__kmp_dispatch_init: T#%%d new: schedule:%%d" - " chunk:%%%s\n", - traits_t::spec); - KD_TRACE(10, (buff, gtid, schedule, chunk)); - __kmp_str_free(&buff); - } -#endif - } -#endif // OMP_45_ENABLED - pr->u.p.parm1 = chunk; - } - KMP_ASSERT2((kmp_sch_lower < schedule && schedule < kmp_sch_upper), - "unknown scheduling type"); - - pr->u.p.count = 0; - - if (__kmp_env_consistency_check) { - if (st == 0) { - __kmp_error_construct(kmp_i18n_msg_CnsLoopIncrZeroProhibited, - (pr->flags.ordered ? ct_pdo_ordered : ct_pdo), loc); - } - } - // compute trip count - if (st == 1) { // most common case - if (ub >= lb) { - tc = ub - lb + 1; - } else { // ub < lb - tc = 0; // zero-trip - } - } else if (st < 0) { - if (lb >= ub) { - // AC: cast to unsigned is needed for loops like (i=2B; i>-2B; i-=1B), - // where the division needs to be unsigned regardless of the result type - tc = (UT)(lb - ub) / (-st) + 1; - } else { // lb < ub - tc = 0; // zero-trip - } - } else { // st > 0 - if (ub >= lb) { - // AC: cast to unsigned is needed for loops like (i=-2B; i<2B; i+=1B), - // where the division needs to be unsigned regardless of the result type - tc = (UT)(ub - lb) / st + 1; - } else { // ub < lb - tc = 0; // zero-trip - } - } - - pr->u.p.lb = lb; - pr->u.p.ub = ub; - pr->u.p.st = st; - pr->u.p.tc = tc; - -#if KMP_OS_WINDOWS - pr->u.p.last_upper = ub + st; -#endif /* KMP_OS_WINDOWS */ - - /* NOTE: only the active parallel region(s) has active ordered sections */ - - if (active) { - if (pr->flags.ordered) { - pr->ordered_bumped = 0; - pr->u.p.ordered_lower = 1; - pr->u.p.ordered_upper = 0; - } - } - - switch (schedule) { -#if (KMP_STATIC_STEAL_ENABLED) - case kmp_sch_static_steal: { - T ntc, init; - - KD_TRACE(100, - ("__kmp_dispatch_init_algorithm: T#%d kmp_sch_static_steal case\n", - gtid)); - - ntc = (tc % chunk ? 1 : 0) + tc / chunk; - if (nproc > 1 && ntc >= nproc) { - KMP_COUNT_BLOCK(OMP_LOOP_STATIC_STEAL); - T id = tid; - T small_chunk, extras; - - small_chunk = ntc / nproc; - extras = ntc % nproc; - - init = id * small_chunk + (id < extras ? id : extras); - pr->u.p.count = init; - pr->u.p.ub = init + small_chunk + (id < extras ? 1 : 0); - - pr->u.p.parm2 = lb; - // pr->pfields.parm3 = 0; // it's not used in static_steal - pr->u.p.parm4 = (id + 1) % nproc; // remember neighbour tid - pr->u.p.st = st; - if (traits_t::type_size > 4) { - // AC: TODO: check if 16-byte CAS available and use it to - // improve performance (probably wait for explicit request - // before spending time on this). - // For now use dynamically allocated per-thread lock, - // free memory in __kmp_dispatch_next when status==0. - KMP_DEBUG_ASSERT(th->th.th_dispatch->th_steal_lock == NULL); - th->th.th_dispatch->th_steal_lock = - (kmp_lock_t *)__kmp_allocate(sizeof(kmp_lock_t)); - __kmp_init_lock(th->th.th_dispatch->th_steal_lock); - } - break; - } else { - KD_TRACE(100, ("__kmp_dispatch_init_algorithm: T#%d falling-through to " - "kmp_sch_static_balanced\n", - gtid)); - schedule = kmp_sch_static_balanced; - /* too few iterations: fall-through to kmp_sch_static_balanced */ - } // if - /* FALL-THROUGH to static balanced */ - } // case -#endif - case kmp_sch_static_balanced: { - T init, limit; - - KD_TRACE( - 100, - ("__kmp_dispatch_init_algorithm: T#%d kmp_sch_static_balanced case\n", - gtid)); - - if (nproc > 1) { - T id = tid; - - if (tc < nproc) { - if (id < tc) { - init = id; - limit = id; - pr->u.p.parm1 = (id == tc - 1); /* parm1 stores *plastiter */ - } else { - pr->u.p.count = 1; /* means no more chunks to execute */ - pr->u.p.parm1 = FALSE; - break; - } - } else { - T small_chunk = tc / nproc; - T extras = tc % nproc; - init = id * small_chunk + (id < extras ? id : extras); - limit = init + small_chunk - (id < extras ? 0 : 1); - pr->u.p.parm1 = (id == nproc - 1); - } - } else { - if (tc > 0) { - init = 0; - limit = tc - 1; - pr->u.p.parm1 = TRUE; - } else { - // zero trip count - pr->u.p.count = 1; /* means no more chunks to execute */ - pr->u.p.parm1 = FALSE; - break; - } - } -#if USE_ITT_BUILD - // Calculate chunk for metadata report - if (itt_need_metadata_reporting) - if (cur_chunk) - *cur_chunk = limit - init + 1; -#endif - if (st == 1) { - pr->u.p.lb = lb + init; - pr->u.p.ub = lb + limit; - } else { - // calculated upper bound, "ub" is user-defined upper bound - T ub_tmp = lb + limit * st; - pr->u.p.lb = lb + init * st; - // adjust upper bound to "ub" if needed, so that MS lastprivate will match - // it exactly - if (st > 0) { - pr->u.p.ub = (ub_tmp + st > ub ? ub : ub_tmp); - } else { - pr->u.p.ub = (ub_tmp + st < ub ? ub : ub_tmp); - } - } - if (pr->flags.ordered) { - pr->u.p.ordered_lower = init; - pr->u.p.ordered_upper = limit; - } - break; - } // case -#if OMP_45_ENABLED - case kmp_sch_static_balanced_chunked: { - // similar to balanced, but chunk adjusted to multiple of simd width - T nth = nproc; - KD_TRACE(100, ("__kmp_dispatch_init_algorithm: T#%d runtime(simd:static)" - " -> falling-through to static_greedy\n", - gtid)); - schedule = kmp_sch_static_greedy; - if (nth > 1) - pr->u.p.parm1 = ((tc + nth - 1) / nth + chunk - 1) & ~(chunk - 1); - else - pr->u.p.parm1 = tc; - break; - } // case - case kmp_sch_guided_simd: -#endif // OMP_45_ENABLED - case kmp_sch_guided_iterative_chunked: { - KD_TRACE( - 100, - ("__kmp_dispatch_init_algorithm: T#%d kmp_sch_guided_iterative_chunked" - " case\n", - gtid)); - - if (nproc > 1) { - if ((2L * chunk + 1) * nproc >= tc) { - /* chunk size too large, switch to dynamic */ - schedule = kmp_sch_dynamic_chunked; - } else { - // when remaining iters become less than parm2 - switch to dynamic - pr->u.p.parm2 = guided_int_param * nproc * (chunk + 1); - *(double *)&pr->u.p.parm3 = - guided_flt_param / nproc; // may occupy parm3 and parm4 - } - } else { - KD_TRACE(100, ("__kmp_dispatch_init_algorithm: T#%d falling-through to " - "kmp_sch_static_greedy\n", - gtid)); - schedule = kmp_sch_static_greedy; - /* team->t.t_nproc == 1: fall-through to kmp_sch_static_greedy */ - KD_TRACE( - 100, - ("__kmp_dispatch_init_algorithm: T#%d kmp_sch_static_greedy case\n", - gtid)); - pr->u.p.parm1 = tc; - } // if - } // case - break; - case kmp_sch_guided_analytical_chunked: { - KD_TRACE(100, ("__kmp_dispatch_init_algorithm: T#%d " - "kmp_sch_guided_analytical_chunked case\n", - gtid)); - - if (nproc > 1) { - if ((2L * chunk + 1) * nproc >= tc) { - /* chunk size too large, switch to dynamic */ - schedule = kmp_sch_dynamic_chunked; - } else { - /* commonly used term: (2 nproc - 1)/(2 nproc) */ - DBL x; - -#if KMP_USE_X87CONTROL - /* Linux* OS already has 64-bit computation by default for long double, - and on Windows* OS on Intel(R) 64, /Qlong_double doesn't work. On - Windows* OS on IA-32 architecture, we need to set precision to 64-bit - instead of the default 53-bit. Even though long double doesn't work - on Windows* OS on Intel(R) 64, the resulting lack of precision is not - expected to impact the correctness of the algorithm, but this has not - been mathematically proven. */ - // save original FPCW and set precision to 64-bit, as - // Windows* OS on IA-32 architecture defaults to 53-bit - unsigned int oldFpcw = _control87(0, 0); - _control87(_PC_64, _MCW_PC); // 0,0x30000 -#endif - /* value used for comparison in solver for cross-over point */ - long double target = ((long double)chunk * 2 + 1) * nproc / tc; - - /* crossover point--chunk indexes equal to or greater than - this point switch to dynamic-style scheduling */ - UT cross; - - /* commonly used term: (2 nproc - 1)/(2 nproc) */ - x = (long double)1.0 - (long double)0.5 / nproc; - -#ifdef KMP_DEBUG - { // test natural alignment - struct _test_a { - char a; - union { - char b; - DBL d; - }; - } t; - ptrdiff_t natural_alignment = - (ptrdiff_t)&t.b - (ptrdiff_t)&t - (ptrdiff_t)1; - //__kmp_warn( " %llx %llx %lld", (long long)&t.d, (long long)&t, (long - // long)natural_alignment ); - KMP_DEBUG_ASSERT( - (((ptrdiff_t)&pr->u.p.parm3) & (natural_alignment)) == 0); - } -#endif // KMP_DEBUG - - /* save the term in thread private dispatch structure */ - *(DBL *)&pr->u.p.parm3 = x; - - /* solve for the crossover point to the nearest integer i for which C_i - <= chunk */ - { - UT left, right, mid; - long double p; - - /* estimate initial upper and lower bound */ - - /* doesn't matter what value right is as long as it is positive, but - it affects performance of the solver */ - right = 229; - p = __kmp_pow(x, right); - if (p > target) { - do { - p *= p; - right <<= 1; - } while (p > target && right < (1 << 27)); - /* lower bound is previous (failed) estimate of upper bound */ - left = right >> 1; - } else { - left = 0; - } - - /* bisection root-finding method */ - while (left + 1 < right) { - mid = (left + right) / 2; - if (__kmp_pow(x, mid) > target) { - left = mid; - } else { - right = mid; - } - } // while - cross = right; - } - /* assert sanity of computed crossover point */ - KMP_ASSERT(cross && __kmp_pow(x, cross - 1) > target && - __kmp_pow(x, cross) <= target); - - /* save the crossover point in thread private dispatch structure */ - pr->u.p.parm2 = cross; - -// C75803 -#if ((KMP_OS_LINUX || KMP_OS_WINDOWS) && KMP_ARCH_X86) && (!defined(KMP_I8)) -#define GUIDED_ANALYTICAL_WORKAROUND (*(DBL *)&pr->u.p.parm3) -#else -#define GUIDED_ANALYTICAL_WORKAROUND (x) -#endif - /* dynamic-style scheduling offset */ - pr->u.p.count = tc - __kmp_dispatch_guided_remaining( - tc, GUIDED_ANALYTICAL_WORKAROUND, cross) - - cross * chunk; -#if KMP_USE_X87CONTROL - // restore FPCW - _control87(oldFpcw, _MCW_PC); -#endif - } // if - } else { - KD_TRACE(100, ("__kmp_dispatch_init_algorithm: T#%d falling-through to " - "kmp_sch_static_greedy\n", - gtid)); - schedule = kmp_sch_static_greedy; - /* team->t.t_nproc == 1: fall-through to kmp_sch_static_greedy */ - pr->u.p.parm1 = tc; - } // if - } // case - break; - case kmp_sch_static_greedy: - KD_TRACE( - 100, - ("__kmp_dispatch_init_algorithm: T#%d kmp_sch_static_greedy case\n", - gtid)); - pr->u.p.parm1 = (nproc > 1) ? (tc + nproc - 1) / nproc : tc; - break; - case kmp_sch_static_chunked: - case kmp_sch_dynamic_chunked: - if (pr->u.p.parm1 <= 0) { - pr->u.p.parm1 = KMP_DEFAULT_CHUNK; - } - KD_TRACE(100, ("__kmp_dispatch_init_algorithm: T#%d " - "kmp_sch_static_chunked/kmp_sch_dynamic_chunked cases\n", - gtid)); - break; - case kmp_sch_trapezoidal: { - /* TSS: trapezoid self-scheduling, minimum chunk_size = parm1 */ - - T parm1, parm2, parm3, parm4; - KD_TRACE(100, - ("__kmp_dispatch_init_algorithm: T#%d kmp_sch_trapezoidal case\n", - gtid)); - - parm1 = chunk; - - /* F : size of the first cycle */ - parm2 = (tc / (2 * nproc)); - - if (parm2 < 1) { - parm2 = 1; - } - - /* L : size of the last cycle. Make sure the last cycle is not larger - than the first cycle. */ - if (parm1 < 1) { - parm1 = 1; - } else if (parm1 > parm2) { - parm1 = parm2; - } - - /* N : number of cycles */ - parm3 = (parm2 + parm1); - parm3 = (2 * tc + parm3 - 1) / parm3; - - if (parm3 < 2) { - parm3 = 2; - } - - /* sigma : decreasing incr of the trapezoid */ - parm4 = (parm3 - 1); - parm4 = (parm2 - parm1) / parm4; - - // pointless check, because parm4 >= 0 always - // if ( parm4 < 0 ) { - // parm4 = 0; - //} - - pr->u.p.parm1 = parm1; - pr->u.p.parm2 = parm2; - pr->u.p.parm3 = parm3; - pr->u.p.parm4 = parm4; - } // case - break; - - default: { - __kmp_fatal(KMP_MSG(UnknownSchedTypeDetected), // Primary message - KMP_HNT(GetNewerLibrary), // Hint - __kmp_msg_null // Variadic argument list terminator - ); - } break; - } // switch - pr->schedule = schedule; -} - -#if KMP_USE_HIER_SCHED -template -inline void __kmp_dispatch_init_hier_runtime(ident_t *loc, T lb, T ub, - typename traits_t::signed_t st); -template <> -inline void -__kmp_dispatch_init_hier_runtime(ident_t *loc, kmp_int32 lb, - kmp_int32 ub, kmp_int32 st) { - __kmp_dispatch_init_hierarchy( - loc, __kmp_hier_scheds.size, __kmp_hier_scheds.layers, - __kmp_hier_scheds.scheds, __kmp_hier_scheds.small_chunks, lb, ub, st); -} -template <> -inline void -__kmp_dispatch_init_hier_runtime(ident_t *loc, kmp_uint32 lb, - kmp_uint32 ub, kmp_int32 st) { - __kmp_dispatch_init_hierarchy( - loc, __kmp_hier_scheds.size, __kmp_hier_scheds.layers, - __kmp_hier_scheds.scheds, __kmp_hier_scheds.small_chunks, lb, ub, st); -} -template <> -inline void -__kmp_dispatch_init_hier_runtime(ident_t *loc, kmp_int64 lb, - kmp_int64 ub, kmp_int64 st) { - __kmp_dispatch_init_hierarchy( - loc, __kmp_hier_scheds.size, __kmp_hier_scheds.layers, - __kmp_hier_scheds.scheds, __kmp_hier_scheds.large_chunks, lb, ub, st); -} -template <> -inline void -__kmp_dispatch_init_hier_runtime(ident_t *loc, kmp_uint64 lb, - kmp_uint64 ub, kmp_int64 st) { - __kmp_dispatch_init_hierarchy( - loc, __kmp_hier_scheds.size, __kmp_hier_scheds.layers, - __kmp_hier_scheds.scheds, __kmp_hier_scheds.large_chunks, lb, ub, st); -} - -// free all the hierarchy scheduling memory associated with the team -void __kmp_dispatch_free_hierarchies(kmp_team_t *team) { - int num_disp_buff = team->t.t_max_nproc > 1 ? __kmp_dispatch_num_buffers : 2; - for (int i = 0; i < num_disp_buff; ++i) { - // type does not matter here so use kmp_int32 - auto sh = - reinterpret_cast volatile *>( - &team->t.t_disp_buffer[i]); - if (sh->hier) { - sh->hier->deallocate(); - __kmp_free(sh->hier); - } - } -} -#endif - -// UT - unsigned flavor of T, ST - signed flavor of T, -// DBL - double if sizeof(T)==4, or long double if sizeof(T)==8 -template -static void -__kmp_dispatch_init(ident_t *loc, int gtid, enum sched_type schedule, T lb, - T ub, typename traits_t::signed_t st, - typename traits_t::signed_t chunk, int push_ws) { - typedef typename traits_t::unsigned_t UT; - - int active; - kmp_info_t *th; - kmp_team_t *team; - kmp_uint32 my_buffer_index; - dispatch_private_info_template *pr; - dispatch_shared_info_template volatile *sh; - - KMP_BUILD_ASSERT(sizeof(dispatch_private_info_template) == - sizeof(dispatch_private_info)); - KMP_BUILD_ASSERT(sizeof(dispatch_shared_info_template) == - sizeof(dispatch_shared_info)); - - if (!TCR_4(__kmp_init_parallel)) - __kmp_parallel_initialize(); - -#if INCLUDE_SSC_MARKS - SSC_MARK_DISPATCH_INIT(); -#endif -#ifdef KMP_DEBUG - typedef typename traits_t::signed_t ST; - { - char *buff; - // create format specifiers before the debug output - buff = __kmp_str_format("__kmp_dispatch_init: T#%%d called: schedule:%%d " - "chunk:%%%s lb:%%%s ub:%%%s st:%%%s\n", - traits_t::spec, traits_t::spec, - traits_t::spec, traits_t::spec); - KD_TRACE(10, (buff, gtid, schedule, chunk, lb, ub, st)); - __kmp_str_free(&buff); - } -#endif - /* setup data */ - th = __kmp_threads[gtid]; - team = th->th.th_team; - active = !team->t.t_serialized; - th->th.th_ident = loc; - - // Any half-decent optimizer will remove this test when the blocks are empty - // since the macros expand to nothing - // when statistics are disabled. - if (schedule == __kmp_static) { - KMP_COUNT_BLOCK(OMP_LOOP_STATIC); - } else { - KMP_COUNT_BLOCK(OMP_LOOP_DYNAMIC); - } - -#if KMP_USE_HIER_SCHED - // Initialize the scheduling hierarchy if requested in OMP_SCHEDULE envirable - // Hierarchical scheduling does not work with ordered, so if ordered is - // detected, then revert back to threaded scheduling. - bool ordered; - enum sched_type my_sched = schedule; - my_buffer_index = th->th.th_dispatch->th_disp_index; - pr = reinterpret_cast *>( - &th->th.th_dispatch - ->th_disp_buffer[my_buffer_index % __kmp_dispatch_num_buffers]); - my_sched = SCHEDULE_WITHOUT_MODIFIERS(my_sched); - if ((my_sched >= kmp_nm_lower) && (my_sched < kmp_nm_upper)) - my_sched = - (enum sched_type)(((int)my_sched) - (kmp_nm_lower - kmp_sch_lower)); - ordered = (kmp_ord_lower & my_sched); - if (pr->flags.use_hier) { - if (ordered) { - KD_TRACE(100, ("__kmp_dispatch_init: T#%d ordered loop detected. " - "Disabling hierarchical scheduling.\n", - gtid)); - pr->flags.use_hier = FALSE; - } - } - if (schedule == kmp_sch_runtime && __kmp_hier_scheds.size > 0) { - // Don't use hierarchical for ordered parallel loops and don't - // use the runtime hierarchy if one was specified in the program - if (!ordered && !pr->flags.use_hier) - __kmp_dispatch_init_hier_runtime(loc, lb, ub, st); - } -#endif // KMP_USE_HIER_SCHED - -#if USE_ITT_BUILD - kmp_uint64 cur_chunk = chunk; - int itt_need_metadata_reporting = __itt_metadata_add_ptr && - __kmp_forkjoin_frames_mode == 3 && - KMP_MASTER_GTID(gtid) && -#if OMP_40_ENABLED - th->th.th_teams_microtask == NULL && -#endif - team->t.t_active_level == 1; -#endif - if (!active) { - pr = reinterpret_cast *>( - th->th.th_dispatch->th_disp_buffer); /* top of the stack */ - } else { - KMP_DEBUG_ASSERT(th->th.th_dispatch == - &th->th.th_team->t.t_dispatch[th->th.th_info.ds.ds_tid]); - - my_buffer_index = th->th.th_dispatch->th_disp_index++; - - /* What happens when number of threads changes, need to resize buffer? */ - pr = reinterpret_cast *>( - &th->th.th_dispatch - ->th_disp_buffer[my_buffer_index % __kmp_dispatch_num_buffers]); - sh = reinterpret_cast volatile *>( - &team->t.t_disp_buffer[my_buffer_index % __kmp_dispatch_num_buffers]); - KD_TRACE(10, ("__kmp_dispatch_init: T#%d my_buffer_index:%d\n", gtid, - my_buffer_index)); - } - - __kmp_dispatch_init_algorithm(loc, gtid, pr, schedule, lb, ub, st, -#if USE_ITT_BUILD - &cur_chunk, -#endif - chunk, (T)th->th.th_team_nproc, - (T)th->th.th_info.ds.ds_tid); - if (active) { - if (pr->flags.ordered == 0) { - th->th.th_dispatch->th_deo_fcn = __kmp_dispatch_deo_error; - th->th.th_dispatch->th_dxo_fcn = __kmp_dispatch_dxo_error; - } else { - th->th.th_dispatch->th_deo_fcn = __kmp_dispatch_deo; - th->th.th_dispatch->th_dxo_fcn = __kmp_dispatch_dxo; - } - } - - if (active) { - /* The name of this buffer should be my_buffer_index when it's free to use - * it */ - - KD_TRACE(100, ("__kmp_dispatch_init: T#%d before wait: my_buffer_index:%d " - "sh->buffer_index:%d\n", - gtid, my_buffer_index, sh->buffer_index)); - __kmp_wait_yield(&sh->buffer_index, my_buffer_index, - __kmp_eq USE_ITT_BUILD_ARG(NULL)); - // Note: KMP_WAIT_YIELD() cannot be used there: buffer index and - // my_buffer_index are *always* 32-bit integers. - KMP_MB(); /* is this necessary? */ - KD_TRACE(100, ("__kmp_dispatch_init: T#%d after wait: my_buffer_index:%d " - "sh->buffer_index:%d\n", - gtid, my_buffer_index, sh->buffer_index)); - - th->th.th_dispatch->th_dispatch_pr_current = (dispatch_private_info_t *)pr; - th->th.th_dispatch->th_dispatch_sh_current = - CCAST(dispatch_shared_info_t *, (volatile dispatch_shared_info_t *)sh); -#if USE_ITT_BUILD - if (pr->flags.ordered) { - __kmp_itt_ordered_init(gtid); - } - // Report loop metadata - if (itt_need_metadata_reporting) { - // Only report metadata by master of active team at level 1 - kmp_uint64 schedtype = 0; - switch (schedule) { - case kmp_sch_static_chunked: - case kmp_sch_static_balanced: // Chunk is calculated in the switch above - break; - case kmp_sch_static_greedy: - cur_chunk = pr->u.p.parm1; - break; - case kmp_sch_dynamic_chunked: - schedtype = 1; - break; - case kmp_sch_guided_iterative_chunked: - case kmp_sch_guided_analytical_chunked: -#if OMP_45_ENABLED - case kmp_sch_guided_simd: -#endif - schedtype = 2; - break; - default: - // Should we put this case under "static"? - // case kmp_sch_static_steal: - schedtype = 3; - break; - } - __kmp_itt_metadata_loop(loc, schedtype, pr->u.p.tc, cur_chunk); - } -#if KMP_USE_HIER_SCHED - if (pr->flags.use_hier) { - pr->u.p.count = 0; - pr->u.p.ub = pr->u.p.lb = pr->u.p.st = pr->u.p.tc = 0; - } -#endif // KMP_USER_HIER_SCHED -#endif /* USE_ITT_BUILD */ - } - -#ifdef KMP_DEBUG - { - char *buff; - // create format specifiers before the debug output - buff = __kmp_str_format( - "__kmp_dispatch_init: T#%%d returning: schedule:%%d ordered:%%%s " - "lb:%%%s ub:%%%s" - " st:%%%s tc:%%%s count:%%%s\n\tordered_lower:%%%s ordered_upper:%%%s" - " parm1:%%%s parm2:%%%s parm3:%%%s parm4:%%%s\n", - traits_t::spec, traits_t::spec, traits_t::spec, - traits_t::spec, traits_t::spec, traits_t::spec, - traits_t::spec, traits_t::spec, traits_t::spec, - traits_t::spec, traits_t::spec, traits_t::spec); - KD_TRACE(10, (buff, gtid, pr->schedule, pr->flags.ordered, pr->u.p.lb, - pr->u.p.ub, pr->u.p.st, pr->u.p.tc, pr->u.p.count, - pr->u.p.ordered_lower, pr->u.p.ordered_upper, pr->u.p.parm1, - pr->u.p.parm2, pr->u.p.parm3, pr->u.p.parm4)); - __kmp_str_free(&buff); - } -#endif -#if (KMP_STATIC_STEAL_ENABLED) - // It cannot be guaranteed that after execution of a loop with some other - // schedule kind all the parm3 variables will contain the same value. Even if - // all parm3 will be the same, it still exists a bad case like using 0 and 1 - // rather than program life-time increment. So the dedicated variable is - // required. The 'static_steal_counter' is used. - if (schedule == kmp_sch_static_steal) { - // Other threads will inspect this variable when searching for a victim. - // This is a flag showing that other threads may steal from this thread - // since then. - volatile T *p = &pr->u.p.static_steal_counter; - *p = *p + 1; - } -#endif // ( KMP_STATIC_STEAL_ENABLED ) - -#if OMPT_SUPPORT && OMPT_OPTIONAL - if (ompt_enabled.ompt_callback_work) { - ompt_team_info_t *team_info = __ompt_get_teaminfo(0, NULL); - ompt_task_info_t *task_info = __ompt_get_task_info_object(0); - ompt_callbacks.ompt_callback(ompt_callback_work)( - ompt_work_loop, ompt_scope_begin, &(team_info->parallel_data), - &(task_info->task_data), pr->u.p.tc, OMPT_LOAD_RETURN_ADDRESS(gtid)); - } -#endif - KMP_PUSH_PARTITIONED_TIMER(OMP_loop_dynamic); -} - -/* For ordered loops, either __kmp_dispatch_finish() should be called after - * every iteration, or __kmp_dispatch_finish_chunk() should be called after - * every chunk of iterations. If the ordered section(s) were not executed - * for this iteration (or every iteration in this chunk), we need to set the - * ordered iteration counters so that the next thread can proceed. */ -template -static void __kmp_dispatch_finish(int gtid, ident_t *loc) { - typedef typename traits_t::signed_t ST; - kmp_info_t *th = __kmp_threads[gtid]; - - KD_TRACE(100, ("__kmp_dispatch_finish: T#%d called\n", gtid)); - if (!th->th.th_team->t.t_serialized) { - - dispatch_private_info_template *pr = - reinterpret_cast *>( - th->th.th_dispatch->th_dispatch_pr_current); - dispatch_shared_info_template volatile *sh = - reinterpret_cast volatile *>( - th->th.th_dispatch->th_dispatch_sh_current); - KMP_DEBUG_ASSERT(pr); - KMP_DEBUG_ASSERT(sh); - KMP_DEBUG_ASSERT(th->th.th_dispatch == - &th->th.th_team->t.t_dispatch[th->th.th_info.ds.ds_tid]); - - if (pr->ordered_bumped) { - KD_TRACE( - 1000, - ("__kmp_dispatch_finish: T#%d resetting ordered_bumped to zero\n", - gtid)); - pr->ordered_bumped = 0; - } else { - UT lower = pr->u.p.ordered_lower; - -#ifdef KMP_DEBUG - { - char *buff; - // create format specifiers before the debug output - buff = __kmp_str_format("__kmp_dispatch_finish: T#%%d before wait: " - "ordered_iteration:%%%s lower:%%%s\n", - traits_t::spec, traits_t::spec); - KD_TRACE(1000, (buff, gtid, sh->u.s.ordered_iteration, lower)); - __kmp_str_free(&buff); - } -#endif - - __kmp_wait_yield(&sh->u.s.ordered_iteration, lower, - __kmp_ge USE_ITT_BUILD_ARG(NULL)); - KMP_MB(); /* is this necessary? */ -#ifdef KMP_DEBUG - { - char *buff; - // create format specifiers before the debug output - buff = __kmp_str_format("__kmp_dispatch_finish: T#%%d after wait: " - "ordered_iteration:%%%s lower:%%%s\n", - traits_t::spec, traits_t::spec); - KD_TRACE(1000, (buff, gtid, sh->u.s.ordered_iteration, lower)); - __kmp_str_free(&buff); - } -#endif - - test_then_inc((volatile ST *)&sh->u.s.ordered_iteration); - } // if - } // if - KD_TRACE(100, ("__kmp_dispatch_finish: T#%d returned\n", gtid)); -} - -#ifdef KMP_GOMP_COMPAT - -template -static void __kmp_dispatch_finish_chunk(int gtid, ident_t *loc) { - typedef typename traits_t::signed_t ST; - kmp_info_t *th = __kmp_threads[gtid]; - - KD_TRACE(100, ("__kmp_dispatch_finish_chunk: T#%d called\n", gtid)); - if (!th->th.th_team->t.t_serialized) { - // int cid; - dispatch_private_info_template *pr = - reinterpret_cast *>( - th->th.th_dispatch->th_dispatch_pr_current); - dispatch_shared_info_template volatile *sh = - reinterpret_cast volatile *>( - th->th.th_dispatch->th_dispatch_sh_current); - KMP_DEBUG_ASSERT(pr); - KMP_DEBUG_ASSERT(sh); - KMP_DEBUG_ASSERT(th->th.th_dispatch == - &th->th.th_team->t.t_dispatch[th->th.th_info.ds.ds_tid]); - - // for (cid = 0; cid < KMP_MAX_ORDERED; ++cid) { - UT lower = pr->u.p.ordered_lower; - UT upper = pr->u.p.ordered_upper; - UT inc = upper - lower + 1; - - if (pr->ordered_bumped == inc) { - KD_TRACE( - 1000, - ("__kmp_dispatch_finish: T#%d resetting ordered_bumped to zero\n", - gtid)); - pr->ordered_bumped = 0; - } else { - inc -= pr->ordered_bumped; - -#ifdef KMP_DEBUG - { - char *buff; - // create format specifiers before the debug output - buff = __kmp_str_format( - "__kmp_dispatch_finish_chunk: T#%%d before wait: " - "ordered_iteration:%%%s lower:%%%s upper:%%%s\n", - traits_t::spec, traits_t::spec, traits_t::spec); - KD_TRACE(1000, (buff, gtid, sh->u.s.ordered_iteration, lower, upper)); - __kmp_str_free(&buff); - } -#endif - - __kmp_wait_yield(&sh->u.s.ordered_iteration, lower, - __kmp_ge USE_ITT_BUILD_ARG(NULL)); - - KMP_MB(); /* is this necessary? */ - KD_TRACE(1000, ("__kmp_dispatch_finish_chunk: T#%d resetting " - "ordered_bumped to zero\n", - gtid)); - pr->ordered_bumped = 0; -//!!!!! TODO check if the inc should be unsigned, or signed??? -#ifdef KMP_DEBUG - { - char *buff; - // create format specifiers before the debug output - buff = __kmp_str_format( - "__kmp_dispatch_finish_chunk: T#%%d after wait: " - "ordered_iteration:%%%s inc:%%%s lower:%%%s upper:%%%s\n", - traits_t::spec, traits_t::spec, traits_t::spec, - traits_t::spec); - KD_TRACE(1000, - (buff, gtid, sh->u.s.ordered_iteration, inc, lower, upper)); - __kmp_str_free(&buff); - } -#endif - - test_then_add((volatile ST *)&sh->u.s.ordered_iteration, inc); - } - // } - } - KD_TRACE(100, ("__kmp_dispatch_finish_chunk: T#%d returned\n", gtid)); -} - -#endif /* KMP_GOMP_COMPAT */ - -template -int __kmp_dispatch_next_algorithm(int gtid, - dispatch_private_info_template *pr, - dispatch_shared_info_template volatile *sh, - kmp_int32 *p_last, T *p_lb, T *p_ub, - typename traits_t::signed_t *p_st, T nproc, - T tid) { - typedef typename traits_t::unsigned_t UT; - typedef typename traits_t::signed_t ST; - typedef typename traits_t::floating_t DBL; - int status = 0; - kmp_int32 last = 0; - T start; - ST incr; - UT limit, trip, init; - kmp_info_t *th = __kmp_threads[gtid]; - kmp_team_t *team = th->th.th_team; - - KMP_DEBUG_ASSERT(th->th.th_dispatch == - &th->th.th_team->t.t_dispatch[th->th.th_info.ds.ds_tid]); - KMP_DEBUG_ASSERT(pr); - KMP_DEBUG_ASSERT(sh); - KMP_DEBUG_ASSERT(tid >= 0 && tid < nproc); -#ifdef KMP_DEBUG - { - char *buff; - // create format specifiers before the debug output - buff = - __kmp_str_format("__kmp_dispatch_next_algorithm: T#%%d called pr:%%p " - "sh:%%p nproc:%%%s tid:%%%s\n", - traits_t::spec, traits_t::spec); - KD_TRACE(10, (buff, gtid, pr, sh, nproc, tid)); - __kmp_str_free(&buff); - } -#endif - - // zero trip count - if (pr->u.p.tc == 0) { - KD_TRACE(10, - ("__kmp_dispatch_next_algorithm: T#%d early exit trip count is " - "zero status:%d\n", - gtid, status)); - return 0; - } - - switch (pr->schedule) { -#if (KMP_STATIC_STEAL_ENABLED) - case kmp_sch_static_steal: { - T chunk = pr->u.p.parm1; - - KD_TRACE(100, - ("__kmp_dispatch_next_algorithm: T#%d kmp_sch_static_steal case\n", - gtid)); - - trip = pr->u.p.tc - 1; - - if (traits_t::type_size > 4) { - // use lock for 8-byte and CAS for 4-byte induction - // variable. TODO (optional): check and use 16-byte CAS - kmp_lock_t *lck = th->th.th_dispatch->th_steal_lock; - KMP_DEBUG_ASSERT(lck != NULL); - if (pr->u.p.count < (UT)pr->u.p.ub) { - __kmp_acquire_lock(lck, gtid); - // try to get own chunk of iterations - init = (pr->u.p.count)++; - status = (init < (UT)pr->u.p.ub); - __kmp_release_lock(lck, gtid); - } else { - status = 0; // no own chunks - } - if (!status) { // try to steal - kmp_info_t **other_threads = team->t.t_threads; - int while_limit = nproc; // nproc attempts to find a victim - int while_index = 0; - // TODO: algorithm of searching for a victim - // should be cleaned up and measured - while ((!status) && (while_limit != ++while_index)) { - T remaining; - T victimIdx = pr->u.p.parm4; - T oldVictimIdx = victimIdx ? victimIdx - 1 : nproc - 1; - dispatch_private_info_template *victim = - reinterpret_cast *>( - other_threads[victimIdx] - ->th.th_dispatch->th_dispatch_pr_current); - while ((victim == NULL || victim == pr || - (*(volatile T *)&victim->u.p.static_steal_counter != - *(volatile T *)&pr->u.p.static_steal_counter)) && - oldVictimIdx != victimIdx) { - victimIdx = (victimIdx + 1) % nproc; - victim = reinterpret_cast *>( - other_threads[victimIdx] - ->th.th_dispatch->th_dispatch_pr_current); - } - if (!victim || (*(volatile T *)&victim->u.p.static_steal_counter != - *(volatile T *)&pr->u.p.static_steal_counter)) { - continue; // try once more (nproc attempts in total) - // no victim is ready yet to participate in stealing - // because all victims are still in kmp_init_dispatch - } - if (victim->u.p.count + 2 > (UT)victim->u.p.ub) { - pr->u.p.parm4 = (victimIdx + 1) % nproc; // shift start tid - continue; // not enough chunks to steal, goto next victim - } - - lck = other_threads[victimIdx]->th.th_dispatch->th_steal_lock; - KMP_ASSERT(lck != NULL); - __kmp_acquire_lock(lck, gtid); - limit = victim->u.p.ub; // keep initial ub - if (victim->u.p.count >= limit || - (remaining = limit - victim->u.p.count) < 2) { - __kmp_release_lock(lck, gtid); - pr->u.p.parm4 = (victimIdx + 1) % nproc; // next victim - continue; // not enough chunks to steal - } - // stealing succeded, reduce victim's ub by 1/4 of undone chunks or - // by 1 - if (remaining > 3) { - // steal 1/4 of remaining - KMP_COUNT_DEVELOPER_VALUE(FOR_static_steal_stolen, remaining >> 2); - init = (victim->u.p.ub -= (remaining >> 2)); - } else { - // steal 1 chunk of 2 or 3 remaining - KMP_COUNT_DEVELOPER_VALUE(FOR_static_steal_stolen, 1); - init = (victim->u.p.ub -= 1); - } - __kmp_release_lock(lck, gtid); - - KMP_DEBUG_ASSERT(init + 1 <= limit); - pr->u.p.parm4 = victimIdx; // remember victim to steal from - status = 1; - while_index = 0; - // now update own count and ub with stolen range but init chunk - __kmp_acquire_lock(th->th.th_dispatch->th_steal_lock, gtid); - pr->u.p.count = init + 1; - pr->u.p.ub = limit; - __kmp_release_lock(th->th.th_dispatch->th_steal_lock, gtid); - } // while (search for victim) - } // if (try to find victim and steal) - } else { - // 4-byte induction variable, use 8-byte CAS for pair (count, ub) - typedef union { - struct { - UT count; - T ub; - } p; - kmp_int64 b; - } union_i4; - // All operations on 'count' or 'ub' must be combined atomically - // together. - { - union_i4 vold, vnew; - vold.b = *(volatile kmp_int64 *)(&pr->u.p.count); - vnew = vold; - vnew.p.count++; - while (!KMP_COMPARE_AND_STORE_ACQ64( - (volatile kmp_int64 *)&pr->u.p.count, - *VOLATILE_CAST(kmp_int64 *) & vold.b, - *VOLATILE_CAST(kmp_int64 *) & vnew.b)) { - KMP_CPU_PAUSE(); - vold.b = *(volatile kmp_int64 *)(&pr->u.p.count); - vnew = vold; - vnew.p.count++; - } - vnew = vold; - init = vnew.p.count; - status = (init < (UT)vnew.p.ub); - } - - if (!status) { - kmp_info_t **other_threads = team->t.t_threads; - int while_limit = nproc; // nproc attempts to find a victim - int while_index = 0; - - // TODO: algorithm of searching for a victim - // should be cleaned up and measured - while ((!status) && (while_limit != ++while_index)) { - union_i4 vold, vnew; - kmp_int32 remaining; - T victimIdx = pr->u.p.parm4; - T oldVictimIdx = victimIdx ? victimIdx - 1 : nproc - 1; - dispatch_private_info_template *victim = - reinterpret_cast *>( - other_threads[victimIdx] - ->th.th_dispatch->th_dispatch_pr_current); - while ((victim == NULL || victim == pr || - (*(volatile T *)&victim->u.p.static_steal_counter != - *(volatile T *)&pr->u.p.static_steal_counter)) && - oldVictimIdx != victimIdx) { - victimIdx = (victimIdx + 1) % nproc; - victim = reinterpret_cast *>( - other_threads[victimIdx] - ->th.th_dispatch->th_dispatch_pr_current); - } - if (!victim || (*(volatile T *)&victim->u.p.static_steal_counter != - *(volatile T *)&pr->u.p.static_steal_counter)) { - continue; // try once more (nproc attempts in total) - // no victim is ready yet to participate in stealing - // because all victims are still in kmp_init_dispatch - } - pr->u.p.parm4 = victimIdx; // new victim found - while (1) { // CAS loop if victim has enough chunks to steal - vold.b = *(volatile kmp_int64 *)(&victim->u.p.count); - vnew = vold; - - KMP_DEBUG_ASSERT((vnew.p.ub - 1) * (UT)chunk <= trip); - if (vnew.p.count >= (UT)vnew.p.ub || - (remaining = vnew.p.ub - vnew.p.count) < 2) { - pr->u.p.parm4 = (victimIdx + 1) % nproc; // shift start victim id - break; // not enough chunks to steal, goto next victim - } - if (remaining > 3) { - vnew.p.ub -= (remaining >> 2); // try to steal 1/4 of remaining - } else { - vnew.p.ub -= 1; // steal 1 chunk of 2 or 3 remaining - } - KMP_DEBUG_ASSERT((vnew.p.ub - 1) * (UT)chunk <= trip); - // TODO: Should this be acquire or release? - if (KMP_COMPARE_AND_STORE_ACQ64( - (volatile kmp_int64 *)&victim->u.p.count, - *VOLATILE_CAST(kmp_int64 *) & vold.b, - *VOLATILE_CAST(kmp_int64 *) & vnew.b)) { - // stealing succedded - KMP_COUNT_DEVELOPER_VALUE(FOR_static_steal_stolen, - vold.p.ub - vnew.p.ub); - status = 1; - while_index = 0; - // now update own count and ub - init = vnew.p.ub; - vold.p.count = init + 1; -#if KMP_ARCH_X86 - KMP_XCHG_FIXED64((volatile kmp_int64 *)(&pr->u.p.count), vold.b); -#else - *(volatile kmp_int64 *)(&pr->u.p.count) = vold.b; -#endif - break; - } // if (check CAS result) - KMP_CPU_PAUSE(); // CAS failed, repeate attempt - } // while (try to steal from particular victim) - } // while (search for victim) - } // if (try to find victim and steal) - } // if (4-byte induction variable) - if (!status) { - *p_lb = 0; - *p_ub = 0; - if (p_st != NULL) - *p_st = 0; - } else { - start = pr->u.p.parm2; - init *= chunk; - limit = chunk + init - 1; - incr = pr->u.p.st; - KMP_COUNT_DEVELOPER_VALUE(FOR_static_steal_chunks, 1); - - KMP_DEBUG_ASSERT(init <= trip); - if ((last = (limit >= trip)) != 0) - limit = trip; - if (p_st != NULL) - *p_st = incr; - - if (incr == 1) { - *p_lb = start + init; - *p_ub = start + limit; - } else { - *p_lb = start + init * incr; - *p_ub = start + limit * incr; - } - - if (pr->flags.ordered) { - pr->u.p.ordered_lower = init; - pr->u.p.ordered_upper = limit; - } // if - } // if - break; - } // case -#endif // ( KMP_STATIC_STEAL_ENABLED ) - case kmp_sch_static_balanced: { - KD_TRACE( - 10, - ("__kmp_dispatch_next_algorithm: T#%d kmp_sch_static_balanced case\n", - gtid)); - /* check if thread has any iteration to do */ - if ((status = !pr->u.p.count) != 0) { - pr->u.p.count = 1; - *p_lb = pr->u.p.lb; - *p_ub = pr->u.p.ub; - last = pr->u.p.parm1; - if (p_st != NULL) - *p_st = pr->u.p.st; - } else { /* no iterations to do */ - pr->u.p.lb = pr->u.p.ub + pr->u.p.st; - } - } // case - break; - case kmp_sch_static_greedy: /* original code for kmp_sch_static_greedy was - merged here */ - case kmp_sch_static_chunked: { - T parm1; - - KD_TRACE(100, ("__kmp_dispatch_next_algorithm: T#%d " - "kmp_sch_static_[affinity|chunked] case\n", - gtid)); - parm1 = pr->u.p.parm1; - - trip = pr->u.p.tc - 1; - init = parm1 * (pr->u.p.count + tid); - - if ((status = (init <= trip)) != 0) { - start = pr->u.p.lb; - incr = pr->u.p.st; - limit = parm1 + init - 1; - - if ((last = (limit >= trip)) != 0) - limit = trip; - - if (p_st != NULL) - *p_st = incr; - - pr->u.p.count += nproc; - - if (incr == 1) { - *p_lb = start + init; - *p_ub = start + limit; - } else { - *p_lb = start + init * incr; - *p_ub = start + limit * incr; - } - - if (pr->flags.ordered) { - pr->u.p.ordered_lower = init; - pr->u.p.ordered_upper = limit; - } // if - } // if - } // case - break; - - case kmp_sch_dynamic_chunked: { - T chunk = pr->u.p.parm1; - - KD_TRACE( - 100, - ("__kmp_dispatch_next_algorithm: T#%d kmp_sch_dynamic_chunked case\n", - gtid)); - - init = chunk * test_then_inc_acq((volatile ST *)&sh->u.s.iteration); - trip = pr->u.p.tc - 1; - - if ((status = (init <= trip)) == 0) { - *p_lb = 0; - *p_ub = 0; - if (p_st != NULL) - *p_st = 0; - } else { - start = pr->u.p.lb; - limit = chunk + init - 1; - incr = pr->u.p.st; - - if ((last = (limit >= trip)) != 0) - limit = trip; - - if (p_st != NULL) - *p_st = incr; - - if (incr == 1) { - *p_lb = start + init; - *p_ub = start + limit; - } else { - *p_lb = start + init * incr; - *p_ub = start + limit * incr; - } - - if (pr->flags.ordered) { - pr->u.p.ordered_lower = init; - pr->u.p.ordered_upper = limit; - } // if - } // if - } // case - break; - - case kmp_sch_guided_iterative_chunked: { - T chunkspec = pr->u.p.parm1; - KD_TRACE(100, ("__kmp_dispatch_next_algorithm: T#%d kmp_sch_guided_chunked " - "iterative case\n", - gtid)); - trip = pr->u.p.tc; - // Start atomic part of calculations - while (1) { - ST remaining; // signed, because can be < 0 - init = sh->u.s.iteration; // shared value - remaining = trip - init; - if (remaining <= 0) { // AC: need to compare with 0 first - // nothing to do, don't try atomic op - status = 0; - break; - } - if ((T)remaining < - pr->u.p.parm2) { // compare with K*nproc*(chunk+1), K=2 by default - // use dynamic-style shcedule - // atomically inrement iterations, get old value - init = test_then_add(RCAST(volatile ST *, &sh->u.s.iteration), - (ST)chunkspec); - remaining = trip - init; - if (remaining <= 0) { - status = 0; // all iterations got by other threads - } else { - // got some iterations to work on - status = 1; - if ((T)remaining > chunkspec) { - limit = init + chunkspec - 1; - } else { - last = 1; // the last chunk - limit = init + remaining - 1; - } // if - } // if - break; - } // if - limit = init + - (UT)(remaining * *(double *)&pr->u.p.parm3); // divide by K*nproc - if (compare_and_swap(RCAST(volatile ST *, &sh->u.s.iteration), - (ST)init, (ST)limit)) { - // CAS was successful, chunk obtained - status = 1; - --limit; - break; - } // if - } // while - if (status != 0) { - start = pr->u.p.lb; - incr = pr->u.p.st; - if (p_st != NULL) - *p_st = incr; - *p_lb = start + init * incr; - *p_ub = start + limit * incr; - if (pr->flags.ordered) { - pr->u.p.ordered_lower = init; - pr->u.p.ordered_upper = limit; - } // if - } else { - *p_lb = 0; - *p_ub = 0; - if (p_st != NULL) - *p_st = 0; - } // if - } // case - break; - -#if OMP_45_ENABLED - case kmp_sch_guided_simd: { - // same as iterative but curr-chunk adjusted to be multiple of given - // chunk - T chunk = pr->u.p.parm1; - KD_TRACE(100, - ("__kmp_dispatch_next_algorithm: T#%d kmp_sch_guided_simd case\n", - gtid)); - trip = pr->u.p.tc; - // Start atomic part of calculations - while (1) { - ST remaining; // signed, because can be < 0 - init = sh->u.s.iteration; // shared value - remaining = trip - init; - if (remaining <= 0) { // AC: need to compare with 0 first - status = 0; // nothing to do, don't try atomic op - break; - } - KMP_DEBUG_ASSERT(init % chunk == 0); - // compare with K*nproc*(chunk+1), K=2 by default - if ((T)remaining < pr->u.p.parm2) { - // use dynamic-style shcedule - // atomically inrement iterations, get old value - init = test_then_add(RCAST(volatile ST *, &sh->u.s.iteration), - (ST)chunk); - remaining = trip - init; - if (remaining <= 0) { - status = 0; // all iterations got by other threads - } else { - // got some iterations to work on - status = 1; - if ((T)remaining > chunk) { - limit = init + chunk - 1; - } else { - last = 1; // the last chunk - limit = init + remaining - 1; - } // if - } // if - break; - } // if - // divide by K*nproc - UT span = remaining * (*(double *)&pr->u.p.parm3); - UT rem = span % chunk; - if (rem) // adjust so that span%chunk == 0 - span += chunk - rem; - limit = init + span; - if (compare_and_swap(RCAST(volatile ST *, &sh->u.s.iteration), - (ST)init, (ST)limit)) { - // CAS was successful, chunk obtained - status = 1; - --limit; - break; - } // if - } // while - if (status != 0) { - start = pr->u.p.lb; - incr = pr->u.p.st; - if (p_st != NULL) - *p_st = incr; - *p_lb = start + init * incr; - *p_ub = start + limit * incr; - if (pr->flags.ordered) { - pr->u.p.ordered_lower = init; - pr->u.p.ordered_upper = limit; - } // if - } else { - *p_lb = 0; - *p_ub = 0; - if (p_st != NULL) - *p_st = 0; - } // if - } // case - break; -#endif // OMP_45_ENABLED - - case kmp_sch_guided_analytical_chunked: { - T chunkspec = pr->u.p.parm1; - UT chunkIdx; -#if KMP_USE_X87CONTROL - /* for storing original FPCW value for Windows* OS on - IA-32 architecture 8-byte version */ - unsigned int oldFpcw; - unsigned int fpcwSet = 0; -#endif - KD_TRACE(100, ("__kmp_dispatch_next_algorithm: T#%d " - "kmp_sch_guided_analytical_chunked case\n", - gtid)); - - trip = pr->u.p.tc; - - KMP_DEBUG_ASSERT(nproc > 1); - KMP_DEBUG_ASSERT((2UL * chunkspec + 1) * (UT)nproc < trip); - - while (1) { /* this while loop is a safeguard against unexpected zero - chunk sizes */ - chunkIdx = test_then_inc_acq((volatile ST *)&sh->u.s.iteration); - if (chunkIdx >= (UT)pr->u.p.parm2) { - --trip; - /* use dynamic-style scheduling */ - init = chunkIdx * chunkspec + pr->u.p.count; - /* need to verify init > 0 in case of overflow in the above - * calculation */ - if ((status = (init > 0 && init <= trip)) != 0) { - limit = init + chunkspec - 1; - - if ((last = (limit >= trip)) != 0) - limit = trip; - } - break; - } else { -/* use exponential-style scheduling */ -/* The following check is to workaround the lack of long double precision on - Windows* OS. - This check works around the possible effect that init != 0 for chunkIdx == 0. - */ -#if KMP_USE_X87CONTROL - /* If we haven't already done so, save original - FPCW and set precision to 64-bit, as Windows* OS - on IA-32 architecture defaults to 53-bit */ - if (!fpcwSet) { - oldFpcw = _control87(0, 0); - _control87(_PC_64, _MCW_PC); - fpcwSet = 0x30000; - } -#endif - if (chunkIdx) { - init = __kmp_dispatch_guided_remaining( - trip, *(DBL *)&pr->u.p.parm3, chunkIdx); - KMP_DEBUG_ASSERT(init); - init = trip - init; - } else - init = 0; - limit = trip - __kmp_dispatch_guided_remaining( - trip, *(DBL *)&pr->u.p.parm3, chunkIdx + 1); - KMP_ASSERT(init <= limit); - if (init < limit) { - KMP_DEBUG_ASSERT(limit <= trip); - --limit; - status = 1; - break; - } // if - } // if - } // while (1) -#if KMP_USE_X87CONTROL - /* restore FPCW if necessary - AC: check fpcwSet flag first because oldFpcw can be uninitialized here - */ - if (fpcwSet && (oldFpcw & fpcwSet)) - _control87(oldFpcw, _MCW_PC); -#endif - if (status != 0) { - start = pr->u.p.lb; - incr = pr->u.p.st; - if (p_st != NULL) - *p_st = incr; - *p_lb = start + init * incr; - *p_ub = start + limit * incr; - if (pr->flags.ordered) { - pr->u.p.ordered_lower = init; - pr->u.p.ordered_upper = limit; - } - } else { - *p_lb = 0; - *p_ub = 0; - if (p_st != NULL) - *p_st = 0; - } - } // case - break; - - case kmp_sch_trapezoidal: { - UT index; - T parm2 = pr->u.p.parm2; - T parm3 = pr->u.p.parm3; - T parm4 = pr->u.p.parm4; - KD_TRACE(100, - ("__kmp_dispatch_next_algorithm: T#%d kmp_sch_trapezoidal case\n", - gtid)); - - index = test_then_inc((volatile ST *)&sh->u.s.iteration); - - init = (index * ((2 * parm2) - (index - 1) * parm4)) / 2; - trip = pr->u.p.tc - 1; - - if ((status = ((T)index < parm3 && init <= trip)) == 0) { - *p_lb = 0; - *p_ub = 0; - if (p_st != NULL) - *p_st = 0; - } else { - start = pr->u.p.lb; - limit = ((index + 1) * (2 * parm2 - index * parm4)) / 2 - 1; - incr = pr->u.p.st; - - if ((last = (limit >= trip)) != 0) - limit = trip; - - if (p_st != NULL) - *p_st = incr; - - if (incr == 1) { - *p_lb = start + init; - *p_ub = start + limit; - } else { - *p_lb = start + init * incr; - *p_ub = start + limit * incr; - } - - if (pr->flags.ordered) { - pr->u.p.ordered_lower = init; - pr->u.p.ordered_upper = limit; - } // if - } // if - } // case - break; - default: { - status = 0; // to avoid complaints on uninitialized variable use - __kmp_fatal(KMP_MSG(UnknownSchedTypeDetected), // Primary message - KMP_HNT(GetNewerLibrary), // Hint - __kmp_msg_null // Variadic argument list terminator - ); - } break; - } // switch - if (p_last) - *p_last = last; -#ifdef KMP_DEBUG - if (pr->flags.ordered) { - char *buff; - // create format specifiers before the debug output - buff = __kmp_str_format("__kmp_dispatch_next_algorithm: T#%%d " - "ordered_lower:%%%s ordered_upper:%%%s\n", - traits_t::spec, traits_t::spec); - KD_TRACE(1000, (buff, gtid, pr->u.p.ordered_lower, pr->u.p.ordered_upper)); - __kmp_str_free(&buff); - } - { - char *buff; - // create format specifiers before the debug output - buff = __kmp_str_format( - "__kmp_dispatch_next_algorithm: T#%%d exit status:%%d p_last:%%d " - "p_lb:%%%s p_ub:%%%s p_st:%%%s\n", - traits_t::spec, traits_t::spec, traits_t::spec); - KD_TRACE(10, (buff, gtid, status, *p_last, *p_lb, *p_ub, *p_st)); - __kmp_str_free(&buff); - } -#endif - return status; -} - -/* Define a macro for exiting __kmp_dispatch_next(). If status is 0 (no more - work), then tell OMPT the loop is over. In some cases kmp_dispatch_fini() - is not called. */ -#if OMPT_SUPPORT && OMPT_OPTIONAL -#define OMPT_LOOP_END \ - if (status == 0) { \ - if (ompt_enabled.ompt_callback_work) { \ - ompt_team_info_t *team_info = __ompt_get_teaminfo(0, NULL); \ - ompt_task_info_t *task_info = __ompt_get_task_info_object(0); \ - ompt_callbacks.ompt_callback(ompt_callback_work)( \ - ompt_work_loop, ompt_scope_end, &(team_info->parallel_data), \ - &(task_info->task_data), 0, codeptr); \ - } \ - } -// TODO: implement count -#else -#define OMPT_LOOP_END // no-op -#endif - -#if KMP_STATS_ENABLED -#define KMP_STATS_LOOP_END \ - { \ - kmp_int64 u, l, t, i; \ - l = (kmp_int64)(*p_lb); \ - u = (kmp_int64)(*p_ub); \ - i = (kmp_int64)(pr->u.p.st); \ - if (status == 0) { \ - t = 0; \ - KMP_POP_PARTITIONED_TIMER(); \ - } else if (i == 1) { \ - if (u >= l) \ - t = u - l + 1; \ - else \ - t = 0; \ - } else if (i < 0) { \ - if (l >= u) \ - t = (l - u) / (-i) + 1; \ - else \ - t = 0; \ - } else { \ - if (u >= l) \ - t = (u - l) / i + 1; \ - else \ - t = 0; \ - } \ - KMP_COUNT_VALUE(OMP_loop_dynamic_iterations, t); \ - } -#else -#define KMP_STATS_LOOP_END /* Nothing */ -#endif - -template -static int __kmp_dispatch_next(ident_t *loc, int gtid, kmp_int32 *p_last, - T *p_lb, T *p_ub, - typename traits_t::signed_t *p_st -#if OMPT_SUPPORT && OMPT_OPTIONAL - , - void *codeptr -#endif - ) { - - typedef typename traits_t::unsigned_t UT; - typedef typename traits_t::signed_t ST; - // This is potentially slightly misleading, schedule(runtime) will appear here - // even if the actual runtme schedule is static. (Which points out a - // disadavantage of schedule(runtime): even when static scheduling is used it - // costs more than a compile time choice to use static scheduling would.) - KMP_TIME_PARTITIONED_BLOCK(OMP_loop_dynamic_scheduling); - - int status; - dispatch_private_info_template *pr; - kmp_info_t *th = __kmp_threads[gtid]; - kmp_team_t *team = th->th.th_team; - - KMP_DEBUG_ASSERT(p_lb && p_ub && p_st); // AC: these cannot be NULL - KD_TRACE( - 1000, - ("__kmp_dispatch_next: T#%d called p_lb:%p p_ub:%p p_st:%p p_last: %p\n", - gtid, p_lb, p_ub, p_st, p_last)); - - if (team->t.t_serialized) { - /* NOTE: serialize this dispatch becase we are not at the active level */ - pr = reinterpret_cast *>( - th->th.th_dispatch->th_disp_buffer); /* top of the stack */ - KMP_DEBUG_ASSERT(pr); - - if ((status = (pr->u.p.tc != 0)) == 0) { - *p_lb = 0; - *p_ub = 0; - // if ( p_last != NULL ) - // *p_last = 0; - if (p_st != NULL) - *p_st = 0; - if (__kmp_env_consistency_check) { - if (pr->pushed_ws != ct_none) { - pr->pushed_ws = __kmp_pop_workshare(gtid, pr->pushed_ws, loc); - } - } - } else if (pr->flags.nomerge) { - kmp_int32 last; - T start; - UT limit, trip, init; - ST incr; - T chunk = pr->u.p.parm1; - - KD_TRACE(100, ("__kmp_dispatch_next: T#%d kmp_sch_dynamic_chunked case\n", - gtid)); - - init = chunk * pr->u.p.count++; - trip = pr->u.p.tc - 1; - - if ((status = (init <= trip)) == 0) { - *p_lb = 0; - *p_ub = 0; - // if ( p_last != NULL ) - // *p_last = 0; - if (p_st != NULL) - *p_st = 0; - if (__kmp_env_consistency_check) { - if (pr->pushed_ws != ct_none) { - pr->pushed_ws = __kmp_pop_workshare(gtid, pr->pushed_ws, loc); - } - } - } else { - start = pr->u.p.lb; - limit = chunk + init - 1; - incr = pr->u.p.st; - - if ((last = (limit >= trip)) != 0) { - limit = trip; -#if KMP_OS_WINDOWS - pr->u.p.last_upper = pr->u.p.ub; -#endif /* KMP_OS_WINDOWS */ - } - if (p_last != NULL) - *p_last = last; - if (p_st != NULL) - *p_st = incr; - if (incr == 1) { - *p_lb = start + init; - *p_ub = start + limit; - } else { - *p_lb = start + init * incr; - *p_ub = start + limit * incr; - } - - if (pr->flags.ordered) { - pr->u.p.ordered_lower = init; - pr->u.p.ordered_upper = limit; -#ifdef KMP_DEBUG - { - char *buff; - // create format specifiers before the debug output - buff = __kmp_str_format("__kmp_dispatch_next: T#%%d " - "ordered_lower:%%%s ordered_upper:%%%s\n", - traits_t::spec, traits_t::spec); - KD_TRACE(1000, (buff, gtid, pr->u.p.ordered_lower, - pr->u.p.ordered_upper)); - __kmp_str_free(&buff); - } -#endif - } // if - } // if - } else { - pr->u.p.tc = 0; - *p_lb = pr->u.p.lb; - *p_ub = pr->u.p.ub; -#if KMP_OS_WINDOWS - pr->u.p.last_upper = *p_ub; -#endif /* KMP_OS_WINDOWS */ - if (p_last != NULL) - *p_last = TRUE; - if (p_st != NULL) - *p_st = pr->u.p.st; - } // if -#ifdef KMP_DEBUG - { - char *buff; - // create format specifiers before the debug output - buff = __kmp_str_format( - "__kmp_dispatch_next: T#%%d serialized case: p_lb:%%%s " - "p_ub:%%%s p_st:%%%s p_last:%%p %%d returning:%%d\n", - traits_t::spec, traits_t::spec, traits_t::spec); - KD_TRACE(10, (buff, gtid, *p_lb, *p_ub, *p_st, p_last, *p_last, status)); - __kmp_str_free(&buff); - } -#endif -#if INCLUDE_SSC_MARKS - SSC_MARK_DISPATCH_NEXT(); -#endif - OMPT_LOOP_END; - KMP_STATS_LOOP_END; - return status; - } else { - kmp_int32 last = 0; - dispatch_shared_info_template volatile *sh; - - KMP_DEBUG_ASSERT(th->th.th_dispatch == - &th->th.th_team->t.t_dispatch[th->th.th_info.ds.ds_tid]); - - pr = reinterpret_cast *>( - th->th.th_dispatch->th_dispatch_pr_current); - KMP_DEBUG_ASSERT(pr); - sh = reinterpret_cast volatile *>( - th->th.th_dispatch->th_dispatch_sh_current); - KMP_DEBUG_ASSERT(sh); - -#if KMP_USE_HIER_SCHED - if (pr->flags.use_hier) - status = sh->hier->next(loc, gtid, pr, &last, p_lb, p_ub, p_st); - else -#endif // KMP_USE_HIER_SCHED - status = __kmp_dispatch_next_algorithm(gtid, pr, sh, &last, p_lb, p_ub, - p_st, th->th.th_team_nproc, - th->th.th_info.ds.ds_tid); - // status == 0: no more iterations to execute - if (status == 0) { - UT num_done; - - num_done = test_then_inc((volatile ST *)&sh->u.s.num_done); -#ifdef KMP_DEBUG - { - char *buff; - // create format specifiers before the debug output - buff = __kmp_str_format( - "__kmp_dispatch_next: T#%%d increment num_done:%%%s\n", - traits_t::spec); - KD_TRACE(10, (buff, gtid, sh->u.s.num_done)); - __kmp_str_free(&buff); - } -#endif - -#if KMP_USE_HIER_SCHED - pr->flags.use_hier = FALSE; -#endif - if ((ST)num_done == th->th.th_team_nproc - 1) { -#if (KMP_STATIC_STEAL_ENABLED) - if (pr->schedule == kmp_sch_static_steal && - traits_t::type_size > 4) { - int i; - kmp_info_t **other_threads = team->t.t_threads; - // loop complete, safe to destroy locks used for stealing - for (i = 0; i < th->th.th_team_nproc; ++i) { - kmp_lock_t *lck = other_threads[i]->th.th_dispatch->th_steal_lock; - KMP_ASSERT(lck != NULL); - __kmp_destroy_lock(lck); - __kmp_free(lck); - other_threads[i]->th.th_dispatch->th_steal_lock = NULL; - } - } -#endif - /* NOTE: release this buffer to be reused */ - - KMP_MB(); /* Flush all pending memory write invalidates. */ - - sh->u.s.num_done = 0; - sh->u.s.iteration = 0; - - /* TODO replace with general release procedure? */ - if (pr->flags.ordered) { - sh->u.s.ordered_iteration = 0; - } - - KMP_MB(); /* Flush all pending memory write invalidates. */ - - sh->buffer_index += __kmp_dispatch_num_buffers; - KD_TRACE(100, ("__kmp_dispatch_next: T#%d change buffer_index:%d\n", - gtid, sh->buffer_index)); - - KMP_MB(); /* Flush all pending memory write invalidates. */ - - } // if - if (__kmp_env_consistency_check) { - if (pr->pushed_ws != ct_none) { - pr->pushed_ws = __kmp_pop_workshare(gtid, pr->pushed_ws, loc); - } - } - - th->th.th_dispatch->th_deo_fcn = NULL; - th->th.th_dispatch->th_dxo_fcn = NULL; - th->th.th_dispatch->th_dispatch_sh_current = NULL; - th->th.th_dispatch->th_dispatch_pr_current = NULL; - } // if (status == 0) -#if KMP_OS_WINDOWS - else if (last) { - pr->u.p.last_upper = pr->u.p.ub; - } -#endif /* KMP_OS_WINDOWS */ - if (p_last != NULL && status != 0) - *p_last = last; - } // if - -#ifdef KMP_DEBUG - { - char *buff; - // create format specifiers before the debug output - buff = __kmp_str_format( - "__kmp_dispatch_next: T#%%d normal case: " - "p_lb:%%%s p_ub:%%%s p_st:%%%s p_last:%%p (%%d) returning:%%d\n", - traits_t::spec, traits_t::spec, traits_t::spec); - KD_TRACE(10, (buff, gtid, *p_lb, *p_ub, p_st ? *p_st : 0, p_last, - (p_last ? *p_last : 0), status)); - __kmp_str_free(&buff); - } -#endif -#if INCLUDE_SSC_MARKS - SSC_MARK_DISPATCH_NEXT(); -#endif - OMPT_LOOP_END; - KMP_STATS_LOOP_END; - return status; -} - -template -static void __kmp_dist_get_bounds(ident_t *loc, kmp_int32 gtid, - kmp_int32 *plastiter, T *plower, T *pupper, - typename traits_t::signed_t incr) { - typedef typename traits_t::unsigned_t UT; - kmp_uint32 team_id; - kmp_uint32 nteams; - UT trip_count; - kmp_team_t *team; - kmp_info_t *th; - - KMP_DEBUG_ASSERT(plastiter && plower && pupper); - KE_TRACE(10, ("__kmpc_dist_get_bounds called (%d)\n", gtid)); -#ifdef KMP_DEBUG - typedef typename traits_t::signed_t ST; - { - char *buff; - // create format specifiers before the debug output - buff = __kmp_str_format("__kmpc_dist_get_bounds: T#%%d liter=%%d " - "iter=(%%%s, %%%s, %%%s) signed?<%s>\n", - traits_t::spec, traits_t::spec, - traits_t::spec, traits_t::spec); - KD_TRACE(100, (buff, gtid, *plastiter, *plower, *pupper, incr)); - __kmp_str_free(&buff); - } -#endif - - if (__kmp_env_consistency_check) { - if (incr == 0) { - __kmp_error_construct(kmp_i18n_msg_CnsLoopIncrZeroProhibited, ct_pdo, - loc); - } - if (incr > 0 ? (*pupper < *plower) : (*plower < *pupper)) { - // The loop is illegal. - // Some zero-trip loops maintained by compiler, e.g.: - // for(i=10;i<0;++i) // lower >= upper - run-time check - // for(i=0;i>10;--i) // lower <= upper - run-time check - // for(i=0;i>10;++i) // incr > 0 - compile-time check - // for(i=10;i<0;--i) // incr < 0 - compile-time check - // Compiler does not check the following illegal loops: - // for(i=0;i<10;i+=incr) // where incr<0 - // for(i=10;i>0;i-=incr) // where incr<0 - __kmp_error_construct(kmp_i18n_msg_CnsLoopIncrIllegal, ct_pdo, loc); - } - } - th = __kmp_threads[gtid]; - team = th->th.th_team; -#if OMP_40_ENABLED - KMP_DEBUG_ASSERT(th->th.th_teams_microtask); // we are in the teams construct - nteams = th->th.th_teams_size.nteams; -#endif - team_id = team->t.t_master_tid; - KMP_DEBUG_ASSERT(nteams == (kmp_uint32)team->t.t_parent->t.t_nproc); - - // compute global trip count - if (incr == 1) { - trip_count = *pupper - *plower + 1; - } else if (incr == -1) { - trip_count = *plower - *pupper + 1; - } else if (incr > 0) { - // upper-lower can exceed the limit of signed type - trip_count = (UT)(*pupper - *plower) / incr + 1; - } else { - trip_count = (UT)(*plower - *pupper) / (-incr) + 1; - } - - if (trip_count <= nteams) { - KMP_DEBUG_ASSERT( - __kmp_static == kmp_sch_static_greedy || - __kmp_static == - kmp_sch_static_balanced); // Unknown static scheduling type. - // only some teams get single iteration, others get nothing - if (team_id < trip_count) { - *pupper = *plower = *plower + team_id * incr; - } else { - *plower = *pupper + incr; // zero-trip loop - } - if (plastiter != NULL) - *plastiter = (team_id == trip_count - 1); - } else { - if (__kmp_static == kmp_sch_static_balanced) { - UT chunk = trip_count / nteams; - UT extras = trip_count % nteams; - *plower += - incr * (team_id * chunk + (team_id < extras ? team_id : extras)); - *pupper = *plower + chunk * incr - (team_id < extras ? 0 : incr); - if (plastiter != NULL) - *plastiter = (team_id == nteams - 1); - } else { - T chunk_inc_count = - (trip_count / nteams + ((trip_count % nteams) ? 1 : 0)) * incr; - T upper = *pupper; - KMP_DEBUG_ASSERT(__kmp_static == kmp_sch_static_greedy); - // Unknown static scheduling type. - *plower += team_id * chunk_inc_count; - *pupper = *plower + chunk_inc_count - incr; - // Check/correct bounds if needed - if (incr > 0) { - if (*pupper < *plower) - *pupper = traits_t::max_value; - if (plastiter != NULL) - *plastiter = *plower <= upper && *pupper > upper - incr; - if (*pupper > upper) - *pupper = upper; // tracker C73258 - } else { - if (*pupper > *plower) - *pupper = traits_t::min_value; - if (plastiter != NULL) - *plastiter = *plower >= upper && *pupper < upper - incr; - if (*pupper < upper) - *pupper = upper; // tracker C73258 - } - } - } -} - -//----------------------------------------------------------------------------- -// Dispatch routines -// Transfer call to template< type T > -// __kmp_dispatch_init( ident_t *loc, int gtid, enum sched_type schedule, -// T lb, T ub, ST st, ST chunk ) -extern "C" { - -/*! -@ingroup WORK_SHARING -@{ -@param loc Source location -@param gtid Global thread id -@param schedule Schedule type -@param lb Lower bound -@param ub Upper bound -@param st Step (or increment if you prefer) -@param chunk The chunk size to block with - -This function prepares the runtime to start a dynamically scheduled for loop, -saving the loop arguments. -These functions are all identical apart from the types of the arguments. -*/ - -void __kmpc_dispatch_init_4(ident_t *loc, kmp_int32 gtid, - enum sched_type schedule, kmp_int32 lb, - kmp_int32 ub, kmp_int32 st, kmp_int32 chunk) { - KMP_DEBUG_ASSERT(__kmp_init_serial); -#if OMPT_SUPPORT && OMPT_OPTIONAL - OMPT_STORE_RETURN_ADDRESS(gtid); -#endif - __kmp_dispatch_init(loc, gtid, schedule, lb, ub, st, chunk, true); -} -/*! -See @ref __kmpc_dispatch_init_4 -*/ -void __kmpc_dispatch_init_4u(ident_t *loc, kmp_int32 gtid, - enum sched_type schedule, kmp_uint32 lb, - kmp_uint32 ub, kmp_int32 st, kmp_int32 chunk) { - KMP_DEBUG_ASSERT(__kmp_init_serial); -#if OMPT_SUPPORT && OMPT_OPTIONAL - OMPT_STORE_RETURN_ADDRESS(gtid); -#endif - __kmp_dispatch_init(loc, gtid, schedule, lb, ub, st, chunk, true); -} - -/*! -See @ref __kmpc_dispatch_init_4 -*/ -void __kmpc_dispatch_init_8(ident_t *loc, kmp_int32 gtid, - enum sched_type schedule, kmp_int64 lb, - kmp_int64 ub, kmp_int64 st, kmp_int64 chunk) { - KMP_DEBUG_ASSERT(__kmp_init_serial); -#if OMPT_SUPPORT && OMPT_OPTIONAL - OMPT_STORE_RETURN_ADDRESS(gtid); -#endif - __kmp_dispatch_init(loc, gtid, schedule, lb, ub, st, chunk, true); -} - -/*! -See @ref __kmpc_dispatch_init_4 -*/ -void __kmpc_dispatch_init_8u(ident_t *loc, kmp_int32 gtid, - enum sched_type schedule, kmp_uint64 lb, - kmp_uint64 ub, kmp_int64 st, kmp_int64 chunk) { - KMP_DEBUG_ASSERT(__kmp_init_serial); -#if OMPT_SUPPORT && OMPT_OPTIONAL - OMPT_STORE_RETURN_ADDRESS(gtid); -#endif - __kmp_dispatch_init(loc, gtid, schedule, lb, ub, st, chunk, true); -} - -/*! -See @ref __kmpc_dispatch_init_4 - -Difference from __kmpc_dispatch_init set of functions is these functions -are called for composite distribute parallel for construct. Thus before -regular iterations dispatching we need to calc per-team iteration space. - -These functions are all identical apart from the types of the arguments. -*/ -void __kmpc_dist_dispatch_init_4(ident_t *loc, kmp_int32 gtid, - enum sched_type schedule, kmp_int32 *p_last, - kmp_int32 lb, kmp_int32 ub, kmp_int32 st, - kmp_int32 chunk) { - KMP_DEBUG_ASSERT(__kmp_init_serial); -#if OMPT_SUPPORT && OMPT_OPTIONAL - OMPT_STORE_RETURN_ADDRESS(gtid); -#endif - __kmp_dist_get_bounds(loc, gtid, p_last, &lb, &ub, st); - __kmp_dispatch_init(loc, gtid, schedule, lb, ub, st, chunk, true); -} - -void __kmpc_dist_dispatch_init_4u(ident_t *loc, kmp_int32 gtid, - enum sched_type schedule, kmp_int32 *p_last, - kmp_uint32 lb, kmp_uint32 ub, kmp_int32 st, - kmp_int32 chunk) { - KMP_DEBUG_ASSERT(__kmp_init_serial); -#if OMPT_SUPPORT && OMPT_OPTIONAL - OMPT_STORE_RETURN_ADDRESS(gtid); -#endif - __kmp_dist_get_bounds(loc, gtid, p_last, &lb, &ub, st); - __kmp_dispatch_init(loc, gtid, schedule, lb, ub, st, chunk, true); -} - -void __kmpc_dist_dispatch_init_8(ident_t *loc, kmp_int32 gtid, - enum sched_type schedule, kmp_int32 *p_last, - kmp_int64 lb, kmp_int64 ub, kmp_int64 st, - kmp_int64 chunk) { - KMP_DEBUG_ASSERT(__kmp_init_serial); -#if OMPT_SUPPORT && OMPT_OPTIONAL - OMPT_STORE_RETURN_ADDRESS(gtid); -#endif - __kmp_dist_get_bounds(loc, gtid, p_last, &lb, &ub, st); - __kmp_dispatch_init(loc, gtid, schedule, lb, ub, st, chunk, true); -} - -void __kmpc_dist_dispatch_init_8u(ident_t *loc, kmp_int32 gtid, - enum sched_type schedule, kmp_int32 *p_last, - kmp_uint64 lb, kmp_uint64 ub, kmp_int64 st, - kmp_int64 chunk) { - KMP_DEBUG_ASSERT(__kmp_init_serial); -#if OMPT_SUPPORT && OMPT_OPTIONAL - OMPT_STORE_RETURN_ADDRESS(gtid); -#endif - __kmp_dist_get_bounds(loc, gtid, p_last, &lb, &ub, st); - __kmp_dispatch_init(loc, gtid, schedule, lb, ub, st, chunk, true); -} - -/*! -@param loc Source code location -@param gtid Global thread id -@param p_last Pointer to a flag set to one if this is the last chunk or zero -otherwise -@param p_lb Pointer to the lower bound for the next chunk of work -@param p_ub Pointer to the upper bound for the next chunk of work -@param p_st Pointer to the stride for the next chunk of work -@return one if there is work to be done, zero otherwise - -Get the next dynamically allocated chunk of work for this thread. -If there is no more work, then the lb,ub and stride need not be modified. -*/ -int __kmpc_dispatch_next_4(ident_t *loc, kmp_int32 gtid, kmp_int32 *p_last, - kmp_int32 *p_lb, kmp_int32 *p_ub, kmp_int32 *p_st) { -#if OMPT_SUPPORT && OMPT_OPTIONAL - OMPT_STORE_RETURN_ADDRESS(gtid); -#endif - return __kmp_dispatch_next(loc, gtid, p_last, p_lb, p_ub, p_st -#if OMPT_SUPPORT && OMPT_OPTIONAL - , - OMPT_LOAD_RETURN_ADDRESS(gtid) -#endif - ); -} - -/*! -See @ref __kmpc_dispatch_next_4 -*/ -int __kmpc_dispatch_next_4u(ident_t *loc, kmp_int32 gtid, kmp_int32 *p_last, - kmp_uint32 *p_lb, kmp_uint32 *p_ub, - kmp_int32 *p_st) { -#if OMPT_SUPPORT && OMPT_OPTIONAL - OMPT_STORE_RETURN_ADDRESS(gtid); -#endif - return __kmp_dispatch_next(loc, gtid, p_last, p_lb, p_ub, p_st -#if OMPT_SUPPORT && OMPT_OPTIONAL - , - OMPT_LOAD_RETURN_ADDRESS(gtid) -#endif - ); -} - -/*! -See @ref __kmpc_dispatch_next_4 -*/ -int __kmpc_dispatch_next_8(ident_t *loc, kmp_int32 gtid, kmp_int32 *p_last, - kmp_int64 *p_lb, kmp_int64 *p_ub, kmp_int64 *p_st) { -#if OMPT_SUPPORT && OMPT_OPTIONAL - OMPT_STORE_RETURN_ADDRESS(gtid); -#endif - return __kmp_dispatch_next(loc, gtid, p_last, p_lb, p_ub, p_st -#if OMPT_SUPPORT && OMPT_OPTIONAL - , - OMPT_LOAD_RETURN_ADDRESS(gtid) -#endif - ); -} - -/*! -See @ref __kmpc_dispatch_next_4 -*/ -int __kmpc_dispatch_next_8u(ident_t *loc, kmp_int32 gtid, kmp_int32 *p_last, - kmp_uint64 *p_lb, kmp_uint64 *p_ub, - kmp_int64 *p_st) { -#if OMPT_SUPPORT && OMPT_OPTIONAL - OMPT_STORE_RETURN_ADDRESS(gtid); -#endif - return __kmp_dispatch_next(loc, gtid, p_last, p_lb, p_ub, p_st -#if OMPT_SUPPORT && OMPT_OPTIONAL - , - OMPT_LOAD_RETURN_ADDRESS(gtid) -#endif - ); -} - -/*! -@param loc Source code location -@param gtid Global thread id - -Mark the end of a dynamic loop. -*/ -void __kmpc_dispatch_fini_4(ident_t *loc, kmp_int32 gtid) { - __kmp_dispatch_finish(gtid, loc); -} - -/*! -See @ref __kmpc_dispatch_fini_4 -*/ -void __kmpc_dispatch_fini_8(ident_t *loc, kmp_int32 gtid) { - __kmp_dispatch_finish(gtid, loc); -} - -/*! -See @ref __kmpc_dispatch_fini_4 -*/ -void __kmpc_dispatch_fini_4u(ident_t *loc, kmp_int32 gtid) { - __kmp_dispatch_finish(gtid, loc); -} - -/*! -See @ref __kmpc_dispatch_fini_4 -*/ -void __kmpc_dispatch_fini_8u(ident_t *loc, kmp_int32 gtid) { - __kmp_dispatch_finish(gtid, loc); -} -/*! @} */ - -//----------------------------------------------------------------------------- -// Non-template routines from kmp_dispatch.cpp used in other sources - -kmp_uint32 __kmp_eq_4(kmp_uint32 value, kmp_uint32 checker) { - return value == checker; -} - -kmp_uint32 __kmp_neq_4(kmp_uint32 value, kmp_uint32 checker) { - return value != checker; -} - -kmp_uint32 __kmp_lt_4(kmp_uint32 value, kmp_uint32 checker) { - return value < checker; -} - -kmp_uint32 __kmp_ge_4(kmp_uint32 value, kmp_uint32 checker) { - return value >= checker; -} - -kmp_uint32 __kmp_le_4(kmp_uint32 value, kmp_uint32 checker) { - return value <= checker; -} - -kmp_uint32 -__kmp_wait_yield_4(volatile kmp_uint32 *spinner, kmp_uint32 checker, - kmp_uint32 (*pred)(kmp_uint32, kmp_uint32), - void *obj // Higher-level synchronization object, or NULL. - ) { - // note: we may not belong to a team at this point - volatile kmp_uint32 *spin = spinner; - kmp_uint32 check = checker; - kmp_uint32 spins; - kmp_uint32 (*f)(kmp_uint32, kmp_uint32) = pred; - kmp_uint32 r; - - KMP_FSYNC_SPIN_INIT(obj, CCAST(kmp_uint32 *, spin)); - KMP_INIT_YIELD(spins); - // main wait spin loop - while (!f(r = TCR_4(*spin), check)) { - KMP_FSYNC_SPIN_PREPARE(obj); - /* GEH - remove this since it was accidentally introduced when kmp_wait was - split. It causes problems with infinite recursion because of exit lock */ - /* if ( TCR_4(__kmp_global.g.g_done) && __kmp_global.g.g_abort) - __kmp_abort_thread(); */ - - /* if we have waited a bit, or are oversubscribed, yield */ - /* pause is in the following code */ - KMP_YIELD(TCR_4(__kmp_nth) > __kmp_avail_proc); - KMP_YIELD_SPIN(spins); - } - KMP_FSYNC_SPIN_ACQUIRED(obj); - return r; -} - -void __kmp_wait_yield_4_ptr( - void *spinner, kmp_uint32 checker, kmp_uint32 (*pred)(void *, kmp_uint32), - void *obj // Higher-level synchronization object, or NULL. - ) { - // note: we may not belong to a team at this point - void *spin = spinner; - kmp_uint32 check = checker; - kmp_uint32 spins; - kmp_uint32 (*f)(void *, kmp_uint32) = pred; - - KMP_FSYNC_SPIN_INIT(obj, spin); - KMP_INIT_YIELD(spins); - // main wait spin loop - while (!f(spin, check)) { - KMP_FSYNC_SPIN_PREPARE(obj); - /* if we have waited a bit, or are oversubscribed, yield */ - /* pause is in the following code */ - KMP_YIELD(TCR_4(__kmp_nth) > __kmp_avail_proc); - KMP_YIELD_SPIN(spins); - } - KMP_FSYNC_SPIN_ACQUIRED(obj); -} - -} // extern "C" - -#ifdef KMP_GOMP_COMPAT - -void __kmp_aux_dispatch_init_4(ident_t *loc, kmp_int32 gtid, - enum sched_type schedule, kmp_int32 lb, - kmp_int32 ub, kmp_int32 st, kmp_int32 chunk, - int push_ws) { - __kmp_dispatch_init(loc, gtid, schedule, lb, ub, st, chunk, - push_ws); -} - -void __kmp_aux_dispatch_init_4u(ident_t *loc, kmp_int32 gtid, - enum sched_type schedule, kmp_uint32 lb, - kmp_uint32 ub, kmp_int32 st, kmp_int32 chunk, - int push_ws) { - __kmp_dispatch_init(loc, gtid, schedule, lb, ub, st, chunk, - push_ws); -} - -void __kmp_aux_dispatch_init_8(ident_t *loc, kmp_int32 gtid, - enum sched_type schedule, kmp_int64 lb, - kmp_int64 ub, kmp_int64 st, kmp_int64 chunk, - int push_ws) { - __kmp_dispatch_init(loc, gtid, schedule, lb, ub, st, chunk, - push_ws); -} - -void __kmp_aux_dispatch_init_8u(ident_t *loc, kmp_int32 gtid, - enum sched_type schedule, kmp_uint64 lb, - kmp_uint64 ub, kmp_int64 st, kmp_int64 chunk, - int push_ws) { - __kmp_dispatch_init(loc, gtid, schedule, lb, ub, st, chunk, - push_ws); -} - -void __kmp_aux_dispatch_fini_chunk_4(ident_t *loc, kmp_int32 gtid) { - __kmp_dispatch_finish_chunk(gtid, loc); -} - -void __kmp_aux_dispatch_fini_chunk_8(ident_t *loc, kmp_int32 gtid) { - __kmp_dispatch_finish_chunk(gtid, loc); -} - -void __kmp_aux_dispatch_fini_chunk_4u(ident_t *loc, kmp_int32 gtid) { - __kmp_dispatch_finish_chunk(gtid, loc); -} - -void __kmp_aux_dispatch_fini_chunk_8u(ident_t *loc, kmp_int32 gtid) { - __kmp_dispatch_finish_chunk(gtid, loc); -} - -#endif /* KMP_GOMP_COMPAT */ - -/* ------------------------------------------------------------------------ */ Property changes on: vendor/llvm-openmp/openmp-release80-r363030/runtime/src/kmp_dispatch.cpp ___________________________________________________________________ Deleted: svn:eol-style ## -1 +0,0 ## -native \ No newline at end of property Deleted: svn:keywords ## -1 +0,0 ## -FreeBSD=%H \ No newline at end of property Deleted: svn:mime-type ## -1 +0,0 ## -text/plain \ No newline at end of property Index: vendor/llvm-openmp/openmp-release80-r363030/runtime/src/kmp_platform.h =================================================================== --- vendor/llvm-openmp/openmp-release80-r363030/runtime/src/kmp_platform.h (revision 348960) +++ vendor/llvm-openmp/openmp-release80-r363030/runtime/src/kmp_platform.h (nonexistent) @@ -1,207 +0,0 @@ -/* - * kmp_platform.h -- header for determining operating system and architecture - */ - -//===----------------------------------------------------------------------===// -// -// The LLVM Compiler Infrastructure -// -// This file is dual licensed under the MIT and the University of Illinois Open -// Source Licenses. See LICENSE.txt for details. -// -//===----------------------------------------------------------------------===// - -#ifndef KMP_PLATFORM_H -#define KMP_PLATFORM_H - -/* ---------------------- Operating system recognition ------------------- */ - -#define KMP_OS_LINUX 0 -#define KMP_OS_DRAGONFLY 0 -#define KMP_OS_FREEBSD 0 -#define KMP_OS_NETBSD 0 -#define KMP_OS_OPENBSD 0 -#define KMP_OS_DARWIN 0 -#define KMP_OS_WINDOWS 0 -#define KMP_OS_CNK 0 -#define KMP_OS_HURD 0 -#define KMP_OS_UNIX 0 /* disjunction of KMP_OS_LINUX, KMP_OS_DARWIN etc. */ - -#ifdef _WIN32 -#undef KMP_OS_WINDOWS -#define KMP_OS_WINDOWS 1 -#endif - -#if (defined __APPLE__ && defined __MACH__) -#undef KMP_OS_DARWIN -#define KMP_OS_DARWIN 1 -#endif - -// in some ppc64 linux installations, only the second condition is met -#if (defined __linux) -#undef KMP_OS_LINUX -#define KMP_OS_LINUX 1 -#elif (defined __linux__) -#undef KMP_OS_LINUX -#define KMP_OS_LINUX 1 -#else -#endif - -#if (defined __DragonFly__) -#undef KMP_OS_DRAGONFLY -#define KMP_OS_DRAGONFLY 1 -#endif - -#if (defined __FreeBSD__) -#undef KMP_OS_FREEBSD -#define KMP_OS_FREEBSD 1 -#endif - -#if (defined __NetBSD__) -#undef KMP_OS_NETBSD -#define KMP_OS_NETBSD 1 -#endif - -#if (defined __OpenBSD__) -#undef KMP_OS_OPENBSD -#define KMP_OS_OPENBSD 1 -#endif - -#if (defined __bgq__) -#undef KMP_OS_CNK -#define KMP_OS_CNK 1 -#endif - -#if (defined __GNU__) -#undef KMP_OS_HURD -#define KMP_OS_HURD 1 -#endif - -#if (1 != \ - KMP_OS_LINUX + KMP_OS_DRAGONFLY + KMP_OS_FREEBSD + KMP_OS_NETBSD + \ - KMP_OS_OPENBSD + KMP_OS_DARWIN + KMP_OS_WINDOWS + KMP_OS_HURD) -#error Unknown OS -#endif - -#if KMP_OS_LINUX || KMP_OS_DRAGONFLY || KMP_OS_FREEBSD || KMP_OS_NETBSD || \ - KMP_OS_OPENBSD || KMP_OS_DARWIN || KMP_OS_HURD -#undef KMP_OS_UNIX -#define KMP_OS_UNIX 1 -#endif - -/* ---------------------- Architecture recognition ------------------- */ - -#define KMP_ARCH_X86 0 -#define KMP_ARCH_X86_64 0 -#define KMP_ARCH_AARCH64 0 -#define KMP_ARCH_PPC64_BE 0 -#define KMP_ARCH_PPC64_LE 0 -#define KMP_ARCH_PPC64 (KMP_ARCH_PPC64_LE || KMP_ARCH_PPC64_BE) -#define KMP_ARCH_MIPS 0 -#define KMP_ARCH_MIPS64 0 - -#if KMP_OS_WINDOWS -#if defined(_M_AMD64) || defined(__x86_64) -#undef KMP_ARCH_X86_64 -#define KMP_ARCH_X86_64 1 -#else -#undef KMP_ARCH_X86 -#define KMP_ARCH_X86 1 -#endif -#endif - -#if KMP_OS_UNIX -#if defined __x86_64 -#undef KMP_ARCH_X86_64 -#define KMP_ARCH_X86_64 1 -#elif defined __i386 -#undef KMP_ARCH_X86 -#define KMP_ARCH_X86 1 -#elif defined __powerpc64__ -#if defined __LITTLE_ENDIAN__ -#undef KMP_ARCH_PPC64_LE -#define KMP_ARCH_PPC64_LE 1 -#else -#undef KMP_ARCH_PPC64_BE -#define KMP_ARCH_PPC64_BE 1 -#endif -#elif defined __aarch64__ -#undef KMP_ARCH_AARCH64 -#define KMP_ARCH_AARCH64 1 -#elif defined __mips__ -#if defined __mips64 -#undef KMP_ARCH_MIPS64 -#define KMP_ARCH_MIPS64 1 -#else -#undef KMP_ARCH_MIPS -#define KMP_ARCH_MIPS 1 -#endif -#endif -#endif - -#if defined(__ARM_ARCH_7__) || defined(__ARM_ARCH_7R__) || \ - defined(__ARM_ARCH_7A__) -#define KMP_ARCH_ARMV7 1 -#endif - -#if defined(KMP_ARCH_ARMV7) || defined(__ARM_ARCH_6__) || \ - defined(__ARM_ARCH_6J__) || defined(__ARM_ARCH_6K__) || \ - defined(__ARM_ARCH_6Z__) || defined(__ARM_ARCH_6T2__) || \ - defined(__ARM_ARCH_6ZK__) -#define KMP_ARCH_ARMV6 1 -#endif - -#if defined(KMP_ARCH_ARMV6) || defined(__ARM_ARCH_5T__) || \ - defined(__ARM_ARCH_5E__) || defined(__ARM_ARCH_5TE__) || \ - defined(__ARM_ARCH_5TEJ__) -#define KMP_ARCH_ARMV5 1 -#endif - -#if defined(KMP_ARCH_ARMV5) || defined(__ARM_ARCH_4__) || \ - defined(__ARM_ARCH_4T__) -#define KMP_ARCH_ARMV4 1 -#endif - -#if defined(KMP_ARCH_ARMV4) || defined(__ARM_ARCH_3__) || \ - defined(__ARM_ARCH_3M__) -#define KMP_ARCH_ARMV3 1 -#endif - -#if defined(KMP_ARCH_ARMV3) || defined(__ARM_ARCH_2__) -#define KMP_ARCH_ARMV2 1 -#endif - -#if defined(KMP_ARCH_ARMV2) -#define KMP_ARCH_ARM 1 -#endif - -#if defined(__MIC__) || defined(__MIC2__) -#define KMP_MIC 1 -#if __MIC2__ || __KNC__ -#define KMP_MIC1 0 -#define KMP_MIC2 1 -#else -#define KMP_MIC1 1 -#define KMP_MIC2 0 -#endif -#else -#define KMP_MIC 0 -#define KMP_MIC1 0 -#define KMP_MIC2 0 -#endif - -/* Specify 32 bit architectures here */ -#define KMP_32_BIT_ARCH (KMP_ARCH_X86 || KMP_ARCH_ARM || KMP_ARCH_MIPS) - -// Platforms which support Intel(R) Many Integrated Core Architecture -#define KMP_MIC_SUPPORTED \ - ((KMP_ARCH_X86 || KMP_ARCH_X86_64) && (KMP_OS_LINUX || KMP_OS_WINDOWS)) - -// TODO: Fixme - This is clever, but really fugly -#if (1 != \ - KMP_ARCH_X86 + KMP_ARCH_X86_64 + KMP_ARCH_ARM + KMP_ARCH_PPC64 + \ - KMP_ARCH_AARCH64 + KMP_ARCH_MIPS + KMP_ARCH_MIPS64) -#error Unknown or unsupported architecture -#endif - -#endif // KMP_PLATFORM_H Property changes on: vendor/llvm-openmp/openmp-release80-r363030/runtime/src/kmp_platform.h ___________________________________________________________________ Deleted: svn:eol-style ## -1 +0,0 ## -native \ No newline at end of property Deleted: svn:keywords ## -1 +0,0 ## -FreeBSD=%H \ No newline at end of property Deleted: svn:mime-type ## -1 +0,0 ## -text/plain \ No newline at end of property Index: vendor/llvm-openmp/openmp-release80-r363030/runtime/src/kmp_atomic.h =================================================================== --- vendor/llvm-openmp/openmp-release80-r363030/runtime/src/kmp_atomic.h (revision 348960) +++ vendor/llvm-openmp/openmp-release80-r363030/runtime/src/kmp_atomic.h (nonexistent) @@ -1,1776 +0,0 @@ -/* - * kmp_atomic.h - ATOMIC header file - */ - -//===----------------------------------------------------------------------===// -// -// The LLVM Compiler Infrastructure -// -// This file is dual licensed under the MIT and the University of Illinois Open -// Source Licenses. See LICENSE.txt for details. -// -//===----------------------------------------------------------------------===// - -#ifndef KMP_ATOMIC_H -#define KMP_ATOMIC_H - -#include "kmp_lock.h" -#include "kmp_os.h" - -#if OMPT_SUPPORT -#include "ompt-specific.h" -#endif - -// C++ build port. -// Intel compiler does not support _Complex datatype on win. -// Intel compiler supports _Complex datatype on lin and mac. -// On the other side, there is a problem of stack alignment on lin_32 and mac_32 -// if the rhs is cmplx80 or cmplx128 typedef'ed datatype. -// The decision is: to use compiler supported _Complex type on lin and mac, -// to use typedef'ed types on win. -// Condition for WIN64 was modified in anticipation of 10.1 build compiler. - -#if defined(__cplusplus) && (KMP_OS_WINDOWS) -// create shortcuts for c99 complex types - -// Visual Studio cannot have function parameters that have the -// align __declspec attribute, so we must remove it. (Compiler Error C2719) -#if KMP_COMPILER_MSVC -#undef KMP_DO_ALIGN -#define KMP_DO_ALIGN(alignment) /* Nothing */ -#endif - -#if (_MSC_VER < 1600) && defined(_DEBUG) -// Workaround for the problem of _DebugHeapTag unresolved external. -// This problem prevented to use our static debug library for C tests -// compiled with /MDd option (the library itself built with /MTd), -#undef _DEBUG -#define _DEBUG_TEMPORARILY_UNSET_ -#endif - -#include - -template -std::complex __kmp_lhs_div_rhs(const std::complex &lhs, - const std::complex &rhs) { - type_lhs a = lhs.real(); - type_lhs b = lhs.imag(); - type_rhs c = rhs.real(); - type_rhs d = rhs.imag(); - type_rhs den = c * c + d * d; - type_rhs r = (a * c + b * d); - type_rhs i = (b * c - a * d); - std::complex ret(r / den, i / den); - return ret; -} - -// complex8 -struct __kmp_cmplx64_t : std::complex { - - __kmp_cmplx64_t() : std::complex() {} - - __kmp_cmplx64_t(const std::complex &cd) : std::complex(cd) {} - - void operator/=(const __kmp_cmplx64_t &rhs) { - std::complex lhs = *this; - *this = __kmp_lhs_div_rhs(lhs, rhs); - } - - __kmp_cmplx64_t operator/(const __kmp_cmplx64_t &rhs) { - std::complex lhs = *this; - return __kmp_lhs_div_rhs(lhs, rhs); - } -}; -typedef struct __kmp_cmplx64_t kmp_cmplx64; - -// complex4 -struct __kmp_cmplx32_t : std::complex { - - __kmp_cmplx32_t() : std::complex() {} - - __kmp_cmplx32_t(const std::complex &cf) : std::complex(cf) {} - - __kmp_cmplx32_t operator+(const __kmp_cmplx32_t &b) { - std::complex lhs = *this; - std::complex rhs = b; - return (lhs + rhs); - } - __kmp_cmplx32_t operator-(const __kmp_cmplx32_t &b) { - std::complex lhs = *this; - std::complex rhs = b; - return (lhs - rhs); - } - __kmp_cmplx32_t operator*(const __kmp_cmplx32_t &b) { - std::complex lhs = *this; - std::complex rhs = b; - return (lhs * rhs); - } - - __kmp_cmplx32_t operator+(const kmp_cmplx64 &b) { - kmp_cmplx64 t = kmp_cmplx64(*this) + b; - std::complex d(t); - std::complex f(d); - __kmp_cmplx32_t r(f); - return r; - } - __kmp_cmplx32_t operator-(const kmp_cmplx64 &b) { - kmp_cmplx64 t = kmp_cmplx64(*this) - b; - std::complex d(t); - std::complex f(d); - __kmp_cmplx32_t r(f); - return r; - } - __kmp_cmplx32_t operator*(const kmp_cmplx64 &b) { - kmp_cmplx64 t = kmp_cmplx64(*this) * b; - std::complex d(t); - std::complex f(d); - __kmp_cmplx32_t r(f); - return r; - } - - void operator/=(const __kmp_cmplx32_t &rhs) { - std::complex lhs = *this; - *this = __kmp_lhs_div_rhs(lhs, rhs); - } - - __kmp_cmplx32_t operator/(const __kmp_cmplx32_t &rhs) { - std::complex lhs = *this; - return __kmp_lhs_div_rhs(lhs, rhs); - } - - void operator/=(const kmp_cmplx64 &rhs) { - std::complex lhs = *this; - *this = __kmp_lhs_div_rhs(lhs, rhs); - } - - __kmp_cmplx32_t operator/(const kmp_cmplx64 &rhs) { - std::complex lhs = *this; - return __kmp_lhs_div_rhs(lhs, rhs); - } -}; -typedef struct __kmp_cmplx32_t kmp_cmplx32; - -// complex10 -struct KMP_DO_ALIGN(16) __kmp_cmplx80_t : std::complex { - - __kmp_cmplx80_t() : std::complex() {} - - __kmp_cmplx80_t(const std::complex &cld) - : std::complex(cld) {} - - void operator/=(const __kmp_cmplx80_t &rhs) { - std::complex lhs = *this; - *this = __kmp_lhs_div_rhs(lhs, rhs); - } - - __kmp_cmplx80_t operator/(const __kmp_cmplx80_t &rhs) { - std::complex lhs = *this; - return __kmp_lhs_div_rhs(lhs, rhs); - } -}; -typedef KMP_DO_ALIGN(16) struct __kmp_cmplx80_t kmp_cmplx80; - -// complex16 -#if KMP_HAVE_QUAD -struct __kmp_cmplx128_t : std::complex<_Quad> { - - __kmp_cmplx128_t() : std::complex<_Quad>() {} - - __kmp_cmplx128_t(const std::complex<_Quad> &cq) : std::complex<_Quad>(cq) {} - - void operator/=(const __kmp_cmplx128_t &rhs) { - std::complex<_Quad> lhs = *this; - *this = __kmp_lhs_div_rhs(lhs, rhs); - } - - __kmp_cmplx128_t operator/(const __kmp_cmplx128_t &rhs) { - std::complex<_Quad> lhs = *this; - return __kmp_lhs_div_rhs(lhs, rhs); - } -}; -typedef struct __kmp_cmplx128_t kmp_cmplx128; -#endif /* KMP_HAVE_QUAD */ - -#ifdef _DEBUG_TEMPORARILY_UNSET_ -#undef _DEBUG_TEMPORARILY_UNSET_ -// Set it back now -#define _DEBUG 1 -#endif - -#else -// create shortcuts for c99 complex types -typedef float _Complex kmp_cmplx32; -typedef double _Complex kmp_cmplx64; -typedef long double _Complex kmp_cmplx80; -#if KMP_HAVE_QUAD -typedef _Quad _Complex kmp_cmplx128; -#endif -#endif - -// Compiler 12.0 changed alignment of 16 and 32-byte arguments (like _Quad -// and kmp_cmplx128) on IA-32 architecture. The following aligned structures -// are implemented to support the old alignment in 10.1, 11.0, 11.1 and -// introduce the new alignment in 12.0. See CQ88405. -#if KMP_ARCH_X86 && KMP_HAVE_QUAD - -// 4-byte aligned structures for backward compatibility. - -#pragma pack(push, 4) - -struct KMP_DO_ALIGN(4) Quad_a4_t { - _Quad q; - - Quad_a4_t() : q() {} - Quad_a4_t(const _Quad &cq) : q(cq) {} - - Quad_a4_t operator+(const Quad_a4_t &b) { - _Quad lhs = (*this).q; - _Quad rhs = b.q; - return (Quad_a4_t)(lhs + rhs); - } - - Quad_a4_t operator-(const Quad_a4_t &b) { - _Quad lhs = (*this).q; - _Quad rhs = b.q; - return (Quad_a4_t)(lhs - rhs); - } - Quad_a4_t operator*(const Quad_a4_t &b) { - _Quad lhs = (*this).q; - _Quad rhs = b.q; - return (Quad_a4_t)(lhs * rhs); - } - - Quad_a4_t operator/(const Quad_a4_t &b) { - _Quad lhs = (*this).q; - _Quad rhs = b.q; - return (Quad_a4_t)(lhs / rhs); - } -}; - -struct KMP_DO_ALIGN(4) kmp_cmplx128_a4_t { - kmp_cmplx128 q; - - kmp_cmplx128_a4_t() : q() {} - - kmp_cmplx128_a4_t(const kmp_cmplx128 &c128) : q(c128) {} - - kmp_cmplx128_a4_t operator+(const kmp_cmplx128_a4_t &b) { - kmp_cmplx128 lhs = (*this).q; - kmp_cmplx128 rhs = b.q; - return (kmp_cmplx128_a4_t)(lhs + rhs); - } - kmp_cmplx128_a4_t operator-(const kmp_cmplx128_a4_t &b) { - kmp_cmplx128 lhs = (*this).q; - kmp_cmplx128 rhs = b.q; - return (kmp_cmplx128_a4_t)(lhs - rhs); - } - kmp_cmplx128_a4_t operator*(const kmp_cmplx128_a4_t &b) { - kmp_cmplx128 lhs = (*this).q; - kmp_cmplx128 rhs = b.q; - return (kmp_cmplx128_a4_t)(lhs * rhs); - } - - kmp_cmplx128_a4_t operator/(const kmp_cmplx128_a4_t &b) { - kmp_cmplx128 lhs = (*this).q; - kmp_cmplx128 rhs = b.q; - return (kmp_cmplx128_a4_t)(lhs / rhs); - } -}; - -#pragma pack(pop) - -// New 16-byte aligned structures for 12.0 compiler. -struct KMP_DO_ALIGN(16) Quad_a16_t { - _Quad q; - - Quad_a16_t() : q() {} - Quad_a16_t(const _Quad &cq) : q(cq) {} - - Quad_a16_t operator+(const Quad_a16_t &b) { - _Quad lhs = (*this).q; - _Quad rhs = b.q; - return (Quad_a16_t)(lhs + rhs); - } - - Quad_a16_t operator-(const Quad_a16_t &b) { - _Quad lhs = (*this).q; - _Quad rhs = b.q; - return (Quad_a16_t)(lhs - rhs); - } - Quad_a16_t operator*(const Quad_a16_t &b) { - _Quad lhs = (*this).q; - _Quad rhs = b.q; - return (Quad_a16_t)(lhs * rhs); - } - - Quad_a16_t operator/(const Quad_a16_t &b) { - _Quad lhs = (*this).q; - _Quad rhs = b.q; - return (Quad_a16_t)(lhs / rhs); - } -}; - -struct KMP_DO_ALIGN(16) kmp_cmplx128_a16_t { - kmp_cmplx128 q; - - kmp_cmplx128_a16_t() : q() {} - - kmp_cmplx128_a16_t(const kmp_cmplx128 &c128) : q(c128) {} - - kmp_cmplx128_a16_t operator+(const kmp_cmplx128_a16_t &b) { - kmp_cmplx128 lhs = (*this).q; - kmp_cmplx128 rhs = b.q; - return (kmp_cmplx128_a16_t)(lhs + rhs); - } - kmp_cmplx128_a16_t operator-(const kmp_cmplx128_a16_t &b) { - kmp_cmplx128 lhs = (*this).q; - kmp_cmplx128 rhs = b.q; - return (kmp_cmplx128_a16_t)(lhs - rhs); - } - kmp_cmplx128_a16_t operator*(const kmp_cmplx128_a16_t &b) { - kmp_cmplx128 lhs = (*this).q; - kmp_cmplx128 rhs = b.q; - return (kmp_cmplx128_a16_t)(lhs * rhs); - } - - kmp_cmplx128_a16_t operator/(const kmp_cmplx128_a16_t &b) { - kmp_cmplx128 lhs = (*this).q; - kmp_cmplx128 rhs = b.q; - return (kmp_cmplx128_a16_t)(lhs / rhs); - } -}; - -#endif - -#if (KMP_ARCH_X86) -#define QUAD_LEGACY Quad_a4_t -#define CPLX128_LEG kmp_cmplx128_a4_t -#else -#define QUAD_LEGACY _Quad -#define CPLX128_LEG kmp_cmplx128 -#endif - -#ifdef __cplusplus -extern "C" { -#endif - -extern int __kmp_atomic_mode; - -// Atomic locks can easily become contended, so we use queuing locks for them. -typedef kmp_queuing_lock_t kmp_atomic_lock_t; - -static inline void __kmp_acquire_atomic_lock(kmp_atomic_lock_t *lck, - kmp_int32 gtid) { -#if OMPT_SUPPORT && OMPT_OPTIONAL - if (ompt_enabled.ompt_callback_mutex_acquire) { - ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)( - ompt_mutex_atomic, 0, kmp_mutex_impl_queuing, (ompt_wait_id_t)lck, - OMPT_GET_RETURN_ADDRESS(0)); - } -#endif - - __kmp_acquire_queuing_lock(lck, gtid); - -#if OMPT_SUPPORT && OMPT_OPTIONAL - if (ompt_enabled.ompt_callback_mutex_acquired) { - ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)( - ompt_mutex_atomic, (ompt_wait_id_t)lck, OMPT_GET_RETURN_ADDRESS(0)); - } -#endif -} - -static inline int __kmp_test_atomic_lock(kmp_atomic_lock_t *lck, - kmp_int32 gtid) { - return __kmp_test_queuing_lock(lck, gtid); -} - -static inline void __kmp_release_atomic_lock(kmp_atomic_lock_t *lck, - kmp_int32 gtid) { - __kmp_release_queuing_lock(lck, gtid); -#if OMPT_SUPPORT && OMPT_OPTIONAL - if (ompt_enabled.ompt_callback_mutex_released) { - ompt_callbacks.ompt_callback(ompt_callback_mutex_released)( - ompt_mutex_atomic, (ompt_wait_id_t)lck, OMPT_GET_RETURN_ADDRESS(0)); - } -#endif -} - -static inline void __kmp_init_atomic_lock(kmp_atomic_lock_t *lck) { - __kmp_init_queuing_lock(lck); -} - -static inline void __kmp_destroy_atomic_lock(kmp_atomic_lock_t *lck) { - __kmp_destroy_queuing_lock(lck); -} - -// Global Locks -extern kmp_atomic_lock_t __kmp_atomic_lock; /* Control access to all user coded - atomics in Gnu compat mode */ -extern kmp_atomic_lock_t __kmp_atomic_lock_1i; /* Control access to all user - coded atomics for 1-byte fixed - data types */ -extern kmp_atomic_lock_t __kmp_atomic_lock_2i; /* Control access to all user - coded atomics for 2-byte fixed - data types */ -extern kmp_atomic_lock_t __kmp_atomic_lock_4i; /* Control access to all user - coded atomics for 4-byte fixed - data types */ -extern kmp_atomic_lock_t __kmp_atomic_lock_4r; /* Control access to all user - coded atomics for kmp_real32 - data type */ -extern kmp_atomic_lock_t __kmp_atomic_lock_8i; /* Control access to all user - coded atomics for 8-byte fixed - data types */ -extern kmp_atomic_lock_t __kmp_atomic_lock_8r; /* Control access to all user - coded atomics for kmp_real64 - data type */ -extern kmp_atomic_lock_t - __kmp_atomic_lock_8c; /* Control access to all user coded atomics for - complex byte data type */ -extern kmp_atomic_lock_t - __kmp_atomic_lock_10r; /* Control access to all user coded atomics for long - double data type */ -extern kmp_atomic_lock_t __kmp_atomic_lock_16r; /* Control access to all user - coded atomics for _Quad data - type */ -extern kmp_atomic_lock_t __kmp_atomic_lock_16c; /* Control access to all user - coded atomics for double - complex data type*/ -extern kmp_atomic_lock_t - __kmp_atomic_lock_20c; /* Control access to all user coded atomics for long - double complex type*/ -extern kmp_atomic_lock_t __kmp_atomic_lock_32c; /* Control access to all user - coded atomics for _Quad - complex data type */ - -// Below routines for atomic UPDATE are listed - -// 1-byte -void __kmpc_atomic_fixed1_add(ident_t *id_ref, int gtid, char *lhs, char rhs); -void __kmpc_atomic_fixed1_andb(ident_t *id_ref, int gtid, char *lhs, char rhs); -void __kmpc_atomic_fixed1_div(ident_t *id_ref, int gtid, char *lhs, char rhs); -void __kmpc_atomic_fixed1u_div(ident_t *id_ref, int gtid, unsigned char *lhs, - unsigned char rhs); -void __kmpc_atomic_fixed1_mul(ident_t *id_ref, int gtid, char *lhs, char rhs); -void __kmpc_atomic_fixed1_orb(ident_t *id_ref, int gtid, char *lhs, char rhs); -void __kmpc_atomic_fixed1_shl(ident_t *id_ref, int gtid, char *lhs, char rhs); -void __kmpc_atomic_fixed1_shr(ident_t *id_ref, int gtid, char *lhs, char rhs); -void __kmpc_atomic_fixed1u_shr(ident_t *id_ref, int gtid, unsigned char *lhs, - unsigned char rhs); -void __kmpc_atomic_fixed1_sub(ident_t *id_ref, int gtid, char *lhs, char rhs); -void __kmpc_atomic_fixed1_xor(ident_t *id_ref, int gtid, char *lhs, char rhs); -// 2-byte -void __kmpc_atomic_fixed2_add(ident_t *id_ref, int gtid, short *lhs, short rhs); -void __kmpc_atomic_fixed2_andb(ident_t *id_ref, int gtid, short *lhs, - short rhs); -void __kmpc_atomic_fixed2_div(ident_t *id_ref, int gtid, short *lhs, short rhs); -void __kmpc_atomic_fixed2u_div(ident_t *id_ref, int gtid, unsigned short *lhs, - unsigned short rhs); -void __kmpc_atomic_fixed2_mul(ident_t *id_ref, int gtid, short *lhs, short rhs); -void __kmpc_atomic_fixed2_orb(ident_t *id_ref, int gtid, short *lhs, short rhs); -void __kmpc_atomic_fixed2_shl(ident_t *id_ref, int gtid, short *lhs, short rhs); -void __kmpc_atomic_fixed2_shr(ident_t *id_ref, int gtid, short *lhs, short rhs); -void __kmpc_atomic_fixed2u_shr(ident_t *id_ref, int gtid, unsigned short *lhs, - unsigned short rhs); -void __kmpc_atomic_fixed2_sub(ident_t *id_ref, int gtid, short *lhs, short rhs); -void __kmpc_atomic_fixed2_xor(ident_t *id_ref, int gtid, short *lhs, short rhs); -// 4-byte add / sub fixed -void __kmpc_atomic_fixed4_add(ident_t *id_ref, int gtid, kmp_int32 *lhs, - kmp_int32 rhs); -void __kmpc_atomic_fixed4_sub(ident_t *id_ref, int gtid, kmp_int32 *lhs, - kmp_int32 rhs); -// 4-byte add / sub float -void __kmpc_atomic_float4_add(ident_t *id_ref, int gtid, kmp_real32 *lhs, - kmp_real32 rhs); -void __kmpc_atomic_float4_sub(ident_t *id_ref, int gtid, kmp_real32 *lhs, - kmp_real32 rhs); -// 8-byte add / sub fixed -void __kmpc_atomic_fixed8_add(ident_t *id_ref, int gtid, kmp_int64 *lhs, - kmp_int64 rhs); -void __kmpc_atomic_fixed8_sub(ident_t *id_ref, int gtid, kmp_int64 *lhs, - kmp_int64 rhs); -// 8-byte add / sub float -void __kmpc_atomic_float8_add(ident_t *id_ref, int gtid, kmp_real64 *lhs, - kmp_real64 rhs); -void __kmpc_atomic_float8_sub(ident_t *id_ref, int gtid, kmp_real64 *lhs, - kmp_real64 rhs); -// 4-byte fixed -void __kmpc_atomic_fixed4_andb(ident_t *id_ref, int gtid, kmp_int32 *lhs, - kmp_int32 rhs); -void __kmpc_atomic_fixed4_div(ident_t *id_ref, int gtid, kmp_int32 *lhs, - kmp_int32 rhs); -void __kmpc_atomic_fixed4u_div(ident_t *id_ref, int gtid, kmp_uint32 *lhs, - kmp_uint32 rhs); -void __kmpc_atomic_fixed4_mul(ident_t *id_ref, int gtid, kmp_int32 *lhs, - kmp_int32 rhs); -void __kmpc_atomic_fixed4_orb(ident_t *id_ref, int gtid, kmp_int32 *lhs, - kmp_int32 rhs); -void __kmpc_atomic_fixed4_shl(ident_t *id_ref, int gtid, kmp_int32 *lhs, - kmp_int32 rhs); -void __kmpc_atomic_fixed4_shr(ident_t *id_ref, int gtid, kmp_int32 *lhs, - kmp_int32 rhs); -void __kmpc_atomic_fixed4u_shr(ident_t *id_ref, int gtid, kmp_uint32 *lhs, - kmp_uint32 rhs); -void __kmpc_atomic_fixed4_xor(ident_t *id_ref, int gtid, kmp_int32 *lhs, - kmp_int32 rhs); -// 8-byte fixed -void __kmpc_atomic_fixed8_andb(ident_t *id_ref, int gtid, kmp_int64 *lhs, - kmp_int64 rhs); -void __kmpc_atomic_fixed8_div(ident_t *id_ref, int gtid, kmp_int64 *lhs, - kmp_int64 rhs); -void __kmpc_atomic_fixed8u_div(ident_t *id_ref, int gtid, kmp_uint64 *lhs, - kmp_uint64 rhs); -void __kmpc_atomic_fixed8_mul(ident_t *id_ref, int gtid, kmp_int64 *lhs, - kmp_int64 rhs); -void __kmpc_atomic_fixed8_orb(ident_t *id_ref, int gtid, kmp_int64 *lhs, - kmp_int64 rhs); -void __kmpc_atomic_fixed8_shl(ident_t *id_ref, int gtid, kmp_int64 *lhs, - kmp_int64 rhs); -void __kmpc_atomic_fixed8_shr(ident_t *id_ref, int gtid, kmp_int64 *lhs, - kmp_int64 rhs); -void __kmpc_atomic_fixed8u_shr(ident_t *id_ref, int gtid, kmp_uint64 *lhs, - kmp_uint64 rhs); -void __kmpc_atomic_fixed8_xor(ident_t *id_ref, int gtid, kmp_int64 *lhs, - kmp_int64 rhs); -// 4-byte float -void __kmpc_atomic_float4_div(ident_t *id_ref, int gtid, kmp_real32 *lhs, - kmp_real32 rhs); -void __kmpc_atomic_float4_mul(ident_t *id_ref, int gtid, kmp_real32 *lhs, - kmp_real32 rhs); -// 8-byte float -void __kmpc_atomic_float8_div(ident_t *id_ref, int gtid, kmp_real64 *lhs, - kmp_real64 rhs); -void __kmpc_atomic_float8_mul(ident_t *id_ref, int gtid, kmp_real64 *lhs, - kmp_real64 rhs); -// 1-, 2-, 4-, 8-byte logical (&&, ||) -void __kmpc_atomic_fixed1_andl(ident_t *id_ref, int gtid, char *lhs, char rhs); -void __kmpc_atomic_fixed1_orl(ident_t *id_ref, int gtid, char *lhs, char rhs); -void __kmpc_atomic_fixed2_andl(ident_t *id_ref, int gtid, short *lhs, - short rhs); -void __kmpc_atomic_fixed2_orl(ident_t *id_ref, int gtid, short *lhs, short rhs); -void __kmpc_atomic_fixed4_andl(ident_t *id_ref, int gtid, kmp_int32 *lhs, - kmp_int32 rhs); -void __kmpc_atomic_fixed4_orl(ident_t *id_ref, int gtid, kmp_int32 *lhs, - kmp_int32 rhs); -void __kmpc_atomic_fixed8_andl(ident_t *id_ref, int gtid, kmp_int64 *lhs, - kmp_int64 rhs); -void __kmpc_atomic_fixed8_orl(ident_t *id_ref, int gtid, kmp_int64 *lhs, - kmp_int64 rhs); -// MIN / MAX -void __kmpc_atomic_fixed1_max(ident_t *id_ref, int gtid, char *lhs, char rhs); -void __kmpc_atomic_fixed1_min(ident_t *id_ref, int gtid, char *lhs, char rhs); -void __kmpc_atomic_fixed2_max(ident_t *id_ref, int gtid, short *lhs, short rhs); -void __kmpc_atomic_fixed2_min(ident_t *id_ref, int gtid, short *lhs, short rhs); -void __kmpc_atomic_fixed4_max(ident_t *id_ref, int gtid, kmp_int32 *lhs, - kmp_int32 rhs); -void __kmpc_atomic_fixed4_min(ident_t *id_ref, int gtid, kmp_int32 *lhs, - kmp_int32 rhs); -void __kmpc_atomic_fixed8_max(ident_t *id_ref, int gtid, kmp_int64 *lhs, - kmp_int64 rhs); -void __kmpc_atomic_fixed8_min(ident_t *id_ref, int gtid, kmp_int64 *lhs, - kmp_int64 rhs); -void __kmpc_atomic_float4_max(ident_t *id_ref, int gtid, kmp_real32 *lhs, - kmp_real32 rhs); -void __kmpc_atomic_float4_min(ident_t *id_ref, int gtid, kmp_real32 *lhs, - kmp_real32 rhs); -void __kmpc_atomic_float8_max(ident_t *id_ref, int gtid, kmp_real64 *lhs, - kmp_real64 rhs); -void __kmpc_atomic_float8_min(ident_t *id_ref, int gtid, kmp_real64 *lhs, - kmp_real64 rhs); -#if KMP_HAVE_QUAD -void __kmpc_atomic_float16_max(ident_t *id_ref, int gtid, QUAD_LEGACY *lhs, - QUAD_LEGACY rhs); -void __kmpc_atomic_float16_min(ident_t *id_ref, int gtid, QUAD_LEGACY *lhs, - QUAD_LEGACY rhs); -#if (KMP_ARCH_X86) -// Routines with 16-byte arguments aligned to 16-byte boundary; IA-32 -// architecture only -void __kmpc_atomic_float16_max_a16(ident_t *id_ref, int gtid, Quad_a16_t *lhs, - Quad_a16_t rhs); -void __kmpc_atomic_float16_min_a16(ident_t *id_ref, int gtid, Quad_a16_t *lhs, - Quad_a16_t rhs); -#endif -#endif -// .NEQV. (same as xor) -void __kmpc_atomic_fixed1_neqv(ident_t *id_ref, int gtid, char *lhs, char rhs); -void __kmpc_atomic_fixed2_neqv(ident_t *id_ref, int gtid, short *lhs, - short rhs); -void __kmpc_atomic_fixed4_neqv(ident_t *id_ref, int gtid, kmp_int32 *lhs, - kmp_int32 rhs); -void __kmpc_atomic_fixed8_neqv(ident_t *id_ref, int gtid, kmp_int64 *lhs, - kmp_int64 rhs); -// .EQV. (same as ~xor) -void __kmpc_atomic_fixed1_eqv(ident_t *id_ref, int gtid, char *lhs, char rhs); -void __kmpc_atomic_fixed2_eqv(ident_t *id_ref, int gtid, short *lhs, short rhs); -void __kmpc_atomic_fixed4_eqv(ident_t *id_ref, int gtid, kmp_int32 *lhs, - kmp_int32 rhs); -void __kmpc_atomic_fixed8_eqv(ident_t *id_ref, int gtid, kmp_int64 *lhs, - kmp_int64 rhs); -// long double type -void __kmpc_atomic_float10_add(ident_t *id_ref, int gtid, long double *lhs, - long double rhs); -void __kmpc_atomic_float10_sub(ident_t *id_ref, int gtid, long double *lhs, - long double rhs); -void __kmpc_atomic_float10_mul(ident_t *id_ref, int gtid, long double *lhs, - long double rhs); -void __kmpc_atomic_float10_div(ident_t *id_ref, int gtid, long double *lhs, - long double rhs); -// _Quad type -#if KMP_HAVE_QUAD -void __kmpc_atomic_float16_add(ident_t *id_ref, int gtid, QUAD_LEGACY *lhs, - QUAD_LEGACY rhs); -void __kmpc_atomic_float16_sub(ident_t *id_ref, int gtid, QUAD_LEGACY *lhs, - QUAD_LEGACY rhs); -void __kmpc_atomic_float16_mul(ident_t *id_ref, int gtid, QUAD_LEGACY *lhs, - QUAD_LEGACY rhs); -void __kmpc_atomic_float16_div(ident_t *id_ref, int gtid, QUAD_LEGACY *lhs, - QUAD_LEGACY rhs); -#if (KMP_ARCH_X86) -// Routines with 16-byte arguments aligned to 16-byte boundary -void __kmpc_atomic_float16_add_a16(ident_t *id_ref, int gtid, Quad_a16_t *lhs, - Quad_a16_t rhs); -void __kmpc_atomic_float16_sub_a16(ident_t *id_ref, int gtid, Quad_a16_t *lhs, - Quad_a16_t rhs); -void __kmpc_atomic_float16_mul_a16(ident_t *id_ref, int gtid, Quad_a16_t *lhs, - Quad_a16_t rhs); -void __kmpc_atomic_float16_div_a16(ident_t *id_ref, int gtid, Quad_a16_t *lhs, - Quad_a16_t rhs); -#endif -#endif -// routines for complex types -void __kmpc_atomic_cmplx4_add(ident_t *id_ref, int gtid, kmp_cmplx32 *lhs, - kmp_cmplx32 rhs); -void __kmpc_atomic_cmplx4_sub(ident_t *id_ref, int gtid, kmp_cmplx32 *lhs, - kmp_cmplx32 rhs); -void __kmpc_atomic_cmplx4_mul(ident_t *id_ref, int gtid, kmp_cmplx32 *lhs, - kmp_cmplx32 rhs); -void __kmpc_atomic_cmplx4_div(ident_t *id_ref, int gtid, kmp_cmplx32 *lhs, - kmp_cmplx32 rhs); -void __kmpc_atomic_cmplx8_add(ident_t *id_ref, int gtid, kmp_cmplx64 *lhs, - kmp_cmplx64 rhs); -void __kmpc_atomic_cmplx8_sub(ident_t *id_ref, int gtid, kmp_cmplx64 *lhs, - kmp_cmplx64 rhs); -void __kmpc_atomic_cmplx8_mul(ident_t *id_ref, int gtid, kmp_cmplx64 *lhs, - kmp_cmplx64 rhs); -void __kmpc_atomic_cmplx8_div(ident_t *id_ref, int gtid, kmp_cmplx64 *lhs, - kmp_cmplx64 rhs); -void __kmpc_atomic_cmplx10_add(ident_t *id_ref, int gtid, kmp_cmplx80 *lhs, - kmp_cmplx80 rhs); -void __kmpc_atomic_cmplx10_sub(ident_t *id_ref, int gtid, kmp_cmplx80 *lhs, - kmp_cmplx80 rhs); -void __kmpc_atomic_cmplx10_mul(ident_t *id_ref, int gtid, kmp_cmplx80 *lhs, - kmp_cmplx80 rhs); -void __kmpc_atomic_cmplx10_div(ident_t *id_ref, int gtid, kmp_cmplx80 *lhs, - kmp_cmplx80 rhs); -#if KMP_HAVE_QUAD -void __kmpc_atomic_cmplx16_add(ident_t *id_ref, int gtid, CPLX128_LEG *lhs, - CPLX128_LEG rhs); -void __kmpc_atomic_cmplx16_sub(ident_t *id_ref, int gtid, CPLX128_LEG *lhs, - CPLX128_LEG rhs); -void __kmpc_atomic_cmplx16_mul(ident_t *id_ref, int gtid, CPLX128_LEG *lhs, - CPLX128_LEG rhs); -void __kmpc_atomic_cmplx16_div(ident_t *id_ref, int gtid, CPLX128_LEG *lhs, - CPLX128_LEG rhs); -#if (KMP_ARCH_X86) -// Routines with 16-byte arguments aligned to 16-byte boundary -void __kmpc_atomic_cmplx16_add_a16(ident_t *id_ref, int gtid, - kmp_cmplx128_a16_t *lhs, - kmp_cmplx128_a16_t rhs); -void __kmpc_atomic_cmplx16_sub_a16(ident_t *id_ref, int gtid, - kmp_cmplx128_a16_t *lhs, - kmp_cmplx128_a16_t rhs); -void __kmpc_atomic_cmplx16_mul_a16(ident_t *id_ref, int gtid, - kmp_cmplx128_a16_t *lhs, - kmp_cmplx128_a16_t rhs); -void __kmpc_atomic_cmplx16_div_a16(ident_t *id_ref, int gtid, - kmp_cmplx128_a16_t *lhs, - kmp_cmplx128_a16_t rhs); -#endif -#endif - -#if OMP_40_ENABLED - -// OpenMP 4.0: x = expr binop x for non-commutative operations. -// Supported only on IA-32 architecture and Intel(R) 64 -#if KMP_ARCH_X86 || KMP_ARCH_X86_64 - -void __kmpc_atomic_fixed1_sub_rev(ident_t *id_ref, int gtid, char *lhs, - char rhs); -void __kmpc_atomic_fixed1_div_rev(ident_t *id_ref, int gtid, char *lhs, - char rhs); -void __kmpc_atomic_fixed1u_div_rev(ident_t *id_ref, int gtid, - unsigned char *lhs, unsigned char rhs); -void __kmpc_atomic_fixed1_shl_rev(ident_t *id_ref, int gtid, char *lhs, - char rhs); -void __kmpc_atomic_fixed1_shr_rev(ident_t *id_ref, int gtid, char *lhs, - char rhs); -void __kmpc_atomic_fixed1u_shr_rev(ident_t *id_ref, int gtid, - unsigned char *lhs, unsigned char rhs); -void __kmpc_atomic_fixed2_sub_rev(ident_t *id_ref, int gtid, short *lhs, - short rhs); -void __kmpc_atomic_fixed2_div_rev(ident_t *id_ref, int gtid, short *lhs, - short rhs); -void __kmpc_atomic_fixed2u_div_rev(ident_t *id_ref, int gtid, - unsigned short *lhs, unsigned short rhs); -void __kmpc_atomic_fixed2_shl_rev(ident_t *id_ref, int gtid, short *lhs, - short rhs); -void __kmpc_atomic_fixed2_shr_rev(ident_t *id_ref, int gtid, short *lhs, - short rhs); -void __kmpc_atomic_fixed2u_shr_rev(ident_t *id_ref, int gtid, - unsigned short *lhs, unsigned short rhs); -void __kmpc_atomic_fixed4_sub_rev(ident_t *id_ref, int gtid, kmp_int32 *lhs, - kmp_int32 rhs); -void __kmpc_atomic_fixed4_div_rev(ident_t *id_ref, int gtid, kmp_int32 *lhs, - kmp_int32 rhs); -void __kmpc_atomic_fixed4u_div_rev(ident_t *id_ref, int gtid, kmp_uint32 *lhs, - kmp_uint32 rhs); -void __kmpc_atomic_fixed4_shl_rev(ident_t *id_ref, int gtid, kmp_int32 *lhs, - kmp_int32 rhs); -void __kmpc_atomic_fixed4_shr_rev(ident_t *id_ref, int gtid, kmp_int32 *lhs, - kmp_int32 rhs); -void __kmpc_atomic_fixed4u_shr_rev(ident_t *id_ref, int gtid, kmp_uint32 *lhs, - kmp_uint32 rhs); -void __kmpc_atomic_fixed8_sub_rev(ident_t *id_ref, int gtid, kmp_int64 *lhs, - kmp_int64 rhs); -void __kmpc_atomic_fixed8_div_rev(ident_t *id_ref, int gtid, kmp_int64 *lhs, - kmp_int64 rhs); -void __kmpc_atomic_fixed8u_div_rev(ident_t *id_ref, int gtid, kmp_uint64 *lhs, - kmp_uint64 rhs); -void __kmpc_atomic_fixed8_shl_rev(ident_t *id_ref, int gtid, kmp_int64 *lhs, - kmp_int64 rhs); -void __kmpc_atomic_fixed8_shr_rev(ident_t *id_ref, int gtid, kmp_int64 *lhs, - kmp_int64 rhs); -void __kmpc_atomic_fixed8u_shr_rev(ident_t *id_ref, int gtid, kmp_uint64 *lhs, - kmp_uint64 rhs); -void __kmpc_atomic_float4_sub_rev(ident_t *id_ref, int gtid, float *lhs, - float rhs); -void __kmpc_atomic_float4_div_rev(ident_t *id_ref, int gtid, float *lhs, - float rhs); -void __kmpc_atomic_float8_sub_rev(ident_t *id_ref, int gtid, double *lhs, - double rhs); -void __kmpc_atomic_float8_div_rev(ident_t *id_ref, int gtid, double *lhs, - double rhs); -void __kmpc_atomic_float10_sub_rev(ident_t *id_ref, int gtid, long double *lhs, - long double rhs); -void __kmpc_atomic_float10_div_rev(ident_t *id_ref, int gtid, long double *lhs, - long double rhs); -#if KMP_HAVE_QUAD -void __kmpc_atomic_float16_sub_rev(ident_t *id_ref, int gtid, QUAD_LEGACY *lhs, - QUAD_LEGACY rhs); -void __kmpc_atomic_float16_div_rev(ident_t *id_ref, int gtid, QUAD_LEGACY *lhs, - QUAD_LEGACY rhs); -#endif -void __kmpc_atomic_cmplx4_sub_rev(ident_t *id_ref, int gtid, kmp_cmplx32 *lhs, - kmp_cmplx32 rhs); -void __kmpc_atomic_cmplx4_div_rev(ident_t *id_ref, int gtid, kmp_cmplx32 *lhs, - kmp_cmplx32 rhs); -void __kmpc_atomic_cmplx8_sub_rev(ident_t *id_ref, int gtid, kmp_cmplx64 *lhs, - kmp_cmplx64 rhs); -void __kmpc_atomic_cmplx8_div_rev(ident_t *id_ref, int gtid, kmp_cmplx64 *lhs, - kmp_cmplx64 rhs); -void __kmpc_atomic_cmplx10_sub_rev(ident_t *id_ref, int gtid, kmp_cmplx80 *lhs, - kmp_cmplx80 rhs); -void __kmpc_atomic_cmplx10_div_rev(ident_t *id_ref, int gtid, kmp_cmplx80 *lhs, - kmp_cmplx80 rhs); -#if KMP_HAVE_QUAD -void __kmpc_atomic_cmplx16_sub_rev(ident_t *id_ref, int gtid, CPLX128_LEG *lhs, - CPLX128_LEG rhs); -void __kmpc_atomic_cmplx16_div_rev(ident_t *id_ref, int gtid, CPLX128_LEG *lhs, - CPLX128_LEG rhs); -#if (KMP_ARCH_X86) -// Routines with 16-byte arguments aligned to 16-byte boundary -void __kmpc_atomic_float16_sub_a16_rev(ident_t *id_ref, int gtid, - Quad_a16_t *lhs, Quad_a16_t rhs); -void __kmpc_atomic_float16_div_a16_rev(ident_t *id_ref, int gtid, - Quad_a16_t *lhs, Quad_a16_t rhs); -void __kmpc_atomic_cmplx16_sub_a16_rev(ident_t *id_ref, int gtid, - kmp_cmplx128_a16_t *lhs, - kmp_cmplx128_a16_t rhs); -void __kmpc_atomic_cmplx16_div_a16_rev(ident_t *id_ref, int gtid, - kmp_cmplx128_a16_t *lhs, - kmp_cmplx128_a16_t rhs); -#endif -#endif // KMP_HAVE_QUAD - -#endif // KMP_ARCH_X86 || KMP_ARCH_X86_64 - -#endif // OMP_40_ENABLED - -// routines for mixed types - -// RHS=float8 -void __kmpc_atomic_fixed1_mul_float8(ident_t *id_ref, int gtid, char *lhs, - kmp_real64 rhs); -void __kmpc_atomic_fixed1_div_float8(ident_t *id_ref, int gtid, char *lhs, - kmp_real64 rhs); -void __kmpc_atomic_fixed2_mul_float8(ident_t *id_ref, int gtid, short *lhs, - kmp_real64 rhs); -void __kmpc_atomic_fixed2_div_float8(ident_t *id_ref, int gtid, short *lhs, - kmp_real64 rhs); -void __kmpc_atomic_fixed4_mul_float8(ident_t *id_ref, int gtid, kmp_int32 *lhs, - kmp_real64 rhs); -void __kmpc_atomic_fixed4_div_float8(ident_t *id_ref, int gtid, kmp_int32 *lhs, - kmp_real64 rhs); -void __kmpc_atomic_fixed8_mul_float8(ident_t *id_ref, int gtid, kmp_int64 *lhs, - kmp_real64 rhs); -void __kmpc_atomic_fixed8_div_float8(ident_t *id_ref, int gtid, kmp_int64 *lhs, - kmp_real64 rhs); -void __kmpc_atomic_float4_add_float8(ident_t *id_ref, int gtid, kmp_real32 *lhs, - kmp_real64 rhs); -void __kmpc_atomic_float4_sub_float8(ident_t *id_ref, int gtid, kmp_real32 *lhs, - kmp_real64 rhs); -void __kmpc_atomic_float4_mul_float8(ident_t *id_ref, int gtid, kmp_real32 *lhs, - kmp_real64 rhs); -void __kmpc_atomic_float4_div_float8(ident_t *id_ref, int gtid, kmp_real32 *lhs, - kmp_real64 rhs); - -// RHS=float16 (deprecated, to be removed when we are sure the compiler does not -// use them) -#if KMP_HAVE_QUAD -void __kmpc_atomic_fixed1_add_fp(ident_t *id_ref, int gtid, char *lhs, - _Quad rhs); -void __kmpc_atomic_fixed1u_add_fp(ident_t *id_ref, int gtid, unsigned char *lhs, - _Quad rhs); -void __kmpc_atomic_fixed1_sub_fp(ident_t *id_ref, int gtid, char *lhs, - _Quad rhs); -void __kmpc_atomic_fixed1u_sub_fp(ident_t *id_ref, int gtid, unsigned char *lhs, - _Quad rhs); -void __kmpc_atomic_fixed1_mul_fp(ident_t *id_ref, int gtid, char *lhs, - _Quad rhs); -void __kmpc_atomic_fixed1u_mul_fp(ident_t *id_ref, int gtid, unsigned char *lhs, - _Quad rhs); -void __kmpc_atomic_fixed1_div_fp(ident_t *id_ref, int gtid, char *lhs, - _Quad rhs); -void __kmpc_atomic_fixed1u_div_fp(ident_t *id_ref, int gtid, unsigned char *lhs, - _Quad rhs); - -void __kmpc_atomic_fixed2_add_fp(ident_t *id_ref, int gtid, short *lhs, - _Quad rhs); -void __kmpc_atomic_fixed2u_add_fp(ident_t *id_ref, int gtid, - unsigned short *lhs, _Quad rhs); -void __kmpc_atomic_fixed2_sub_fp(ident_t *id_ref, int gtid, short *lhs, - _Quad rhs); -void __kmpc_atomic_fixed2u_sub_fp(ident_t *id_ref, int gtid, - unsigned short *lhs, _Quad rhs); -void __kmpc_atomic_fixed2_mul_fp(ident_t *id_ref, int gtid, short *lhs, - _Quad rhs); -void __kmpc_atomic_fixed2u_mul_fp(ident_t *id_ref, int gtid, - unsigned short *lhs, _Quad rhs); -void __kmpc_atomic_fixed2_div_fp(ident_t *id_ref, int gtid, short *lhs, - _Quad rhs); -void __kmpc_atomic_fixed2u_div_fp(ident_t *id_ref, int gtid, - unsigned short *lhs, _Quad rhs); - -void __kmpc_atomic_fixed4_add_fp(ident_t *id_ref, int gtid, kmp_int32 *lhs, - _Quad rhs); -void __kmpc_atomic_fixed4u_add_fp(ident_t *id_ref, int gtid, kmp_uint32 *lhs, - _Quad rhs); -void __kmpc_atomic_fixed4_sub_fp(ident_t *id_ref, int gtid, kmp_int32 *lhs, - _Quad rhs); -void __kmpc_atomic_fixed4u_sub_fp(ident_t *id_ref, int gtid, kmp_uint32 *lhs, - _Quad rhs); -void __kmpc_atomic_fixed4_mul_fp(ident_t *id_ref, int gtid, kmp_int32 *lhs, - _Quad rhs); -void __kmpc_atomic_fixed4u_mul_fp(ident_t *id_ref, int gtid, kmp_uint32 *lhs, - _Quad rhs); -void __kmpc_atomic_fixed4_div_fp(ident_t *id_ref, int gtid, kmp_int32 *lhs, - _Quad rhs); -void __kmpc_atomic_fixed4u_div_fp(ident_t *id_ref, int gtid, kmp_uint32 *lhs, - _Quad rhs); - -void __kmpc_atomic_fixed8_add_fp(ident_t *id_ref, int gtid, kmp_int64 *lhs, - _Quad rhs); -void __kmpc_atomic_fixed8u_add_fp(ident_t *id_ref, int gtid, kmp_uint64 *lhs, - _Quad rhs); -void __kmpc_atomic_fixed8_sub_fp(ident_t *id_ref, int gtid, kmp_int64 *lhs, - _Quad rhs); -void __kmpc_atomic_fixed8u_sub_fp(ident_t *id_ref, int gtid, kmp_uint64 *lhs, - _Quad rhs); -void __kmpc_atomic_fixed8_mul_fp(ident_t *id_ref, int gtid, kmp_int64 *lhs, - _Quad rhs); -void __kmpc_atomic_fixed8u_mul_fp(ident_t *id_ref, int gtid, kmp_uint64 *lhs, - _Quad rhs); -void __kmpc_atomic_fixed8_div_fp(ident_t *id_ref, int gtid, kmp_int64 *lhs, - _Quad rhs); -void __kmpc_atomic_fixed8u_div_fp(ident_t *id_ref, int gtid, kmp_uint64 *lhs, - _Quad rhs); - -void __kmpc_atomic_float4_add_fp(ident_t *id_ref, int gtid, kmp_real32 *lhs, - _Quad rhs); -void __kmpc_atomic_float4_sub_fp(ident_t *id_ref, int gtid, kmp_real32 *lhs, - _Quad rhs); -void __kmpc_atomic_float4_mul_fp(ident_t *id_ref, int gtid, kmp_real32 *lhs, - _Quad rhs); -void __kmpc_atomic_float4_div_fp(ident_t *id_ref, int gtid, kmp_real32 *lhs, - _Quad rhs); - -void __kmpc_atomic_float8_add_fp(ident_t *id_ref, int gtid, kmp_real64 *lhs, - _Quad rhs); -void __kmpc_atomic_float8_sub_fp(ident_t *id_ref, int gtid, kmp_real64 *lhs, - _Quad rhs); -void __kmpc_atomic_float8_mul_fp(ident_t *id_ref, int gtid, kmp_real64 *lhs, - _Quad rhs); -void __kmpc_atomic_float8_div_fp(ident_t *id_ref, int gtid, kmp_real64 *lhs, - _Quad rhs); - -void __kmpc_atomic_float10_add_fp(ident_t *id_ref, int gtid, long double *lhs, - _Quad rhs); -void __kmpc_atomic_float10_sub_fp(ident_t *id_ref, int gtid, long double *lhs, - _Quad rhs); -void __kmpc_atomic_float10_mul_fp(ident_t *id_ref, int gtid, long double *lhs, - _Quad rhs); -void __kmpc_atomic_float10_div_fp(ident_t *id_ref, int gtid, long double *lhs, - _Quad rhs); - -// Reverse operations -void __kmpc_atomic_fixed1_sub_rev_fp(ident_t *id_ref, int gtid, char *lhs, - _Quad rhs); -void __kmpc_atomic_fixed1u_sub_rev_fp(ident_t *id_ref, int gtid, - unsigned char *lhs, _Quad rhs); -void __kmpc_atomic_fixed1_div_rev_fp(ident_t *id_ref, int gtid, char *lhs, - _Quad rhs); -void __kmpc_atomic_fixed1u_div_rev_fp(ident_t *id_ref, int gtid, - unsigned char *lhs, _Quad rhs); -void __kmpc_atomic_fixed2_sub_rev_fp(ident_t *id_ref, int gtid, short *lhs, - _Quad rhs); -void __kmpc_atomic_fixed2u_sub_rev_fp(ident_t *id_ref, int gtid, - unsigned short *lhs, _Quad rhs); -void __kmpc_atomic_fixed2_div_rev_fp(ident_t *id_ref, int gtid, short *lhs, - _Quad rhs); -void __kmpc_atomic_fixed2u_div_rev_fp(ident_t *id_ref, int gtid, - unsigned short *lhs, _Quad rhs); -void __kmpc_atomic_fixed4_sub_rev_fp(ident_t *id_ref, int gtid, kmp_int32 *lhs, - _Quad rhs); -void __kmpc_atomic_fixed4u_sub_rev_fp(ident_t *id_ref, int gtid, - kmp_uint32 *lhs, _Quad rhs); -void __kmpc_atomic_fixed4_div_rev_fp(ident_t *id_ref, int gtid, kmp_int32 *lhs, - _Quad rhs); -void __kmpc_atomic_fixed4u_div_rev_fp(ident_t *id_ref, int gtid, - kmp_uint32 *lhs, _Quad rhs); -void __kmpc_atomic_fixed8_sub_rev_fp(ident_t *id_ref, int gtid, kmp_int64 *lhs, - _Quad rhs); -void __kmpc_atomic_fixed8u_sub_rev_fp(ident_t *id_ref, int gtid, - kmp_uint64 *lhs, _Quad rhs); -void __kmpc_atomic_fixed8_div_rev_fp(ident_t *id_ref, int gtid, kmp_int64 *lhs, - _Quad rhs); -void __kmpc_atomic_fixed8u_div_rev_fp(ident_t *id_ref, int gtid, - kmp_uint64 *lhs, _Quad rhs); -void __kmpc_atomic_float4_sub_rev_fp(ident_t *id_ref, int gtid, float *lhs, - _Quad rhs); -void __kmpc_atomic_float4_div_rev_fp(ident_t *id_ref, int gtid, float *lhs, - _Quad rhs); -void __kmpc_atomic_float8_sub_rev_fp(ident_t *id_ref, int gtid, double *lhs, - _Quad rhs); -void __kmpc_atomic_float8_div_rev_fp(ident_t *id_ref, int gtid, double *lhs, - _Quad rhs); -void __kmpc_atomic_float10_sub_rev_fp(ident_t *id_ref, int gtid, - long double *lhs, _Quad rhs); -void __kmpc_atomic_float10_div_rev_fp(ident_t *id_ref, int gtid, - long double *lhs, _Quad rhs); - -#endif // KMP_HAVE_QUAD - -// RHS=cmplx8 -void __kmpc_atomic_cmplx4_add_cmplx8(ident_t *id_ref, int gtid, - kmp_cmplx32 *lhs, kmp_cmplx64 rhs); -void __kmpc_atomic_cmplx4_sub_cmplx8(ident_t *id_ref, int gtid, - kmp_cmplx32 *lhs, kmp_cmplx64 rhs); -void __kmpc_atomic_cmplx4_mul_cmplx8(ident_t *id_ref, int gtid, - kmp_cmplx32 *lhs, kmp_cmplx64 rhs); -void __kmpc_atomic_cmplx4_div_cmplx8(ident_t *id_ref, int gtid, - kmp_cmplx32 *lhs, kmp_cmplx64 rhs); - -// generic atomic routines -void __kmpc_atomic_1(ident_t *id_ref, int gtid, void *lhs, void *rhs, - void (*f)(void *, void *, void *)); -void __kmpc_atomic_2(ident_t *id_ref, int gtid, void *lhs, void *rhs, - void (*f)(void *, void *, void *)); -void __kmpc_atomic_4(ident_t *id_ref, int gtid, void *lhs, void *rhs, - void (*f)(void *, void *, void *)); -void __kmpc_atomic_8(ident_t *id_ref, int gtid, void *lhs, void *rhs, - void (*f)(void *, void *, void *)); -void __kmpc_atomic_10(ident_t *id_ref, int gtid, void *lhs, void *rhs, - void (*f)(void *, void *, void *)); -void __kmpc_atomic_16(ident_t *id_ref, int gtid, void *lhs, void *rhs, - void (*f)(void *, void *, void *)); -void __kmpc_atomic_20(ident_t *id_ref, int gtid, void *lhs, void *rhs, - void (*f)(void *, void *, void *)); -void __kmpc_atomic_32(ident_t *id_ref, int gtid, void *lhs, void *rhs, - void (*f)(void *, void *, void *)); - -// READ, WRITE, CAPTURE are supported only on IA-32 architecture and Intel(R) 64 -#if KMP_ARCH_X86 || KMP_ARCH_X86_64 - -// Below routines for atomic READ are listed -char __kmpc_atomic_fixed1_rd(ident_t *id_ref, int gtid, char *loc); -short __kmpc_atomic_fixed2_rd(ident_t *id_ref, int gtid, short *loc); -kmp_int32 __kmpc_atomic_fixed4_rd(ident_t *id_ref, int gtid, kmp_int32 *loc); -kmp_int64 __kmpc_atomic_fixed8_rd(ident_t *id_ref, int gtid, kmp_int64 *loc); -kmp_real32 __kmpc_atomic_float4_rd(ident_t *id_ref, int gtid, kmp_real32 *loc); -kmp_real64 __kmpc_atomic_float8_rd(ident_t *id_ref, int gtid, kmp_real64 *loc); -long double __kmpc_atomic_float10_rd(ident_t *id_ref, int gtid, - long double *loc); -#if KMP_HAVE_QUAD -QUAD_LEGACY __kmpc_atomic_float16_rd(ident_t *id_ref, int gtid, - QUAD_LEGACY *loc); -#endif -// Fix for CQ220361: cmplx4 READ will return void on Windows* OS; read value -// will be returned through an additional parameter -#if (KMP_OS_WINDOWS) -void __kmpc_atomic_cmplx4_rd(kmp_cmplx32 *out, ident_t *id_ref, int gtid, - kmp_cmplx32 *loc); -#else -kmp_cmplx32 __kmpc_atomic_cmplx4_rd(ident_t *id_ref, int gtid, - kmp_cmplx32 *loc); -#endif -kmp_cmplx64 __kmpc_atomic_cmplx8_rd(ident_t *id_ref, int gtid, - kmp_cmplx64 *loc); -kmp_cmplx80 __kmpc_atomic_cmplx10_rd(ident_t *id_ref, int gtid, - kmp_cmplx80 *loc); -#if KMP_HAVE_QUAD -CPLX128_LEG __kmpc_atomic_cmplx16_rd(ident_t *id_ref, int gtid, - CPLX128_LEG *loc); -#if (KMP_ARCH_X86) -// Routines with 16-byte arguments aligned to 16-byte boundary -Quad_a16_t __kmpc_atomic_float16_a16_rd(ident_t *id_ref, int gtid, - Quad_a16_t *loc); -kmp_cmplx128_a16_t __kmpc_atomic_cmplx16_a16_rd(ident_t *id_ref, int gtid, - kmp_cmplx128_a16_t *loc); -#endif -#endif - -// Below routines for atomic WRITE are listed -void __kmpc_atomic_fixed1_wr(ident_t *id_ref, int gtid, char *lhs, char rhs); -void __kmpc_atomic_fixed2_wr(ident_t *id_ref, int gtid, short *lhs, short rhs); -void __kmpc_atomic_fixed4_wr(ident_t *id_ref, int gtid, kmp_int32 *lhs, - kmp_int32 rhs); -void __kmpc_atomic_fixed8_wr(ident_t *id_ref, int gtid, kmp_int64 *lhs, - kmp_int64 rhs); -void __kmpc_atomic_float4_wr(ident_t *id_ref, int gtid, kmp_real32 *lhs, - kmp_real32 rhs); -void __kmpc_atomic_float8_wr(ident_t *id_ref, int gtid, kmp_real64 *lhs, - kmp_real64 rhs); -void __kmpc_atomic_float10_wr(ident_t *id_ref, int gtid, long double *lhs, - long double rhs); -#if KMP_HAVE_QUAD -void __kmpc_atomic_float16_wr(ident_t *id_ref, int gtid, QUAD_LEGACY *lhs, - QUAD_LEGACY rhs); -#endif -void __kmpc_atomic_cmplx4_wr(ident_t *id_ref, int gtid, kmp_cmplx32 *lhs, - kmp_cmplx32 rhs); -void __kmpc_atomic_cmplx8_wr(ident_t *id_ref, int gtid, kmp_cmplx64 *lhs, - kmp_cmplx64 rhs); -void __kmpc_atomic_cmplx10_wr(ident_t *id_ref, int gtid, kmp_cmplx80 *lhs, - kmp_cmplx80 rhs); -#if KMP_HAVE_QUAD -void __kmpc_atomic_cmplx16_wr(ident_t *id_ref, int gtid, CPLX128_LEG *lhs, - CPLX128_LEG rhs); -#if (KMP_ARCH_X86) -// Routines with 16-byte arguments aligned to 16-byte boundary -void __kmpc_atomic_float16_a16_wr(ident_t *id_ref, int gtid, Quad_a16_t *lhs, - Quad_a16_t rhs); -void __kmpc_atomic_cmplx16_a16_wr(ident_t *id_ref, int gtid, - kmp_cmplx128_a16_t *lhs, - kmp_cmplx128_a16_t rhs); -#endif -#endif - -// Below routines for atomic CAPTURE are listed - -// 1-byte -char __kmpc_atomic_fixed1_add_cpt(ident_t *id_ref, int gtid, char *lhs, - char rhs, int flag); -char __kmpc_atomic_fixed1_andb_cpt(ident_t *id_ref, int gtid, char *lhs, - char rhs, int flag); -char __kmpc_atomic_fixed1_div_cpt(ident_t *id_ref, int gtid, char *lhs, - char rhs, int flag); -unsigned char __kmpc_atomic_fixed1u_div_cpt(ident_t *id_ref, int gtid, - unsigned char *lhs, - unsigned char rhs, int flag); -char __kmpc_atomic_fixed1_mul_cpt(ident_t *id_ref, int gtid, char *lhs, - char rhs, int flag); -char __kmpc_atomic_fixed1_orb_cpt(ident_t *id_ref, int gtid, char *lhs, - char rhs, int flag); -char __kmpc_atomic_fixed1_shl_cpt(ident_t *id_ref, int gtid, char *lhs, - char rhs, int flag); -char __kmpc_atomic_fixed1_shr_cpt(ident_t *id_ref, int gtid, char *lhs, - char rhs, int flag); -unsigned char __kmpc_atomic_fixed1u_shr_cpt(ident_t *id_ref, int gtid, - unsigned char *lhs, - unsigned char rhs, int flag); -char __kmpc_atomic_fixed1_sub_cpt(ident_t *id_ref, int gtid, char *lhs, - char rhs, int flag); -char __kmpc_atomic_fixed1_xor_cpt(ident_t *id_ref, int gtid, char *lhs, - char rhs, int flag); -// 2-byte -short __kmpc_atomic_fixed2_add_cpt(ident_t *id_ref, int gtid, short *lhs, - short rhs, int flag); -short __kmpc_atomic_fixed2_andb_cpt(ident_t *id_ref, int gtid, short *lhs, - short rhs, int flag); -short __kmpc_atomic_fixed2_div_cpt(ident_t *id_ref, int gtid, short *lhs, - short rhs, int flag); -unsigned short __kmpc_atomic_fixed2u_div_cpt(ident_t *id_ref, int gtid, - unsigned short *lhs, - unsigned short rhs, int flag); -short __kmpc_atomic_fixed2_mul_cpt(ident_t *id_ref, int gtid, short *lhs, - short rhs, int flag); -short __kmpc_atomic_fixed2_orb_cpt(ident_t *id_ref, int gtid, short *lhs, - short rhs, int flag); -short __kmpc_atomic_fixed2_shl_cpt(ident_t *id_ref, int gtid, short *lhs, - short rhs, int flag); -short __kmpc_atomic_fixed2_shr_cpt(ident_t *id_ref, int gtid, short *lhs, - short rhs, int flag); -unsigned short __kmpc_atomic_fixed2u_shr_cpt(ident_t *id_ref, int gtid, - unsigned short *lhs, - unsigned short rhs, int flag); -short __kmpc_atomic_fixed2_sub_cpt(ident_t *id_ref, int gtid, short *lhs, - short rhs, int flag); -short __kmpc_atomic_fixed2_xor_cpt(ident_t *id_ref, int gtid, short *lhs, - short rhs, int flag); -// 4-byte add / sub fixed -kmp_int32 __kmpc_atomic_fixed4_add_cpt(ident_t *id_ref, int gtid, - kmp_int32 *lhs, kmp_int32 rhs, int flag); -kmp_int32 __kmpc_atomic_fixed4_sub_cpt(ident_t *id_ref, int gtid, - kmp_int32 *lhs, kmp_int32 rhs, int flag); -// 4-byte add / sub float -kmp_real32 __kmpc_atomic_float4_add_cpt(ident_t *id_ref, int gtid, - kmp_real32 *lhs, kmp_real32 rhs, - int flag); -kmp_real32 __kmpc_atomic_float4_sub_cpt(ident_t *id_ref, int gtid, - kmp_real32 *lhs, kmp_real32 rhs, - int flag); -// 8-byte add / sub fixed -kmp_int64 __kmpc_atomic_fixed8_add_cpt(ident_t *id_ref, int gtid, - kmp_int64 *lhs, kmp_int64 rhs, int flag); -kmp_int64 __kmpc_atomic_fixed8_sub_cpt(ident_t *id_ref, int gtid, - kmp_int64 *lhs, kmp_int64 rhs, int flag); -// 8-byte add / sub float -kmp_real64 __kmpc_atomic_float8_add_cpt(ident_t *id_ref, int gtid, - kmp_real64 *lhs, kmp_real64 rhs, - int flag); -kmp_real64 __kmpc_atomic_float8_sub_cpt(ident_t *id_ref, int gtid, - kmp_real64 *lhs, kmp_real64 rhs, - int flag); -// 4-byte fixed -kmp_int32 __kmpc_atomic_fixed4_andb_cpt(ident_t *id_ref, int gtid, - kmp_int32 *lhs, kmp_int32 rhs, - int flag); -kmp_int32 __kmpc_atomic_fixed4_div_cpt(ident_t *id_ref, int gtid, - kmp_int32 *lhs, kmp_int32 rhs, int flag); -kmp_uint32 __kmpc_atomic_fixed4u_div_cpt(ident_t *id_ref, int gtid, - kmp_uint32 *lhs, kmp_uint32 rhs, - int flag); -kmp_int32 __kmpc_atomic_fixed4_mul_cpt(ident_t *id_ref, int gtid, - kmp_int32 *lhs, kmp_int32 rhs, int flag); -kmp_int32 __kmpc_atomic_fixed4_orb_cpt(ident_t *id_ref, int gtid, - kmp_int32 *lhs, kmp_int32 rhs, int flag); -kmp_int32 __kmpc_atomic_fixed4_shl_cpt(ident_t *id_ref, int gtid, - kmp_int32 *lhs, kmp_int32 rhs, int flag); -kmp_int32 __kmpc_atomic_fixed4_shr_cpt(ident_t *id_ref, int gtid, - kmp_int32 *lhs, kmp_int32 rhs, int flag); -kmp_uint32 __kmpc_atomic_fixed4u_shr_cpt(ident_t *id_ref, int gtid, - kmp_uint32 *lhs, kmp_uint32 rhs, - int flag); -kmp_int32 __kmpc_atomic_fixed4_xor_cpt(ident_t *id_ref, int gtid, - kmp_int32 *lhs, kmp_int32 rhs, int flag); -// 8-byte fixed -kmp_int64 __kmpc_atomic_fixed8_andb_cpt(ident_t *id_ref, int gtid, - kmp_int64 *lhs, kmp_int64 rhs, - int flag); -kmp_int64 __kmpc_atomic_fixed8_div_cpt(ident_t *id_ref, int gtid, - kmp_int64 *lhs, kmp_int64 rhs, int flag); -kmp_uint64 __kmpc_atomic_fixed8u_div_cpt(ident_t *id_ref, int gtid, - kmp_uint64 *lhs, kmp_uint64 rhs, - int flag); -kmp_int64 __kmpc_atomic_fixed8_mul_cpt(ident_t *id_ref, int gtid, - kmp_int64 *lhs, kmp_int64 rhs, int flag); -kmp_int64 __kmpc_atomic_fixed8_orb_cpt(ident_t *id_ref, int gtid, - kmp_int64 *lhs, kmp_int64 rhs, int flag); -kmp_int64 __kmpc_atomic_fixed8_shl_cpt(ident_t *id_ref, int gtid, - kmp_int64 *lhs, kmp_int64 rhs, int flag); -kmp_int64 __kmpc_atomic_fixed8_shr_cpt(ident_t *id_ref, int gtid, - kmp_int64 *lhs, kmp_int64 rhs, int flag); -kmp_uint64 __kmpc_atomic_fixed8u_shr_cpt(ident_t *id_ref, int gtid, - kmp_uint64 *lhs, kmp_uint64 rhs, - int flag); -kmp_int64 __kmpc_atomic_fixed8_xor_cpt(ident_t *id_ref, int gtid, - kmp_int64 *lhs, kmp_int64 rhs, int flag); -// 4-byte float -kmp_real32 __kmpc_atomic_float4_div_cpt(ident_t *id_ref, int gtid, - kmp_real32 *lhs, kmp_real32 rhs, - int flag); -kmp_real32 __kmpc_atomic_float4_mul_cpt(ident_t *id_ref, int gtid, - kmp_real32 *lhs, kmp_real32 rhs, - int flag); -// 8-byte float -kmp_real64 __kmpc_atomic_float8_div_cpt(ident_t *id_ref, int gtid, - kmp_real64 *lhs, kmp_real64 rhs, - int flag); -kmp_real64 __kmpc_atomic_float8_mul_cpt(ident_t *id_ref, int gtid, - kmp_real64 *lhs, kmp_real64 rhs, - int flag); -// 1-, 2-, 4-, 8-byte logical (&&, ||) -char __kmpc_atomic_fixed1_andl_cpt(ident_t *id_ref, int gtid, char *lhs, - char rhs, int flag); -char __kmpc_atomic_fixed1_orl_cpt(ident_t *id_ref, int gtid, char *lhs, - char rhs, int flag); -short __kmpc_atomic_fixed2_andl_cpt(ident_t *id_ref, int gtid, short *lhs, - short rhs, int flag); -short __kmpc_atomic_fixed2_orl_cpt(ident_t *id_ref, int gtid, short *lhs, - short rhs, int flag); -kmp_int32 __kmpc_atomic_fixed4_andl_cpt(ident_t *id_ref, int gtid, - kmp_int32 *lhs, kmp_int32 rhs, - int flag); -kmp_int32 __kmpc_atomic_fixed4_orl_cpt(ident_t *id_ref, int gtid, - kmp_int32 *lhs, kmp_int32 rhs, int flag); -kmp_int64 __kmpc_atomic_fixed8_andl_cpt(ident_t *id_ref, int gtid, - kmp_int64 *lhs, kmp_int64 rhs, - int flag); -kmp_int64 __kmpc_atomic_fixed8_orl_cpt(ident_t *id_ref, int gtid, - kmp_int64 *lhs, kmp_int64 rhs, int flag); -// MIN / MAX -char __kmpc_atomic_fixed1_max_cpt(ident_t *id_ref, int gtid, char *lhs, - char rhs, int flag); -char __kmpc_atomic_fixed1_min_cpt(ident_t *id_ref, int gtid, char *lhs, - char rhs, int flag); -short __kmpc_atomic_fixed2_max_cpt(ident_t *id_ref, int gtid, short *lhs, - short rhs, int flag); -short __kmpc_atomic_fixed2_min_cpt(ident_t *id_ref, int gtid, short *lhs, - short rhs, int flag); -kmp_int32 __kmpc_atomic_fixed4_max_cpt(ident_t *id_ref, int gtid, - kmp_int32 *lhs, kmp_int32 rhs, int flag); -kmp_int32 __kmpc_atomic_fixed4_min_cpt(ident_t *id_ref, int gtid, - kmp_int32 *lhs, kmp_int32 rhs, int flag); -kmp_int64 __kmpc_atomic_fixed8_max_cpt(ident_t *id_ref, int gtid, - kmp_int64 *lhs, kmp_int64 rhs, int flag); -kmp_int64 __kmpc_atomic_fixed8_min_cpt(ident_t *id_ref, int gtid, - kmp_int64 *lhs, kmp_int64 rhs, int flag); -kmp_real32 __kmpc_atomic_float4_max_cpt(ident_t *id_ref, int gtid, - kmp_real32 *lhs, kmp_real32 rhs, - int flag); -kmp_real32 __kmpc_atomic_float4_min_cpt(ident_t *id_ref, int gtid, - kmp_real32 *lhs, kmp_real32 rhs, - int flag); -kmp_real64 __kmpc_atomic_float8_max_cpt(ident_t *id_ref, int gtid, - kmp_real64 *lhs, kmp_real64 rhs, - int flag); -kmp_real64 __kmpc_atomic_float8_min_cpt(ident_t *id_ref, int gtid, - kmp_real64 *lhs, kmp_real64 rhs, - int flag); -#if KMP_HAVE_QUAD -QUAD_LEGACY __kmpc_atomic_float16_max_cpt(ident_t *id_ref, int gtid, - QUAD_LEGACY *lhs, QUAD_LEGACY rhs, - int flag); -QUAD_LEGACY __kmpc_atomic_float16_min_cpt(ident_t *id_ref, int gtid, - QUAD_LEGACY *lhs, QUAD_LEGACY rhs, - int flag); -#endif -// .NEQV. (same as xor) -char __kmpc_atomic_fixed1_neqv_cpt(ident_t *id_ref, int gtid, char *lhs, - char rhs, int flag); -short __kmpc_atomic_fixed2_neqv_cpt(ident_t *id_ref, int gtid, short *lhs, - short rhs, int flag); -kmp_int32 __kmpc_atomic_fixed4_neqv_cpt(ident_t *id_ref, int gtid, - kmp_int32 *lhs, kmp_int32 rhs, - int flag); -kmp_int64 __kmpc_atomic_fixed8_neqv_cpt(ident_t *id_ref, int gtid, - kmp_int64 *lhs, kmp_int64 rhs, - int flag); -// .EQV. (same as ~xor) -char __kmpc_atomic_fixed1_eqv_cpt(ident_t *id_ref, int gtid, char *lhs, - char rhs, int flag); -short __kmpc_atomic_fixed2_eqv_cpt(ident_t *id_ref, int gtid, short *lhs, - short rhs, int flag); -kmp_int32 __kmpc_atomic_fixed4_eqv_cpt(ident_t *id_ref, int gtid, - kmp_int32 *lhs, kmp_int32 rhs, int flag); -kmp_int64 __kmpc_atomic_fixed8_eqv_cpt(ident_t *id_ref, int gtid, - kmp_int64 *lhs, kmp_int64 rhs, int flag); -// long double type -long double __kmpc_atomic_float10_add_cpt(ident_t *id_ref, int gtid, - long double *lhs, long double rhs, - int flag); -long double __kmpc_atomic_float10_sub_cpt(ident_t *id_ref, int gtid, - long double *lhs, long double rhs, - int flag); -long double __kmpc_atomic_float10_mul_cpt(ident_t *id_ref, int gtid, - long double *lhs, long double rhs, - int flag); -long double __kmpc_atomic_float10_div_cpt(ident_t *id_ref, int gtid, - long double *lhs, long double rhs, - int flag); -#if KMP_HAVE_QUAD -// _Quad type -QUAD_LEGACY __kmpc_atomic_float16_add_cpt(ident_t *id_ref, int gtid, - QUAD_LEGACY *lhs, QUAD_LEGACY rhs, - int flag); -QUAD_LEGACY __kmpc_atomic_float16_sub_cpt(ident_t *id_ref, int gtid, - QUAD_LEGACY *lhs, QUAD_LEGACY rhs, - int flag); -QUAD_LEGACY __kmpc_atomic_float16_mul_cpt(ident_t *id_ref, int gtid, - QUAD_LEGACY *lhs, QUAD_LEGACY rhs, - int flag); -QUAD_LEGACY __kmpc_atomic_float16_div_cpt(ident_t *id_ref, int gtid, - QUAD_LEGACY *lhs, QUAD_LEGACY rhs, - int flag); -#endif -// routines for complex types -// Workaround for cmplx4 routines - return void; captured value is returned via -// the argument -void __kmpc_atomic_cmplx4_add_cpt(ident_t *id_ref, int gtid, kmp_cmplx32 *lhs, - kmp_cmplx32 rhs, kmp_cmplx32 *out, int flag); -void __kmpc_atomic_cmplx4_sub_cpt(ident_t *id_ref, int gtid, kmp_cmplx32 *lhs, - kmp_cmplx32 rhs, kmp_cmplx32 *out, int flag); -void __kmpc_atomic_cmplx4_mul_cpt(ident_t *id_ref, int gtid, kmp_cmplx32 *lhs, - kmp_cmplx32 rhs, kmp_cmplx32 *out, int flag); -void __kmpc_atomic_cmplx4_div_cpt(ident_t *id_ref, int gtid, kmp_cmplx32 *lhs, - kmp_cmplx32 rhs, kmp_cmplx32 *out, int flag); - -kmp_cmplx64 __kmpc_atomic_cmplx8_add_cpt(ident_t *id_ref, int gtid, - kmp_cmplx64 *lhs, kmp_cmplx64 rhs, - int flag); -kmp_cmplx64 __kmpc_atomic_cmplx8_sub_cpt(ident_t *id_ref, int gtid, - kmp_cmplx64 *lhs, kmp_cmplx64 rhs, - int flag); -kmp_cmplx64 __kmpc_atomic_cmplx8_mul_cpt(ident_t *id_ref, int gtid, - kmp_cmplx64 *lhs, kmp_cmplx64 rhs, - int flag); -kmp_cmplx64 __kmpc_atomic_cmplx8_div_cpt(ident_t *id_ref, int gtid, - kmp_cmplx64 *lhs, kmp_cmplx64 rhs, - int flag); -kmp_cmplx80 __kmpc_atomic_cmplx10_add_cpt(ident_t *id_ref, int gtid, - kmp_cmplx80 *lhs, kmp_cmplx80 rhs, - int flag); -kmp_cmplx80 __kmpc_atomic_cmplx10_sub_cpt(ident_t *id_ref, int gtid, - kmp_cmplx80 *lhs, kmp_cmplx80 rhs, - int flag); -kmp_cmplx80 __kmpc_atomic_cmplx10_mul_cpt(ident_t *id_ref, int gtid, - kmp_cmplx80 *lhs, kmp_cmplx80 rhs, - int flag); -kmp_cmplx80 __kmpc_atomic_cmplx10_div_cpt(ident_t *id_ref, int gtid, - kmp_cmplx80 *lhs, kmp_cmplx80 rhs, - int flag); -#if KMP_HAVE_QUAD -CPLX128_LEG __kmpc_atomic_cmplx16_add_cpt(ident_t *id_ref, int gtid, - CPLX128_LEG *lhs, CPLX128_LEG rhs, - int flag); -CPLX128_LEG __kmpc_atomic_cmplx16_sub_cpt(ident_t *id_ref, int gtid, - CPLX128_LEG *lhs, CPLX128_LEG rhs, - int flag); -CPLX128_LEG __kmpc_atomic_cmplx16_mul_cpt(ident_t *id_ref, int gtid, - CPLX128_LEG *lhs, CPLX128_LEG rhs, - int flag); -CPLX128_LEG __kmpc_atomic_cmplx16_div_cpt(ident_t *id_ref, int gtid, - CPLX128_LEG *lhs, CPLX128_LEG rhs, - int flag); -#if (KMP_ARCH_X86) -// Routines with 16-byte arguments aligned to 16-byte boundary -Quad_a16_t __kmpc_atomic_float16_add_a16_cpt(ident_t *id_ref, int gtid, - Quad_a16_t *lhs, Quad_a16_t rhs, - int flag); -Quad_a16_t __kmpc_atomic_float16_sub_a16_cpt(ident_t *id_ref, int gtid, - Quad_a16_t *lhs, Quad_a16_t rhs, - int flag); -Quad_a16_t __kmpc_atomic_float16_mul_a16_cpt(ident_t *id_ref, int gtid, - Quad_a16_t *lhs, Quad_a16_t rhs, - int flag); -Quad_a16_t __kmpc_atomic_float16_div_a16_cpt(ident_t *id_ref, int gtid, - Quad_a16_t *lhs, Quad_a16_t rhs, - int flag); -Quad_a16_t __kmpc_atomic_float16_max_a16_cpt(ident_t *id_ref, int gtid, - Quad_a16_t *lhs, Quad_a16_t rhs, - int flag); -Quad_a16_t __kmpc_atomic_float16_min_a16_cpt(ident_t *id_ref, int gtid, - Quad_a16_t *lhs, Quad_a16_t rhs, - int flag); -kmp_cmplx128_a16_t __kmpc_atomic_cmplx16_add_a16_cpt(ident_t *id_ref, int gtid, - kmp_cmplx128_a16_t *lhs, - kmp_cmplx128_a16_t rhs, - int flag); -kmp_cmplx128_a16_t __kmpc_atomic_cmplx16_sub_a16_cpt(ident_t *id_ref, int gtid, - kmp_cmplx128_a16_t *lhs, - kmp_cmplx128_a16_t rhs, - int flag); -kmp_cmplx128_a16_t __kmpc_atomic_cmplx16_mul_a16_cpt(ident_t *id_ref, int gtid, - kmp_cmplx128_a16_t *lhs, - kmp_cmplx128_a16_t rhs, - int flag); -kmp_cmplx128_a16_t __kmpc_atomic_cmplx16_div_a16_cpt(ident_t *id_ref, int gtid, - kmp_cmplx128_a16_t *lhs, - kmp_cmplx128_a16_t rhs, - int flag); -#endif -#endif - -void __kmpc_atomic_start(void); -void __kmpc_atomic_end(void); - -#if OMP_40_ENABLED - -// OpenMP 4.0: v = x = expr binop x; { v = x; x = expr binop x; } { x = expr -// binop x; v = x; } for non-commutative operations. - -char __kmpc_atomic_fixed1_sub_cpt_rev(ident_t *id_ref, int gtid, char *lhs, - char rhs, int flag); -char __kmpc_atomic_fixed1_div_cpt_rev(ident_t *id_ref, int gtid, char *lhs, - char rhs, int flag); -unsigned char __kmpc_atomic_fixed1u_div_cpt_rev(ident_t *id_ref, int gtid, - unsigned char *lhs, - unsigned char rhs, int flag); -char __kmpc_atomic_fixed1_shl_cpt_rev(ident_t *id_ref, int gtid, char *lhs, - char rhs, int flag); -char __kmpc_atomic_fixed1_shr_cpt_rev(ident_t *id_ref, int gtid, char *lhs, - char rhs, int flag); -unsigned char __kmpc_atomic_fixed1u_shr_cpt_rev(ident_t *id_ref, int gtid, - unsigned char *lhs, - unsigned char rhs, int flag); -short __kmpc_atomic_fixed2_sub_cpt_rev(ident_t *id_ref, int gtid, short *lhs, - short rhs, int flag); -short __kmpc_atomic_fixed2_div_cpt_rev(ident_t *id_ref, int gtid, short *lhs, - short rhs, int flag); -unsigned short __kmpc_atomic_fixed2u_div_cpt_rev(ident_t *id_ref, int gtid, - unsigned short *lhs, - unsigned short rhs, int flag); -short __kmpc_atomic_fixed2_shl_cpt_rev(ident_t *id_ref, int gtid, short *lhs, - short rhs, int flag); -short __kmpc_atomic_fixed2_shr_cpt_rev(ident_t *id_ref, int gtid, short *lhs, - short rhs, int flag); -unsigned short __kmpc_atomic_fixed2u_shr_cpt_rev(ident_t *id_ref, int gtid, - unsigned short *lhs, - unsigned short rhs, int flag); -kmp_int32 __kmpc_atomic_fixed4_sub_cpt_rev(ident_t *id_ref, int gtid, - kmp_int32 *lhs, kmp_int32 rhs, - int flag); -kmp_int32 __kmpc_atomic_fixed4_div_cpt_rev(ident_t *id_ref, int gtid, - kmp_int32 *lhs, kmp_int32 rhs, - int flag); -kmp_uint32 __kmpc_atomic_fixed4u_div_cpt_rev(ident_t *id_ref, int gtid, - kmp_uint32 *lhs, kmp_uint32 rhs, - int flag); -kmp_int32 __kmpc_atomic_fixed4_shl_cpt_rev(ident_t *id_ref, int gtid, - kmp_int32 *lhs, kmp_int32 rhs, - int flag); -kmp_int32 __kmpc_atomic_fixed4_shr_cpt_rev(ident_t *id_ref, int gtid, - kmp_int32 *lhs, kmp_int32 rhs, - int flag); -kmp_uint32 __kmpc_atomic_fixed4u_shr_cpt_rev(ident_t *id_ref, int gtid, - kmp_uint32 *lhs, kmp_uint32 rhs, - int flag); -kmp_int64 __kmpc_atomic_fixed8_sub_cpt_rev(ident_t *id_ref, int gtid, - kmp_int64 *lhs, kmp_int64 rhs, - int flag); -kmp_int64 __kmpc_atomic_fixed8_div_cpt_rev(ident_t *id_ref, int gtid, - kmp_int64 *lhs, kmp_int64 rhs, - int flag); -kmp_uint64 __kmpc_atomic_fixed8u_div_cpt_rev(ident_t *id_ref, int gtid, - kmp_uint64 *lhs, kmp_uint64 rhs, - int flag); -kmp_int64 __kmpc_atomic_fixed8_shl_cpt_rev(ident_t *id_ref, int gtid, - kmp_int64 *lhs, kmp_int64 rhs, - int flag); -kmp_int64 __kmpc_atomic_fixed8_shr_cpt_rev(ident_t *id_ref, int gtid, - kmp_int64 *lhs, kmp_int64 rhs, - int flag); -kmp_uint64 __kmpc_atomic_fixed8u_shr_cpt_rev(ident_t *id_ref, int gtid, - kmp_uint64 *lhs, kmp_uint64 rhs, - int flag); -float __kmpc_atomic_float4_sub_cpt_rev(ident_t *id_ref, int gtid, float *lhs, - float rhs, int flag); -float __kmpc_atomic_float4_div_cpt_rev(ident_t *id_ref, int gtid, float *lhs, - float rhs, int flag); -double __kmpc_atomic_float8_sub_cpt_rev(ident_t *id_ref, int gtid, double *lhs, - double rhs, int flag); -double __kmpc_atomic_float8_div_cpt_rev(ident_t *id_ref, int gtid, double *lhs, - double rhs, int flag); -long double __kmpc_atomic_float10_sub_cpt_rev(ident_t *id_ref, int gtid, - long double *lhs, long double rhs, - int flag); -long double __kmpc_atomic_float10_div_cpt_rev(ident_t *id_ref, int gtid, - long double *lhs, long double rhs, - int flag); -#if KMP_HAVE_QUAD -QUAD_LEGACY __kmpc_atomic_float16_sub_cpt_rev(ident_t *id_ref, int gtid, - QUAD_LEGACY *lhs, QUAD_LEGACY rhs, - int flag); -QUAD_LEGACY __kmpc_atomic_float16_div_cpt_rev(ident_t *id_ref, int gtid, - QUAD_LEGACY *lhs, QUAD_LEGACY rhs, - int flag); -#endif -// Workaround for cmplx4 routines - return void; captured value is returned via -// the argument -void __kmpc_atomic_cmplx4_sub_cpt_rev(ident_t *id_ref, int gtid, - kmp_cmplx32 *lhs, kmp_cmplx32 rhs, - kmp_cmplx32 *out, int flag); -void __kmpc_atomic_cmplx4_div_cpt_rev(ident_t *id_ref, int gtid, - kmp_cmplx32 *lhs, kmp_cmplx32 rhs, - kmp_cmplx32 *out, int flag); -kmp_cmplx64 __kmpc_atomic_cmplx8_sub_cpt_rev(ident_t *id_ref, int gtid, - kmp_cmplx64 *lhs, kmp_cmplx64 rhs, - int flag); -kmp_cmplx64 __kmpc_atomic_cmplx8_div_cpt_rev(ident_t *id_ref, int gtid, - kmp_cmplx64 *lhs, kmp_cmplx64 rhs, - int flag); -kmp_cmplx80 __kmpc_atomic_cmplx10_sub_cpt_rev(ident_t *id_ref, int gtid, - kmp_cmplx80 *lhs, kmp_cmplx80 rhs, - int flag); -kmp_cmplx80 __kmpc_atomic_cmplx10_div_cpt_rev(ident_t *id_ref, int gtid, - kmp_cmplx80 *lhs, kmp_cmplx80 rhs, - int flag); -#if KMP_HAVE_QUAD -CPLX128_LEG __kmpc_atomic_cmplx16_sub_cpt_rev(ident_t *id_ref, int gtid, - CPLX128_LEG *lhs, CPLX128_LEG rhs, - int flag); -CPLX128_LEG __kmpc_atomic_cmplx16_div_cpt_rev(ident_t *id_ref, int gtid, - CPLX128_LEG *lhs, CPLX128_LEG rhs, - int flag); -#if (KMP_ARCH_X86) -Quad_a16_t __kmpc_atomic_float16_sub_a16_cpt_rev(ident_t *id_ref, int gtid, - Quad_a16_t *lhs, - Quad_a16_t rhs, int flag); -Quad_a16_t __kmpc_atomic_float16_div_a16_cpt_rev(ident_t *id_ref, int gtid, - Quad_a16_t *lhs, - Quad_a16_t rhs, int flag); -kmp_cmplx128_a16_t -__kmpc_atomic_cmplx16_sub_a16_cpt_rev(ident_t *id_ref, int gtid, - kmp_cmplx128_a16_t *lhs, - kmp_cmplx128_a16_t rhs, int flag); -kmp_cmplx128_a16_t -__kmpc_atomic_cmplx16_div_a16_cpt_rev(ident_t *id_ref, int gtid, - kmp_cmplx128_a16_t *lhs, - kmp_cmplx128_a16_t rhs, int flag); -#endif -#endif - -// OpenMP 4.0 Capture-write (swap): {v = x; x = expr;} -char __kmpc_atomic_fixed1_swp(ident_t *id_ref, int gtid, char *lhs, char rhs); -short __kmpc_atomic_fixed2_swp(ident_t *id_ref, int gtid, short *lhs, - short rhs); -kmp_int32 __kmpc_atomic_fixed4_swp(ident_t *id_ref, int gtid, kmp_int32 *lhs, - kmp_int32 rhs); -kmp_int64 __kmpc_atomic_fixed8_swp(ident_t *id_ref, int gtid, kmp_int64 *lhs, - kmp_int64 rhs); -float __kmpc_atomic_float4_swp(ident_t *id_ref, int gtid, float *lhs, - float rhs); -double __kmpc_atomic_float8_swp(ident_t *id_ref, int gtid, double *lhs, - double rhs); -long double __kmpc_atomic_float10_swp(ident_t *id_ref, int gtid, - long double *lhs, long double rhs); -#if KMP_HAVE_QUAD -QUAD_LEGACY __kmpc_atomic_float16_swp(ident_t *id_ref, int gtid, - QUAD_LEGACY *lhs, QUAD_LEGACY rhs); -#endif -// !!! TODO: check if we need a workaround here -void __kmpc_atomic_cmplx4_swp(ident_t *id_ref, int gtid, kmp_cmplx32 *lhs, - kmp_cmplx32 rhs, kmp_cmplx32 *out); -// kmp_cmplx32 __kmpc_atomic_cmplx4_swp( ident_t *id_ref, int gtid, -// kmp_cmplx32 * lhs, kmp_cmplx32 rhs ); - -kmp_cmplx64 __kmpc_atomic_cmplx8_swp(ident_t *id_ref, int gtid, - kmp_cmplx64 *lhs, kmp_cmplx64 rhs); -kmp_cmplx80 __kmpc_atomic_cmplx10_swp(ident_t *id_ref, int gtid, - kmp_cmplx80 *lhs, kmp_cmplx80 rhs); -#if KMP_HAVE_QUAD -CPLX128_LEG __kmpc_atomic_cmplx16_swp(ident_t *id_ref, int gtid, - CPLX128_LEG *lhs, CPLX128_LEG rhs); -#if (KMP_ARCH_X86) -Quad_a16_t __kmpc_atomic_float16_a16_swp(ident_t *id_ref, int gtid, - Quad_a16_t *lhs, Quad_a16_t rhs); -kmp_cmplx128_a16_t __kmpc_atomic_cmplx16_a16_swp(ident_t *id_ref, int gtid, - kmp_cmplx128_a16_t *lhs, - kmp_cmplx128_a16_t rhs); -#endif -#endif - -// Capture routines for mixed types (RHS=float16) -#if KMP_HAVE_QUAD - -char __kmpc_atomic_fixed1_add_cpt_fp(ident_t *id_ref, int gtid, char *lhs, - _Quad rhs, int flag); -char __kmpc_atomic_fixed1_sub_cpt_fp(ident_t *id_ref, int gtid, char *lhs, - _Quad rhs, int flag); -char __kmpc_atomic_fixed1_mul_cpt_fp(ident_t *id_ref, int gtid, char *lhs, - _Quad rhs, int flag); -char __kmpc_atomic_fixed1_div_cpt_fp(ident_t *id_ref, int gtid, char *lhs, - _Quad rhs, int flag); -unsigned char __kmpc_atomic_fixed1u_add_cpt_fp(ident_t *id_ref, int gtid, - unsigned char *lhs, _Quad rhs, - int flag); -unsigned char __kmpc_atomic_fixed1u_sub_cpt_fp(ident_t *id_ref, int gtid, - unsigned char *lhs, _Quad rhs, - int flag); -unsigned char __kmpc_atomic_fixed1u_mul_cpt_fp(ident_t *id_ref, int gtid, - unsigned char *lhs, _Quad rhs, - int flag); -unsigned char __kmpc_atomic_fixed1u_div_cpt_fp(ident_t *id_ref, int gtid, - unsigned char *lhs, _Quad rhs, - int flag); - -short __kmpc_atomic_fixed2_add_cpt_fp(ident_t *id_ref, int gtid, short *lhs, - _Quad rhs, int flag); -short __kmpc_atomic_fixed2_sub_cpt_fp(ident_t *id_ref, int gtid, short *lhs, - _Quad rhs, int flag); -short __kmpc_atomic_fixed2_mul_cpt_fp(ident_t *id_ref, int gtid, short *lhs, - _Quad rhs, int flag); -short __kmpc_atomic_fixed2_div_cpt_fp(ident_t *id_ref, int gtid, short *lhs, - _Quad rhs, int flag); -unsigned short __kmpc_atomic_fixed2u_add_cpt_fp(ident_t *id_ref, int gtid, - unsigned short *lhs, _Quad rhs, - int flag); -unsigned short __kmpc_atomic_fixed2u_sub_cpt_fp(ident_t *id_ref, int gtid, - unsigned short *lhs, _Quad rhs, - int flag); -unsigned short __kmpc_atomic_fixed2u_mul_cpt_fp(ident_t *id_ref, int gtid, - unsigned short *lhs, _Quad rhs, - int flag); -unsigned short __kmpc_atomic_fixed2u_div_cpt_fp(ident_t *id_ref, int gtid, - unsigned short *lhs, _Quad rhs, - int flag); - -kmp_int32 __kmpc_atomic_fixed4_add_cpt_fp(ident_t *id_ref, int gtid, - kmp_int32 *lhs, _Quad rhs, int flag); -kmp_int32 __kmpc_atomic_fixed4_sub_cpt_fp(ident_t *id_ref, int gtid, - kmp_int32 *lhs, _Quad rhs, int flag); -kmp_int32 __kmpc_atomic_fixed4_mul_cpt_fp(ident_t *id_ref, int gtid, - kmp_int32 *lhs, _Quad rhs, int flag); -kmp_int32 __kmpc_atomic_fixed4_div_cpt_fp(ident_t *id_ref, int gtid, - kmp_int32 *lhs, _Quad rhs, int flag); -kmp_uint32 __kmpc_atomic_fixed4u_add_cpt_fp(ident_t *id_ref, int gtid, - kmp_uint32 *lhs, _Quad rhs, - int flag); -kmp_uint32 __kmpc_atomic_fixed4u_sub_cpt_fp(ident_t *id_ref, int gtid, - kmp_uint32 *lhs, _Quad rhs, - int flag); -kmp_uint32 __kmpc_atomic_fixed4u_mul_cpt_fp(ident_t *id_ref, int gtid, - kmp_uint32 *lhs, _Quad rhs, - int flag); -kmp_uint32 __kmpc_atomic_fixed4u_div_cpt_fp(ident_t *id_ref, int gtid, - kmp_uint32 *lhs, _Quad rhs, - int flag); - -kmp_int64 __kmpc_atomic_fixed8_add_cpt_fp(ident_t *id_ref, int gtid, - kmp_int64 *lhs, _Quad rhs, int flag); -kmp_int64 __kmpc_atomic_fixed8_sub_cpt_fp(ident_t *id_ref, int gtid, - kmp_int64 *lhs, _Quad rhs, int flag); -kmp_int64 __kmpc_atomic_fixed8_mul_cpt_fp(ident_t *id_ref, int gtid, - kmp_int64 *lhs, _Quad rhs, int flag); -kmp_int64 __kmpc_atomic_fixed8_div_cpt_fp(ident_t *id_ref, int gtid, - kmp_int64 *lhs, _Quad rhs, int flag); -kmp_uint64 __kmpc_atomic_fixed8u_add_cpt_fp(ident_t *id_ref, int gtid, - kmp_uint64 *lhs, _Quad rhs, - int flag); -kmp_uint64 __kmpc_atomic_fixed8u_sub_cpt_fp(ident_t *id_ref, int gtid, - kmp_uint64 *lhs, _Quad rhs, - int flag); -kmp_uint64 __kmpc_atomic_fixed8u_mul_cpt_fp(ident_t *id_ref, int gtid, - kmp_uint64 *lhs, _Quad rhs, - int flag); -kmp_uint64 __kmpc_atomic_fixed8u_div_cpt_fp(ident_t *id_ref, int gtid, - kmp_uint64 *lhs, _Quad rhs, - int flag); - -float __kmpc_atomic_float4_add_cpt_fp(ident_t *id_ref, int gtid, - kmp_real32 *lhs, _Quad rhs, int flag); -float __kmpc_atomic_float4_sub_cpt_fp(ident_t *id_ref, int gtid, - kmp_real32 *lhs, _Quad rhs, int flag); -float __kmpc_atomic_float4_mul_cpt_fp(ident_t *id_ref, int gtid, - kmp_real32 *lhs, _Quad rhs, int flag); -float __kmpc_atomic_float4_div_cpt_fp(ident_t *id_ref, int gtid, - kmp_real32 *lhs, _Quad rhs, int flag); - -double __kmpc_atomic_float8_add_cpt_fp(ident_t *id_ref, int gtid, - kmp_real64 *lhs, _Quad rhs, int flag); -double __kmpc_atomic_float8_sub_cpt_fp(ident_t *id_ref, int gtid, - kmp_real64 *lhs, _Quad rhs, int flag); -double __kmpc_atomic_float8_mul_cpt_fp(ident_t *id_ref, int gtid, - kmp_real64 *lhs, _Quad rhs, int flag); -double __kmpc_atomic_float8_div_cpt_fp(ident_t *id_ref, int gtid, - kmp_real64 *lhs, _Quad rhs, int flag); - -long double __kmpc_atomic_float10_add_cpt_fp(ident_t *id_ref, int gtid, - long double *lhs, _Quad rhs, - int flag); -long double __kmpc_atomic_float10_sub_cpt_fp(ident_t *id_ref, int gtid, - long double *lhs, _Quad rhs, - int flag); -long double __kmpc_atomic_float10_mul_cpt_fp(ident_t *id_ref, int gtid, - long double *lhs, _Quad rhs, - int flag); -long double __kmpc_atomic_float10_div_cpt_fp(ident_t *id_ref, int gtid, - long double *lhs, _Quad rhs, - int flag); - -char __kmpc_atomic_fixed1_sub_cpt_rev_fp(ident_t *id_ref, int gtid, char *lhs, - _Quad rhs, int flag); -unsigned char __kmpc_atomic_fixed1u_sub_cpt_rev_fp(ident_t *id_ref, int gtid, - unsigned char *lhs, - _Quad rhs, int flag); -char __kmpc_atomic_fixed1_div_cpt_rev_fp(ident_t *id_ref, int gtid, char *lhs, - _Quad rhs, int flag); -unsigned char __kmpc_atomic_fixed1u_div_cpt_rev_fp(ident_t *id_ref, int gtid, - unsigned char *lhs, - _Quad rhs, int flag); -short __kmpc_atomic_fixed2_sub_cpt_rev_fp(ident_t *id_ref, int gtid, short *lhs, - _Quad rhs, int flag); -unsigned short __kmpc_atomic_fixed2u_sub_cpt_rev_fp(ident_t *id_ref, int gtid, - unsigned short *lhs, - _Quad rhs, int flag); -short __kmpc_atomic_fixed2_div_cpt_rev_fp(ident_t *id_ref, int gtid, short *lhs, - _Quad rhs, int flag); -unsigned short __kmpc_atomic_fixed2u_div_cpt_rev_fp(ident_t *id_ref, int gtid, - unsigned short *lhs, - _Quad rhs, int flag); -kmp_int32 __kmpc_atomic_fixed4_sub_cpt_rev_fp(ident_t *id_ref, int gtid, - kmp_int32 *lhs, _Quad rhs, - int flag); -kmp_uint32 __kmpc_atomic_fixed4u_sub_cpt_rev_fp(ident_t *id_ref, int gtid, - kmp_uint32 *lhs, _Quad rhs, - int flag); -kmp_int32 __kmpc_atomic_fixed4_div_cpt_rev_fp(ident_t *id_ref, int gtid, - kmp_int32 *lhs, _Quad rhs, - int flag); -kmp_uint32 __kmpc_atomic_fixed4u_div_cpt_rev_fp(ident_t *id_ref, int gtid, - kmp_uint32 *lhs, _Quad rhs, - int flag); -kmp_int64 __kmpc_atomic_fixed8_sub_cpt_rev_fp(ident_t *id_ref, int gtid, - kmp_int64 *lhs, _Quad rhs, - int flag); -kmp_uint64 __kmpc_atomic_fixed8u_sub_cpt_rev_fp(ident_t *id_ref, int gtid, - kmp_uint64 *lhs, _Quad rhs, - int flag); -kmp_int64 __kmpc_atomic_fixed8_div_cpt_rev_fp(ident_t *id_ref, int gtid, - kmp_int64 *lhs, _Quad rhs, - int flag); -kmp_uint64 __kmpc_atomic_fixed8u_div_cpt_rev_fp(ident_t *id_ref, int gtid, - kmp_uint64 *lhs, _Quad rhs, - int flag); -float __kmpc_atomic_float4_sub_cpt_rev_fp(ident_t *id_ref, int gtid, float *lhs, - _Quad rhs, int flag); -float __kmpc_atomic_float4_div_cpt_rev_fp(ident_t *id_ref, int gtid, float *lhs, - _Quad rhs, int flag); -double __kmpc_atomic_float8_sub_cpt_rev_fp(ident_t *id_ref, int gtid, - double *lhs, _Quad rhs, int flag); -double __kmpc_atomic_float8_div_cpt_rev_fp(ident_t *id_ref, int gtid, - double *lhs, _Quad rhs, int flag); -long double __kmpc_atomic_float10_sub_cpt_rev_fp(ident_t *id_ref, int gtid, - long double *lhs, _Quad rhs, - int flag); -long double __kmpc_atomic_float10_div_cpt_rev_fp(ident_t *id_ref, int gtid, - long double *lhs, _Quad rhs, - int flag); - -#endif // KMP_HAVE_QUAD - -// End of OpenMP 4.0 capture - -#endif // OMP_40_ENABLED - -#endif // KMP_ARCH_X86 || KMP_ARCH_X86_64 - -/* ------------------------------------------------------------------------ */ - -#ifdef __cplusplus -} // extern "C" -#endif - -#endif /* KMP_ATOMIC_H */ - -// end of file Property changes on: vendor/llvm-openmp/openmp-release80-r363030/runtime/src/kmp_atomic.h ___________________________________________________________________ Deleted: svn:eol-style ## -1 +0,0 ## -native \ No newline at end of property Deleted: svn:keywords ## -1 +0,0 ## -FreeBSD=%H \ No newline at end of property Deleted: svn:mime-type ## -1 +0,0 ## -text/plain \ No newline at end of property Index: vendor/llvm-openmp/openmp-release80-r363030/runtime/src/exports_so.txt =================================================================== --- vendor/llvm-openmp/openmp-release80-r363030/runtime/src/exports_so.txt (revision 348960) +++ vendor/llvm-openmp/openmp-release80-r363030/runtime/src/exports_so.txt (nonexistent) @@ -1,126 +0,0 @@ -# exports_so.txt # - -# -#//===----------------------------------------------------------------------===// -#// -#// The LLVM Compiler Infrastructure -#// -#// This file is dual licensed under the MIT and the University of Illinois Open -#// Source Licenses. See LICENSE.txt for details. -#// -#//===----------------------------------------------------------------------===// -# - -# This is version script for OMP RTL shared library (libomp*.so) - -VERSION { - - global: # Exported symbols. - - # - # "Normal" symbols. - # - omp_*; # Standard OpenMP functions. - OMP_*; # Standard OpenMP symbols. - - # - # OMPT API - # - ompt_start_tool; # OMPT start interface - - # icc drops weak attribute at linking step without the following line: - Annotate*; # TSAN annotation - - ompc_*; # omp.h renames some standard functions to ompc_*. - kmp_*; # Intel extensions. - kmpc_*; # Intel extensions. - __kmpc_*; # Functions called by compiler-generated code. - GOMP_*; # GNU C compatibility functions. - - _You_must_link_with_*; # Mutual detection/MS compatibility symbols. - - - # - # Debugger support. - # -#if USE_DEBUGGER - __kmp_debugging; - __kmp_omp_debug_struct_info; -#endif /* USE_DEBUGGER */ - - # - # Internal functions exported for testing purposes. - # - __kmp_get_reduce_method; - ___kmp_allocate; - ___kmp_free; - __kmp_thread_pool; - __kmp_thread_pool_nth; - - __kmp_reset_stats; - -#if USE_ITT_BUILD - # - # ITT support. - # - # The following entry points are added so that the backtraces from - # the tools contain meaningful names for all the functions that might - # appear in a backtrace of a thread which is blocked in the RTL. - __kmp_acquire_drdpa_lock; - __kmp_acquire_nested_drdpa_lock; - __kmp_acquire_nested_queuing_lock; - __kmp_acquire_nested_tas_lock; - __kmp_acquire_nested_ticket_lock; - __kmp_acquire_queuing_lock; - __kmp_acquire_tas_lock; - __kmp_acquire_ticket_lock; - __kmp_fork_call; - __kmp_invoke_microtask; -#if KMP_USE_MONITOR - __kmp_launch_monitor; - __kmp_reap_monitor; -#endif - __kmp_launch_worker; - __kmp_reap_worker; - __kmp_release_64; - __kmp_wait_64; - __kmp_wait_yield_4; - - # ittnotify symbols to be used by debugger - __kmp_itt_fini_ittlib; - __kmp_itt_init_ittlib; -#endif /* USE_ITT_BUILD */ - - local: # Non-exported symbols. - - *; # All other symbols are not exported. - -}; # VERSION - -# sets up GCC OMP_ version dependency chain -OMP_1.0 { -}; -OMP_2.0 { -} OMP_1.0; -OMP_3.0 { -} OMP_2.0; -OMP_3.1 { -} OMP_3.0; -OMP_4.0 { -} OMP_3.1; -OMP_4.5 { -} OMP_4.0; - -# sets up GCC GOMP_ version dependency chain -GOMP_1.0 { -}; -GOMP_2.0 { -} GOMP_1.0; -GOMP_3.0 { -} GOMP_2.0; -GOMP_4.0 { -} GOMP_3.0; -GOMP_4.5 { -} GOMP_4.0; - -# end of file # Property changes on: vendor/llvm-openmp/openmp-release80-r363030/runtime/src/exports_so.txt ___________________________________________________________________ Deleted: svn:eol-style ## -1 +0,0 ## -native \ No newline at end of property Deleted: svn:keywords ## -1 +0,0 ## -FreeBSD=%H \ No newline at end of property Deleted: svn:mime-type ## -1 +0,0 ## -text/plain \ No newline at end of property Index: vendor/llvm-openmp/openmp-release80-r363030/runtime/src/kmp_debug.cpp =================================================================== --- vendor/llvm-openmp/openmp-release80-r363030/runtime/src/kmp_debug.cpp (revision 348960) +++ vendor/llvm-openmp/openmp-release80-r363030/runtime/src/kmp_debug.cpp (nonexistent) @@ -1,132 +0,0 @@ -/* - * kmp_debug.cpp -- debug utilities for the Guide library - */ - -//===----------------------------------------------------------------------===// -// -// The LLVM Compiler Infrastructure -// -// This file is dual licensed under the MIT and the University of Illinois Open -// Source Licenses. See LICENSE.txt for details. -// -//===----------------------------------------------------------------------===// - -#include "kmp.h" -#include "kmp_debug.h" /* really necessary? */ -#include "kmp_i18n.h" -#include "kmp_io.h" - -#ifdef KMP_DEBUG -void __kmp_debug_printf_stdout(char const *format, ...) { - va_list ap; - va_start(ap, format); - - __kmp_vprintf(kmp_out, format, ap); - - va_end(ap); -} -#endif - -void __kmp_debug_printf(char const *format, ...) { - va_list ap; - va_start(ap, format); - - __kmp_vprintf(kmp_err, format, ap); - - va_end(ap); -} - -#ifdef KMP_USE_ASSERT -int __kmp_debug_assert(char const *msg, char const *file, int line) { - - if (file == NULL) { - file = KMP_I18N_STR(UnknownFile); - } else { - // Remove directories from path, leave only file name. File name is enough, - // there is no need in bothering developers and customers with full paths. - char const *slash = strrchr(file, '/'); - if (slash != NULL) { - file = slash + 1; - } - } - -#ifdef KMP_DEBUG - __kmp_acquire_bootstrap_lock(&__kmp_stdio_lock); - __kmp_debug_printf("Assertion failure at %s(%d): %s.\n", file, line, msg); - __kmp_release_bootstrap_lock(&__kmp_stdio_lock); -#ifdef USE_ASSERT_BREAK -#if KMP_OS_WINDOWS - DebugBreak(); -#endif -#endif // USE_ASSERT_BREAK -#ifdef USE_ASSERT_STALL - /* __kmp_infinite_loop(); */ - for (;;) - ; -#endif // USE_ASSERT_STALL -#ifdef USE_ASSERT_SEG - { - int volatile *ZERO = (int *)0; - ++(*ZERO); - } -#endif // USE_ASSERT_SEG -#endif - - __kmp_fatal(KMP_MSG(AssertionFailure, file, line), KMP_HNT(SubmitBugReport), - __kmp_msg_null); - - return 0; - -} // __kmp_debug_assert - -#endif // KMP_USE_ASSERT - -/* Dump debugging buffer to stderr */ -void __kmp_dump_debug_buffer(void) { - if (__kmp_debug_buffer != NULL) { - int i; - int dc = __kmp_debug_count; - char *db = &__kmp_debug_buffer[(dc % __kmp_debug_buf_lines) * - __kmp_debug_buf_chars]; - char *db_end = - &__kmp_debug_buffer[__kmp_debug_buf_lines * __kmp_debug_buf_chars]; - char *db2; - - __kmp_acquire_bootstrap_lock(&__kmp_stdio_lock); - __kmp_printf_no_lock("\nStart dump of debugging buffer (entry=%d):\n", - dc % __kmp_debug_buf_lines); - - for (i = 0; i < __kmp_debug_buf_lines; i++) { - - if (*db != '\0') { - /* Fix up where no carriage return before string termination char */ - for (db2 = db + 1; db2 < db + __kmp_debug_buf_chars - 1; db2++) { - if (*db2 == '\0') { - if (*(db2 - 1) != '\n') { - *db2 = '\n'; - *(db2 + 1) = '\0'; - } - break; - } - } - /* Handle case at end by shortening the printed message by one char if - * necessary */ - if (db2 == db + __kmp_debug_buf_chars - 1 && *db2 == '\0' && - *(db2 - 1) != '\n') { - *(db2 - 1) = '\n'; - } - - __kmp_printf_no_lock("%4d: %.*s", i, __kmp_debug_buf_chars, db); - *db = '\0'; /* only let it print once! */ - } - - db += __kmp_debug_buf_chars; - if (db >= db_end) - db = __kmp_debug_buffer; - } - - __kmp_printf_no_lock("End dump of debugging buffer (entry=%d).\n\n", - (dc + i - 1) % __kmp_debug_buf_lines); - __kmp_release_bootstrap_lock(&__kmp_stdio_lock); - } -} Property changes on: vendor/llvm-openmp/openmp-release80-r363030/runtime/src/kmp_debug.cpp ___________________________________________________________________ Deleted: svn:eol-style ## -1 +0,0 ## -native \ No newline at end of property Deleted: svn:keywords ## -1 +0,0 ## -FreeBSD=%H \ No newline at end of property Deleted: svn:mime-type ## -1 +0,0 ## -text/plain \ No newline at end of property Index: vendor/llvm-openmp/openmp-release80-r363030/runtime/src/kmp_settings.h =================================================================== --- vendor/llvm-openmp/openmp-release80-r363030/runtime/src/kmp_settings.h (revision 348960) +++ vendor/llvm-openmp/openmp-release80-r363030/runtime/src/kmp_settings.h (nonexistent) @@ -1,69 +0,0 @@ -/* - * kmp_settings.h -- Initialize environment variables - */ - -//===----------------------------------------------------------------------===// -// -// The LLVM Compiler Infrastructure -// -// This file is dual licensed under the MIT and the University of Illinois Open -// Source Licenses. See LICENSE.txt for details. -// -//===----------------------------------------------------------------------===// - -#ifndef KMP_SETTINGS_H -#define KMP_SETTINGS_H - -void __kmp_reset_global_vars(void); -void __kmp_env_initialize(char const *); -void __kmp_env_print(); -#if OMP_40_ENABLED -void __kmp_env_print_2(); -#endif // OMP_40_ENABLED - -int __kmp_initial_threads_capacity(int req_nproc); -void __kmp_init_dflt_team_nth(); -int __kmp_convert_to_milliseconds(char const *); -int __kmp_default_tp_capacity(int, int, int); - -#if KMP_MIC -#define KMP_STR_BUF_PRINT_NAME \ - __kmp_str_buf_print(buffer, " %s %s", KMP_I18N_STR(Device), name) -#define KMP_STR_BUF_PRINT_NAME_EX(x) \ - __kmp_str_buf_print(buffer, " %s %s='", KMP_I18N_STR(Device), x) -#define KMP_STR_BUF_PRINT_BOOL_EX(n, v, t, f) \ - __kmp_str_buf_print(buffer, " %s %s='%s'\n", KMP_I18N_STR(Device), n, \ - (v) ? t : f) -#define KMP_STR_BUF_PRINT_BOOL \ - KMP_STR_BUF_PRINT_BOOL_EX(name, value, "TRUE", "FALSE") -#define KMP_STR_BUF_PRINT_INT \ - __kmp_str_buf_print(buffer, " %s %s='%d'\n", KMP_I18N_STR(Device), name, \ - value) -#define KMP_STR_BUF_PRINT_UINT64 \ - __kmp_str_buf_print(buffer, " %s %s='%" KMP_UINT64_SPEC "'\n", \ - KMP_I18N_STR(Device), name, value); -#define KMP_STR_BUF_PRINT_STR \ - __kmp_str_buf_print(buffer, " %s %s='%s'\n", KMP_I18N_STR(Device), name, \ - value) -#else -#define KMP_STR_BUF_PRINT_NAME \ - __kmp_str_buf_print(buffer, " %s %s", KMP_I18N_STR(Host), name) -#define KMP_STR_BUF_PRINT_NAME_EX(x) \ - __kmp_str_buf_print(buffer, " %s %s='", KMP_I18N_STR(Host), x) -#define KMP_STR_BUF_PRINT_BOOL_EX(n, v, t, f) \ - __kmp_str_buf_print(buffer, " %s %s='%s'\n", KMP_I18N_STR(Host), n, \ - (v) ? t : f) -#define KMP_STR_BUF_PRINT_BOOL \ - KMP_STR_BUF_PRINT_BOOL_EX(name, value, "TRUE", "FALSE") -#define KMP_STR_BUF_PRINT_INT \ - __kmp_str_buf_print(buffer, " %s %s='%d'\n", KMP_I18N_STR(Host), name, value) -#define KMP_STR_BUF_PRINT_UINT64 \ - __kmp_str_buf_print(buffer, " %s %s='%" KMP_UINT64_SPEC "'\n", \ - KMP_I18N_STR(Host), name, value); -#define KMP_STR_BUF_PRINT_STR \ - __kmp_str_buf_print(buffer, " %s %s='%s'\n", KMP_I18N_STR(Host), name, value) -#endif - -#endif // KMP_SETTINGS_H - -// end of file // Property changes on: vendor/llvm-openmp/openmp-release80-r363030/runtime/src/kmp_settings.h ___________________________________________________________________ Deleted: svn:eol-style ## -1 +0,0 ## -native \ No newline at end of property Deleted: svn:keywords ## -1 +0,0 ## -FreeBSD=%H \ No newline at end of property Deleted: svn:mime-type ## -1 +0,0 ## -text/plain \ No newline at end of property Index: vendor/llvm-openmp/openmp-release80-r363030/runtime/src/kmp_taskdeps.h =================================================================== --- vendor/llvm-openmp/openmp-release80-r363030/runtime/src/kmp_taskdeps.h (revision 348960) +++ vendor/llvm-openmp/openmp-release80-r363030/runtime/src/kmp_taskdeps.h (nonexistent) @@ -1,150 +0,0 @@ -/* - * kmp_taskdeps.h - */ - - -//===----------------------------------------------------------------------===// -// -// The LLVM Compiler Infrastructure -// -// This file is dual licensed under the MIT and the University of Illinois Open -// Source Licenses. See LICENSE.txt for details. -// -//===----------------------------------------------------------------------===// - - -#ifndef KMP_TASKDEPS_H -#define KMP_TASKDEPS_H - -#include "kmp.h" - -#if OMP_40_ENABLED - -#define KMP_ACQUIRE_DEPNODE(gtid, n) __kmp_acquire_lock(&(n)->dn.lock, (gtid)) -#define KMP_RELEASE_DEPNODE(gtid, n) __kmp_release_lock(&(n)->dn.lock, (gtid)) - -static inline void __kmp_node_deref(kmp_info_t *thread, kmp_depnode_t *node) { - if (!node) - return; - - kmp_int32 n = KMP_ATOMIC_DEC(&node->dn.nrefs) - 1; - if (n == 0) { - KMP_ASSERT(node->dn.nrefs == 0); -#if USE_FAST_MEMORY - __kmp_fast_free(thread, node); -#else - __kmp_thread_free(thread, node); -#endif - } -} - -static inline void __kmp_depnode_list_free(kmp_info_t *thread, - kmp_depnode_list *list) { - kmp_depnode_list *next; - - for (; list; list = next) { - next = list->next; - - __kmp_node_deref(thread, list->node); -#if USE_FAST_MEMORY - __kmp_fast_free(thread, list); -#else - __kmp_thread_free(thread, list); -#endif - } -} - -static inline void __kmp_dephash_free_entries(kmp_info_t *thread, - kmp_dephash_t *h) { - for (size_t i = 0; i < h->size; i++) { - if (h->buckets[i]) { - kmp_dephash_entry_t *next; - for (kmp_dephash_entry_t *entry = h->buckets[i]; entry; entry = next) { - next = entry->next_in_bucket; - __kmp_depnode_list_free(thread, entry->last_ins); - __kmp_depnode_list_free(thread, entry->last_mtxs); - __kmp_node_deref(thread, entry->last_out); - if (entry->mtx_lock) { - __kmp_destroy_lock(entry->mtx_lock); - __kmp_free(entry->mtx_lock); - } -#if USE_FAST_MEMORY - __kmp_fast_free(thread, entry); -#else - __kmp_thread_free(thread, entry); -#endif - } - h->buckets[i] = 0; - } - } -} - -static inline void __kmp_dephash_free(kmp_info_t *thread, kmp_dephash_t *h) { - __kmp_dephash_free_entries(thread, h); -#if USE_FAST_MEMORY - __kmp_fast_free(thread, h); -#else - __kmp_thread_free(thread, h); -#endif -} - -static inline void __kmp_release_deps(kmp_int32 gtid, kmp_taskdata_t *task) { - kmp_info_t *thread = __kmp_threads[gtid]; - kmp_depnode_t *node = task->td_depnode; - - if (task->td_dephash) { - KA_TRACE( - 40, ("__kmp_release_deps: T#%d freeing dependencies hash of task %p.\n", - gtid, task)); - __kmp_dephash_free(thread, task->td_dephash); - task->td_dephash = NULL; - } - - if (!node) - return; - - KA_TRACE(20, ("__kmp_release_deps: T#%d notifying successors of task %p.\n", - gtid, task)); - - KMP_ACQUIRE_DEPNODE(gtid, node); - node->dn.task = - NULL; // mark this task as finished, so no new dependencies are generated - KMP_RELEASE_DEPNODE(gtid, node); - - kmp_depnode_list_t *next; - for (kmp_depnode_list_t *p = node->dn.successors; p; p = next) { - kmp_depnode_t *successor = p->node; - kmp_int32 npredecessors = KMP_ATOMIC_DEC(&successor->dn.npredecessors) - 1; - - // successor task can be NULL for wait_depends or because deps are still - // being processed - if (npredecessors == 0) { - KMP_MB(); - if (successor->dn.task) { - KA_TRACE(20, ("__kmp_release_deps: T#%d successor %p of %p scheduled " - "for execution.\n", - gtid, successor->dn.task, task)); - __kmp_omp_task(gtid, successor->dn.task, false); - } - } - - next = p->next; - __kmp_node_deref(thread, p->node); -#if USE_FAST_MEMORY - __kmp_fast_free(thread, p); -#else - __kmp_thread_free(thread, p); -#endif - } - - __kmp_node_deref(thread, node); - - KA_TRACE( - 20, - ("__kmp_release_deps: T#%d all successors of %p notified of completion\n", - gtid, task)); -} - -#endif // OMP_40_ENABLED - -#endif // KMP_TASKDEPS_H Property changes on: vendor/llvm-openmp/openmp-release80-r363030/runtime/src/kmp_taskdeps.h ___________________________________________________________________ Deleted: svn:eol-style ## -1 +0,0 ## -native \ No newline at end of property Deleted: svn:keywords ## -1 +0,0 ## -FreeBSD=%H \ No newline at end of property Deleted: svn:mime-type ## -1 +0,0 ## -text/plain \ No newline at end of property Index: vendor/llvm-openmp/openmp-release80-r363030/runtime/src/kmp_io.cpp =================================================================== --- vendor/llvm-openmp/openmp-release80-r363030/runtime/src/kmp_io.cpp (revision 348960) +++ vendor/llvm-openmp/openmp-release80-r363030/runtime/src/kmp_io.cpp (nonexistent) @@ -1,230 +0,0 @@ -/* - * kmp_io.cpp -- RTL IO - */ - -//===----------------------------------------------------------------------===// -// -// The LLVM Compiler Infrastructure -// -// This file is dual licensed under the MIT and the University of Illinois Open -// Source Licenses. See LICENSE.txt for details. -// -//===----------------------------------------------------------------------===// - -#include -#include -#include -#include -#include -#ifndef __ABSOFT_WIN -#include -#endif - -#include "kmp.h" // KMP_GTID_DNE, __kmp_debug_buf, etc -#include "kmp_io.h" -#include "kmp_lock.h" -#include "kmp_os.h" -#include "kmp_str.h" - -#if KMP_OS_WINDOWS -#if KMP_MSVC_COMPAT -#pragma warning(push) -#pragma warning(disable : 271 310) -#endif -#include -#if KMP_MSVC_COMPAT -#pragma warning(pop) -#endif -#endif - -/* ------------------------------------------------------------------------ */ - -kmp_bootstrap_lock_t __kmp_stdio_lock = KMP_BOOTSTRAP_LOCK_INITIALIZER( - __kmp_stdio_lock); /* Control stdio functions */ -kmp_bootstrap_lock_t __kmp_console_lock = KMP_BOOTSTRAP_LOCK_INITIALIZER( - __kmp_console_lock); /* Control console initialization */ - -#if KMP_OS_WINDOWS - -static HANDLE __kmp_stdout = NULL; -static HANDLE __kmp_stderr = NULL; -static int __kmp_console_exists = FALSE; -static kmp_str_buf_t __kmp_console_buf; - -static int is_console(void) { - char buffer[128]; - DWORD rc = 0; - DWORD err = 0; - // Try to get console title. - SetLastError(0); - // GetConsoleTitle does not reset last error in case of success or short - // buffer, so we need to clear it explicitly. - rc = GetConsoleTitle(buffer, sizeof(buffer)); - if (rc == 0) { - // rc == 0 means getting console title failed. Let us find out why. - err = GetLastError(); - // err == 0 means buffer too short (we suppose console exists). - // In Window applications we usually have err == 6 (invalid handle). - } - return rc > 0 || err == 0; -} - -void __kmp_close_console(void) { - /* wait until user presses return before closing window */ - /* TODO only close if a window was opened */ - if (__kmp_console_exists) { - __kmp_stdout = NULL; - __kmp_stderr = NULL; - __kmp_str_buf_free(&__kmp_console_buf); - __kmp_console_exists = FALSE; - } -} - -/* For windows, call this before stdout, stderr, or stdin are used. - It opens a console window and starts processing */ -static void __kmp_redirect_output(void) { - __kmp_acquire_bootstrap_lock(&__kmp_console_lock); - - if (!__kmp_console_exists) { - HANDLE ho; - HANDLE he; - - __kmp_str_buf_init(&__kmp_console_buf); - - AllocConsole(); - // We do not check the result of AllocConsole because - // 1. the call is harmless - // 2. it is not clear how to communicate failue - // 3. we will detect failure later when we get handle(s) - - ho = GetStdHandle(STD_OUTPUT_HANDLE); - if (ho == INVALID_HANDLE_VALUE || ho == NULL) { - - DWORD err = GetLastError(); - // TODO: output error somehow (maybe message box) - __kmp_stdout = NULL; - - } else { - - __kmp_stdout = ho; // temporary code, need new global for ho - } - he = GetStdHandle(STD_ERROR_HANDLE); - if (he == INVALID_HANDLE_VALUE || he == NULL) { - - DWORD err = GetLastError(); - // TODO: output error somehow (maybe message box) - __kmp_stderr = NULL; - - } else { - - __kmp_stderr = he; // temporary code, need new global - } - __kmp_console_exists = TRUE; - } - __kmp_release_bootstrap_lock(&__kmp_console_lock); -} - -#else -#define __kmp_stderr (stderr) -#define __kmp_stdout (stdout) -#endif /* KMP_OS_WINDOWS */ - -void __kmp_vprintf(enum kmp_io out_stream, char const *format, va_list ap) { -#if KMP_OS_WINDOWS - if (!__kmp_console_exists) { - __kmp_redirect_output(); - } - if (!__kmp_stderr && out_stream == kmp_err) { - return; - } - if (!__kmp_stdout && out_stream == kmp_out) { - return; - } -#endif /* KMP_OS_WINDOWS */ - auto stream = ((out_stream == kmp_out) ? __kmp_stdout : __kmp_stderr); - - if (__kmp_debug_buf && __kmp_debug_buffer != NULL) { - - int dc = __kmp_debug_count++ % __kmp_debug_buf_lines; - char *db = &__kmp_debug_buffer[dc * __kmp_debug_buf_chars]; - int chars = 0; - -#ifdef KMP_DEBUG_PIDS - chars = KMP_SNPRINTF(db, __kmp_debug_buf_chars, "pid=%d: ", - (kmp_int32)getpid()); -#endif - chars += KMP_VSNPRINTF(db, __kmp_debug_buf_chars, format, ap); - - if (chars + 1 > __kmp_debug_buf_chars) { - if (chars + 1 > __kmp_debug_buf_warn_chars) { -#if KMP_OS_WINDOWS - DWORD count; - __kmp_str_buf_print(&__kmp_console_buf, "OMP warning: Debugging buffer " - "overflow; increase " - "KMP_DEBUG_BUF_CHARS to %d\n", - chars + 1); - WriteFile(stream, __kmp_console_buf.str, __kmp_console_buf.used, &count, - NULL); - __kmp_str_buf_clear(&__kmp_console_buf); -#else - fprintf(stream, "OMP warning: Debugging buffer overflow; " - "increase KMP_DEBUG_BUF_CHARS to %d\n", - chars + 1); - fflush(stream); -#endif - __kmp_debug_buf_warn_chars = chars + 1; - } - /* terminate string if overflow occurred */ - db[__kmp_debug_buf_chars - 2] = '\n'; - db[__kmp_debug_buf_chars - 1] = '\0'; - } - } else { -#if KMP_OS_WINDOWS - DWORD count; -#ifdef KMP_DEBUG_PIDS - __kmp_str_buf_print(&__kmp_console_buf, "pid=%d: ", (kmp_int32)getpid()); -#endif - __kmp_str_buf_vprint(&__kmp_console_buf, format, ap); - WriteFile(stream, __kmp_console_buf.str, __kmp_console_buf.used, &count, - NULL); - __kmp_str_buf_clear(&__kmp_console_buf); -#else -#ifdef KMP_DEBUG_PIDS - fprintf(stream, "pid=%d: ", (kmp_int32)getpid()); -#endif - vfprintf(stream, format, ap); - fflush(stream); -#endif - } -} - -void __kmp_printf(char const *format, ...) { - va_list ap; - va_start(ap, format); - - __kmp_acquire_bootstrap_lock(&__kmp_stdio_lock); - __kmp_vprintf(kmp_err, format, ap); - __kmp_release_bootstrap_lock(&__kmp_stdio_lock); - - va_end(ap); -} - -void __kmp_printf_no_lock(char const *format, ...) { - va_list ap; - va_start(ap, format); - - __kmp_vprintf(kmp_err, format, ap); - - va_end(ap); -} - -void __kmp_fprintf(enum kmp_io stream, char const *format, ...) { - va_list ap; - va_start(ap, format); - - __kmp_acquire_bootstrap_lock(&__kmp_stdio_lock); - __kmp_vprintf(stream, format, ap); - __kmp_release_bootstrap_lock(&__kmp_stdio_lock); - - va_end(ap); -} Property changes on: vendor/llvm-openmp/openmp-release80-r363030/runtime/src/kmp_io.cpp ___________________________________________________________________ Deleted: svn:eol-style ## -1 +0,0 ## -native \ No newline at end of property Deleted: svn:keywords ## -1 +0,0 ## -FreeBSD=%H \ No newline at end of property Deleted: svn:mime-type ## -1 +0,0 ## -text/plain \ No newline at end of property Index: vendor/llvm-openmp/openmp-release80-r363030/runtime/src/kmp_affinity.cpp =================================================================== --- vendor/llvm-openmp/openmp-release80-r363030/runtime/src/kmp_affinity.cpp (revision 348960) +++ vendor/llvm-openmp/openmp-release80-r363030/runtime/src/kmp_affinity.cpp (nonexistent) @@ -1,5379 +0,0 @@ -/* - * kmp_affinity.cpp -- affinity management - */ - -//===----------------------------------------------------------------------===// -// -// The LLVM Compiler Infrastructure -// -// This file is dual licensed under the MIT and the University of Illinois Open -// Source Licenses. See LICENSE.txt for details. -// -//===----------------------------------------------------------------------===// - -#include "kmp.h" -#include "kmp_affinity.h" -#include "kmp_i18n.h" -#include "kmp_io.h" -#include "kmp_str.h" -#include "kmp_wrapper_getpid.h" -#if KMP_USE_HIER_SCHED -#include "kmp_dispatch_hier.h" -#endif - -// Store the real or imagined machine hierarchy here -static hierarchy_info machine_hierarchy; - -void __kmp_cleanup_hierarchy() { machine_hierarchy.fini(); } - -void __kmp_get_hierarchy(kmp_uint32 nproc, kmp_bstate_t *thr_bar) { - kmp_uint32 depth; - // The test below is true if affinity is available, but set to "none". Need to - // init on first use of hierarchical barrier. - if (TCR_1(machine_hierarchy.uninitialized)) - machine_hierarchy.init(NULL, nproc); - - // Adjust the hierarchy in case num threads exceeds original - if (nproc > machine_hierarchy.base_num_threads) - machine_hierarchy.resize(nproc); - - depth = machine_hierarchy.depth; - KMP_DEBUG_ASSERT(depth > 0); - - thr_bar->depth = depth; - thr_bar->base_leaf_kids = (kmp_uint8)machine_hierarchy.numPerLevel[0] - 1; - thr_bar->skip_per_level = machine_hierarchy.skipPerLevel; -} - -#if KMP_AFFINITY_SUPPORTED - -bool KMPAffinity::picked_api = false; - -void *KMPAffinity::Mask::operator new(size_t n) { return __kmp_allocate(n); } -void *KMPAffinity::Mask::operator new[](size_t n) { return __kmp_allocate(n); } -void KMPAffinity::Mask::operator delete(void *p) { __kmp_free(p); } -void KMPAffinity::Mask::operator delete[](void *p) { __kmp_free(p); } -void *KMPAffinity::operator new(size_t n) { return __kmp_allocate(n); } -void KMPAffinity::operator delete(void *p) { __kmp_free(p); } - -void KMPAffinity::pick_api() { - KMPAffinity *affinity_dispatch; - if (picked_api) - return; -#if KMP_USE_HWLOC - // Only use Hwloc if affinity isn't explicitly disabled and - // user requests Hwloc topology method - if (__kmp_affinity_top_method == affinity_top_method_hwloc && - __kmp_affinity_type != affinity_disabled) { - affinity_dispatch = new KMPHwlocAffinity(); - } else -#endif - { - affinity_dispatch = new KMPNativeAffinity(); - } - __kmp_affinity_dispatch = affinity_dispatch; - picked_api = true; -} - -void KMPAffinity::destroy_api() { - if (__kmp_affinity_dispatch != NULL) { - delete __kmp_affinity_dispatch; - __kmp_affinity_dispatch = NULL; - picked_api = false; - } -} - -#define KMP_ADVANCE_SCAN(scan) \ - while (*scan != '\0') { \ - scan++; \ - } - -// Print the affinity mask to the character array in a pretty format. -// The format is a comma separated list of non-negative integers or integer -// ranges: e.g., 1,2,3-5,7,9-15 -// The format can also be the string "{}" if no bits are set in mask -char *__kmp_affinity_print_mask(char *buf, int buf_len, - kmp_affin_mask_t *mask) { - int start = 0, finish = 0, previous = 0; - bool first_range; - KMP_ASSERT(buf); - KMP_ASSERT(buf_len >= 40); - KMP_ASSERT(mask); - char *scan = buf; - char *end = buf + buf_len - 1; - - // Check for empty set. - if (mask->begin() == mask->end()) { - KMP_SNPRINTF(scan, end - scan + 1, "{}"); - KMP_ADVANCE_SCAN(scan); - KMP_ASSERT(scan <= end); - return buf; - } - - first_range = true; - start = mask->begin(); - while (1) { - // Find next range - // [start, previous] is inclusive range of contiguous bits in mask - for (finish = mask->next(start), previous = start; - finish == previous + 1 && finish != mask->end(); - finish = mask->next(finish)) { - previous = finish; - } - - // The first range does not need a comma printed before it, but the rest - // of the ranges do need a comma beforehand - if (!first_range) { - KMP_SNPRINTF(scan, end - scan + 1, "%s", ","); - KMP_ADVANCE_SCAN(scan); - } else { - first_range = false; - } - // Range with three or more contiguous bits in the affinity mask - if (previous - start > 1) { - KMP_SNPRINTF(scan, end - scan + 1, "%d-%d", static_cast(start), - static_cast(previous)); - } else { - // Range with one or two contiguous bits in the affinity mask - KMP_SNPRINTF(scan, end - scan + 1, "%d", static_cast(start)); - KMP_ADVANCE_SCAN(scan); - if (previous - start > 0) { - KMP_SNPRINTF(scan, end - scan + 1, ",%d", static_cast(previous)); - } - } - KMP_ADVANCE_SCAN(scan); - // Start over with new start point - start = finish; - if (start == mask->end()) - break; - // Check for overflow - if (end - scan < 2) - break; - } - - // Check for overflow - KMP_ASSERT(scan <= end); - return buf; -} -#undef KMP_ADVANCE_SCAN - -// Print the affinity mask to the string buffer object in a pretty format -// The format is a comma separated list of non-negative integers or integer -// ranges: e.g., 1,2,3-5,7,9-15 -// The format can also be the string "{}" if no bits are set in mask -kmp_str_buf_t *__kmp_affinity_str_buf_mask(kmp_str_buf_t *buf, - kmp_affin_mask_t *mask) { - int start = 0, finish = 0, previous = 0; - bool first_range; - KMP_ASSERT(buf); - KMP_ASSERT(mask); - - __kmp_str_buf_clear(buf); - - // Check for empty set. - if (mask->begin() == mask->end()) { - __kmp_str_buf_print(buf, "%s", "{}"); - return buf; - } - - first_range = true; - start = mask->begin(); - while (1) { - // Find next range - // [start, previous] is inclusive range of contiguous bits in mask - for (finish = mask->next(start), previous = start; - finish == previous + 1 && finish != mask->end(); - finish = mask->next(finish)) { - previous = finish; - } - - // The first range does not need a comma printed before it, but the rest - // of the ranges do need a comma beforehand - if (!first_range) { - __kmp_str_buf_print(buf, "%s", ","); - } else { - first_range = false; - } - // Range with three or more contiguous bits in the affinity mask - if (previous - start > 1) { - __kmp_str_buf_print(buf, "%d-%d", static_cast(start), - static_cast(previous)); - } else { - // Range with one or two contiguous bits in the affinity mask - __kmp_str_buf_print(buf, "%d", static_cast(start)); - if (previous - start > 0) { - __kmp_str_buf_print(buf, ",%d", static_cast(previous)); - } - } - // Start over with new start point - start = finish; - if (start == mask->end()) - break; - } - return buf; -} - -void __kmp_affinity_entire_machine_mask(kmp_affin_mask_t *mask) { - KMP_CPU_ZERO(mask); - -#if KMP_GROUP_AFFINITY - - if (__kmp_num_proc_groups > 1) { - int group; - KMP_DEBUG_ASSERT(__kmp_GetActiveProcessorCount != NULL); - for (group = 0; group < __kmp_num_proc_groups; group++) { - int i; - int num = __kmp_GetActiveProcessorCount(group); - for (i = 0; i < num; i++) { - KMP_CPU_SET(i + group * (CHAR_BIT * sizeof(DWORD_PTR)), mask); - } - } - } else - -#endif /* KMP_GROUP_AFFINITY */ - - { - int proc; - for (proc = 0; proc < __kmp_xproc; proc++) { - KMP_CPU_SET(proc, mask); - } - } -} - -// When sorting by labels, __kmp_affinity_assign_child_nums() must first be -// called to renumber the labels from [0..n] and place them into the child_num -// vector of the address object. This is done in case the labels used for -// the children at one node of the hierarchy differ from those used for -// another node at the same level. Example: suppose the machine has 2 nodes -// with 2 packages each. The first node contains packages 601 and 602, and -// second node contains packages 603 and 604. If we try to sort the table -// for "scatter" affinity, the table will still be sorted 601, 602, 603, 604 -// because we are paying attention to the labels themselves, not the ordinal -// child numbers. By using the child numbers in the sort, the result is -// {0,0}=601, {0,1}=603, {1,0}=602, {1,1}=604. -static void __kmp_affinity_assign_child_nums(AddrUnsPair *address2os, - int numAddrs) { - KMP_DEBUG_ASSERT(numAddrs > 0); - int depth = address2os->first.depth; - unsigned *counts = (unsigned *)__kmp_allocate(depth * sizeof(unsigned)); - unsigned *lastLabel = (unsigned *)__kmp_allocate(depth * sizeof(unsigned)); - int labCt; - for (labCt = 0; labCt < depth; labCt++) { - address2os[0].first.childNums[labCt] = counts[labCt] = 0; - lastLabel[labCt] = address2os[0].first.labels[labCt]; - } - int i; - for (i = 1; i < numAddrs; i++) { - for (labCt = 0; labCt < depth; labCt++) { - if (address2os[i].first.labels[labCt] != lastLabel[labCt]) { - int labCt2; - for (labCt2 = labCt + 1; labCt2 < depth; labCt2++) { - counts[labCt2] = 0; - lastLabel[labCt2] = address2os[i].first.labels[labCt2]; - } - counts[labCt]++; - lastLabel[labCt] = address2os[i].first.labels[labCt]; - break; - } - } - for (labCt = 0; labCt < depth; labCt++) { - address2os[i].first.childNums[labCt] = counts[labCt]; - } - for (; labCt < (int)Address::maxDepth; labCt++) { - address2os[i].first.childNums[labCt] = 0; - } - } - __kmp_free(lastLabel); - __kmp_free(counts); -} - -// All of the __kmp_affinity_create_*_map() routines should set -// __kmp_affinity_masks to a vector of affinity mask objects of length -// __kmp_affinity_num_masks, if __kmp_affinity_type != affinity_none, and return -// the number of levels in the machine topology tree (zero if -// __kmp_affinity_type == affinity_none). -// -// All of the __kmp_affinity_create_*_map() routines should set -// *__kmp_affin_fullMask to the affinity mask for the initialization thread. -// They need to save and restore the mask, and it could be needed later, so -// saving it is just an optimization to avoid calling kmp_get_system_affinity() -// again. -kmp_affin_mask_t *__kmp_affin_fullMask = NULL; - -static int nCoresPerPkg, nPackages; -static int __kmp_nThreadsPerCore; -#ifndef KMP_DFLT_NTH_CORES -static int __kmp_ncores; -#endif -static int *__kmp_pu_os_idx = NULL; - -// __kmp_affinity_uniform_topology() doesn't work when called from -// places which support arbitrarily many levels in the machine topology -// map, i.e. the non-default cases in __kmp_affinity_create_cpuinfo_map() -// __kmp_affinity_create_x2apicid_map(). -inline static bool __kmp_affinity_uniform_topology() { - return __kmp_avail_proc == (__kmp_nThreadsPerCore * nCoresPerPkg * nPackages); -} - -// Print out the detailed machine topology map, i.e. the physical locations -// of each OS proc. -static void __kmp_affinity_print_topology(AddrUnsPair *address2os, int len, - int depth, int pkgLevel, - int coreLevel, int threadLevel) { - int proc; - - KMP_INFORM(OSProcToPhysicalThreadMap, "KMP_AFFINITY"); - for (proc = 0; proc < len; proc++) { - int level; - kmp_str_buf_t buf; - __kmp_str_buf_init(&buf); - for (level = 0; level < depth; level++) { - if (level == threadLevel) { - __kmp_str_buf_print(&buf, "%s ", KMP_I18N_STR(Thread)); - } else if (level == coreLevel) { - __kmp_str_buf_print(&buf, "%s ", KMP_I18N_STR(Core)); - } else if (level == pkgLevel) { - __kmp_str_buf_print(&buf, "%s ", KMP_I18N_STR(Package)); - } else if (level > pkgLevel) { - __kmp_str_buf_print(&buf, "%s_%d ", KMP_I18N_STR(Node), - level - pkgLevel - 1); - } else { - __kmp_str_buf_print(&buf, "L%d ", level); - } - __kmp_str_buf_print(&buf, "%d ", address2os[proc].first.labels[level]); - } - KMP_INFORM(OSProcMapToPack, "KMP_AFFINITY", address2os[proc].second, - buf.str); - __kmp_str_buf_free(&buf); - } -} - -#if KMP_USE_HWLOC - -static void __kmp_affinity_print_hwloc_tp(AddrUnsPair *addrP, int len, - int depth, int *levels) { - int proc; - kmp_str_buf_t buf; - __kmp_str_buf_init(&buf); - KMP_INFORM(OSProcToPhysicalThreadMap, "KMP_AFFINITY"); - for (proc = 0; proc < len; proc++) { - __kmp_str_buf_print(&buf, "%s %d ", KMP_I18N_STR(Package), - addrP[proc].first.labels[0]); - if (depth > 1) { - int level = 1; // iterate over levels - int label = 1; // iterate over labels - if (__kmp_numa_detected) - // node level follows package - if (levels[level++] > 0) - __kmp_str_buf_print(&buf, "%s %d ", KMP_I18N_STR(Node), - addrP[proc].first.labels[label++]); - if (__kmp_tile_depth > 0) - // tile level follows node if any, or package - if (levels[level++] > 0) - __kmp_str_buf_print(&buf, "%s %d ", KMP_I18N_STR(Tile), - addrP[proc].first.labels[label++]); - if (levels[level++] > 0) - // core level follows - __kmp_str_buf_print(&buf, "%s %d ", KMP_I18N_STR(Core), - addrP[proc].first.labels[label++]); - if (levels[level++] > 0) - // thread level is the latest - __kmp_str_buf_print(&buf, "%s %d ", KMP_I18N_STR(Thread), - addrP[proc].first.labels[label++]); - KMP_DEBUG_ASSERT(label == depth); - } - KMP_INFORM(OSProcMapToPack, "KMP_AFFINITY", addrP[proc].second, buf.str); - __kmp_str_buf_clear(&buf); - } - __kmp_str_buf_free(&buf); -} - -static int nNodePerPkg, nTilePerPkg, nTilePerNode, nCorePerNode, nCorePerTile; - -// This function removes the topology levels that are radix 1 and don't offer -// further information about the topology. The most common example is when you -// have one thread context per core, we don't want the extra thread context -// level if it offers no unique labels. So they are removed. -// return value: the new depth of address2os -static int __kmp_affinity_remove_radix_one_levels(AddrUnsPair *addrP, int nTh, - int depth, int *levels) { - int level; - int i; - int radix1_detected; - int new_depth = depth; - for (level = depth - 1; level > 0; --level) { - // Detect if this level is radix 1 - radix1_detected = 1; - for (i = 1; i < nTh; ++i) { - if (addrP[0].first.labels[level] != addrP[i].first.labels[level]) { - // There are differing label values for this level so it stays - radix1_detected = 0; - break; - } - } - if (!radix1_detected) - continue; - // Radix 1 was detected - --new_depth; - levels[level] = -1; // mark level as not present in address2os array - if (level == new_depth) { - // "turn off" deepest level, just decrement the depth that removes - // the level from address2os array - for (i = 0; i < nTh; ++i) { - addrP[i].first.depth--; - } - } else { - // For other levels, we move labels over and also reduce the depth - int j; - for (j = level; j < new_depth; ++j) { - for (i = 0; i < nTh; ++i) { - addrP[i].first.labels[j] = addrP[i].first.labels[j + 1]; - addrP[i].first.depth--; - } - levels[j + 1] -= 1; - } - } - } - return new_depth; -} - -// Returns the number of objects of type 'type' below 'obj' within the topology -// tree structure. e.g., if obj is a HWLOC_OBJ_PACKAGE object, and type is -// HWLOC_OBJ_PU, then this will return the number of PU's under the SOCKET -// object. -static int __kmp_hwloc_get_nobjs_under_obj(hwloc_obj_t obj, - hwloc_obj_type_t type) { - int retval = 0; - hwloc_obj_t first; - for (first = hwloc_get_obj_below_by_type(__kmp_hwloc_topology, obj->type, - obj->logical_index, type, 0); - first != NULL && - hwloc_get_ancestor_obj_by_type(__kmp_hwloc_topology, obj->type, first) == - obj; - first = hwloc_get_next_obj_by_type(__kmp_hwloc_topology, first->type, - first)) { - ++retval; - } - return retval; -} - -static int __kmp_hwloc_count_children_by_depth(hwloc_topology_t t, - hwloc_obj_t o, unsigned depth, - hwloc_obj_t *f) { - if (o->depth == depth) { - if (*f == NULL) - *f = o; // output first descendant found - return 1; - } - int sum = 0; - for (unsigned i = 0; i < o->arity; i++) - sum += __kmp_hwloc_count_children_by_depth(t, o->children[i], depth, f); - return sum; // will be 0 if no one found (as PU arity is 0) -} - -static int __kmp_hwloc_count_children_by_type(hwloc_topology_t t, hwloc_obj_t o, - hwloc_obj_type_t type, - hwloc_obj_t *f) { - if (!hwloc_compare_types(o->type, type)) { - if (*f == NULL) - *f = o; // output first descendant found - return 1; - } - int sum = 0; - for (unsigned i = 0; i < o->arity; i++) - sum += __kmp_hwloc_count_children_by_type(t, o->children[i], type, f); - return sum; // will be 0 if no one found (as PU arity is 0) -} - -static int __kmp_hwloc_process_obj_core_pu(AddrUnsPair *addrPair, - int &nActiveThreads, - int &num_active_cores, - hwloc_obj_t obj, int depth, - int *labels) { - hwloc_obj_t core = NULL; - hwloc_topology_t &tp = __kmp_hwloc_topology; - int NC = __kmp_hwloc_count_children_by_type(tp, obj, HWLOC_OBJ_CORE, &core); - for (int core_id = 0; core_id < NC; ++core_id, core = core->next_cousin) { - hwloc_obj_t pu = NULL; - KMP_DEBUG_ASSERT(core != NULL); - int num_active_threads = 0; - int NT = __kmp_hwloc_count_children_by_type(tp, core, HWLOC_OBJ_PU, &pu); - // int NT = core->arity; pu = core->first_child; // faster? - for (int pu_id = 0; pu_id < NT; ++pu_id, pu = pu->next_cousin) { - KMP_DEBUG_ASSERT(pu != NULL); - if (!KMP_CPU_ISSET(pu->os_index, __kmp_affin_fullMask)) - continue; // skip inactive (inaccessible) unit - Address addr(depth + 2); - KA_TRACE(20, ("Hwloc inserting %d (%d) %d (%d) %d (%d) into address2os\n", - obj->os_index, obj->logical_index, core->os_index, - core->logical_index, pu->os_index, pu->logical_index)); - for (int i = 0; i < depth; ++i) - addr.labels[i] = labels[i]; // package, etc. - addr.labels[depth] = core_id; // core - addr.labels[depth + 1] = pu_id; // pu - addrPair[nActiveThreads] = AddrUnsPair(addr, pu->os_index); - __kmp_pu_os_idx[nActiveThreads] = pu->os_index; - nActiveThreads++; - ++num_active_threads; // count active threads per core - } - if (num_active_threads) { // were there any active threads on the core? - ++__kmp_ncores; // count total active cores - ++num_active_cores; // count active cores per socket - if (num_active_threads > __kmp_nThreadsPerCore) - __kmp_nThreadsPerCore = num_active_threads; // calc maximum - } - } - return 0; -} - -// Check if NUMA node detected below the package, -// and if tile object is detected and return its depth -static int __kmp_hwloc_check_numa() { - hwloc_topology_t &tp = __kmp_hwloc_topology; - hwloc_obj_t hT, hC, hL, hN, hS; // hwloc objects (pointers to) - int depth; - - // Get some PU - hT = hwloc_get_obj_by_type(tp, HWLOC_OBJ_PU, 0); - if (hT == NULL) // something has gone wrong - return 1; - - // check NUMA node below PACKAGE - hN = hwloc_get_ancestor_obj_by_type(tp, HWLOC_OBJ_NUMANODE, hT); - hS = hwloc_get_ancestor_obj_by_type(tp, HWLOC_OBJ_PACKAGE, hT); - KMP_DEBUG_ASSERT(hS != NULL); - if (hN != NULL && hN->depth > hS->depth) { - __kmp_numa_detected = TRUE; // socket includes node(s) - if (__kmp_affinity_gran == affinity_gran_node) { - __kmp_affinity_gran == affinity_gran_numa; - } - } - - // check tile, get object by depth because of multiple caches possible - depth = hwloc_get_cache_type_depth(tp, 2, HWLOC_OBJ_CACHE_UNIFIED); - hL = hwloc_get_ancestor_obj_by_depth(tp, depth, hT); - hC = NULL; // not used, but reset it here just in case - if (hL != NULL && - __kmp_hwloc_count_children_by_type(tp, hL, HWLOC_OBJ_CORE, &hC) > 1) - __kmp_tile_depth = depth; // tile consists of multiple cores - return 0; -} - -static int __kmp_affinity_create_hwloc_map(AddrUnsPair **address2os, - kmp_i18n_id_t *const msg_id) { - hwloc_topology_t &tp = __kmp_hwloc_topology; // shortcut of a long name - *address2os = NULL; - *msg_id = kmp_i18n_null; - - // Save the affinity mask for the current thread. - kmp_affin_mask_t *oldMask; - KMP_CPU_ALLOC(oldMask); - __kmp_get_system_affinity(oldMask, TRUE); - __kmp_hwloc_check_numa(); - - if (!KMP_AFFINITY_CAPABLE()) { - // Hack to try and infer the machine topology using only the data - // available from cpuid on the current thread, and __kmp_xproc. - KMP_ASSERT(__kmp_affinity_type == affinity_none); - - nCoresPerPkg = __kmp_hwloc_get_nobjs_under_obj( - hwloc_get_obj_by_type(tp, HWLOC_OBJ_PACKAGE, 0), HWLOC_OBJ_CORE); - __kmp_nThreadsPerCore = __kmp_hwloc_get_nobjs_under_obj( - hwloc_get_obj_by_type(tp, HWLOC_OBJ_CORE, 0), HWLOC_OBJ_PU); - __kmp_ncores = __kmp_xproc / __kmp_nThreadsPerCore; - nPackages = (__kmp_xproc + nCoresPerPkg - 1) / nCoresPerPkg; - if (__kmp_affinity_verbose) { - KMP_INFORM(AffNotCapableUseLocCpuidL11, "KMP_AFFINITY"); - KMP_INFORM(AvailableOSProc, "KMP_AFFINITY", __kmp_avail_proc); - if (__kmp_affinity_uniform_topology()) { - KMP_INFORM(Uniform, "KMP_AFFINITY"); - } else { - KMP_INFORM(NonUniform, "KMP_AFFINITY"); - } - KMP_INFORM(Topology, "KMP_AFFINITY", nPackages, nCoresPerPkg, - __kmp_nThreadsPerCore, __kmp_ncores); - } - KMP_CPU_FREE(oldMask); - return 0; - } - - int depth = 3; - int levels[5] = {0, 1, 2, 3, 4}; // package, [node,] [tile,] core, thread - int labels[3] = {0}; // package [,node] [,tile] - head of lables array - if (__kmp_numa_detected) - ++depth; - if (__kmp_tile_depth) - ++depth; - - // Allocate the data structure to be returned. - AddrUnsPair *retval = - (AddrUnsPair *)__kmp_allocate(sizeof(AddrUnsPair) * __kmp_avail_proc); - KMP_DEBUG_ASSERT(__kmp_pu_os_idx == NULL); - __kmp_pu_os_idx = (int *)__kmp_allocate(sizeof(int) * __kmp_avail_proc); - - // When affinity is off, this routine will still be called to set - // __kmp_ncores, as well as __kmp_nThreadsPerCore, - // nCoresPerPkg, & nPackages. Make sure all these vars are set - // correctly, and return if affinity is not enabled. - - hwloc_obj_t socket, node, tile; - int nActiveThreads = 0; - int socket_id = 0; - // re-calculate globals to count only accessible resources - __kmp_ncores = nPackages = nCoresPerPkg = __kmp_nThreadsPerCore = 0; - nNodePerPkg = nTilePerPkg = nTilePerNode = nCorePerNode = nCorePerTile = 0; - for (socket = hwloc_get_obj_by_type(tp, HWLOC_OBJ_PACKAGE, 0); socket != NULL; - socket = hwloc_get_next_obj_by_type(tp, HWLOC_OBJ_PACKAGE, socket), - socket_id++) { - labels[0] = socket_id; - if (__kmp_numa_detected) { - int NN; - int n_active_nodes = 0; - node = NULL; - NN = __kmp_hwloc_count_children_by_type(tp, socket, HWLOC_OBJ_NUMANODE, - &node); - for (int node_id = 0; node_id < NN; ++node_id, node = node->next_cousin) { - labels[1] = node_id; - if (__kmp_tile_depth) { - // NUMA + tiles - int NT; - int n_active_tiles = 0; - tile = NULL; - NT = __kmp_hwloc_count_children_by_depth(tp, node, __kmp_tile_depth, - &tile); - for (int tl_id = 0; tl_id < NT; ++tl_id, tile = tile->next_cousin) { - labels[2] = tl_id; - int n_active_cores = 0; - __kmp_hwloc_process_obj_core_pu(retval, nActiveThreads, - n_active_cores, tile, 3, labels); - if (n_active_cores) { // were there any active cores on the socket? - ++n_active_tiles; // count active tiles per node - if (n_active_cores > nCorePerTile) - nCorePerTile = n_active_cores; // calc maximum - } - } - if (n_active_tiles) { // were there any active tiles on the socket? - ++n_active_nodes; // count active nodes per package - if (n_active_tiles > nTilePerNode) - nTilePerNode = n_active_tiles; // calc maximum - } - } else { - // NUMA, no tiles - int n_active_cores = 0; - __kmp_hwloc_process_obj_core_pu(retval, nActiveThreads, - n_active_cores, node, 2, labels); - if (n_active_cores) { // were there any active cores on the socket? - ++n_active_nodes; // count active nodes per package - if (n_active_cores > nCorePerNode) - nCorePerNode = n_active_cores; // calc maximum - } - } - } - if (n_active_nodes) { // were there any active nodes on the socket? - ++nPackages; // count total active packages - if (n_active_nodes > nNodePerPkg) - nNodePerPkg = n_active_nodes; // calc maximum - } - } else { - if (__kmp_tile_depth) { - // no NUMA, tiles - int NT; - int n_active_tiles = 0; - tile = NULL; - NT = __kmp_hwloc_count_children_by_depth(tp, socket, __kmp_tile_depth, - &tile); - for (int tl_id = 0; tl_id < NT; ++tl_id, tile = tile->next_cousin) { - labels[1] = tl_id; - int n_active_cores = 0; - __kmp_hwloc_process_obj_core_pu(retval, nActiveThreads, - n_active_cores, tile, 2, labels); - if (n_active_cores) { // were there any active cores on the socket? - ++n_active_tiles; // count active tiles per package - if (n_active_cores > nCorePerTile) - nCorePerTile = n_active_cores; // calc maximum - } - } - if (n_active_tiles) { // were there any active tiles on the socket? - ++nPackages; // count total active packages - if (n_active_tiles > nTilePerPkg) - nTilePerPkg = n_active_tiles; // calc maximum - } - } else { - // no NUMA, no tiles - int n_active_cores = 0; - __kmp_hwloc_process_obj_core_pu(retval, nActiveThreads, n_active_cores, - socket, 1, labels); - if (n_active_cores) { // were there any active cores on the socket? - ++nPackages; // count total active packages - if (n_active_cores > nCoresPerPkg) - nCoresPerPkg = n_active_cores; // calc maximum - } - } - } - } - - // If there's only one thread context to bind to, return now. - KMP_DEBUG_ASSERT(nActiveThreads == __kmp_avail_proc); - KMP_ASSERT(nActiveThreads > 0); - if (nActiveThreads == 1) { - __kmp_ncores = nPackages = 1; - __kmp_nThreadsPerCore = nCoresPerPkg = 1; - if (__kmp_affinity_verbose) { - char buf[KMP_AFFIN_MASK_PRINT_LEN]; - __kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN, oldMask); - - KMP_INFORM(AffUsingHwloc, "KMP_AFFINITY"); - if (__kmp_affinity_respect_mask) { - KMP_INFORM(InitOSProcSetRespect, "KMP_AFFINITY", buf); - } else { - KMP_INFORM(InitOSProcSetNotRespect, "KMP_AFFINITY", buf); - } - KMP_INFORM(AvailableOSProc, "KMP_AFFINITY", __kmp_avail_proc); - KMP_INFORM(Uniform, "KMP_AFFINITY"); - KMP_INFORM(Topology, "KMP_AFFINITY", nPackages, nCoresPerPkg, - __kmp_nThreadsPerCore, __kmp_ncores); - } - - if (__kmp_affinity_type == affinity_none) { - __kmp_free(retval); - KMP_CPU_FREE(oldMask); - return 0; - } - - // Form an Address object which only includes the package level. - Address addr(1); - addr.labels[0] = retval[0].first.labels[0]; - retval[0].first = addr; - - if (__kmp_affinity_gran_levels < 0) { - __kmp_affinity_gran_levels = 0; - } - - if (__kmp_affinity_verbose) { - __kmp_affinity_print_topology(retval, 1, 1, 0, -1, -1); - } - - *address2os = retval; - KMP_CPU_FREE(oldMask); - return 1; - } - - // Sort the table by physical Id. - qsort(retval, nActiveThreads, sizeof(*retval), - __kmp_affinity_cmp_Address_labels); - - // Check to see if the machine topology is uniform - int nPUs = nPackages * __kmp_nThreadsPerCore; - if (__kmp_numa_detected) { - if (__kmp_tile_depth) { // NUMA + tiles - nPUs *= (nNodePerPkg * nTilePerNode * nCorePerTile); - } else { // NUMA, no tiles - nPUs *= (nNodePerPkg * nCorePerNode); - } - } else { - if (__kmp_tile_depth) { // no NUMA, tiles - nPUs *= (nTilePerPkg * nCorePerTile); - } else { // no NUMA, no tiles - nPUs *= nCoresPerPkg; - } - } - unsigned uniform = (nPUs == nActiveThreads); - - // Print the machine topology summary. - if (__kmp_affinity_verbose) { - char mask[KMP_AFFIN_MASK_PRINT_LEN]; - __kmp_affinity_print_mask(mask, KMP_AFFIN_MASK_PRINT_LEN, oldMask); - if (__kmp_affinity_respect_mask) { - KMP_INFORM(InitOSProcSetRespect, "KMP_AFFINITY", mask); - } else { - KMP_INFORM(InitOSProcSetNotRespect, "KMP_AFFINITY", mask); - } - KMP_INFORM(AvailableOSProc, "KMP_AFFINITY", __kmp_avail_proc); - if (uniform) { - KMP_INFORM(Uniform, "KMP_AFFINITY"); - } else { - KMP_INFORM(NonUniform, "KMP_AFFINITY"); - } - if (__kmp_numa_detected) { - if (__kmp_tile_depth) { // NUMA + tiles - KMP_INFORM(TopologyExtraNoTi, "KMP_AFFINITY", nPackages, nNodePerPkg, - nTilePerNode, nCorePerTile, __kmp_nThreadsPerCore, - __kmp_ncores); - } else { // NUMA, no tiles - KMP_INFORM(TopologyExtraNode, "KMP_AFFINITY", nPackages, nNodePerPkg, - nCorePerNode, __kmp_nThreadsPerCore, __kmp_ncores); - nPUs *= (nNodePerPkg * nCorePerNode); - } - } else { - if (__kmp_tile_depth) { // no NUMA, tiles - KMP_INFORM(TopologyExtraTile, "KMP_AFFINITY", nPackages, nTilePerPkg, - nCorePerTile, __kmp_nThreadsPerCore, __kmp_ncores); - } else { // no NUMA, no tiles - kmp_str_buf_t buf; - __kmp_str_buf_init(&buf); - __kmp_str_buf_print(&buf, "%d", nPackages); - KMP_INFORM(TopologyExtra, "KMP_AFFINITY", buf.str, nCoresPerPkg, - __kmp_nThreadsPerCore, __kmp_ncores); - __kmp_str_buf_free(&buf); - } - } - } - - if (__kmp_affinity_type == affinity_none) { - __kmp_free(retval); - KMP_CPU_FREE(oldMask); - return 0; - } - - int depth_full = depth; // number of levels before compressing - // Find any levels with radiix 1, and remove them from the map - // (except for the package level). - depth = __kmp_affinity_remove_radix_one_levels(retval, nActiveThreads, depth, - levels); - KMP_DEBUG_ASSERT(__kmp_affinity_gran != affinity_gran_default); - if (__kmp_affinity_gran_levels < 0) { - // Set the granularity level based on what levels are modeled - // in the machine topology map. - __kmp_affinity_gran_levels = 0; // lowest level (e.g. fine) - if (__kmp_affinity_gran > affinity_gran_thread) { - for (int i = 1; i <= depth_full; ++i) { - if (__kmp_affinity_gran <= i) // only count deeper levels - break; - if (levels[depth_full - i] > 0) - __kmp_affinity_gran_levels++; - } - } - if (__kmp_affinity_gran > affinity_gran_package) - __kmp_affinity_gran_levels++; // e.g. granularity = group - } - - if (__kmp_affinity_verbose) - __kmp_affinity_print_hwloc_tp(retval, nActiveThreads, depth, levels); - - KMP_CPU_FREE(oldMask); - *address2os = retval; - return depth; -} -#endif // KMP_USE_HWLOC - -// If we don't know how to retrieve the machine's processor topology, or -// encounter an error in doing so, this routine is called to form a "flat" -// mapping of os thread id's <-> processor id's. -static int __kmp_affinity_create_flat_map(AddrUnsPair **address2os, - kmp_i18n_id_t *const msg_id) { - *address2os = NULL; - *msg_id = kmp_i18n_null; - - // Even if __kmp_affinity_type == affinity_none, this routine might still - // called to set __kmp_ncores, as well as - // __kmp_nThreadsPerCore, nCoresPerPkg, & nPackages. - if (!KMP_AFFINITY_CAPABLE()) { - KMP_ASSERT(__kmp_affinity_type == affinity_none); - __kmp_ncores = nPackages = __kmp_xproc; - __kmp_nThreadsPerCore = nCoresPerPkg = 1; - if (__kmp_affinity_verbose) { - KMP_INFORM(AffFlatTopology, "KMP_AFFINITY"); - KMP_INFORM(AvailableOSProc, "KMP_AFFINITY", __kmp_avail_proc); - KMP_INFORM(Uniform, "KMP_AFFINITY"); - KMP_INFORM(Topology, "KMP_AFFINITY", nPackages, nCoresPerPkg, - __kmp_nThreadsPerCore, __kmp_ncores); - } - return 0; - } - - // When affinity is off, this routine will still be called to set - // __kmp_ncores, as well as __kmp_nThreadsPerCore, nCoresPerPkg, & nPackages. - // Make sure all these vars are set correctly, and return now if affinity is - // not enabled. - __kmp_ncores = nPackages = __kmp_avail_proc; - __kmp_nThreadsPerCore = nCoresPerPkg = 1; - if (__kmp_affinity_verbose) { - char buf[KMP_AFFIN_MASK_PRINT_LEN]; - __kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN, - __kmp_affin_fullMask); - - KMP_INFORM(AffCapableUseFlat, "KMP_AFFINITY"); - if (__kmp_affinity_respect_mask) { - KMP_INFORM(InitOSProcSetRespect, "KMP_AFFINITY", buf); - } else { - KMP_INFORM(InitOSProcSetNotRespect, "KMP_AFFINITY", buf); - } - KMP_INFORM(AvailableOSProc, "KMP_AFFINITY", __kmp_avail_proc); - KMP_INFORM(Uniform, "KMP_AFFINITY"); - KMP_INFORM(Topology, "KMP_AFFINITY", nPackages, nCoresPerPkg, - __kmp_nThreadsPerCore, __kmp_ncores); - } - KMP_DEBUG_ASSERT(__kmp_pu_os_idx == NULL); - __kmp_pu_os_idx = (int *)__kmp_allocate(sizeof(int) * __kmp_avail_proc); - if (__kmp_affinity_type == affinity_none) { - int avail_ct = 0; - int i; - KMP_CPU_SET_ITERATE(i, __kmp_affin_fullMask) { - if (!KMP_CPU_ISSET(i, __kmp_affin_fullMask)) - continue; - __kmp_pu_os_idx[avail_ct++] = i; // suppose indices are flat - } - return 0; - } - - // Contruct the data structure to be returned. - *address2os = - (AddrUnsPair *)__kmp_allocate(sizeof(**address2os) * __kmp_avail_proc); - int avail_ct = 0; - int i; - KMP_CPU_SET_ITERATE(i, __kmp_affin_fullMask) { - // Skip this proc if it is not included in the machine model. - if (!KMP_CPU_ISSET(i, __kmp_affin_fullMask)) { - continue; - } - __kmp_pu_os_idx[avail_ct] = i; // suppose indices are flat - Address addr(1); - addr.labels[0] = i; - (*address2os)[avail_ct++] = AddrUnsPair(addr, i); - } - if (__kmp_affinity_verbose) { - KMP_INFORM(OSProcToPackage, "KMP_AFFINITY"); - } - - if (__kmp_affinity_gran_levels < 0) { - // Only the package level is modeled in the machine topology map, - // so the #levels of granularity is either 0 or 1. - if (__kmp_affinity_gran > affinity_gran_package) { - __kmp_affinity_gran_levels = 1; - } else { - __kmp_affinity_gran_levels = 0; - } - } - return 1; -} - -#if KMP_GROUP_AFFINITY - -// If multiple Windows* OS processor groups exist, we can create a 2-level -// topology map with the groups at level 0 and the individual procs at level 1. -// This facilitates letting the threads float among all procs in a group, -// if granularity=group (the default when there are multiple groups). -static int __kmp_affinity_create_proc_group_map(AddrUnsPair **address2os, - kmp_i18n_id_t *const msg_id) { - *address2os = NULL; - *msg_id = kmp_i18n_null; - - // If we aren't affinity capable, then return now. - // The flat mapping will be used. - if (!KMP_AFFINITY_CAPABLE()) { - // FIXME set *msg_id - return -1; - } - - // Contruct the data structure to be returned. - *address2os = - (AddrUnsPair *)__kmp_allocate(sizeof(**address2os) * __kmp_avail_proc); - KMP_DEBUG_ASSERT(__kmp_pu_os_idx == NULL); - __kmp_pu_os_idx = (int *)__kmp_allocate(sizeof(int) * __kmp_avail_proc); - int avail_ct = 0; - int i; - KMP_CPU_SET_ITERATE(i, __kmp_affin_fullMask) { - // Skip this proc if it is not included in the machine model. - if (!KMP_CPU_ISSET(i, __kmp_affin_fullMask)) { - continue; - } - __kmp_pu_os_idx[avail_ct] = i; // suppose indices are flat - Address addr(2); - addr.labels[0] = i / (CHAR_BIT * sizeof(DWORD_PTR)); - addr.labels[1] = i % (CHAR_BIT * sizeof(DWORD_PTR)); - (*address2os)[avail_ct++] = AddrUnsPair(addr, i); - - if (__kmp_affinity_verbose) { - KMP_INFORM(AffOSProcToGroup, "KMP_AFFINITY", i, addr.labels[0], - addr.labels[1]); - } - } - - if (__kmp_affinity_gran_levels < 0) { - if (__kmp_affinity_gran == affinity_gran_group) { - __kmp_affinity_gran_levels = 1; - } else if ((__kmp_affinity_gran == affinity_gran_fine) || - (__kmp_affinity_gran == affinity_gran_thread)) { - __kmp_affinity_gran_levels = 0; - } else { - const char *gran_str = NULL; - if (__kmp_affinity_gran == affinity_gran_core) { - gran_str = "core"; - } else if (__kmp_affinity_gran == affinity_gran_package) { - gran_str = "package"; - } else if (__kmp_affinity_gran == affinity_gran_node) { - gran_str = "node"; - } else { - KMP_ASSERT(0); - } - - // Warning: can't use affinity granularity \"gran\" with group topology - // method, using "thread" - __kmp_affinity_gran_levels = 0; - } - } - return 2; -} - -#endif /* KMP_GROUP_AFFINITY */ - -#if KMP_ARCH_X86 || KMP_ARCH_X86_64 - -static int __kmp_cpuid_mask_width(int count) { - int r = 0; - - while ((1 << r) < count) - ++r; - return r; -} - -class apicThreadInfo { -public: - unsigned osId; // param to __kmp_affinity_bind_thread - unsigned apicId; // from cpuid after binding - unsigned maxCoresPerPkg; // "" - unsigned maxThreadsPerPkg; // "" - unsigned pkgId; // inferred from above values - unsigned coreId; // "" - unsigned threadId; // "" -}; - -static int __kmp_affinity_cmp_apicThreadInfo_phys_id(const void *a, - const void *b) { - const apicThreadInfo *aa = (const apicThreadInfo *)a; - const apicThreadInfo *bb = (const apicThreadInfo *)b; - if (aa->pkgId < bb->pkgId) - return -1; - if (aa->pkgId > bb->pkgId) - return 1; - if (aa->coreId < bb->coreId) - return -1; - if (aa->coreId > bb->coreId) - return 1; - if (aa->threadId < bb->threadId) - return -1; - if (aa->threadId > bb->threadId) - return 1; - return 0; -} - -// On IA-32 architecture and Intel(R) 64 architecture, we attempt to use -// an algorithm which cycles through the available os threads, setting -// the current thread's affinity mask to that thread, and then retrieves -// the Apic Id for each thread context using the cpuid instruction. -static int __kmp_affinity_create_apicid_map(AddrUnsPair **address2os, - kmp_i18n_id_t *const msg_id) { - kmp_cpuid buf; - *address2os = NULL; - *msg_id = kmp_i18n_null; - - // Check if cpuid leaf 4 is supported. - __kmp_x86_cpuid(0, 0, &buf); - if (buf.eax < 4) { - *msg_id = kmp_i18n_str_NoLeaf4Support; - return -1; - } - - // The algorithm used starts by setting the affinity to each available thread - // and retrieving info from the cpuid instruction, so if we are not capable of - // calling __kmp_get_system_affinity() and _kmp_get_system_affinity(), then we - // need to do something else - use the defaults that we calculated from - // issuing cpuid without binding to each proc. - if (!KMP_AFFINITY_CAPABLE()) { - // Hack to try and infer the machine topology using only the data - // available from cpuid on the current thread, and __kmp_xproc. - KMP_ASSERT(__kmp_affinity_type == affinity_none); - - // Get an upper bound on the number of threads per package using cpuid(1). - // On some OS/chps combinations where HT is supported by the chip but is - // disabled, this value will be 2 on a single core chip. Usually, it will be - // 2 if HT is enabled and 1 if HT is disabled. - __kmp_x86_cpuid(1, 0, &buf); - int maxThreadsPerPkg = (buf.ebx >> 16) & 0xff; - if (maxThreadsPerPkg == 0) { - maxThreadsPerPkg = 1; - } - - // The num cores per pkg comes from cpuid(4). 1 must be added to the encoded - // value. - // - // The author of cpu_count.cpp treated this only an upper bound on the - // number of cores, but I haven't seen any cases where it was greater than - // the actual number of cores, so we will treat it as exact in this block of - // code. - // - // First, we need to check if cpuid(4) is supported on this chip. To see if - // cpuid(n) is supported, issue cpuid(0) and check if eax has the value n or - // greater. - __kmp_x86_cpuid(0, 0, &buf); - if (buf.eax >= 4) { - __kmp_x86_cpuid(4, 0, &buf); - nCoresPerPkg = ((buf.eax >> 26) & 0x3f) + 1; - } else { - nCoresPerPkg = 1; - } - - // There is no way to reliably tell if HT is enabled without issuing the - // cpuid instruction from every thread, can correlating the cpuid info, so - // if the machine is not affinity capable, we assume that HT is off. We have - // seen quite a few machines where maxThreadsPerPkg is 2, yet the machine - // does not support HT. - // - // - Older OSes are usually found on machines with older chips, which do not - // support HT. - // - The performance penalty for mistakenly identifying a machine as HT when - // it isn't (which results in blocktime being incorrecly set to 0) is - // greater than the penalty when for mistakenly identifying a machine as - // being 1 thread/core when it is really HT enabled (which results in - // blocktime being incorrectly set to a positive value). - __kmp_ncores = __kmp_xproc; - nPackages = (__kmp_xproc + nCoresPerPkg - 1) / nCoresPerPkg; - __kmp_nThreadsPerCore = 1; - if (__kmp_affinity_verbose) { - KMP_INFORM(AffNotCapableUseLocCpuid, "KMP_AFFINITY"); - KMP_INFORM(AvailableOSProc, "KMP_AFFINITY", __kmp_avail_proc); - if (__kmp_affinity_uniform_topology()) { - KMP_INFORM(Uniform, "KMP_AFFINITY"); - } else { - KMP_INFORM(NonUniform, "KMP_AFFINITY"); - } - KMP_INFORM(Topology, "KMP_AFFINITY", nPackages, nCoresPerPkg, - __kmp_nThreadsPerCore, __kmp_ncores); - } - return 0; - } - - // From here on, we can assume that it is safe to call - // __kmp_get_system_affinity() and __kmp_set_system_affinity(), even if - // __kmp_affinity_type = affinity_none. - - // Save the affinity mask for the current thread. - kmp_affin_mask_t *oldMask; - KMP_CPU_ALLOC(oldMask); - KMP_ASSERT(oldMask != NULL); - __kmp_get_system_affinity(oldMask, TRUE); - - // Run through each of the available contexts, binding the current thread - // to it, and obtaining the pertinent information using the cpuid instr. - // - // The relevant information is: - // - Apic Id: Bits 24:31 of ebx after issuing cpuid(1) - each thread context - // has a uniqie Apic Id, which is of the form pkg# : core# : thread#. - // - Max Threads Per Pkg: Bits 16:23 of ebx after issuing cpuid(1). The value - // of this field determines the width of the core# + thread# fields in the - // Apic Id. It is also an upper bound on the number of threads per - // package, but it has been verified that situations happen were it is not - // exact. In particular, on certain OS/chip combinations where Intel(R) - // Hyper-Threading Technology is supported by the chip but has been - // disabled, the value of this field will be 2 (for a single core chip). - // On other OS/chip combinations supporting Intel(R) Hyper-Threading - // Technology, the value of this field will be 1 when Intel(R) - // Hyper-Threading Technology is disabled and 2 when it is enabled. - // - Max Cores Per Pkg: Bits 26:31 of eax after issuing cpuid(4). The value - // of this field (+1) determines the width of the core# field in the Apic - // Id. The comments in "cpucount.cpp" say that this value is an upper - // bound, but the IA-32 architecture manual says that it is exactly the - // number of cores per package, and I haven't seen any case where it - // wasn't. - // - // From this information, deduce the package Id, core Id, and thread Id, - // and set the corresponding fields in the apicThreadInfo struct. - unsigned i; - apicThreadInfo *threadInfo = (apicThreadInfo *)__kmp_allocate( - __kmp_avail_proc * sizeof(apicThreadInfo)); - unsigned nApics = 0; - KMP_CPU_SET_ITERATE(i, __kmp_affin_fullMask) { - // Skip this proc if it is not included in the machine model. - if (!KMP_CPU_ISSET(i, __kmp_affin_fullMask)) { - continue; - } - KMP_DEBUG_ASSERT((int)nApics < __kmp_avail_proc); - - __kmp_affinity_dispatch->bind_thread(i); - threadInfo[nApics].osId = i; - - // The apic id and max threads per pkg come from cpuid(1). - __kmp_x86_cpuid(1, 0, &buf); - if (((buf.edx >> 9) & 1) == 0) { - __kmp_set_system_affinity(oldMask, TRUE); - __kmp_free(threadInfo); - KMP_CPU_FREE(oldMask); - *msg_id = kmp_i18n_str_ApicNotPresent; - return -1; - } - threadInfo[nApics].apicId = (buf.ebx >> 24) & 0xff; - threadInfo[nApics].maxThreadsPerPkg = (buf.ebx >> 16) & 0xff; - if (threadInfo[nApics].maxThreadsPerPkg == 0) { - threadInfo[nApics].maxThreadsPerPkg = 1; - } - - // Max cores per pkg comes from cpuid(4). 1 must be added to the encoded - // value. - // - // First, we need to check if cpuid(4) is supported on this chip. To see if - // cpuid(n) is supported, issue cpuid(0) and check if eax has the value n - // or greater. - __kmp_x86_cpuid(0, 0, &buf); - if (buf.eax >= 4) { - __kmp_x86_cpuid(4, 0, &buf); - threadInfo[nApics].maxCoresPerPkg = ((buf.eax >> 26) & 0x3f) + 1; - } else { - threadInfo[nApics].maxCoresPerPkg = 1; - } - - // Infer the pkgId / coreId / threadId using only the info obtained locally. - int widthCT = __kmp_cpuid_mask_width(threadInfo[nApics].maxThreadsPerPkg); - threadInfo[nApics].pkgId = threadInfo[nApics].apicId >> widthCT; - - int widthC = __kmp_cpuid_mask_width(threadInfo[nApics].maxCoresPerPkg); - int widthT = widthCT - widthC; - if (widthT < 0) { - // I've never seen this one happen, but I suppose it could, if the cpuid - // instruction on a chip was really screwed up. Make sure to restore the - // affinity mask before the tail call. - __kmp_set_system_affinity(oldMask, TRUE); - __kmp_free(threadInfo); - KMP_CPU_FREE(oldMask); - *msg_id = kmp_i18n_str_InvalidCpuidInfo; - return -1; - } - - int maskC = (1 << widthC) - 1; - threadInfo[nApics].coreId = (threadInfo[nApics].apicId >> widthT) & maskC; - - int maskT = (1 << widthT) - 1; - threadInfo[nApics].threadId = threadInfo[nApics].apicId & maskT; - - nApics++; - } - - // We've collected all the info we need. - // Restore the old affinity mask for this thread. - __kmp_set_system_affinity(oldMask, TRUE); - - // If there's only one thread context to bind to, form an Address object - // with depth 1 and return immediately (or, if affinity is off, set - // address2os to NULL and return). - // - // If it is configured to omit the package level when there is only a single - // package, the logic at the end of this routine won't work if there is only - // a single thread - it would try to form an Address object with depth 0. - KMP_ASSERT(nApics > 0); - if (nApics == 1) { - __kmp_ncores = nPackages = 1; - __kmp_nThreadsPerCore = nCoresPerPkg = 1; - if (__kmp_affinity_verbose) { - char buf[KMP_AFFIN_MASK_PRINT_LEN]; - __kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN, oldMask); - - KMP_INFORM(AffUseGlobCpuid, "KMP_AFFINITY"); - if (__kmp_affinity_respect_mask) { - KMP_INFORM(InitOSProcSetRespect, "KMP_AFFINITY", buf); - } else { - KMP_INFORM(InitOSProcSetNotRespect, "KMP_AFFINITY", buf); - } - KMP_INFORM(AvailableOSProc, "KMP_AFFINITY", __kmp_avail_proc); - KMP_INFORM(Uniform, "KMP_AFFINITY"); - KMP_INFORM(Topology, "KMP_AFFINITY", nPackages, nCoresPerPkg, - __kmp_nThreadsPerCore, __kmp_ncores); - } - - if (__kmp_affinity_type == affinity_none) { - __kmp_free(threadInfo); - KMP_CPU_FREE(oldMask); - return 0; - } - - *address2os = (AddrUnsPair *)__kmp_allocate(sizeof(AddrUnsPair)); - Address addr(1); - addr.labels[0] = threadInfo[0].pkgId; - (*address2os)[0] = AddrUnsPair(addr, threadInfo[0].osId); - - if (__kmp_affinity_gran_levels < 0) { - __kmp_affinity_gran_levels = 0; - } - - if (__kmp_affinity_verbose) { - __kmp_affinity_print_topology(*address2os, 1, 1, 0, -1, -1); - } - - __kmp_free(threadInfo); - KMP_CPU_FREE(oldMask); - return 1; - } - - // Sort the threadInfo table by physical Id. - qsort(threadInfo, nApics, sizeof(*threadInfo), - __kmp_affinity_cmp_apicThreadInfo_phys_id); - - // The table is now sorted by pkgId / coreId / threadId, but we really don't - // know the radix of any of the fields. pkgId's may be sparsely assigned among - // the chips on a system. Although coreId's are usually assigned - // [0 .. coresPerPkg-1] and threadId's are usually assigned - // [0..threadsPerCore-1], we don't want to make any such assumptions. - // - // For that matter, we don't know what coresPerPkg and threadsPerCore (or the - // total # packages) are at this point - we want to determine that now. We - // only have an upper bound on the first two figures. - // - // We also perform a consistency check at this point: the values returned by - // the cpuid instruction for any thread bound to a given package had better - // return the same info for maxThreadsPerPkg and maxCoresPerPkg. - nPackages = 1; - nCoresPerPkg = 1; - __kmp_nThreadsPerCore = 1; - unsigned nCores = 1; - - unsigned pkgCt = 1; // to determine radii - unsigned lastPkgId = threadInfo[0].pkgId; - unsigned coreCt = 1; - unsigned lastCoreId = threadInfo[0].coreId; - unsigned threadCt = 1; - unsigned lastThreadId = threadInfo[0].threadId; - - // intra-pkg consist checks - unsigned prevMaxCoresPerPkg = threadInfo[0].maxCoresPerPkg; - unsigned prevMaxThreadsPerPkg = threadInfo[0].maxThreadsPerPkg; - - for (i = 1; i < nApics; i++) { - if (threadInfo[i].pkgId != lastPkgId) { - nCores++; - pkgCt++; - lastPkgId = threadInfo[i].pkgId; - if ((int)coreCt > nCoresPerPkg) - nCoresPerPkg = coreCt; - coreCt = 1; - lastCoreId = threadInfo[i].coreId; - if ((int)threadCt > __kmp_nThreadsPerCore) - __kmp_nThreadsPerCore = threadCt; - threadCt = 1; - lastThreadId = threadInfo[i].threadId; - - // This is a different package, so go on to the next iteration without - // doing any consistency checks. Reset the consistency check vars, though. - prevMaxCoresPerPkg = threadInfo[i].maxCoresPerPkg; - prevMaxThreadsPerPkg = threadInfo[i].maxThreadsPerPkg; - continue; - } - - if (threadInfo[i].coreId != lastCoreId) { - nCores++; - coreCt++; - lastCoreId = threadInfo[i].coreId; - if ((int)threadCt > __kmp_nThreadsPerCore) - __kmp_nThreadsPerCore = threadCt; - threadCt = 1; - lastThreadId = threadInfo[i].threadId; - } else if (threadInfo[i].threadId != lastThreadId) { - threadCt++; - lastThreadId = threadInfo[i].threadId; - } else { - __kmp_free(threadInfo); - KMP_CPU_FREE(oldMask); - *msg_id = kmp_i18n_str_LegacyApicIDsNotUnique; - return -1; - } - - // Check to make certain that the maxCoresPerPkg and maxThreadsPerPkg - // fields agree between all the threads bounds to a given package. - if ((prevMaxCoresPerPkg != threadInfo[i].maxCoresPerPkg) || - (prevMaxThreadsPerPkg != threadInfo[i].maxThreadsPerPkg)) { - __kmp_free(threadInfo); - KMP_CPU_FREE(oldMask); - *msg_id = kmp_i18n_str_InconsistentCpuidInfo; - return -1; - } - } - nPackages = pkgCt; - if ((int)coreCt > nCoresPerPkg) - nCoresPerPkg = coreCt; - if ((int)threadCt > __kmp_nThreadsPerCore) - __kmp_nThreadsPerCore = threadCt; - - // When affinity is off, this routine will still be called to set - // __kmp_ncores, as well as __kmp_nThreadsPerCore, nCoresPerPkg, & nPackages. - // Make sure all these vars are set correctly, and return now if affinity is - // not enabled. - __kmp_ncores = nCores; - if (__kmp_affinity_verbose) { - char buf[KMP_AFFIN_MASK_PRINT_LEN]; - __kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN, oldMask); - - KMP_INFORM(AffUseGlobCpuid, "KMP_AFFINITY"); - if (__kmp_affinity_respect_mask) { - KMP_INFORM(InitOSProcSetRespect, "KMP_AFFINITY", buf); - } else { - KMP_INFORM(InitOSProcSetNotRespect, "KMP_AFFINITY", buf); - } - KMP_INFORM(AvailableOSProc, "KMP_AFFINITY", __kmp_avail_proc); - if (__kmp_affinity_uniform_topology()) { - KMP_INFORM(Uniform, "KMP_AFFINITY"); - } else { - KMP_INFORM(NonUniform, "KMP_AFFINITY"); - } - KMP_INFORM(Topology, "KMP_AFFINITY", nPackages, nCoresPerPkg, - __kmp_nThreadsPerCore, __kmp_ncores); - } - KMP_DEBUG_ASSERT(__kmp_pu_os_idx == NULL); - KMP_DEBUG_ASSERT(nApics == (unsigned)__kmp_avail_proc); - __kmp_pu_os_idx = (int *)__kmp_allocate(sizeof(int) * __kmp_avail_proc); - for (i = 0; i < nApics; ++i) { - __kmp_pu_os_idx[i] = threadInfo[i].osId; - } - if (__kmp_affinity_type == affinity_none) { - __kmp_free(threadInfo); - KMP_CPU_FREE(oldMask); - return 0; - } - - // Now that we've determined the number of packages, the number of cores per - // package, and the number of threads per core, we can construct the data - // structure that is to be returned. - int pkgLevel = 0; - int coreLevel = (nCoresPerPkg <= 1) ? -1 : 1; - int threadLevel = - (__kmp_nThreadsPerCore <= 1) ? -1 : ((coreLevel >= 0) ? 2 : 1); - unsigned depth = (pkgLevel >= 0) + (coreLevel >= 0) + (threadLevel >= 0); - - KMP_ASSERT(depth > 0); - *address2os = (AddrUnsPair *)__kmp_allocate(sizeof(AddrUnsPair) * nApics); - - for (i = 0; i < nApics; ++i) { - Address addr(depth); - unsigned os = threadInfo[i].osId; - int d = 0; - - if (pkgLevel >= 0) { - addr.labels[d++] = threadInfo[i].pkgId; - } - if (coreLevel >= 0) { - addr.labels[d++] = threadInfo[i].coreId; - } - if (threadLevel >= 0) { - addr.labels[d++] = threadInfo[i].threadId; - } - (*address2os)[i] = AddrUnsPair(addr, os); - } - - if (__kmp_affinity_gran_levels < 0) { - // Set the granularity level based on what levels are modeled in the machine - // topology map. - __kmp_affinity_gran_levels = 0; - if ((threadLevel >= 0) && (__kmp_affinity_gran > affinity_gran_thread)) { - __kmp_affinity_gran_levels++; - } - if ((coreLevel >= 0) && (__kmp_affinity_gran > affinity_gran_core)) { - __kmp_affinity_gran_levels++; - } - if ((pkgLevel >= 0) && (__kmp_affinity_gran > affinity_gran_package)) { - __kmp_affinity_gran_levels++; - } - } - - if (__kmp_affinity_verbose) { - __kmp_affinity_print_topology(*address2os, nApics, depth, pkgLevel, - coreLevel, threadLevel); - } - - __kmp_free(threadInfo); - KMP_CPU_FREE(oldMask); - return depth; -} - -// Intel(R) microarchitecture code name Nehalem, Dunnington and later -// architectures support a newer interface for specifying the x2APIC Ids, -// based on cpuid leaf 11. -static int __kmp_affinity_create_x2apicid_map(AddrUnsPair **address2os, - kmp_i18n_id_t *const msg_id) { - kmp_cpuid buf; - *address2os = NULL; - *msg_id = kmp_i18n_null; - - // Check to see if cpuid leaf 11 is supported. - __kmp_x86_cpuid(0, 0, &buf); - if (buf.eax < 11) { - *msg_id = kmp_i18n_str_NoLeaf11Support; - return -1; - } - __kmp_x86_cpuid(11, 0, &buf); - if (buf.ebx == 0) { - *msg_id = kmp_i18n_str_NoLeaf11Support; - return -1; - } - - // Find the number of levels in the machine topology. While we're at it, get - // the default values for __kmp_nThreadsPerCore & nCoresPerPkg. We will try to - // get more accurate values later by explicitly counting them, but get - // reasonable defaults now, in case we return early. - int level; - int threadLevel = -1; - int coreLevel = -1; - int pkgLevel = -1; - __kmp_nThreadsPerCore = nCoresPerPkg = nPackages = 1; - - for (level = 0;; level++) { - if (level > 31) { - // FIXME: Hack for DPD200163180 - // - // If level is big then something went wrong -> exiting - // - // There could actually be 32 valid levels in the machine topology, but so - // far, the only machine we have seen which does not exit this loop before - // iteration 32 has fubar x2APIC settings. - // - // For now, just reject this case based upon loop trip count. - *msg_id = kmp_i18n_str_InvalidCpuidInfo; - return -1; - } - __kmp_x86_cpuid(11, level, &buf); - if (buf.ebx == 0) { - if (pkgLevel < 0) { - // Will infer nPackages from __kmp_xproc - pkgLevel = level; - level++; - } - break; - } - int kind = (buf.ecx >> 8) & 0xff; - if (kind == 1) { - // SMT level - threadLevel = level; - coreLevel = -1; - pkgLevel = -1; - __kmp_nThreadsPerCore = buf.ebx & 0xffff; - if (__kmp_nThreadsPerCore == 0) { - *msg_id = kmp_i18n_str_InvalidCpuidInfo; - return -1; - } - } else if (kind == 2) { - // core level - coreLevel = level; - pkgLevel = -1; - nCoresPerPkg = buf.ebx & 0xffff; - if (nCoresPerPkg == 0) { - *msg_id = kmp_i18n_str_InvalidCpuidInfo; - return -1; - } - } else { - if (level <= 0) { - *msg_id = kmp_i18n_str_InvalidCpuidInfo; - return -1; - } - if (pkgLevel >= 0) { - continue; - } - pkgLevel = level; - nPackages = buf.ebx & 0xffff; - if (nPackages == 0) { - *msg_id = kmp_i18n_str_InvalidCpuidInfo; - return -1; - } - } - } - int depth = level; - - // In the above loop, "level" was counted from the finest level (usually - // thread) to the coarsest. The caller expects that we will place the labels - // in (*address2os)[].first.labels[] in the inverse order, so we need to - // invert the vars saying which level means what. - if (threadLevel >= 0) { - threadLevel = depth - threadLevel - 1; - } - if (coreLevel >= 0) { - coreLevel = depth - coreLevel - 1; - } - KMP_DEBUG_ASSERT(pkgLevel >= 0); - pkgLevel = depth - pkgLevel - 1; - - // The algorithm used starts by setting the affinity to each available thread - // and retrieving info from the cpuid instruction, so if we are not capable of - // calling __kmp_get_system_affinity() and _kmp_get_system_affinity(), then we - // need to do something else - use the defaults that we calculated from - // issuing cpuid without binding to each proc. - if (!KMP_AFFINITY_CAPABLE()) { - // Hack to try and infer the machine topology using only the data - // available from cpuid on the current thread, and __kmp_xproc. - KMP_ASSERT(__kmp_affinity_type == affinity_none); - - __kmp_ncores = __kmp_xproc / __kmp_nThreadsPerCore; - nPackages = (__kmp_xproc + nCoresPerPkg - 1) / nCoresPerPkg; - if (__kmp_affinity_verbose) { - KMP_INFORM(AffNotCapableUseLocCpuidL11, "KMP_AFFINITY"); - KMP_INFORM(AvailableOSProc, "KMP_AFFINITY", __kmp_avail_proc); - if (__kmp_affinity_uniform_topology()) { - KMP_INFORM(Uniform, "KMP_AFFINITY"); - } else { - KMP_INFORM(NonUniform, "KMP_AFFINITY"); - } - KMP_INFORM(Topology, "KMP_AFFINITY", nPackages, nCoresPerPkg, - __kmp_nThreadsPerCore, __kmp_ncores); - } - return 0; - } - - // From here on, we can assume that it is safe to call - // __kmp_get_system_affinity() and __kmp_set_system_affinity(), even if - // __kmp_affinity_type = affinity_none. - - // Save the affinity mask for the current thread. - kmp_affin_mask_t *oldMask; - KMP_CPU_ALLOC(oldMask); - __kmp_get_system_affinity(oldMask, TRUE); - - // Allocate the data structure to be returned. - AddrUnsPair *retval = - (AddrUnsPair *)__kmp_allocate(sizeof(AddrUnsPair) * __kmp_avail_proc); - - // Run through each of the available contexts, binding the current thread - // to it, and obtaining the pertinent information using the cpuid instr. - unsigned int proc; - int nApics = 0; - KMP_CPU_SET_ITERATE(proc, __kmp_affin_fullMask) { - // Skip this proc if it is not included in the machine model. - if (!KMP_CPU_ISSET(proc, __kmp_affin_fullMask)) { - continue; - } - KMP_DEBUG_ASSERT(nApics < __kmp_avail_proc); - - __kmp_affinity_dispatch->bind_thread(proc); - - // Extract labels for each level in the machine topology map from Apic ID. - Address addr(depth); - int prev_shift = 0; - - for (level = 0; level < depth; level++) { - __kmp_x86_cpuid(11, level, &buf); - unsigned apicId = buf.edx; - if (buf.ebx == 0) { - if (level != depth - 1) { - KMP_CPU_FREE(oldMask); - *msg_id = kmp_i18n_str_InconsistentCpuidInfo; - return -1; - } - addr.labels[depth - level - 1] = apicId >> prev_shift; - level++; - break; - } - int shift = buf.eax & 0x1f; - int mask = (1 << shift) - 1; - addr.labels[depth - level - 1] = (apicId & mask) >> prev_shift; - prev_shift = shift; - } - if (level != depth) { - KMP_CPU_FREE(oldMask); - *msg_id = kmp_i18n_str_InconsistentCpuidInfo; - return -1; - } - - retval[nApics] = AddrUnsPair(addr, proc); - nApics++; - } - - // We've collected all the info we need. - // Restore the old affinity mask for this thread. - __kmp_set_system_affinity(oldMask, TRUE); - - // If there's only one thread context to bind to, return now. - KMP_ASSERT(nApics > 0); - if (nApics == 1) { - __kmp_ncores = nPackages = 1; - __kmp_nThreadsPerCore = nCoresPerPkg = 1; - if (__kmp_affinity_verbose) { - char buf[KMP_AFFIN_MASK_PRINT_LEN]; - __kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN, oldMask); - - KMP_INFORM(AffUseGlobCpuidL11, "KMP_AFFINITY"); - if (__kmp_affinity_respect_mask) { - KMP_INFORM(InitOSProcSetRespect, "KMP_AFFINITY", buf); - } else { - KMP_INFORM(InitOSProcSetNotRespect, "KMP_AFFINITY", buf); - } - KMP_INFORM(AvailableOSProc, "KMP_AFFINITY", __kmp_avail_proc); - KMP_INFORM(Uniform, "KMP_AFFINITY"); - KMP_INFORM(Topology, "KMP_AFFINITY", nPackages, nCoresPerPkg, - __kmp_nThreadsPerCore, __kmp_ncores); - } - - if (__kmp_affinity_type == affinity_none) { - __kmp_free(retval); - KMP_CPU_FREE(oldMask); - return 0; - } - - // Form an Address object which only includes the package level. - Address addr(1); - addr.labels[0] = retval[0].first.labels[pkgLevel]; - retval[0].first = addr; - - if (__kmp_affinity_gran_levels < 0) { - __kmp_affinity_gran_levels = 0; - } - - if (__kmp_affinity_verbose) { - __kmp_affinity_print_topology(retval, 1, 1, 0, -1, -1); - } - - *address2os = retval; - KMP_CPU_FREE(oldMask); - return 1; - } - - // Sort the table by physical Id. - qsort(retval, nApics, sizeof(*retval), __kmp_affinity_cmp_Address_labels); - - // Find the radix at each of the levels. - unsigned *totals = (unsigned *)__kmp_allocate(depth * sizeof(unsigned)); - unsigned *counts = (unsigned *)__kmp_allocate(depth * sizeof(unsigned)); - unsigned *maxCt = (unsigned *)__kmp_allocate(depth * sizeof(unsigned)); - unsigned *last = (unsigned *)__kmp_allocate(depth * sizeof(unsigned)); - for (level = 0; level < depth; level++) { - totals[level] = 1; - maxCt[level] = 1; - counts[level] = 1; - last[level] = retval[0].first.labels[level]; - } - - // From here on, the iteration variable "level" runs from the finest level to - // the coarsest, i.e. we iterate forward through - // (*address2os)[].first.labels[] - in the previous loops, we iterated - // backwards. - for (proc = 1; (int)proc < nApics; proc++) { - int level; - for (level = 0; level < depth; level++) { - if (retval[proc].first.labels[level] != last[level]) { - int j; - for (j = level + 1; j < depth; j++) { - totals[j]++; - counts[j] = 1; - // The line below causes printing incorrect topology information in - // case the max value for some level (maxCt[level]) is encountered - // earlier than some less value while going through the array. For - // example, let pkg0 has 4 cores and pkg1 has 2 cores. Then - // maxCt[1] == 2 - // whereas it must be 4. - // TODO!!! Check if it can be commented safely - // maxCt[j] = 1; - last[j] = retval[proc].first.labels[j]; - } - totals[level]++; - counts[level]++; - if (counts[level] > maxCt[level]) { - maxCt[level] = counts[level]; - } - last[level] = retval[proc].first.labels[level]; - break; - } else if (level == depth - 1) { - __kmp_free(last); - __kmp_free(maxCt); - __kmp_free(counts); - __kmp_free(totals); - __kmp_free(retval); - KMP_CPU_FREE(oldMask); - *msg_id = kmp_i18n_str_x2ApicIDsNotUnique; - return -1; - } - } - } - - // When affinity is off, this routine will still be called to set - // __kmp_ncores, as well as __kmp_nThreadsPerCore, nCoresPerPkg, & nPackages. - // Make sure all these vars are set correctly, and return if affinity is not - // enabled. - if (threadLevel >= 0) { - __kmp_nThreadsPerCore = maxCt[threadLevel]; - } else { - __kmp_nThreadsPerCore = 1; - } - nPackages = totals[pkgLevel]; - - if (coreLevel >= 0) { - __kmp_ncores = totals[coreLevel]; - nCoresPerPkg = maxCt[coreLevel]; - } else { - __kmp_ncores = nPackages; - nCoresPerPkg = 1; - } - - // Check to see if the machine topology is uniform - unsigned prod = maxCt[0]; - for (level = 1; level < depth; level++) { - prod *= maxCt[level]; - } - bool uniform = (prod == totals[level - 1]); - - // Print the machine topology summary. - if (__kmp_affinity_verbose) { - char mask[KMP_AFFIN_MASK_PRINT_LEN]; - __kmp_affinity_print_mask(mask, KMP_AFFIN_MASK_PRINT_LEN, oldMask); - - KMP_INFORM(AffUseGlobCpuidL11, "KMP_AFFINITY"); - if (__kmp_affinity_respect_mask) { - KMP_INFORM(InitOSProcSetRespect, "KMP_AFFINITY", mask); - } else { - KMP_INFORM(InitOSProcSetNotRespect, "KMP_AFFINITY", mask); - } - KMP_INFORM(AvailableOSProc, "KMP_AFFINITY", __kmp_avail_proc); - if (uniform) { - KMP_INFORM(Uniform, "KMP_AFFINITY"); - } else { - KMP_INFORM(NonUniform, "KMP_AFFINITY"); - } - - kmp_str_buf_t buf; - __kmp_str_buf_init(&buf); - - __kmp_str_buf_print(&buf, "%d", totals[0]); - for (level = 1; level <= pkgLevel; level++) { - __kmp_str_buf_print(&buf, " x %d", maxCt[level]); - } - KMP_INFORM(TopologyExtra, "KMP_AFFINITY", buf.str, nCoresPerPkg, - __kmp_nThreadsPerCore, __kmp_ncores); - - __kmp_str_buf_free(&buf); - } - KMP_DEBUG_ASSERT(__kmp_pu_os_idx == NULL); - KMP_DEBUG_ASSERT(nApics == __kmp_avail_proc); - __kmp_pu_os_idx = (int *)__kmp_allocate(sizeof(int) * __kmp_avail_proc); - for (proc = 0; (int)proc < nApics; ++proc) { - __kmp_pu_os_idx[proc] = retval[proc].second; - } - if (__kmp_affinity_type == affinity_none) { - __kmp_free(last); - __kmp_free(maxCt); - __kmp_free(counts); - __kmp_free(totals); - __kmp_free(retval); - KMP_CPU_FREE(oldMask); - return 0; - } - - // Find any levels with radiix 1, and remove them from the map - // (except for the package level). - int new_depth = 0; - for (level = 0; level < depth; level++) { - if ((maxCt[level] == 1) && (level != pkgLevel)) { - continue; - } - new_depth++; - } - - // If we are removing any levels, allocate a new vector to return, - // and copy the relevant information to it. - if (new_depth != depth) { - AddrUnsPair *new_retval = - (AddrUnsPair *)__kmp_allocate(sizeof(AddrUnsPair) * nApics); - for (proc = 0; (int)proc < nApics; proc++) { - Address addr(new_depth); - new_retval[proc] = AddrUnsPair(addr, retval[proc].second); - } - int new_level = 0; - int newPkgLevel = -1; - int newCoreLevel = -1; - int newThreadLevel = -1; - for (level = 0; level < depth; level++) { - if ((maxCt[level] == 1) && (level != pkgLevel)) { - // Remove this level. Never remove the package level - continue; - } - if (level == pkgLevel) { - newPkgLevel = new_level; - } - if (level == coreLevel) { - newCoreLevel = new_level; - } - if (level == threadLevel) { - newThreadLevel = new_level; - } - for (proc = 0; (int)proc < nApics; proc++) { - new_retval[proc].first.labels[new_level] = - retval[proc].first.labels[level]; - } - new_level++; - } - - __kmp_free(retval); - retval = new_retval; - depth = new_depth; - pkgLevel = newPkgLevel; - coreLevel = newCoreLevel; - threadLevel = newThreadLevel; - } - - if (__kmp_affinity_gran_levels < 0) { - // Set the granularity level based on what levels are modeled - // in the machine topology map. - __kmp_affinity_gran_levels = 0; - if ((threadLevel >= 0) && (__kmp_affinity_gran > affinity_gran_thread)) { - __kmp_affinity_gran_levels++; - } - if ((coreLevel >= 0) && (__kmp_affinity_gran > affinity_gran_core)) { - __kmp_affinity_gran_levels++; - } - if (__kmp_affinity_gran > affinity_gran_package) { - __kmp_affinity_gran_levels++; - } - } - - if (__kmp_affinity_verbose) { - __kmp_affinity_print_topology(retval, nApics, depth, pkgLevel, coreLevel, - threadLevel); - } - - __kmp_free(last); - __kmp_free(maxCt); - __kmp_free(counts); - __kmp_free(totals); - KMP_CPU_FREE(oldMask); - *address2os = retval; - return depth; -} - -#endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */ - -#define osIdIndex 0 -#define threadIdIndex 1 -#define coreIdIndex 2 -#define pkgIdIndex 3 -#define nodeIdIndex 4 - -typedef unsigned *ProcCpuInfo; -static unsigned maxIndex = pkgIdIndex; - -static int __kmp_affinity_cmp_ProcCpuInfo_phys_id(const void *a, - const void *b) { - unsigned i; - const unsigned *aa = *(unsigned *const *)a; - const unsigned *bb = *(unsigned *const *)b; - for (i = maxIndex;; i--) { - if (aa[i] < bb[i]) - return -1; - if (aa[i] > bb[i]) - return 1; - if (i == osIdIndex) - break; - } - return 0; -} - -#if KMP_USE_HIER_SCHED -// Set the array sizes for the hierarchy layers -static void __kmp_dispatch_set_hierarchy_values() { - // Set the maximum number of L1's to number of cores - // Set the maximum number of L2's to to either number of cores / 2 for - // Intel(R) Xeon Phi(TM) coprocessor formally codenamed Knights Landing - // Or the number of cores for Intel(R) Xeon(R) processors - // Set the maximum number of NUMA nodes and L3's to number of packages - __kmp_hier_max_units[kmp_hier_layer_e::LAYER_THREAD + 1] = - nPackages * nCoresPerPkg * __kmp_nThreadsPerCore; - __kmp_hier_max_units[kmp_hier_layer_e::LAYER_L1 + 1] = __kmp_ncores; -#if KMP_ARCH_X86_64 && (KMP_OS_LINUX || KMP_OS_WINDOWS) - if (__kmp_mic_type >= mic3) - __kmp_hier_max_units[kmp_hier_layer_e::LAYER_L2 + 1] = __kmp_ncores / 2; - else -#endif // KMP_ARCH_X86_64 && (KMP_OS_LINUX || KMP_OS_WINDOWS) - __kmp_hier_max_units[kmp_hier_layer_e::LAYER_L2 + 1] = __kmp_ncores; - __kmp_hier_max_units[kmp_hier_layer_e::LAYER_L3 + 1] = nPackages; - __kmp_hier_max_units[kmp_hier_layer_e::LAYER_NUMA + 1] = nPackages; - __kmp_hier_max_units[kmp_hier_layer_e::LAYER_LOOP + 1] = 1; - // Set the number of threads per unit - // Number of hardware threads per L1/L2/L3/NUMA/LOOP - __kmp_hier_threads_per[kmp_hier_layer_e::LAYER_THREAD + 1] = 1; - __kmp_hier_threads_per[kmp_hier_layer_e::LAYER_L1 + 1] = - __kmp_nThreadsPerCore; -#if KMP_ARCH_X86_64 && (KMP_OS_LINUX || KMP_OS_WINDOWS) - if (__kmp_mic_type >= mic3) - __kmp_hier_threads_per[kmp_hier_layer_e::LAYER_L2 + 1] = - 2 * __kmp_nThreadsPerCore; - else -#endif // KMP_ARCH_X86_64 && (KMP_OS_LINUX || KMP_OS_WINDOWS) - __kmp_hier_threads_per[kmp_hier_layer_e::LAYER_L2 + 1] = - __kmp_nThreadsPerCore; - __kmp_hier_threads_per[kmp_hier_layer_e::LAYER_L3 + 1] = - nCoresPerPkg * __kmp_nThreadsPerCore; - __kmp_hier_threads_per[kmp_hier_layer_e::LAYER_NUMA + 1] = - nCoresPerPkg * __kmp_nThreadsPerCore; - __kmp_hier_threads_per[kmp_hier_layer_e::LAYER_LOOP + 1] = - nPackages * nCoresPerPkg * __kmp_nThreadsPerCore; -} - -// Return the index into the hierarchy for this tid and layer type (L1, L2, etc) -// i.e., this thread's L1 or this thread's L2, etc. -int __kmp_dispatch_get_index(int tid, kmp_hier_layer_e type) { - int index = type + 1; - int num_hw_threads = __kmp_hier_max_units[kmp_hier_layer_e::LAYER_THREAD + 1]; - KMP_DEBUG_ASSERT(type != kmp_hier_layer_e::LAYER_LAST); - if (type == kmp_hier_layer_e::LAYER_THREAD) - return tid; - else if (type == kmp_hier_layer_e::LAYER_LOOP) - return 0; - KMP_DEBUG_ASSERT(__kmp_hier_max_units[index] != 0); - if (tid >= num_hw_threads) - tid = tid % num_hw_threads; - return (tid / __kmp_hier_threads_per[index]) % __kmp_hier_max_units[index]; -} - -// Return the number of t1's per t2 -int __kmp_dispatch_get_t1_per_t2(kmp_hier_layer_e t1, kmp_hier_layer_e t2) { - int i1 = t1 + 1; - int i2 = t2 + 1; - KMP_DEBUG_ASSERT(i1 <= i2); - KMP_DEBUG_ASSERT(t1 != kmp_hier_layer_e::LAYER_LAST); - KMP_DEBUG_ASSERT(t2 != kmp_hier_layer_e::LAYER_LAST); - KMP_DEBUG_ASSERT(__kmp_hier_threads_per[i1] != 0); - // (nthreads/t2) / (nthreads/t1) = t1 / t2 - return __kmp_hier_threads_per[i2] / __kmp_hier_threads_per[i1]; -} -#endif // KMP_USE_HIER_SCHED - -// Parse /proc/cpuinfo (or an alternate file in the same format) to obtain the -// affinity map. -static int __kmp_affinity_create_cpuinfo_map(AddrUnsPair **address2os, - int *line, - kmp_i18n_id_t *const msg_id, - FILE *f) { - *address2os = NULL; - *msg_id = kmp_i18n_null; - - // Scan of the file, and count the number of "processor" (osId) fields, - // and find the highest value of for a node_ field. - char buf[256]; - unsigned num_records = 0; - while (!feof(f)) { - buf[sizeof(buf) - 1] = 1; - if (!fgets(buf, sizeof(buf), f)) { - // Read errors presumably because of EOF - break; - } - - char s1[] = "processor"; - if (strncmp(buf, s1, sizeof(s1) - 1) == 0) { - num_records++; - continue; - } - - // FIXME - this will match "node_ " - unsigned level; - if (KMP_SSCANF(buf, "node_%u id", &level) == 1) { - if (nodeIdIndex + level >= maxIndex) { - maxIndex = nodeIdIndex + level; - } - continue; - } - } - - // Check for empty file / no valid processor records, or too many. The number - // of records can't exceed the number of valid bits in the affinity mask. - if (num_records == 0) { - *line = 0; - *msg_id = kmp_i18n_str_NoProcRecords; - return -1; - } - if (num_records > (unsigned)__kmp_xproc) { - *line = 0; - *msg_id = kmp_i18n_str_TooManyProcRecords; - return -1; - } - - // Set the file pointer back to the begginning, so that we can scan the file - // again, this time performing a full parse of the data. Allocate a vector of - // ProcCpuInfo object, where we will place the data. Adding an extra element - // at the end allows us to remove a lot of extra checks for termination - // conditions. - if (fseek(f, 0, SEEK_SET) != 0) { - *line = 0; - *msg_id = kmp_i18n_str_CantRewindCpuinfo; - return -1; - } - - // Allocate the array of records to store the proc info in. The dummy - // element at the end makes the logic in filling them out easier to code. - unsigned **threadInfo = - (unsigned **)__kmp_allocate((num_records + 1) * sizeof(unsigned *)); - unsigned i; - for (i = 0; i <= num_records; i++) { - threadInfo[i] = - (unsigned *)__kmp_allocate((maxIndex + 1) * sizeof(unsigned)); - } - -#define CLEANUP_THREAD_INFO \ - for (i = 0; i <= num_records; i++) { \ - __kmp_free(threadInfo[i]); \ - } \ - __kmp_free(threadInfo); - - // A value of UINT_MAX means that we didn't find the field - unsigned __index; - -#define INIT_PROC_INFO(p) \ - for (__index = 0; __index <= maxIndex; __index++) { \ - (p)[__index] = UINT_MAX; \ - } - - for (i = 0; i <= num_records; i++) { - INIT_PROC_INFO(threadInfo[i]); - } - - unsigned num_avail = 0; - *line = 0; - while (!feof(f)) { - // Create an inner scoping level, so that all the goto targets at the end of - // the loop appear in an outer scoping level. This avoids warnings about - // jumping past an initialization to a target in the same block. - { - buf[sizeof(buf) - 1] = 1; - bool long_line = false; - if (!fgets(buf, sizeof(buf), f)) { - // Read errors presumably because of EOF - // If there is valid data in threadInfo[num_avail], then fake - // a blank line in ensure that the last address gets parsed. - bool valid = false; - for (i = 0; i <= maxIndex; i++) { - if (threadInfo[num_avail][i] != UINT_MAX) { - valid = true; - } - } - if (!valid) { - break; - } - buf[0] = 0; - } else if (!buf[sizeof(buf) - 1]) { - // The line is longer than the buffer. Set a flag and don't - // emit an error if we were going to ignore the line, anyway. - long_line = true; - -#define CHECK_LINE \ - if (long_line) { \ - CLEANUP_THREAD_INFO; \ - *msg_id = kmp_i18n_str_LongLineCpuinfo; \ - return -1; \ - } - } - (*line)++; - - char s1[] = "processor"; - if (strncmp(buf, s1, sizeof(s1) - 1) == 0) { - CHECK_LINE; - char *p = strchr(buf + sizeof(s1) - 1, ':'); - unsigned val; - if ((p == NULL) || (KMP_SSCANF(p + 1, "%u\n", &val) != 1)) - goto no_val; - if (threadInfo[num_avail][osIdIndex] != UINT_MAX) -#if KMP_ARCH_AARCH64 - // Handle the old AArch64 /proc/cpuinfo layout differently, - // it contains all of the 'processor' entries listed in a - // single 'Processor' section, therefore the normal looking - // for duplicates in that section will always fail. - num_avail++; -#else - goto dup_field; -#endif - threadInfo[num_avail][osIdIndex] = val; -#if KMP_OS_LINUX && !(KMP_ARCH_X86 || KMP_ARCH_X86_64) - char path[256]; - KMP_SNPRINTF( - path, sizeof(path), - "/sys/devices/system/cpu/cpu%u/topology/physical_package_id", - threadInfo[num_avail][osIdIndex]); - __kmp_read_from_file(path, "%u", &threadInfo[num_avail][pkgIdIndex]); - - KMP_SNPRINTF(path, sizeof(path), - "/sys/devices/system/cpu/cpu%u/topology/core_id", - threadInfo[num_avail][osIdIndex]); - __kmp_read_from_file(path, "%u", &threadInfo[num_avail][coreIdIndex]); - continue; -#else - } - char s2[] = "physical id"; - if (strncmp(buf, s2, sizeof(s2) - 1) == 0) { - CHECK_LINE; - char *p = strchr(buf + sizeof(s2) - 1, ':'); - unsigned val; - if ((p == NULL) || (KMP_SSCANF(p + 1, "%u\n", &val) != 1)) - goto no_val; - if (threadInfo[num_avail][pkgIdIndex] != UINT_MAX) - goto dup_field; - threadInfo[num_avail][pkgIdIndex] = val; - continue; - } - char s3[] = "core id"; - if (strncmp(buf, s3, sizeof(s3) - 1) == 0) { - CHECK_LINE; - char *p = strchr(buf + sizeof(s3) - 1, ':'); - unsigned val; - if ((p == NULL) || (KMP_SSCANF(p + 1, "%u\n", &val) != 1)) - goto no_val; - if (threadInfo[num_avail][coreIdIndex] != UINT_MAX) - goto dup_field; - threadInfo[num_avail][coreIdIndex] = val; - continue; -#endif // KMP_OS_LINUX && USE_SYSFS_INFO - } - char s4[] = "thread id"; - if (strncmp(buf, s4, sizeof(s4) - 1) == 0) { - CHECK_LINE; - char *p = strchr(buf + sizeof(s4) - 1, ':'); - unsigned val; - if ((p == NULL) || (KMP_SSCANF(p + 1, "%u\n", &val) != 1)) - goto no_val; - if (threadInfo[num_avail][threadIdIndex] != UINT_MAX) - goto dup_field; - threadInfo[num_avail][threadIdIndex] = val; - continue; - } - unsigned level; - if (KMP_SSCANF(buf, "node_%u id", &level) == 1) { - CHECK_LINE; - char *p = strchr(buf + sizeof(s4) - 1, ':'); - unsigned val; - if ((p == NULL) || (KMP_SSCANF(p + 1, "%u\n", &val) != 1)) - goto no_val; - KMP_ASSERT(nodeIdIndex + level <= maxIndex); - if (threadInfo[num_avail][nodeIdIndex + level] != UINT_MAX) - goto dup_field; - threadInfo[num_avail][nodeIdIndex + level] = val; - continue; - } - - // We didn't recognize the leading token on the line. There are lots of - // leading tokens that we don't recognize - if the line isn't empty, go on - // to the next line. - if ((*buf != 0) && (*buf != '\n')) { - // If the line is longer than the buffer, read characters - // until we find a newline. - if (long_line) { - int ch; - while (((ch = fgetc(f)) != EOF) && (ch != '\n')) - ; - } - continue; - } - - // A newline has signalled the end of the processor record. - // Check that there aren't too many procs specified. - if ((int)num_avail == __kmp_xproc) { - CLEANUP_THREAD_INFO; - *msg_id = kmp_i18n_str_TooManyEntries; - return -1; - } - - // Check for missing fields. The osId field must be there, and we - // currently require that the physical id field is specified, also. - if (threadInfo[num_avail][osIdIndex] == UINT_MAX) { - CLEANUP_THREAD_INFO; - *msg_id = kmp_i18n_str_MissingProcField; - return -1; - } - if (threadInfo[0][pkgIdIndex] == UINT_MAX) { - CLEANUP_THREAD_INFO; - *msg_id = kmp_i18n_str_MissingPhysicalIDField; - return -1; - } - - // Skip this proc if it is not included in the machine model. - if (!KMP_CPU_ISSET(threadInfo[num_avail][osIdIndex], - __kmp_affin_fullMask)) { - INIT_PROC_INFO(threadInfo[num_avail]); - continue; - } - - // We have a successful parse of this proc's info. - // Increment the counter, and prepare for the next proc. - num_avail++; - KMP_ASSERT(num_avail <= num_records); - INIT_PROC_INFO(threadInfo[num_avail]); - } - continue; - - no_val: - CLEANUP_THREAD_INFO; - *msg_id = kmp_i18n_str_MissingValCpuinfo; - return -1; - - dup_field: - CLEANUP_THREAD_INFO; - *msg_id = kmp_i18n_str_DuplicateFieldCpuinfo; - return -1; - } - *line = 0; - -#if KMP_MIC && REDUCE_TEAM_SIZE - unsigned teamSize = 0; -#endif // KMP_MIC && REDUCE_TEAM_SIZE - - // check for num_records == __kmp_xproc ??? - - // If there's only one thread context to bind to, form an Address object with - // depth 1 and return immediately (or, if affinity is off, set address2os to - // NULL and return). - // - // If it is configured to omit the package level when there is only a single - // package, the logic at the end of this routine won't work if there is only a - // single thread - it would try to form an Address object with depth 0. - KMP_ASSERT(num_avail > 0); - KMP_ASSERT(num_avail <= num_records); - if (num_avail == 1) { - __kmp_ncores = 1; - __kmp_nThreadsPerCore = nCoresPerPkg = nPackages = 1; - if (__kmp_affinity_verbose) { - if (!KMP_AFFINITY_CAPABLE()) { - KMP_INFORM(AffNotCapableUseCpuinfo, "KMP_AFFINITY"); - KMP_INFORM(AvailableOSProc, "KMP_AFFINITY", __kmp_avail_proc); - KMP_INFORM(Uniform, "KMP_AFFINITY"); - } else { - char buf[KMP_AFFIN_MASK_PRINT_LEN]; - __kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN, - __kmp_affin_fullMask); - KMP_INFORM(AffCapableUseCpuinfo, "KMP_AFFINITY"); - if (__kmp_affinity_respect_mask) { - KMP_INFORM(InitOSProcSetRespect, "KMP_AFFINITY", buf); - } else { - KMP_INFORM(InitOSProcSetNotRespect, "KMP_AFFINITY", buf); - } - KMP_INFORM(AvailableOSProc, "KMP_AFFINITY", __kmp_avail_proc); - KMP_INFORM(Uniform, "KMP_AFFINITY"); - } - int index; - kmp_str_buf_t buf; - __kmp_str_buf_init(&buf); - __kmp_str_buf_print(&buf, "1"); - for (index = maxIndex - 1; index > pkgIdIndex; index--) { - __kmp_str_buf_print(&buf, " x 1"); - } - KMP_INFORM(TopologyExtra, "KMP_AFFINITY", buf.str, 1, 1, 1); - __kmp_str_buf_free(&buf); - } - - if (__kmp_affinity_type == affinity_none) { - CLEANUP_THREAD_INFO; - return 0; - } - - *address2os = (AddrUnsPair *)__kmp_allocate(sizeof(AddrUnsPair)); - Address addr(1); - addr.labels[0] = threadInfo[0][pkgIdIndex]; - (*address2os)[0] = AddrUnsPair(addr, threadInfo[0][osIdIndex]); - - if (__kmp_affinity_gran_levels < 0) { - __kmp_affinity_gran_levels = 0; - } - - if (__kmp_affinity_verbose) { - __kmp_affinity_print_topology(*address2os, 1, 1, 0, -1, -1); - } - - CLEANUP_THREAD_INFO; - return 1; - } - - // Sort the threadInfo table by physical Id. - qsort(threadInfo, num_avail, sizeof(*threadInfo), - __kmp_affinity_cmp_ProcCpuInfo_phys_id); - - // The table is now sorted by pkgId / coreId / threadId, but we really don't - // know the radix of any of the fields. pkgId's may be sparsely assigned among - // the chips on a system. Although coreId's are usually assigned - // [0 .. coresPerPkg-1] and threadId's are usually assigned - // [0..threadsPerCore-1], we don't want to make any such assumptions. - // - // For that matter, we don't know what coresPerPkg and threadsPerCore (or the - // total # packages) are at this point - we want to determine that now. We - // only have an upper bound on the first two figures. - unsigned *counts = - (unsigned *)__kmp_allocate((maxIndex + 1) * sizeof(unsigned)); - unsigned *maxCt = - (unsigned *)__kmp_allocate((maxIndex + 1) * sizeof(unsigned)); - unsigned *totals = - (unsigned *)__kmp_allocate((maxIndex + 1) * sizeof(unsigned)); - unsigned *lastId = - (unsigned *)__kmp_allocate((maxIndex + 1) * sizeof(unsigned)); - - bool assign_thread_ids = false; - unsigned threadIdCt; - unsigned index; - -restart_radix_check: - threadIdCt = 0; - - // Initialize the counter arrays with data from threadInfo[0]. - if (assign_thread_ids) { - if (threadInfo[0][threadIdIndex] == UINT_MAX) { - threadInfo[0][threadIdIndex] = threadIdCt++; - } else if (threadIdCt <= threadInfo[0][threadIdIndex]) { - threadIdCt = threadInfo[0][threadIdIndex] + 1; - } - } - for (index = 0; index <= maxIndex; index++) { - counts[index] = 1; - maxCt[index] = 1; - totals[index] = 1; - lastId[index] = threadInfo[0][index]; - ; - } - - // Run through the rest of the OS procs. - for (i = 1; i < num_avail; i++) { - // Find the most significant index whose id differs from the id for the - // previous OS proc. - for (index = maxIndex; index >= threadIdIndex; index--) { - if (assign_thread_ids && (index == threadIdIndex)) { - // Auto-assign the thread id field if it wasn't specified. - if (threadInfo[i][threadIdIndex] == UINT_MAX) { - threadInfo[i][threadIdIndex] = threadIdCt++; - } - // Apparently the thread id field was specified for some entries and not - // others. Start the thread id counter off at the next higher thread id. - else if (threadIdCt <= threadInfo[i][threadIdIndex]) { - threadIdCt = threadInfo[i][threadIdIndex] + 1; - } - } - if (threadInfo[i][index] != lastId[index]) { - // Run through all indices which are less significant, and reset the - // counts to 1. At all levels up to and including index, we need to - // increment the totals and record the last id. - unsigned index2; - for (index2 = threadIdIndex; index2 < index; index2++) { - totals[index2]++; - if (counts[index2] > maxCt[index2]) { - maxCt[index2] = counts[index2]; - } - counts[index2] = 1; - lastId[index2] = threadInfo[i][index2]; - } - counts[index]++; - totals[index]++; - lastId[index] = threadInfo[i][index]; - - if (assign_thread_ids && (index > threadIdIndex)) { - -#if KMP_MIC && REDUCE_TEAM_SIZE - // The default team size is the total #threads in the machine - // minus 1 thread for every core that has 3 or more threads. - teamSize += (threadIdCt <= 2) ? (threadIdCt) : (threadIdCt - 1); -#endif // KMP_MIC && REDUCE_TEAM_SIZE - - // Restart the thread counter, as we are on a new core. - threadIdCt = 0; - - // Auto-assign the thread id field if it wasn't specified. - if (threadInfo[i][threadIdIndex] == UINT_MAX) { - threadInfo[i][threadIdIndex] = threadIdCt++; - } - - // Aparrently the thread id field was specified for some entries and - // not others. Start the thread id counter off at the next higher - // thread id. - else if (threadIdCt <= threadInfo[i][threadIdIndex]) { - threadIdCt = threadInfo[i][threadIdIndex] + 1; - } - } - break; - } - } - if (index < threadIdIndex) { - // If thread ids were specified, it is an error if they are not unique. - // Also, check that we waven't already restarted the loop (to be safe - - // shouldn't need to). - if ((threadInfo[i][threadIdIndex] != UINT_MAX) || assign_thread_ids) { - __kmp_free(lastId); - __kmp_free(totals); - __kmp_free(maxCt); - __kmp_free(counts); - CLEANUP_THREAD_INFO; - *msg_id = kmp_i18n_str_PhysicalIDsNotUnique; - return -1; - } - - // If the thread ids were not specified and we see entries entries that - // are duplicates, start the loop over and assign the thread ids manually. - assign_thread_ids = true; - goto restart_radix_check; - } - } - -#if KMP_MIC && REDUCE_TEAM_SIZE - // The default team size is the total #threads in the machine - // minus 1 thread for every core that has 3 or more threads. - teamSize += (threadIdCt <= 2) ? (threadIdCt) : (threadIdCt - 1); -#endif // KMP_MIC && REDUCE_TEAM_SIZE - - for (index = threadIdIndex; index <= maxIndex; index++) { - if (counts[index] > maxCt[index]) { - maxCt[index] = counts[index]; - } - } - - __kmp_nThreadsPerCore = maxCt[threadIdIndex]; - nCoresPerPkg = maxCt[coreIdIndex]; - nPackages = totals[pkgIdIndex]; - - // Check to see if the machine topology is uniform - unsigned prod = totals[maxIndex]; - for (index = threadIdIndex; index < maxIndex; index++) { - prod *= maxCt[index]; - } - bool uniform = (prod == totals[threadIdIndex]); - - // When affinity is off, this routine will still be called to set - // __kmp_ncores, as well as __kmp_nThreadsPerCore, nCoresPerPkg, & nPackages. - // Make sure all these vars are set correctly, and return now if affinity is - // not enabled. - __kmp_ncores = totals[coreIdIndex]; - - if (__kmp_affinity_verbose) { - if (!KMP_AFFINITY_CAPABLE()) { - KMP_INFORM(AffNotCapableUseCpuinfo, "KMP_AFFINITY"); - KMP_INFORM(AvailableOSProc, "KMP_AFFINITY", __kmp_avail_proc); - if (uniform) { - KMP_INFORM(Uniform, "KMP_AFFINITY"); - } else { - KMP_INFORM(NonUniform, "KMP_AFFINITY"); - } - } else { - char buf[KMP_AFFIN_MASK_PRINT_LEN]; - __kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN, - __kmp_affin_fullMask); - KMP_INFORM(AffCapableUseCpuinfo, "KMP_AFFINITY"); - if (__kmp_affinity_respect_mask) { - KMP_INFORM(InitOSProcSetRespect, "KMP_AFFINITY", buf); - } else { - KMP_INFORM(InitOSProcSetNotRespect, "KMP_AFFINITY", buf); - } - KMP_INFORM(AvailableOSProc, "KMP_AFFINITY", __kmp_avail_proc); - if (uniform) { - KMP_INFORM(Uniform, "KMP_AFFINITY"); - } else { - KMP_INFORM(NonUniform, "KMP_AFFINITY"); - } - } - kmp_str_buf_t buf; - __kmp_str_buf_init(&buf); - - __kmp_str_buf_print(&buf, "%d", totals[maxIndex]); - for (index = maxIndex - 1; index >= pkgIdIndex; index--) { - __kmp_str_buf_print(&buf, " x %d", maxCt[index]); - } - KMP_INFORM(TopologyExtra, "KMP_AFFINITY", buf.str, maxCt[coreIdIndex], - maxCt[threadIdIndex], __kmp_ncores); - - __kmp_str_buf_free(&buf); - } - -#if KMP_MIC && REDUCE_TEAM_SIZE - // Set the default team size. - if ((__kmp_dflt_team_nth == 0) && (teamSize > 0)) { - __kmp_dflt_team_nth = teamSize; - KA_TRACE(20, ("__kmp_affinity_create_cpuinfo_map: setting " - "__kmp_dflt_team_nth = %d\n", - __kmp_dflt_team_nth)); - } -#endif // KMP_MIC && REDUCE_TEAM_SIZE - - KMP_DEBUG_ASSERT(__kmp_pu_os_idx == NULL); - KMP_DEBUG_ASSERT(num_avail == (unsigned)__kmp_avail_proc); - __kmp_pu_os_idx = (int *)__kmp_allocate(sizeof(int) * __kmp_avail_proc); - for (i = 0; i < num_avail; ++i) { // fill the os indices - __kmp_pu_os_idx[i] = threadInfo[i][osIdIndex]; - } - - if (__kmp_affinity_type == affinity_none) { - __kmp_free(lastId); - __kmp_free(totals); - __kmp_free(maxCt); - __kmp_free(counts); - CLEANUP_THREAD_INFO; - return 0; - } - - // Count the number of levels which have more nodes at that level than at the - // parent's level (with there being an implicit root node of the top level). - // This is equivalent to saying that there is at least one node at this level - // which has a sibling. These levels are in the map, and the package level is - // always in the map. - bool *inMap = (bool *)__kmp_allocate((maxIndex + 1) * sizeof(bool)); - for (index = threadIdIndex; index < maxIndex; index++) { - KMP_ASSERT(totals[index] >= totals[index + 1]); - inMap[index] = (totals[index] > totals[index + 1]); - } - inMap[maxIndex] = (totals[maxIndex] > 1); - inMap[pkgIdIndex] = true; - - int depth = 0; - for (index = threadIdIndex; index <= maxIndex; index++) { - if (inMap[index]) { - depth++; - } - } - KMP_ASSERT(depth > 0); - - // Construct the data structure that is to be returned. - *address2os = (AddrUnsPair *)__kmp_allocate(sizeof(AddrUnsPair) * num_avail); - int pkgLevel = -1; - int coreLevel = -1; - int threadLevel = -1; - - for (i = 0; i < num_avail; ++i) { - Address addr(depth); - unsigned os = threadInfo[i][osIdIndex]; - int src_index; - int dst_index = 0; - - for (src_index = maxIndex; src_index >= threadIdIndex; src_index--) { - if (!inMap[src_index]) { - continue; - } - addr.labels[dst_index] = threadInfo[i][src_index]; - if (src_index == pkgIdIndex) { - pkgLevel = dst_index; - } else if (src_index == coreIdIndex) { - coreLevel = dst_index; - } else if (src_index == threadIdIndex) { - threadLevel = dst_index; - } - dst_index++; - } - (*address2os)[i] = AddrUnsPair(addr, os); - } - - if (__kmp_affinity_gran_levels < 0) { - // Set the granularity level based on what levels are modeled - // in the machine topology map. - unsigned src_index; - __kmp_affinity_gran_levels = 0; - for (src_index = threadIdIndex; src_index <= maxIndex; src_index++) { - if (!inMap[src_index]) { - continue; - } - switch (src_index) { - case threadIdIndex: - if (__kmp_affinity_gran > affinity_gran_thread) { - __kmp_affinity_gran_levels++; - } - - break; - case coreIdIndex: - if (__kmp_affinity_gran > affinity_gran_core) { - __kmp_affinity_gran_levels++; - } - break; - - case pkgIdIndex: - if (__kmp_affinity_gran > affinity_gran_package) { - __kmp_affinity_gran_levels++; - } - break; - } - } - } - - if (__kmp_affinity_verbose) { - __kmp_affinity_print_topology(*address2os, num_avail, depth, pkgLevel, - coreLevel, threadLevel); - } - - __kmp_free(inMap); - __kmp_free(lastId); - __kmp_free(totals); - __kmp_free(maxCt); - __kmp_free(counts); - CLEANUP_THREAD_INFO; - return depth; -} - -// Create and return a table of affinity masks, indexed by OS thread ID. -// This routine handles OR'ing together all the affinity masks of threads -// that are sufficiently close, if granularity > fine. -static kmp_affin_mask_t *__kmp_create_masks(unsigned *maxIndex, - unsigned *numUnique, - AddrUnsPair *address2os, - unsigned numAddrs) { - // First form a table of affinity masks in order of OS thread id. - unsigned depth; - unsigned maxOsId; - unsigned i; - - KMP_ASSERT(numAddrs > 0); - depth = address2os[0].first.depth; - - maxOsId = 0; - for (i = numAddrs - 1;; --i) { - unsigned osId = address2os[i].second; - if (osId > maxOsId) { - maxOsId = osId; - } - if (i == 0) - break; - } - kmp_affin_mask_t *osId2Mask; - KMP_CPU_ALLOC_ARRAY(osId2Mask, (maxOsId + 1)); - - // Sort the address2os table according to physical order. Doing so will put - // all threads on the same core/package/node in consecutive locations. - qsort(address2os, numAddrs, sizeof(*address2os), - __kmp_affinity_cmp_Address_labels); - - KMP_ASSERT(__kmp_affinity_gran_levels >= 0); - if (__kmp_affinity_verbose && (__kmp_affinity_gran_levels > 0)) { - KMP_INFORM(ThreadsMigrate, "KMP_AFFINITY", __kmp_affinity_gran_levels); - } - if (__kmp_affinity_gran_levels >= (int)depth) { - if (__kmp_affinity_verbose || - (__kmp_affinity_warnings && (__kmp_affinity_type != affinity_none))) { - KMP_WARNING(AffThreadsMayMigrate); - } - } - - // Run through the table, forming the masks for all threads on each core. - // Threads on the same core will have identical "Address" objects, not - // considering the last level, which must be the thread id. All threads on a - // core will appear consecutively. - unsigned unique = 0; - unsigned j = 0; // index of 1st thread on core - unsigned leader = 0; - Address *leaderAddr = &(address2os[0].first); - kmp_affin_mask_t *sum; - KMP_CPU_ALLOC_ON_STACK(sum); - KMP_CPU_ZERO(sum); - KMP_CPU_SET(address2os[0].second, sum); - for (i = 1; i < numAddrs; i++) { - // If this thread is sufficiently close to the leader (within the - // granularity setting), then set the bit for this os thread in the - // affinity mask for this group, and go on to the next thread. - if (leaderAddr->isClose(address2os[i].first, __kmp_affinity_gran_levels)) { - KMP_CPU_SET(address2os[i].second, sum); - continue; - } - - // For every thread in this group, copy the mask to the thread's entry in - // the osId2Mask table. Mark the first address as a leader. - for (; j < i; j++) { - unsigned osId = address2os[j].second; - KMP_DEBUG_ASSERT(osId <= maxOsId); - kmp_affin_mask_t *mask = KMP_CPU_INDEX(osId2Mask, osId); - KMP_CPU_COPY(mask, sum); - address2os[j].first.leader = (j == leader); - } - unique++; - - // Start a new mask. - leader = i; - leaderAddr = &(address2os[i].first); - KMP_CPU_ZERO(sum); - KMP_CPU_SET(address2os[i].second, sum); - } - - // For every thread in last group, copy the mask to the thread's - // entry in the osId2Mask table. - for (; j < i; j++) { - unsigned osId = address2os[j].second; - KMP_DEBUG_ASSERT(osId <= maxOsId); - kmp_affin_mask_t *mask = KMP_CPU_INDEX(osId2Mask, osId); - KMP_CPU_COPY(mask, sum); - address2os[j].first.leader = (j == leader); - } - unique++; - KMP_CPU_FREE_FROM_STACK(sum); - - *maxIndex = maxOsId; - *numUnique = unique; - return osId2Mask; -} - -// Stuff for the affinity proclist parsers. It's easier to declare these vars -// as file-static than to try and pass them through the calling sequence of -// the recursive-descent OMP_PLACES parser. -static kmp_affin_mask_t *newMasks; -static int numNewMasks; -static int nextNewMask; - -#define ADD_MASK(_mask) \ - { \ - if (nextNewMask >= numNewMasks) { \ - int i; \ - numNewMasks *= 2; \ - kmp_affin_mask_t *temp; \ - KMP_CPU_INTERNAL_ALLOC_ARRAY(temp, numNewMasks); \ - for (i = 0; i < numNewMasks / 2; i++) { \ - kmp_affin_mask_t *src = KMP_CPU_INDEX(newMasks, i); \ - kmp_affin_mask_t *dest = KMP_CPU_INDEX(temp, i); \ - KMP_CPU_COPY(dest, src); \ - } \ - KMP_CPU_INTERNAL_FREE_ARRAY(newMasks, numNewMasks / 2); \ - newMasks = temp; \ - } \ - KMP_CPU_COPY(KMP_CPU_INDEX(newMasks, nextNewMask), (_mask)); \ - nextNewMask++; \ - } - -#define ADD_MASK_OSID(_osId, _osId2Mask, _maxOsId) \ - { \ - if (((_osId) > _maxOsId) || \ - (!KMP_CPU_ISSET((_osId), KMP_CPU_INDEX((_osId2Mask), (_osId))))) { \ - if (__kmp_affinity_verbose || \ - (__kmp_affinity_warnings && \ - (__kmp_affinity_type != affinity_none))) { \ - KMP_WARNING(AffIgnoreInvalidProcID, _osId); \ - } \ - } else { \ - ADD_MASK(KMP_CPU_INDEX(_osId2Mask, (_osId))); \ - } \ - } - -// Re-parse the proclist (for the explicit affinity type), and form the list -// of affinity newMasks indexed by gtid. -static void __kmp_affinity_process_proclist(kmp_affin_mask_t **out_masks, - unsigned int *out_numMasks, - const char *proclist, - kmp_affin_mask_t *osId2Mask, - int maxOsId) { - int i; - const char *scan = proclist; - const char *next = proclist; - - // We use malloc() for the temporary mask vector, so that we can use - // realloc() to extend it. - numNewMasks = 2; - KMP_CPU_INTERNAL_ALLOC_ARRAY(newMasks, numNewMasks); - nextNewMask = 0; - kmp_affin_mask_t *sumMask; - KMP_CPU_ALLOC(sumMask); - int setSize = 0; - - for (;;) { - int start, end, stride; - - SKIP_WS(scan); - next = scan; - if (*next == '\0') { - break; - } - - if (*next == '{') { - int num; - setSize = 0; - next++; // skip '{' - SKIP_WS(next); - scan = next; - - // Read the first integer in the set. - KMP_ASSERT2((*next >= '0') && (*next <= '9'), "bad proclist"); - SKIP_DIGITS(next); - num = __kmp_str_to_int(scan, *next); - KMP_ASSERT2(num >= 0, "bad explicit proc list"); - - // Copy the mask for that osId to the sum (union) mask. - if ((num > maxOsId) || - (!KMP_CPU_ISSET(num, KMP_CPU_INDEX(osId2Mask, num)))) { - if (__kmp_affinity_verbose || - (__kmp_affinity_warnings && - (__kmp_affinity_type != affinity_none))) { - KMP_WARNING(AffIgnoreInvalidProcID, num); - } - KMP_CPU_ZERO(sumMask); - } else { - KMP_CPU_COPY(sumMask, KMP_CPU_INDEX(osId2Mask, num)); - setSize = 1; - } - - for (;;) { - // Check for end of set. - SKIP_WS(next); - if (*next == '}') { - next++; // skip '}' - break; - } - - // Skip optional comma. - if (*next == ',') { - next++; - } - SKIP_WS(next); - - // Read the next integer in the set. - scan = next; - KMP_ASSERT2((*next >= '0') && (*next <= '9'), "bad explicit proc list"); - - SKIP_DIGITS(next); - num = __kmp_str_to_int(scan, *next); - KMP_ASSERT2(num >= 0, "bad explicit proc list"); - - // Add the mask for that osId to the sum mask. - if ((num > maxOsId) || - (!KMP_CPU_ISSET(num, KMP_CPU_INDEX(osId2Mask, num)))) { - if (__kmp_affinity_verbose || - (__kmp_affinity_warnings && - (__kmp_affinity_type != affinity_none))) { - KMP_WARNING(AffIgnoreInvalidProcID, num); - } - } else { - KMP_CPU_UNION(sumMask, KMP_CPU_INDEX(osId2Mask, num)); - setSize++; - } - } - if (setSize > 0) { - ADD_MASK(sumMask); - } - - SKIP_WS(next); - if (*next == ',') { - next++; - } - scan = next; - continue; - } - - // Read the first integer. - KMP_ASSERT2((*next >= '0') && (*next <= '9'), "bad explicit proc list"); - SKIP_DIGITS(next); - start = __kmp_str_to_int(scan, *next); - KMP_ASSERT2(start >= 0, "bad explicit proc list"); - SKIP_WS(next); - - // If this isn't a range, then add a mask to the list and go on. - if (*next != '-') { - ADD_MASK_OSID(start, osId2Mask, maxOsId); - - // Skip optional comma. - if (*next == ',') { - next++; - } - scan = next; - continue; - } - - // This is a range. Skip over the '-' and read in the 2nd int. - next++; // skip '-' - SKIP_WS(next); - scan = next; - KMP_ASSERT2((*next >= '0') && (*next <= '9'), "bad explicit proc list"); - SKIP_DIGITS(next); - end = __kmp_str_to_int(scan, *next); - KMP_ASSERT2(end >= 0, "bad explicit proc list"); - - // Check for a stride parameter - stride = 1; - SKIP_WS(next); - if (*next == ':') { - // A stride is specified. Skip over the ':" and read the 3rd int. - int sign = +1; - next++; // skip ':' - SKIP_WS(next); - scan = next; - if (*next == '-') { - sign = -1; - next++; - SKIP_WS(next); - scan = next; - } - KMP_ASSERT2((*next >= '0') && (*next <= '9'), "bad explicit proc list"); - SKIP_DIGITS(next); - stride = __kmp_str_to_int(scan, *next); - KMP_ASSERT2(stride >= 0, "bad explicit proc list"); - stride *= sign; - } - - // Do some range checks. - KMP_ASSERT2(stride != 0, "bad explicit proc list"); - if (stride > 0) { - KMP_ASSERT2(start <= end, "bad explicit proc list"); - } else { - KMP_ASSERT2(start >= end, "bad explicit proc list"); - } - KMP_ASSERT2((end - start) / stride <= 65536, "bad explicit proc list"); - - // Add the mask for each OS proc # to the list. - if (stride > 0) { - do { - ADD_MASK_OSID(start, osId2Mask, maxOsId); - start += stride; - } while (start <= end); - } else { - do { - ADD_MASK_OSID(start, osId2Mask, maxOsId); - start += stride; - } while (start >= end); - } - - // Skip optional comma. - SKIP_WS(next); - if (*next == ',') { - next++; - } - scan = next; - } - - *out_numMasks = nextNewMask; - if (nextNewMask == 0) { - *out_masks = NULL; - KMP_CPU_INTERNAL_FREE_ARRAY(newMasks, numNewMasks); - return; - } - KMP_CPU_ALLOC_ARRAY((*out_masks), nextNewMask); - for (i = 0; i < nextNewMask; i++) { - kmp_affin_mask_t *src = KMP_CPU_INDEX(newMasks, i); - kmp_affin_mask_t *dest = KMP_CPU_INDEX((*out_masks), i); - KMP_CPU_COPY(dest, src); - } - KMP_CPU_INTERNAL_FREE_ARRAY(newMasks, numNewMasks); - KMP_CPU_FREE(sumMask); -} - -#if OMP_40_ENABLED - -/*----------------------------------------------------------------------------- -Re-parse the OMP_PLACES proc id list, forming the newMasks for the different -places. Again, Here is the grammar: - -place_list := place -place_list := place , place_list -place := num -place := place : num -place := place : num : signed -place := { subplacelist } -place := ! place // (lowest priority) -subplace_list := subplace -subplace_list := subplace , subplace_list -subplace := num -subplace := num : num -subplace := num : num : signed -signed := num -signed := + signed -signed := - signed ------------------------------------------------------------------------------*/ - -static void __kmp_process_subplace_list(const char **scan, - kmp_affin_mask_t *osId2Mask, - int maxOsId, kmp_affin_mask_t *tempMask, - int *setSize) { - const char *next; - - for (;;) { - int start, count, stride, i; - - // Read in the starting proc id - SKIP_WS(*scan); - KMP_ASSERT2((**scan >= '0') && (**scan <= '9'), "bad explicit places list"); - next = *scan; - SKIP_DIGITS(next); - start = __kmp_str_to_int(*scan, *next); - KMP_ASSERT(start >= 0); - *scan = next; - - // valid follow sets are ',' ':' and '}' - SKIP_WS(*scan); - if (**scan == '}' || **scan == ',') { - if ((start > maxOsId) || - (!KMP_CPU_ISSET(start, KMP_CPU_INDEX(osId2Mask, start)))) { - if (__kmp_affinity_verbose || - (__kmp_affinity_warnings && - (__kmp_affinity_type != affinity_none))) { - KMP_WARNING(AffIgnoreInvalidProcID, start); - } - } else { - KMP_CPU_UNION(tempMask, KMP_CPU_INDEX(osId2Mask, start)); - (*setSize)++; - } - if (**scan == '}') { - break; - } - (*scan)++; // skip ',' - continue; - } - KMP_ASSERT2(**scan == ':', "bad explicit places list"); - (*scan)++; // skip ':' - - // Read count parameter - SKIP_WS(*scan); - KMP_ASSERT2((**scan >= '0') && (**scan <= '9'), "bad explicit places list"); - next = *scan; - SKIP_DIGITS(next); - count = __kmp_str_to_int(*scan, *next); - KMP_ASSERT(count >= 0); - *scan = next; - - // valid follow sets are ',' ':' and '}' - SKIP_WS(*scan); - if (**scan == '}' || **scan == ',') { - for (i = 0; i < count; i++) { - if ((start > maxOsId) || - (!KMP_CPU_ISSET(start, KMP_CPU_INDEX(osId2Mask, start)))) { - if (__kmp_affinity_verbose || - (__kmp_affinity_warnings && - (__kmp_affinity_type != affinity_none))) { - KMP_WARNING(AffIgnoreInvalidProcID, start); - } - break; // don't proliferate warnings for large count - } else { - KMP_CPU_UNION(tempMask, KMP_CPU_INDEX(osId2Mask, start)); - start++; - (*setSize)++; - } - } - if (**scan == '}') { - break; - } - (*scan)++; // skip ',' - continue; - } - KMP_ASSERT2(**scan == ':', "bad explicit places list"); - (*scan)++; // skip ':' - - // Read stride parameter - int sign = +1; - for (;;) { - SKIP_WS(*scan); - if (**scan == '+') { - (*scan)++; // skip '+' - continue; - } - if (**scan == '-') { - sign *= -1; - (*scan)++; // skip '-' - continue; - } - break; - } - SKIP_WS(*scan); - KMP_ASSERT2((**scan >= '0') && (**scan <= '9'), "bad explicit places list"); - next = *scan; - SKIP_DIGITS(next); - stride = __kmp_str_to_int(*scan, *next); - KMP_ASSERT(stride >= 0); - *scan = next; - stride *= sign; - - // valid follow sets are ',' and '}' - SKIP_WS(*scan); - if (**scan == '}' || **scan == ',') { - for (i = 0; i < count; i++) { - if ((start > maxOsId) || - (!KMP_CPU_ISSET(start, KMP_CPU_INDEX(osId2Mask, start)))) { - if (__kmp_affinity_verbose || - (__kmp_affinity_warnings && - (__kmp_affinity_type != affinity_none))) { - KMP_WARNING(AffIgnoreInvalidProcID, start); - } - break; // don't proliferate warnings for large count - } else { - KMP_CPU_UNION(tempMask, KMP_CPU_INDEX(osId2Mask, start)); - start += stride; - (*setSize)++; - } - } - if (**scan == '}') { - break; - } - (*scan)++; // skip ',' - continue; - } - - KMP_ASSERT2(0, "bad explicit places list"); - } -} - -static void __kmp_process_place(const char **scan, kmp_affin_mask_t *osId2Mask, - int maxOsId, kmp_affin_mask_t *tempMask, - int *setSize) { - const char *next; - - // valid follow sets are '{' '!' and num - SKIP_WS(*scan); - if (**scan == '{') { - (*scan)++; // skip '{' - __kmp_process_subplace_list(scan, osId2Mask, maxOsId, tempMask, setSize); - KMP_ASSERT2(**scan == '}', "bad explicit places list"); - (*scan)++; // skip '}' - } else if (**scan == '!') { - (*scan)++; // skip '!' - __kmp_process_place(scan, osId2Mask, maxOsId, tempMask, setSize); - KMP_CPU_COMPLEMENT(maxOsId, tempMask); - } else if ((**scan >= '0') && (**scan <= '9')) { - next = *scan; - SKIP_DIGITS(next); - int num = __kmp_str_to_int(*scan, *next); - KMP_ASSERT(num >= 0); - if ((num > maxOsId) || - (!KMP_CPU_ISSET(num, KMP_CPU_INDEX(osId2Mask, num)))) { - if (__kmp_affinity_verbose || - (__kmp_affinity_warnings && (__kmp_affinity_type != affinity_none))) { - KMP_WARNING(AffIgnoreInvalidProcID, num); - } - } else { - KMP_CPU_UNION(tempMask, KMP_CPU_INDEX(osId2Mask, num)); - (*setSize)++; - } - *scan = next; // skip num - } else { - KMP_ASSERT2(0, "bad explicit places list"); - } -} - -// static void -void __kmp_affinity_process_placelist(kmp_affin_mask_t **out_masks, - unsigned int *out_numMasks, - const char *placelist, - kmp_affin_mask_t *osId2Mask, - int maxOsId) { - int i, j, count, stride, sign; - const char *scan = placelist; - const char *next = placelist; - - numNewMasks = 2; - KMP_CPU_INTERNAL_ALLOC_ARRAY(newMasks, numNewMasks); - nextNewMask = 0; - - // tempMask is modified based on the previous or initial - // place to form the current place - // previousMask contains the previous place - kmp_affin_mask_t *tempMask; - kmp_affin_mask_t *previousMask; - KMP_CPU_ALLOC(tempMask); - KMP_CPU_ZERO(tempMask); - KMP_CPU_ALLOC(previousMask); - KMP_CPU_ZERO(previousMask); - int setSize = 0; - - for (;;) { - __kmp_process_place(&scan, osId2Mask, maxOsId, tempMask, &setSize); - - // valid follow sets are ',' ':' and EOL - SKIP_WS(scan); - if (*scan == '\0' || *scan == ',') { - if (setSize > 0) { - ADD_MASK(tempMask); - } - KMP_CPU_ZERO(tempMask); - setSize = 0; - if (*scan == '\0') { - break; - } - scan++; // skip ',' - continue; - } - - KMP_ASSERT2(*scan == ':', "bad explicit places list"); - scan++; // skip ':' - - // Read count parameter - SKIP_WS(scan); - KMP_ASSERT2((*scan >= '0') && (*scan <= '9'), "bad explicit places list"); - next = scan; - SKIP_DIGITS(next); - count = __kmp_str_to_int(scan, *next); - KMP_ASSERT(count >= 0); - scan = next; - - // valid follow sets are ',' ':' and EOL - SKIP_WS(scan); - if (*scan == '\0' || *scan == ',') { - stride = +1; - } else { - KMP_ASSERT2(*scan == ':', "bad explicit places list"); - scan++; // skip ':' - - // Read stride parameter - sign = +1; - for (;;) { - SKIP_WS(scan); - if (*scan == '+') { - scan++; // skip '+' - continue; - } - if (*scan == '-') { - sign *= -1; - scan++; // skip '-' - continue; - } - break; - } - SKIP_WS(scan); - KMP_ASSERT2((*scan >= '0') && (*scan <= '9'), "bad explicit places list"); - next = scan; - SKIP_DIGITS(next); - stride = __kmp_str_to_int(scan, *next); - KMP_DEBUG_ASSERT(stride >= 0); - scan = next; - stride *= sign; - } - - // Add places determined by initial_place : count : stride - for (i = 0; i < count; i++) { - if (setSize == 0) { - break; - } - // Add the current place, then build the next place (tempMask) from that - KMP_CPU_COPY(previousMask, tempMask); - ADD_MASK(previousMask); - KMP_CPU_ZERO(tempMask); - setSize = 0; - KMP_CPU_SET_ITERATE(j, previousMask) { - if (!KMP_CPU_ISSET(j, previousMask)) { - continue; - } - if ((j + stride > maxOsId) || (j + stride < 0) || - (!KMP_CPU_ISSET(j, __kmp_affin_fullMask)) || - (!KMP_CPU_ISSET(j + stride, - KMP_CPU_INDEX(osId2Mask, j + stride)))) { - if ((__kmp_affinity_verbose || - (__kmp_affinity_warnings && - (__kmp_affinity_type != affinity_none))) && - i < count - 1) { - KMP_WARNING(AffIgnoreInvalidProcID, j + stride); - } - continue; - } - KMP_CPU_SET(j + stride, tempMask); - setSize++; - } - } - KMP_CPU_ZERO(tempMask); - setSize = 0; - - // valid follow sets are ',' and EOL - SKIP_WS(scan); - if (*scan == '\0') { - break; - } - if (*scan == ',') { - scan++; // skip ',' - continue; - } - - KMP_ASSERT2(0, "bad explicit places list"); - } - - *out_numMasks = nextNewMask; - if (nextNewMask == 0) { - *out_masks = NULL; - KMP_CPU_INTERNAL_FREE_ARRAY(newMasks, numNewMasks); - return; - } - KMP_CPU_ALLOC_ARRAY((*out_masks), nextNewMask); - KMP_CPU_FREE(tempMask); - KMP_CPU_FREE(previousMask); - for (i = 0; i < nextNewMask; i++) { - kmp_affin_mask_t *src = KMP_CPU_INDEX(newMasks, i); - kmp_affin_mask_t *dest = KMP_CPU_INDEX((*out_masks), i); - KMP_CPU_COPY(dest, src); - } - KMP_CPU_INTERNAL_FREE_ARRAY(newMasks, numNewMasks); -} - -#endif /* OMP_40_ENABLED */ - -#undef ADD_MASK -#undef ADD_MASK_OSID - -#if KMP_USE_HWLOC -static int __kmp_hwloc_skip_PUs_obj(hwloc_topology_t t, hwloc_obj_t o) { - // skip PUs descendants of the object o - int skipped = 0; - hwloc_obj_t hT = NULL; - int N = __kmp_hwloc_count_children_by_type(t, o, HWLOC_OBJ_PU, &hT); - for (int i = 0; i < N; ++i) { - KMP_DEBUG_ASSERT(hT); - unsigned idx = hT->os_index; - if (KMP_CPU_ISSET(idx, __kmp_affin_fullMask)) { - KMP_CPU_CLR(idx, __kmp_affin_fullMask); - KC_TRACE(200, ("KMP_HW_SUBSET: skipped proc %d\n", idx)); - ++skipped; - } - hT = hwloc_get_next_obj_by_type(t, HWLOC_OBJ_PU, hT); - } - return skipped; // count number of skipped units -} - -static int __kmp_hwloc_obj_has_PUs(hwloc_topology_t t, hwloc_obj_t o) { - // check if obj has PUs present in fullMask - hwloc_obj_t hT = NULL; - int N = __kmp_hwloc_count_children_by_type(t, o, HWLOC_OBJ_PU, &hT); - for (int i = 0; i < N; ++i) { - KMP_DEBUG_ASSERT(hT); - unsigned idx = hT->os_index; - if (KMP_CPU_ISSET(idx, __kmp_affin_fullMask)) - return 1; // found PU - hT = hwloc_get_next_obj_by_type(t, HWLOC_OBJ_PU, hT); - } - return 0; // no PUs found -} -#endif // KMP_USE_HWLOC - -static void __kmp_apply_thread_places(AddrUnsPair **pAddr, int depth) { - AddrUnsPair *newAddr; - if (__kmp_hws_requested == 0) - goto _exit; // no topology limiting actions requested, exit -#if KMP_USE_HWLOC - if (__kmp_affinity_dispatch->get_api_type() == KMPAffinity::HWLOC) { - // Number of subobjects calculated dynamically, this works fine for - // any non-uniform topology. - // L2 cache objects are determined by depth, other objects - by type. - hwloc_topology_t tp = __kmp_hwloc_topology; - int nS = 0, nN = 0, nL = 0, nC = 0, - nT = 0; // logical index including skipped - int nCr = 0, nTr = 0; // number of requested units - int nPkg = 0, nCo = 0, n_new = 0, n_old = 0, nCpP = 0, nTpC = 0; // counters - hwloc_obj_t hT, hC, hL, hN, hS; // hwloc objects (pointers to) - int L2depth, idx; - - // check support of extensions ---------------------------------- - int numa_support = 0, tile_support = 0; - if (__kmp_pu_os_idx) - hT = hwloc_get_pu_obj_by_os_index(tp, - __kmp_pu_os_idx[__kmp_avail_proc - 1]); - else - hT = hwloc_get_obj_by_type(tp, HWLOC_OBJ_PU, __kmp_avail_proc - 1); - if (hT == NULL) { // something's gone wrong - KMP_WARNING(AffHWSubsetUnsupported); - goto _exit; - } - // check NUMA node - hN = hwloc_get_ancestor_obj_by_type(tp, HWLOC_OBJ_NUMANODE, hT); - hS = hwloc_get_ancestor_obj_by_type(tp, HWLOC_OBJ_PACKAGE, hT); - if (hN != NULL && hN->depth > hS->depth) { - numa_support = 1; // 1 in case socket includes node(s) - } else if (__kmp_hws_node.num > 0) { - // don't support sockets inside NUMA node (no such HW found for testing) - KMP_WARNING(AffHWSubsetUnsupported); - goto _exit; - } - // check L2 cahce, get object by depth because of multiple caches - L2depth = hwloc_get_cache_type_depth(tp, 2, HWLOC_OBJ_CACHE_UNIFIED); - hL = hwloc_get_ancestor_obj_by_depth(tp, L2depth, hT); - if (hL != NULL && - __kmp_hwloc_count_children_by_type(tp, hL, HWLOC_OBJ_CORE, &hC) > 1) { - tile_support = 1; // no sense to count L2 if it includes single core - } else if (__kmp_hws_tile.num > 0) { - if (__kmp_hws_core.num == 0) { - __kmp_hws_core = __kmp_hws_tile; // replace L2 with core - __kmp_hws_tile.num = 0; - } else { - // L2 and core are both requested, but represent same object - KMP_WARNING(AffHWSubsetInvalid); - goto _exit; - } - } - // end of check of extensions ----------------------------------- - - // fill in unset items, validate settings ----------------------- - if (__kmp_hws_socket.num == 0) - __kmp_hws_socket.num = nPackages; // use all available sockets - if (__kmp_hws_socket.offset >= nPackages) { - KMP_WARNING(AffHWSubsetManySockets); - goto _exit; - } - if (numa_support) { - hN = NULL; - int NN = __kmp_hwloc_count_children_by_type(tp, hS, HWLOC_OBJ_NUMANODE, - &hN); // num nodes in socket - if (__kmp_hws_node.num == 0) - __kmp_hws_node.num = NN; // use all available nodes - if (__kmp_hws_node.offset >= NN) { - KMP_WARNING(AffHWSubsetManyNodes); - goto _exit; - } - if (tile_support) { - // get num tiles in node - int NL = __kmp_hwloc_count_children_by_depth(tp, hN, L2depth, &hL); - if (__kmp_hws_tile.num == 0) { - __kmp_hws_tile.num = NL + 1; - } // use all available tiles, some node may have more tiles, thus +1 - if (__kmp_hws_tile.offset >= NL) { - KMP_WARNING(AffHWSubsetManyTiles); - goto _exit; - } - int NC = __kmp_hwloc_count_children_by_type(tp, hL, HWLOC_OBJ_CORE, - &hC); // num cores in tile - if (__kmp_hws_core.num == 0) - __kmp_hws_core.num = NC; // use all available cores - if (__kmp_hws_core.offset >= NC) { - KMP_WARNING(AffHWSubsetManyCores); - goto _exit; - } - } else { // tile_support - int NC = __kmp_hwloc_count_children_by_type(tp, hN, HWLOC_OBJ_CORE, - &hC); // num cores in node - if (__kmp_hws_core.num == 0) - __kmp_hws_core.num = NC; // use all available cores - if (__kmp_hws_core.offset >= NC) { - KMP_WARNING(AffHWSubsetManyCores); - goto _exit; - } - } // tile_support - } else { // numa_support - if (tile_support) { - // get num tiles in socket - int NL = __kmp_hwloc_count_children_by_depth(tp, hS, L2depth, &hL); - if (__kmp_hws_tile.num == 0) - __kmp_hws_tile.num = NL; // use all available tiles - if (__kmp_hws_tile.offset >= NL) { - KMP_WARNING(AffHWSubsetManyTiles); - goto _exit; - } - int NC = __kmp_hwloc_count_children_by_type(tp, hL, HWLOC_OBJ_CORE, - &hC); // num cores in tile - if (__kmp_hws_core.num == 0) - __kmp_hws_core.num = NC; // use all available cores - if (__kmp_hws_core.offset >= NC) { - KMP_WARNING(AffHWSubsetManyCores); - goto _exit; - } - } else { // tile_support - int NC = __kmp_hwloc_count_children_by_type(tp, hS, HWLOC_OBJ_CORE, - &hC); // num cores in socket - if (__kmp_hws_core.num == 0) - __kmp_hws_core.num = NC; // use all available cores - if (__kmp_hws_core.offset >= NC) { - KMP_WARNING(AffHWSubsetManyCores); - goto _exit; - } - } // tile_support - } - if (__kmp_hws_proc.num == 0) - __kmp_hws_proc.num = __kmp_nThreadsPerCore; // use all available procs - if (__kmp_hws_proc.offset >= __kmp_nThreadsPerCore) { - KMP_WARNING(AffHWSubsetManyProcs); - goto _exit; - } - // end of validation -------------------------------------------- - - if (pAddr) // pAddr is NULL in case of affinity_none - newAddr = (AddrUnsPair *)__kmp_allocate(sizeof(AddrUnsPair) * - __kmp_avail_proc); // max size - // main loop to form HW subset ---------------------------------- - hS = NULL; - int NP = hwloc_get_nbobjs_by_type(tp, HWLOC_OBJ_PACKAGE); - for (int s = 0; s < NP; ++s) { - // Check Socket ----------------------------------------------- - hS = hwloc_get_next_obj_by_type(tp, HWLOC_OBJ_PACKAGE, hS); - if (!__kmp_hwloc_obj_has_PUs(tp, hS)) - continue; // skip socket if all PUs are out of fullMask - ++nS; // only count objects those have PUs in affinity mask - if (nS <= __kmp_hws_socket.offset || - nS > __kmp_hws_socket.num + __kmp_hws_socket.offset) { - n_old += __kmp_hwloc_skip_PUs_obj(tp, hS); // skip socket - continue; // move to next socket - } - nCr = 0; // count number of cores per socket - // socket requested, go down the topology tree - // check 4 cases: (+NUMA+Tile), (+NUMA-Tile), (-NUMA+Tile), (-NUMA-Tile) - if (numa_support) { - nN = 0; - hN = NULL; - // num nodes in current socket - int NN = - __kmp_hwloc_count_children_by_type(tp, hS, HWLOC_OBJ_NUMANODE, &hN); - for (int n = 0; n < NN; ++n) { - // Check NUMA Node ---------------------------------------- - if (!__kmp_hwloc_obj_has_PUs(tp, hN)) { - hN = hwloc_get_next_obj_by_type(tp, HWLOC_OBJ_NUMANODE, hN); - continue; // skip node if all PUs are out of fullMask - } - ++nN; - if (nN <= __kmp_hws_node.offset || - nN > __kmp_hws_node.num + __kmp_hws_node.offset) { - // skip node as not requested - n_old += __kmp_hwloc_skip_PUs_obj(tp, hN); // skip node - hN = hwloc_get_next_obj_by_type(tp, HWLOC_OBJ_NUMANODE, hN); - continue; // move to next node - } - // node requested, go down the topology tree - if (tile_support) { - nL = 0; - hL = NULL; - int NL = __kmp_hwloc_count_children_by_depth(tp, hN, L2depth, &hL); - for (int l = 0; l < NL; ++l) { - // Check L2 (tile) ------------------------------------ - if (!__kmp_hwloc_obj_has_PUs(tp, hL)) { - hL = hwloc_get_next_obj_by_depth(tp, L2depth, hL); - continue; // skip tile if all PUs are out of fullMask - } - ++nL; - if (nL <= __kmp_hws_tile.offset || - nL > __kmp_hws_tile.num + __kmp_hws_tile.offset) { - // skip tile as not requested - n_old += __kmp_hwloc_skip_PUs_obj(tp, hL); // skip tile - hL = hwloc_get_next_obj_by_depth(tp, L2depth, hL); - continue; // move to next tile - } - // tile requested, go down the topology tree - nC = 0; - hC = NULL; - // num cores in current tile - int NC = __kmp_hwloc_count_children_by_type(tp, hL, - HWLOC_OBJ_CORE, &hC); - for (int c = 0; c < NC; ++c) { - // Check Core --------------------------------------- - if (!__kmp_hwloc_obj_has_PUs(tp, hC)) { - hC = hwloc_get_next_obj_by_type(tp, HWLOC_OBJ_CORE, hC); - continue; // skip core if all PUs are out of fullMask - } - ++nC; - if (nC <= __kmp_hws_core.offset || - nC > __kmp_hws_core.num + __kmp_hws_core.offset) { - // skip node as not requested - n_old += __kmp_hwloc_skip_PUs_obj(tp, hC); // skip core - hC = hwloc_get_next_obj_by_type(tp, HWLOC_OBJ_CORE, hC); - continue; // move to next node - } - // core requested, go down to PUs - nT = 0; - nTr = 0; - hT = NULL; - // num procs in current core - int NT = __kmp_hwloc_count_children_by_type(tp, hC, - HWLOC_OBJ_PU, &hT); - for (int t = 0; t < NT; ++t) { - // Check PU --------------------------------------- - idx = hT->os_index; - if (!KMP_CPU_ISSET(idx, __kmp_affin_fullMask)) { - hT = hwloc_get_next_obj_by_type(tp, HWLOC_OBJ_PU, hT); - continue; // skip PU if not in fullMask - } - ++nT; - if (nT <= __kmp_hws_proc.offset || - nT > __kmp_hws_proc.num + __kmp_hws_proc.offset) { - // skip PU - KMP_CPU_CLR(idx, __kmp_affin_fullMask); - ++n_old; - KC_TRACE(200, ("KMP_HW_SUBSET: skipped proc %d\n", idx)); - hT = hwloc_get_next_obj_by_type(tp, HWLOC_OBJ_PU, hT); - continue; // move to next node - } - ++nTr; - if (pAddr) // collect requested thread's data - newAddr[n_new] = (*pAddr)[n_old]; - ++n_new; - ++n_old; - hT = hwloc_get_next_obj_by_type(tp, HWLOC_OBJ_PU, hT); - } // threads loop - if (nTr > 0) { - ++nCr; // num cores per socket - ++nCo; // total num cores - if (nTr > nTpC) - nTpC = nTr; // calc max threads per core - } - hC = hwloc_get_next_obj_by_type(tp, HWLOC_OBJ_CORE, hC); - } // cores loop - hL = hwloc_get_next_obj_by_depth(tp, L2depth, hL); - } // tiles loop - } else { // tile_support - // no tiles, check cores - nC = 0; - hC = NULL; - // num cores in current node - int NC = - __kmp_hwloc_count_children_by_type(tp, hN, HWLOC_OBJ_CORE, &hC); - for (int c = 0; c < NC; ++c) { - // Check Core --------------------------------------- - if (!__kmp_hwloc_obj_has_PUs(tp, hC)) { - hC = hwloc_get_next_obj_by_type(tp, HWLOC_OBJ_CORE, hC); - continue; // skip core if all PUs are out of fullMask - } - ++nC; - if (nC <= __kmp_hws_core.offset || - nC > __kmp_hws_core.num + __kmp_hws_core.offset) { - // skip node as not requested - n_old += __kmp_hwloc_skip_PUs_obj(tp, hC); // skip core - hC = hwloc_get_next_obj_by_type(tp, HWLOC_OBJ_CORE, hC); - continue; // move to next node - } - // core requested, go down to PUs - nT = 0; - nTr = 0; - hT = NULL; - int NT = - __kmp_hwloc_count_children_by_type(tp, hC, HWLOC_OBJ_PU, &hT); - for (int t = 0; t < NT; ++t) { - // Check PU --------------------------------------- - idx = hT->os_index; - if (!KMP_CPU_ISSET(idx, __kmp_affin_fullMask)) { - hT = hwloc_get_next_obj_by_type(tp, HWLOC_OBJ_PU, hT); - continue; // skip PU if not in fullMask - } - ++nT; - if (nT <= __kmp_hws_proc.offset || - nT > __kmp_hws_proc.num + __kmp_hws_proc.offset) { - // skip PU - KMP_CPU_CLR(idx, __kmp_affin_fullMask); - ++n_old; - KC_TRACE(200, ("KMP_HW_SUBSET: skipped proc %d\n", idx)); - hT = hwloc_get_next_obj_by_type(tp, HWLOC_OBJ_PU, hT); - continue; // move to next node - } - ++nTr; - if (pAddr) // collect requested thread's data - newAddr[n_new] = (*pAddr)[n_old]; - ++n_new; - ++n_old; - hT = hwloc_get_next_obj_by_type(tp, HWLOC_OBJ_PU, hT); - } // threads loop - if (nTr > 0) { - ++nCr; // num cores per socket - ++nCo; // total num cores - if (nTr > nTpC) - nTpC = nTr; // calc max threads per core - } - hC = hwloc_get_next_obj_by_type(tp, HWLOC_OBJ_CORE, hC); - } // cores loop - } // tiles support - hN = hwloc_get_next_obj_by_type(tp, HWLOC_OBJ_NUMANODE, hN); - } // nodes loop - } else { // numa_support - // no NUMA support - if (tile_support) { - nL = 0; - hL = NULL; - // num tiles in current socket - int NL = __kmp_hwloc_count_children_by_depth(tp, hS, L2depth, &hL); - for (int l = 0; l < NL; ++l) { - // Check L2 (tile) ------------------------------------ - if (!__kmp_hwloc_obj_has_PUs(tp, hL)) { - hL = hwloc_get_next_obj_by_depth(tp, L2depth, hL); - continue; // skip tile if all PUs are out of fullMask - } - ++nL; - if (nL <= __kmp_hws_tile.offset || - nL > __kmp_hws_tile.num + __kmp_hws_tile.offset) { - // skip tile as not requested - n_old += __kmp_hwloc_skip_PUs_obj(tp, hL); // skip tile - hL = hwloc_get_next_obj_by_depth(tp, L2depth, hL); - continue; // move to next tile - } - // tile requested, go down the topology tree - nC = 0; - hC = NULL; - // num cores per tile - int NC = - __kmp_hwloc_count_children_by_type(tp, hL, HWLOC_OBJ_CORE, &hC); - for (int c = 0; c < NC; ++c) { - // Check Core --------------------------------------- - if (!__kmp_hwloc_obj_has_PUs(tp, hC)) { - hC = hwloc_get_next_obj_by_type(tp, HWLOC_OBJ_CORE, hC); - continue; // skip core if all PUs are out of fullMask - } - ++nC; - if (nC <= __kmp_hws_core.offset || - nC > __kmp_hws_core.num + __kmp_hws_core.offset) { - // skip node as not requested - n_old += __kmp_hwloc_skip_PUs_obj(tp, hC); // skip core - hC = hwloc_get_next_obj_by_type(tp, HWLOC_OBJ_CORE, hC); - continue; // move to next node - } - // core requested, go down to PUs - nT = 0; - nTr = 0; - hT = NULL; - // num procs per core - int NT = - __kmp_hwloc_count_children_by_type(tp, hC, HWLOC_OBJ_PU, &hT); - for (int t = 0; t < NT; ++t) { - // Check PU --------------------------------------- - idx = hT->os_index; - if (!KMP_CPU_ISSET(idx, __kmp_affin_fullMask)) { - hT = hwloc_get_next_obj_by_type(tp, HWLOC_OBJ_PU, hT); - continue; // skip PU if not in fullMask - } - ++nT; - if (nT <= __kmp_hws_proc.offset || - nT > __kmp_hws_proc.num + __kmp_hws_proc.offset) { - // skip PU - KMP_CPU_CLR(idx, __kmp_affin_fullMask); - ++n_old; - KC_TRACE(200, ("KMP_HW_SUBSET: skipped proc %d\n", idx)); - hT = hwloc_get_next_obj_by_type(tp, HWLOC_OBJ_PU, hT); - continue; // move to next node - } - ++nTr; - if (pAddr) // collect requested thread's data - newAddr[n_new] = (*pAddr)[n_old]; - ++n_new; - ++n_old; - hT = hwloc_get_next_obj_by_type(tp, HWLOC_OBJ_PU, hT); - } // threads loop - if (nTr > 0) { - ++nCr; // num cores per socket - ++nCo; // total num cores - if (nTr > nTpC) - nTpC = nTr; // calc max threads per core - } - hC = hwloc_get_next_obj_by_type(tp, HWLOC_OBJ_CORE, hC); - } // cores loop - hL = hwloc_get_next_obj_by_depth(tp, L2depth, hL); - } // tiles loop - } else { // tile_support - // no tiles, check cores - nC = 0; - hC = NULL; - // num cores in socket - int NC = - __kmp_hwloc_count_children_by_type(tp, hS, HWLOC_OBJ_CORE, &hC); - for (int c = 0; c < NC; ++c) { - // Check Core ------------------------------------------- - if (!__kmp_hwloc_obj_has_PUs(tp, hC)) { - hC = hwloc_get_next_obj_by_type(tp, HWLOC_OBJ_CORE, hC); - continue; // skip core if all PUs are out of fullMask - } - ++nC; - if (nC <= __kmp_hws_core.offset || - nC > __kmp_hws_core.num + __kmp_hws_core.offset) { - // skip node as not requested - n_old += __kmp_hwloc_skip_PUs_obj(tp, hC); // skip core - hC = hwloc_get_next_obj_by_type(tp, HWLOC_OBJ_CORE, hC); - continue; // move to next node - } - // core requested, go down to PUs - nT = 0; - nTr = 0; - hT = NULL; - // num procs per core - int NT = - __kmp_hwloc_count_children_by_type(tp, hC, HWLOC_OBJ_PU, &hT); - for (int t = 0; t < NT; ++t) { - // Check PU --------------------------------------- - idx = hT->os_index; - if (!KMP_CPU_ISSET(idx, __kmp_affin_fullMask)) { - hT = hwloc_get_next_obj_by_type(tp, HWLOC_OBJ_PU, hT); - continue; // skip PU if not in fullMask - } - ++nT; - if (nT <= __kmp_hws_proc.offset || - nT > __kmp_hws_proc.num + __kmp_hws_proc.offset) { - // skip PU - KMP_CPU_CLR(idx, __kmp_affin_fullMask); - ++n_old; - KC_TRACE(200, ("KMP_HW_SUBSET: skipped proc %d\n", idx)); - hT = hwloc_get_next_obj_by_type(tp, HWLOC_OBJ_PU, hT); - continue; // move to next node - } - ++nTr; - if (pAddr) // collect requested thread's data - newAddr[n_new] = (*pAddr)[n_old]; - ++n_new; - ++n_old; - hT = hwloc_get_next_obj_by_type(tp, HWLOC_OBJ_PU, hT); - } // threads loop - if (nTr > 0) { - ++nCr; // num cores per socket - ++nCo; // total num cores - if (nTr > nTpC) - nTpC = nTr; // calc max threads per core - } - hC = hwloc_get_next_obj_by_type(tp, HWLOC_OBJ_CORE, hC); - } // cores loop - } // tiles support - } // numa_support - if (nCr > 0) { // found cores? - ++nPkg; // num sockets - if (nCr > nCpP) - nCpP = nCr; // calc max cores per socket - } - } // sockets loop - - // check the subset is valid - KMP_DEBUG_ASSERT(n_old == __kmp_avail_proc); - KMP_DEBUG_ASSERT(nPkg > 0); - KMP_DEBUG_ASSERT(nCpP > 0); - KMP_DEBUG_ASSERT(nTpC > 0); - KMP_DEBUG_ASSERT(nCo > 0); - KMP_DEBUG_ASSERT(nPkg <= nPackages); - KMP_DEBUG_ASSERT(nCpP <= nCoresPerPkg); - KMP_DEBUG_ASSERT(nTpC <= __kmp_nThreadsPerCore); - KMP_DEBUG_ASSERT(nCo <= __kmp_ncores); - - nPackages = nPkg; // correct num sockets - nCoresPerPkg = nCpP; // correct num cores per socket - __kmp_nThreadsPerCore = nTpC; // correct num threads per core - __kmp_avail_proc = n_new; // correct num procs - __kmp_ncores = nCo; // correct num cores - // hwloc topology method end - } else -#endif // KMP_USE_HWLOC - { - int n_old = 0, n_new = 0, proc_num = 0; - if (__kmp_hws_node.num > 0 || __kmp_hws_tile.num > 0) { - KMP_WARNING(AffHWSubsetNoHWLOC); - goto _exit; - } - if (__kmp_hws_socket.num == 0) - __kmp_hws_socket.num = nPackages; // use all available sockets - if (__kmp_hws_core.num == 0) - __kmp_hws_core.num = nCoresPerPkg; // use all available cores - if (__kmp_hws_proc.num == 0 || __kmp_hws_proc.num > __kmp_nThreadsPerCore) - __kmp_hws_proc.num = __kmp_nThreadsPerCore; // use all HW contexts - if (!__kmp_affinity_uniform_topology()) { - KMP_WARNING(AffHWSubsetNonUniform); - goto _exit; // don't support non-uniform topology - } - if (depth > 3) { - KMP_WARNING(AffHWSubsetNonThreeLevel); - goto _exit; // don't support not-3-level topology - } - if (__kmp_hws_socket.offset + __kmp_hws_socket.num > nPackages) { - KMP_WARNING(AffHWSubsetManySockets); - goto _exit; - } - if (__kmp_hws_core.offset + __kmp_hws_core.num > nCoresPerPkg) { - KMP_WARNING(AffHWSubsetManyCores); - goto _exit; - } - // Form the requested subset - if (pAddr) // pAddr is NULL in case of affinity_none - newAddr = (AddrUnsPair *)__kmp_allocate( - sizeof(AddrUnsPair) * __kmp_hws_socket.num * __kmp_hws_core.num * - __kmp_hws_proc.num); - for (int i = 0; i < nPackages; ++i) { - if (i < __kmp_hws_socket.offset || - i >= __kmp_hws_socket.offset + __kmp_hws_socket.num) { - // skip not-requested socket - n_old += nCoresPerPkg * __kmp_nThreadsPerCore; - if (__kmp_pu_os_idx != NULL) { - // walk through skipped socket - for (int j = 0; j < nCoresPerPkg; ++j) { - for (int k = 0; k < __kmp_nThreadsPerCore; ++k) { - KMP_CPU_CLR(__kmp_pu_os_idx[proc_num], __kmp_affin_fullMask); - ++proc_num; - } - } - } - } else { - // walk through requested socket - for (int j = 0; j < nCoresPerPkg; ++j) { - if (j < __kmp_hws_core.offset || - j >= __kmp_hws_core.offset + - __kmp_hws_core.num) { // skip not-requested core - n_old += __kmp_nThreadsPerCore; - if (__kmp_pu_os_idx != NULL) { - for (int k = 0; k < __kmp_nThreadsPerCore; ++k) { - KMP_CPU_CLR(__kmp_pu_os_idx[proc_num], __kmp_affin_fullMask); - ++proc_num; - } - } - } else { - // walk through requested core - for (int k = 0; k < __kmp_nThreadsPerCore; ++k) { - if (k < __kmp_hws_proc.num) { - if (pAddr) // collect requested thread's data - newAddr[n_new] = (*pAddr)[n_old]; - n_new++; - } else { - if (__kmp_pu_os_idx != NULL) - KMP_CPU_CLR(__kmp_pu_os_idx[proc_num], __kmp_affin_fullMask); - } - n_old++; - ++proc_num; - } - } - } - } - } - KMP_DEBUG_ASSERT(n_old == nPackages * nCoresPerPkg * __kmp_nThreadsPerCore); - KMP_DEBUG_ASSERT(n_new == - __kmp_hws_socket.num * __kmp_hws_core.num * - __kmp_hws_proc.num); - nPackages = __kmp_hws_socket.num; // correct nPackages - nCoresPerPkg = __kmp_hws_core.num; // correct nCoresPerPkg - __kmp_nThreadsPerCore = __kmp_hws_proc.num; // correct __kmp_nThreadsPerCore - __kmp_avail_proc = n_new; // correct avail_proc - __kmp_ncores = nPackages * __kmp_hws_core.num; // correct ncores - } // non-hwloc topology method - if (pAddr) { - __kmp_free(*pAddr); - *pAddr = newAddr; // replace old topology with new one - } - if (__kmp_affinity_verbose) { - char m[KMP_AFFIN_MASK_PRINT_LEN]; - __kmp_affinity_print_mask(m, KMP_AFFIN_MASK_PRINT_LEN, - __kmp_affin_fullMask); - if (__kmp_affinity_respect_mask) { - KMP_INFORM(InitOSProcSetRespect, "KMP_HW_SUBSET", m); - } else { - KMP_INFORM(InitOSProcSetNotRespect, "KMP_HW_SUBSET", m); - } - KMP_INFORM(AvailableOSProc, "KMP_HW_SUBSET", __kmp_avail_proc); - kmp_str_buf_t buf; - __kmp_str_buf_init(&buf); - __kmp_str_buf_print(&buf, "%d", nPackages); - KMP_INFORM(TopologyExtra, "KMP_HW_SUBSET", buf.str, nCoresPerPkg, - __kmp_nThreadsPerCore, __kmp_ncores); - __kmp_str_buf_free(&buf); - } -_exit: - if (__kmp_pu_os_idx != NULL) { - __kmp_free(__kmp_pu_os_idx); - __kmp_pu_os_idx = NULL; - } -} - -// This function figures out the deepest level at which there is at least one -// cluster/core with more than one processing unit bound to it. -static int __kmp_affinity_find_core_level(const AddrUnsPair *address2os, - int nprocs, int bottom_level) { - int core_level = 0; - - for (int i = 0; i < nprocs; i++) { - for (int j = bottom_level; j > 0; j--) { - if (address2os[i].first.labels[j] > 0) { - if (core_level < (j - 1)) { - core_level = j - 1; - } - } - } - } - return core_level; -} - -// This function counts number of clusters/cores at given level. -static int __kmp_affinity_compute_ncores(const AddrUnsPair *address2os, - int nprocs, int bottom_level, - int core_level) { - int ncores = 0; - int i, j; - - j = bottom_level; - for (i = 0; i < nprocs; i++) { - for (j = bottom_level; j > core_level; j--) { - if ((i + 1) < nprocs) { - if (address2os[i + 1].first.labels[j] > 0) { - break; - } - } - } - if (j == core_level) { - ncores++; - } - } - if (j > core_level) { - // In case of ( nprocs < __kmp_avail_proc ) we may end too deep and miss one - // core. May occur when called from __kmp_affinity_find_core(). - ncores++; - } - return ncores; -} - -// This function finds to which cluster/core given processing unit is bound. -static int __kmp_affinity_find_core(const AddrUnsPair *address2os, int proc, - int bottom_level, int core_level) { - return __kmp_affinity_compute_ncores(address2os, proc + 1, bottom_level, - core_level) - - 1; -} - -// This function finds maximal number of processing units bound to a -// cluster/core at given level. -static int __kmp_affinity_max_proc_per_core(const AddrUnsPair *address2os, - int nprocs, int bottom_level, - int core_level) { - int maxprocpercore = 0; - - if (core_level < bottom_level) { - for (int i = 0; i < nprocs; i++) { - int percore = address2os[i].first.labels[core_level + 1] + 1; - - if (percore > maxprocpercore) { - maxprocpercore = percore; - } - } - } else { - maxprocpercore = 1; - } - return maxprocpercore; -} - -static AddrUnsPair *address2os = NULL; -static int *procarr = NULL; -static int __kmp_aff_depth = 0; - -#if KMP_USE_HIER_SCHED -#define KMP_EXIT_AFF_NONE \ - KMP_ASSERT(__kmp_affinity_type == affinity_none); \ - KMP_ASSERT(address2os == NULL); \ - __kmp_apply_thread_places(NULL, 0); \ - __kmp_create_affinity_none_places(); \ - __kmp_dispatch_set_hierarchy_values(); \ - return; -#else -#define KMP_EXIT_AFF_NONE \ - KMP_ASSERT(__kmp_affinity_type == affinity_none); \ - KMP_ASSERT(address2os == NULL); \ - __kmp_apply_thread_places(NULL, 0); \ - __kmp_create_affinity_none_places(); \ - return; -#endif - -// Create a one element mask array (set of places) which only contains the -// initial process's affinity mask -static void __kmp_create_affinity_none_places() { - KMP_ASSERT(__kmp_affin_fullMask != NULL); - KMP_ASSERT(__kmp_affinity_type == affinity_none); - __kmp_affinity_num_masks = 1; - KMP_CPU_ALLOC_ARRAY(__kmp_affinity_masks, __kmp_affinity_num_masks); - kmp_affin_mask_t *dest = KMP_CPU_INDEX(__kmp_affinity_masks, 0); - KMP_CPU_COPY(dest, __kmp_affin_fullMask); -} - -static int __kmp_affinity_cmp_Address_child_num(const void *a, const void *b) { - const Address *aa = &(((const AddrUnsPair *)a)->first); - const Address *bb = &(((const AddrUnsPair *)b)->first); - unsigned depth = aa->depth; - unsigned i; - KMP_DEBUG_ASSERT(depth == bb->depth); - KMP_DEBUG_ASSERT((unsigned)__kmp_affinity_compact <= depth); - KMP_DEBUG_ASSERT(__kmp_affinity_compact >= 0); - for (i = 0; i < (unsigned)__kmp_affinity_compact; i++) { - int j = depth - i - 1; - if (aa->childNums[j] < bb->childNums[j]) - return -1; - if (aa->childNums[j] > bb->childNums[j]) - return 1; - } - for (; i < depth; i++) { - int j = i - __kmp_affinity_compact; - if (aa->childNums[j] < bb->childNums[j]) - return -1; - if (aa->childNums[j] > bb->childNums[j]) - return 1; - } - return 0; -} - -static void __kmp_aux_affinity_initialize(void) { - if (__kmp_affinity_masks != NULL) { - KMP_ASSERT(__kmp_affin_fullMask != NULL); - return; - } - - // Create the "full" mask - this defines all of the processors that we - // consider to be in the machine model. If respect is set, then it is the - // initialization thread's affinity mask. Otherwise, it is all processors that - // we know about on the machine. - if (__kmp_affin_fullMask == NULL) { - KMP_CPU_ALLOC(__kmp_affin_fullMask); - } - if (KMP_AFFINITY_CAPABLE()) { - if (__kmp_affinity_respect_mask) { - __kmp_get_system_affinity(__kmp_affin_fullMask, TRUE); - - // Count the number of available processors. - unsigned i; - __kmp_avail_proc = 0; - KMP_CPU_SET_ITERATE(i, __kmp_affin_fullMask) { - if (!KMP_CPU_ISSET(i, __kmp_affin_fullMask)) { - continue; - } - __kmp_avail_proc++; - } - if (__kmp_avail_proc > __kmp_xproc) { - if (__kmp_affinity_verbose || - (__kmp_affinity_warnings && - (__kmp_affinity_type != affinity_none))) { - KMP_WARNING(ErrorInitializeAffinity); - } - __kmp_affinity_type = affinity_none; - KMP_AFFINITY_DISABLE(); - return; - } - } else { - __kmp_affinity_entire_machine_mask(__kmp_affin_fullMask); - __kmp_avail_proc = __kmp_xproc; - } - } - - if (__kmp_affinity_gran == affinity_gran_tile && - // check if user's request is valid - __kmp_affinity_dispatch->get_api_type() == KMPAffinity::NATIVE_OS) { - KMP_WARNING(AffTilesNoHWLOC, "KMP_AFFINITY"); - __kmp_affinity_gran = affinity_gran_package; - } - - int depth = -1; - kmp_i18n_id_t msg_id = kmp_i18n_null; - - // For backward compatibility, setting KMP_CPUINFO_FILE => - // KMP_TOPOLOGY_METHOD=cpuinfo - if ((__kmp_cpuinfo_file != NULL) && - (__kmp_affinity_top_method == affinity_top_method_all)) { - __kmp_affinity_top_method = affinity_top_method_cpuinfo; - } - - if (__kmp_affinity_top_method == affinity_top_method_all) { - // In the default code path, errors are not fatal - we just try using - // another method. We only emit a warning message if affinity is on, or the - // verbose flag is set, an the nowarnings flag was not set. - const char *file_name = NULL; - int line = 0; -#if KMP_USE_HWLOC - if (depth < 0 && - __kmp_affinity_dispatch->get_api_type() == KMPAffinity::HWLOC) { - if (__kmp_affinity_verbose) { - KMP_INFORM(AffUsingHwloc, "KMP_AFFINITY"); - } - if (!__kmp_hwloc_error) { - depth = __kmp_affinity_create_hwloc_map(&address2os, &msg_id); - if (depth == 0) { - KMP_EXIT_AFF_NONE; - } else if (depth < 0 && __kmp_affinity_verbose) { - KMP_INFORM(AffIgnoringHwloc, "KMP_AFFINITY"); - } - } else if (__kmp_affinity_verbose) { - KMP_INFORM(AffIgnoringHwloc, "KMP_AFFINITY"); - } - } -#endif - -#if KMP_ARCH_X86 || KMP_ARCH_X86_64 - - if (depth < 0) { - if (__kmp_affinity_verbose) { - KMP_INFORM(AffInfoStr, "KMP_AFFINITY", KMP_I18N_STR(Decodingx2APIC)); - } - - file_name = NULL; - depth = __kmp_affinity_create_x2apicid_map(&address2os, &msg_id); - if (depth == 0) { - KMP_EXIT_AFF_NONE; - } - - if (depth < 0) { - if (__kmp_affinity_verbose) { - if (msg_id != kmp_i18n_null) { - KMP_INFORM(AffInfoStrStr, "KMP_AFFINITY", - __kmp_i18n_catgets(msg_id), - KMP_I18N_STR(DecodingLegacyAPIC)); - } else { - KMP_INFORM(AffInfoStr, "KMP_AFFINITY", - KMP_I18N_STR(DecodingLegacyAPIC)); - } - } - - file_name = NULL; - depth = __kmp_affinity_create_apicid_map(&address2os, &msg_id); - if (depth == 0) { - KMP_EXIT_AFF_NONE; - } - } - } - -#endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */ - -#if KMP_OS_LINUX - - if (depth < 0) { - if (__kmp_affinity_verbose) { - if (msg_id != kmp_i18n_null) { - KMP_INFORM(AffStrParseFilename, "KMP_AFFINITY", - __kmp_i18n_catgets(msg_id), "/proc/cpuinfo"); - } else { - KMP_INFORM(AffParseFilename, "KMP_AFFINITY", "/proc/cpuinfo"); - } - } - - FILE *f = fopen("/proc/cpuinfo", "r"); - if (f == NULL) { - msg_id = kmp_i18n_str_CantOpenCpuinfo; - } else { - file_name = "/proc/cpuinfo"; - depth = - __kmp_affinity_create_cpuinfo_map(&address2os, &line, &msg_id, f); - fclose(f); - if (depth == 0) { - KMP_EXIT_AFF_NONE; - } - } - } - -#endif /* KMP_OS_LINUX */ - -#if KMP_GROUP_AFFINITY - - if ((depth < 0) && (__kmp_num_proc_groups > 1)) { - if (__kmp_affinity_verbose) { - KMP_INFORM(AffWindowsProcGroupMap, "KMP_AFFINITY"); - } - - depth = __kmp_affinity_create_proc_group_map(&address2os, &msg_id); - KMP_ASSERT(depth != 0); - } - -#endif /* KMP_GROUP_AFFINITY */ - - if (depth < 0) { - if (__kmp_affinity_verbose && (msg_id != kmp_i18n_null)) { - if (file_name == NULL) { - KMP_INFORM(UsingFlatOS, __kmp_i18n_catgets(msg_id)); - } else if (line == 0) { - KMP_INFORM(UsingFlatOSFile, file_name, __kmp_i18n_catgets(msg_id)); - } else { - KMP_INFORM(UsingFlatOSFileLine, file_name, line, - __kmp_i18n_catgets(msg_id)); - } - } - // FIXME - print msg if msg_id = kmp_i18n_null ??? - - file_name = ""; - depth = __kmp_affinity_create_flat_map(&address2os, &msg_id); - if (depth == 0) { - KMP_EXIT_AFF_NONE; - } - KMP_ASSERT(depth > 0); - KMP_ASSERT(address2os != NULL); - } - } - -#if KMP_USE_HWLOC - else if (__kmp_affinity_top_method == affinity_top_method_hwloc) { - KMP_ASSERT(__kmp_affinity_dispatch->get_api_type() == KMPAffinity::HWLOC); - if (__kmp_affinity_verbose) { - KMP_INFORM(AffUsingHwloc, "KMP_AFFINITY"); - } - depth = __kmp_affinity_create_hwloc_map(&address2os, &msg_id); - if (depth == 0) { - KMP_EXIT_AFF_NONE; - } - } -#endif // KMP_USE_HWLOC - -// If the user has specified that a paricular topology discovery method is to be -// used, then we abort if that method fails. The exception is group affinity, -// which might have been implicitly set. - -#if KMP_ARCH_X86 || KMP_ARCH_X86_64 - - else if (__kmp_affinity_top_method == affinity_top_method_x2apicid) { - if (__kmp_affinity_verbose) { - KMP_INFORM(AffInfoStr, "KMP_AFFINITY", KMP_I18N_STR(Decodingx2APIC)); - } - - depth = __kmp_affinity_create_x2apicid_map(&address2os, &msg_id); - if (depth == 0) { - KMP_EXIT_AFF_NONE; - } - if (depth < 0) { - KMP_ASSERT(msg_id != kmp_i18n_null); - KMP_FATAL(MsgExiting, __kmp_i18n_catgets(msg_id)); - } - } else if (__kmp_affinity_top_method == affinity_top_method_apicid) { - if (__kmp_affinity_verbose) { - KMP_INFORM(AffInfoStr, "KMP_AFFINITY", KMP_I18N_STR(DecodingLegacyAPIC)); - } - - depth = __kmp_affinity_create_apicid_map(&address2os, &msg_id); - if (depth == 0) { - KMP_EXIT_AFF_NONE; - } - if (depth < 0) { - KMP_ASSERT(msg_id != kmp_i18n_null); - KMP_FATAL(MsgExiting, __kmp_i18n_catgets(msg_id)); - } - } - -#endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */ - - else if (__kmp_affinity_top_method == affinity_top_method_cpuinfo) { - const char *filename; - if (__kmp_cpuinfo_file != NULL) { - filename = __kmp_cpuinfo_file; - } else { - filename = "/proc/cpuinfo"; - } - - if (__kmp_affinity_verbose) { - KMP_INFORM(AffParseFilename, "KMP_AFFINITY", filename); - } - - FILE *f = fopen(filename, "r"); - if (f == NULL) { - int code = errno; - if (__kmp_cpuinfo_file != NULL) { - __kmp_fatal(KMP_MSG(CantOpenFileForReading, filename), KMP_ERR(code), - KMP_HNT(NameComesFrom_CPUINFO_FILE), __kmp_msg_null); - } else { - __kmp_fatal(KMP_MSG(CantOpenFileForReading, filename), KMP_ERR(code), - __kmp_msg_null); - } - } - int line = 0; - depth = __kmp_affinity_create_cpuinfo_map(&address2os, &line, &msg_id, f); - fclose(f); - if (depth < 0) { - KMP_ASSERT(msg_id != kmp_i18n_null); - if (line > 0) { - KMP_FATAL(FileLineMsgExiting, filename, line, - __kmp_i18n_catgets(msg_id)); - } else { - KMP_FATAL(FileMsgExiting, filename, __kmp_i18n_catgets(msg_id)); - } - } - if (__kmp_affinity_type == affinity_none) { - KMP_ASSERT(depth == 0); - KMP_EXIT_AFF_NONE; - } - } - -#if KMP_GROUP_AFFINITY - - else if (__kmp_affinity_top_method == affinity_top_method_group) { - if (__kmp_affinity_verbose) { - KMP_INFORM(AffWindowsProcGroupMap, "KMP_AFFINITY"); - } - - depth = __kmp_affinity_create_proc_group_map(&address2os, &msg_id); - KMP_ASSERT(depth != 0); - if (depth < 0) { - KMP_ASSERT(msg_id != kmp_i18n_null); - KMP_FATAL(MsgExiting, __kmp_i18n_catgets(msg_id)); - } - } - -#endif /* KMP_GROUP_AFFINITY */ - - else if (__kmp_affinity_top_method == affinity_top_method_flat) { - if (__kmp_affinity_verbose) { - KMP_INFORM(AffUsingFlatOS, "KMP_AFFINITY"); - } - - depth = __kmp_affinity_create_flat_map(&address2os, &msg_id); - if (depth == 0) { - KMP_EXIT_AFF_NONE; - } - // should not fail - KMP_ASSERT(depth > 0); - KMP_ASSERT(address2os != NULL); - } - -#if KMP_USE_HIER_SCHED - __kmp_dispatch_set_hierarchy_values(); -#endif - - if (address2os == NULL) { - if (KMP_AFFINITY_CAPABLE() && - (__kmp_affinity_verbose || - (__kmp_affinity_warnings && (__kmp_affinity_type != affinity_none)))) { - KMP_WARNING(ErrorInitializeAffinity); - } - __kmp_affinity_type = affinity_none; - __kmp_create_affinity_none_places(); - KMP_AFFINITY_DISABLE(); - return; - } - - if (__kmp_affinity_gran == affinity_gran_tile -#if KMP_USE_HWLOC - && __kmp_tile_depth == 0 -#endif - ) { - // tiles requested but not detected, warn user on this - KMP_WARNING(AffTilesNoTiles, "KMP_AFFINITY"); - } - - __kmp_apply_thread_places(&address2os, depth); - - // Create the table of masks, indexed by thread Id. - unsigned maxIndex; - unsigned numUnique; - kmp_affin_mask_t *osId2Mask = - __kmp_create_masks(&maxIndex, &numUnique, address2os, __kmp_avail_proc); - if (__kmp_affinity_gran_levels == 0) { - KMP_DEBUG_ASSERT((int)numUnique == __kmp_avail_proc); - } - - // Set the childNums vector in all Address objects. This must be done before - // we can sort using __kmp_affinity_cmp_Address_child_num(), which takes into - // account the setting of __kmp_affinity_compact. - __kmp_affinity_assign_child_nums(address2os, __kmp_avail_proc); - - switch (__kmp_affinity_type) { - - case affinity_explicit: - KMP_DEBUG_ASSERT(__kmp_affinity_proclist != NULL); -#if OMP_40_ENABLED - if (__kmp_nested_proc_bind.bind_types[0] == proc_bind_intel) -#endif - { - __kmp_affinity_process_proclist( - &__kmp_affinity_masks, &__kmp_affinity_num_masks, - __kmp_affinity_proclist, osId2Mask, maxIndex); - } -#if OMP_40_ENABLED - else { - __kmp_affinity_process_placelist( - &__kmp_affinity_masks, &__kmp_affinity_num_masks, - __kmp_affinity_proclist, osId2Mask, maxIndex); - } -#endif - if (__kmp_affinity_num_masks == 0) { - if (__kmp_affinity_verbose || - (__kmp_affinity_warnings && (__kmp_affinity_type != affinity_none))) { - KMP_WARNING(AffNoValidProcID); - } - __kmp_affinity_type = affinity_none; - __kmp_create_affinity_none_places(); - return; - } - break; - - // The other affinity types rely on sorting the Addresses according to some - // permutation of the machine topology tree. Set __kmp_affinity_compact and - // __kmp_affinity_offset appropriately, then jump to a common code fragment - // to do the sort and create the array of affinity masks. - - case affinity_logical: - __kmp_affinity_compact = 0; - if (__kmp_affinity_offset) { - __kmp_affinity_offset = - __kmp_nThreadsPerCore * __kmp_affinity_offset % __kmp_avail_proc; - } - goto sortAddresses; - - case affinity_physical: - if (__kmp_nThreadsPerCore > 1) { - __kmp_affinity_compact = 1; - if (__kmp_affinity_compact >= depth) { - __kmp_affinity_compact = 0; - } - } else { - __kmp_affinity_compact = 0; - } - if (__kmp_affinity_offset) { - __kmp_affinity_offset = - __kmp_nThreadsPerCore * __kmp_affinity_offset % __kmp_avail_proc; - } - goto sortAddresses; - - case affinity_scatter: - if (__kmp_affinity_compact >= depth) { - __kmp_affinity_compact = 0; - } else { - __kmp_affinity_compact = depth - 1 - __kmp_affinity_compact; - } - goto sortAddresses; - - case affinity_compact: - if (__kmp_affinity_compact >= depth) { - __kmp_affinity_compact = depth - 1; - } - goto sortAddresses; - - case affinity_balanced: - if (depth <= 1) { - if (__kmp_affinity_verbose || __kmp_affinity_warnings) { - KMP_WARNING(AffBalancedNotAvail, "KMP_AFFINITY"); - } - __kmp_affinity_type = affinity_none; - __kmp_create_affinity_none_places(); - return; - } else if (!__kmp_affinity_uniform_topology()) { - // Save the depth for further usage - __kmp_aff_depth = depth; - - int core_level = __kmp_affinity_find_core_level( - address2os, __kmp_avail_proc, depth - 1); - int ncores = __kmp_affinity_compute_ncores(address2os, __kmp_avail_proc, - depth - 1, core_level); - int maxprocpercore = __kmp_affinity_max_proc_per_core( - address2os, __kmp_avail_proc, depth - 1, core_level); - - int nproc = ncores * maxprocpercore; - if ((nproc < 2) || (nproc < __kmp_avail_proc)) { - if (__kmp_affinity_verbose || __kmp_affinity_warnings) { - KMP_WARNING(AffBalancedNotAvail, "KMP_AFFINITY"); - } - __kmp_affinity_type = affinity_none; - return; - } - - procarr = (int *)__kmp_allocate(sizeof(int) * nproc); - for (int i = 0; i < nproc; i++) { - procarr[i] = -1; - } - - int lastcore = -1; - int inlastcore = 0; - for (int i = 0; i < __kmp_avail_proc; i++) { - int proc = address2os[i].second; - int core = - __kmp_affinity_find_core(address2os, i, depth - 1, core_level); - - if (core == lastcore) { - inlastcore++; - } else { - inlastcore = 0; - } - lastcore = core; - - procarr[core * maxprocpercore + inlastcore] = proc; - } - } - if (__kmp_affinity_compact >= depth) { - __kmp_affinity_compact = depth - 1; - } - - sortAddresses: - // Allocate the gtid->affinity mask table. - if (__kmp_affinity_dups) { - __kmp_affinity_num_masks = __kmp_avail_proc; - } else { - __kmp_affinity_num_masks = numUnique; - } - -#if OMP_40_ENABLED - if ((__kmp_nested_proc_bind.bind_types[0] != proc_bind_intel) && - (__kmp_affinity_num_places > 0) && - ((unsigned)__kmp_affinity_num_places < __kmp_affinity_num_masks)) { - __kmp_affinity_num_masks = __kmp_affinity_num_places; - } -#endif - - KMP_CPU_ALLOC_ARRAY(__kmp_affinity_masks, __kmp_affinity_num_masks); - - // Sort the address2os table according to the current setting of - // __kmp_affinity_compact, then fill out __kmp_affinity_masks. - qsort(address2os, __kmp_avail_proc, sizeof(*address2os), - __kmp_affinity_cmp_Address_child_num); - { - int i; - unsigned j; - for (i = 0, j = 0; i < __kmp_avail_proc; i++) { - if ((!__kmp_affinity_dups) && (!address2os[i].first.leader)) { - continue; - } - unsigned osId = address2os[i].second; - kmp_affin_mask_t *src = KMP_CPU_INDEX(osId2Mask, osId); - kmp_affin_mask_t *dest = KMP_CPU_INDEX(__kmp_affinity_masks, j); - KMP_ASSERT(KMP_CPU_ISSET(osId, src)); - KMP_CPU_COPY(dest, src); - if (++j >= __kmp_affinity_num_masks) { - break; - } - } - KMP_DEBUG_ASSERT(j == __kmp_affinity_num_masks); - } - break; - - default: - KMP_ASSERT2(0, "Unexpected affinity setting"); - } - - KMP_CPU_FREE_ARRAY(osId2Mask, maxIndex + 1); - machine_hierarchy.init(address2os, __kmp_avail_proc); -} -#undef KMP_EXIT_AFF_NONE - -void __kmp_affinity_initialize(void) { - // Much of the code above was written assumming that if a machine was not - // affinity capable, then __kmp_affinity_type == affinity_none. We now - // explicitly represent this as __kmp_affinity_type == affinity_disabled. - // There are too many checks for __kmp_affinity_type == affinity_none - // in this code. Instead of trying to change them all, check if - // __kmp_affinity_type == affinity_disabled, and if so, slam it with - // affinity_none, call the real initialization routine, then restore - // __kmp_affinity_type to affinity_disabled. - int disabled = (__kmp_affinity_type == affinity_disabled); - if (!KMP_AFFINITY_CAPABLE()) { - KMP_ASSERT(disabled); - } - if (disabled) { - __kmp_affinity_type = affinity_none; - } - __kmp_aux_affinity_initialize(); - if (disabled) { - __kmp_affinity_type = affinity_disabled; - } -} - -void __kmp_affinity_uninitialize(void) { - if (__kmp_affinity_masks != NULL) { - KMP_CPU_FREE_ARRAY(__kmp_affinity_masks, __kmp_affinity_num_masks); - __kmp_affinity_masks = NULL; - } - if (__kmp_affin_fullMask != NULL) { - KMP_CPU_FREE(__kmp_affin_fullMask); - __kmp_affin_fullMask = NULL; - } - __kmp_affinity_num_masks = 0; - __kmp_affinity_type = affinity_default; -#if OMP_40_ENABLED - __kmp_affinity_num_places = 0; -#endif - if (__kmp_affinity_proclist != NULL) { - __kmp_free(__kmp_affinity_proclist); - __kmp_affinity_proclist = NULL; - } - if (address2os != NULL) { - __kmp_free(address2os); - address2os = NULL; - } - if (procarr != NULL) { - __kmp_free(procarr); - procarr = NULL; - } -#if KMP_USE_HWLOC - if (__kmp_hwloc_topology != NULL) { - hwloc_topology_destroy(__kmp_hwloc_topology); - __kmp_hwloc_topology = NULL; - } -#endif - KMPAffinity::destroy_api(); -} - -void __kmp_affinity_set_init_mask(int gtid, int isa_root) { - if (!KMP_AFFINITY_CAPABLE()) { - return; - } - - kmp_info_t *th = (kmp_info_t *)TCR_SYNC_PTR(__kmp_threads[gtid]); - if (th->th.th_affin_mask == NULL) { - KMP_CPU_ALLOC(th->th.th_affin_mask); - } else { - KMP_CPU_ZERO(th->th.th_affin_mask); - } - - // Copy the thread mask to the kmp_info_t strucuture. If - // __kmp_affinity_type == affinity_none, copy the "full" mask, i.e. one that - // has all of the OS proc ids set, or if __kmp_affinity_respect_mask is set, - // then the full mask is the same as the mask of the initialization thread. - kmp_affin_mask_t *mask; - int i; - -#if OMP_40_ENABLED - if (KMP_AFFINITY_NON_PROC_BIND) -#endif - { - if ((__kmp_affinity_type == affinity_none) || - (__kmp_affinity_type == affinity_balanced)) { -#if KMP_GROUP_AFFINITY - if (__kmp_num_proc_groups > 1) { - return; - } -#endif - KMP_ASSERT(__kmp_affin_fullMask != NULL); - i = 0; - mask = __kmp_affin_fullMask; - } else { - KMP_DEBUG_ASSERT(__kmp_affinity_num_masks > 0); - i = (gtid + __kmp_affinity_offset) % __kmp_affinity_num_masks; - mask = KMP_CPU_INDEX(__kmp_affinity_masks, i); - } - } -#if OMP_40_ENABLED - else { - if ((!isa_root) || - (__kmp_nested_proc_bind.bind_types[0] == proc_bind_false)) { -#if KMP_GROUP_AFFINITY - if (__kmp_num_proc_groups > 1) { - return; - } -#endif - KMP_ASSERT(__kmp_affin_fullMask != NULL); - i = KMP_PLACE_ALL; - mask = __kmp_affin_fullMask; - } else { - // int i = some hash function or just a counter that doesn't - // always start at 0. Use gtid for now. - KMP_DEBUG_ASSERT(__kmp_affinity_num_masks > 0); - i = (gtid + __kmp_affinity_offset) % __kmp_affinity_num_masks; - mask = KMP_CPU_INDEX(__kmp_affinity_masks, i); - } - } -#endif - -#if OMP_40_ENABLED - th->th.th_current_place = i; - if (isa_root) { - th->th.th_new_place = i; - th->th.th_first_place = 0; - th->th.th_last_place = __kmp_affinity_num_masks - 1; - } else if (KMP_AFFINITY_NON_PROC_BIND) { - // When using a Non-OMP_PROC_BIND affinity method, - // set all threads' place-partition-var to the entire place list - th->th.th_first_place = 0; - th->th.th_last_place = __kmp_affinity_num_masks - 1; - } - - if (i == KMP_PLACE_ALL) { - KA_TRACE(100, ("__kmp_affinity_set_init_mask: binding T#%d to all places\n", - gtid)); - } else { - KA_TRACE(100, ("__kmp_affinity_set_init_mask: binding T#%d to place %d\n", - gtid, i)); - } -#else - if (i == -1) { - KA_TRACE( - 100, - ("__kmp_affinity_set_init_mask: binding T#%d to __kmp_affin_fullMask\n", - gtid)); - } else { - KA_TRACE(100, ("__kmp_affinity_set_init_mask: binding T#%d to mask %d\n", - gtid, i)); - } -#endif /* OMP_40_ENABLED */ - - KMP_CPU_COPY(th->th.th_affin_mask, mask); - - if (__kmp_affinity_verbose - /* to avoid duplicate printing (will be correctly printed on barrier) */ - && (__kmp_affinity_type == affinity_none || - (i != KMP_PLACE_ALL && __kmp_affinity_type != affinity_balanced))) { - char buf[KMP_AFFIN_MASK_PRINT_LEN]; - __kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN, - th->th.th_affin_mask); - KMP_INFORM(BoundToOSProcSet, "KMP_AFFINITY", (kmp_int32)getpid(), - __kmp_gettid(), gtid, buf); - } - -#if KMP_OS_WINDOWS - // On Windows* OS, the process affinity mask might have changed. If the user - // didn't request affinity and this call fails, just continue silently. - // See CQ171393. - if (__kmp_affinity_type == affinity_none) { - __kmp_set_system_affinity(th->th.th_affin_mask, FALSE); - } else -#endif - __kmp_set_system_affinity(th->th.th_affin_mask, TRUE); -} - -#if OMP_40_ENABLED - -void __kmp_affinity_set_place(int gtid) { - if (!KMP_AFFINITY_CAPABLE()) { - return; - } - - kmp_info_t *th = (kmp_info_t *)TCR_SYNC_PTR(__kmp_threads[gtid]); - - KA_TRACE(100, ("__kmp_affinity_set_place: binding T#%d to place %d (current " - "place = %d)\n", - gtid, th->th.th_new_place, th->th.th_current_place)); - - // Check that the new place is within this thread's partition. - KMP_DEBUG_ASSERT(th->th.th_affin_mask != NULL); - KMP_ASSERT(th->th.th_new_place >= 0); - KMP_ASSERT((unsigned)th->th.th_new_place <= __kmp_affinity_num_masks); - if (th->th.th_first_place <= th->th.th_last_place) { - KMP_ASSERT((th->th.th_new_place >= th->th.th_first_place) && - (th->th.th_new_place <= th->th.th_last_place)); - } else { - KMP_ASSERT((th->th.th_new_place <= th->th.th_first_place) || - (th->th.th_new_place >= th->th.th_last_place)); - } - - // Copy the thread mask to the kmp_info_t strucuture, - // and set this thread's affinity. - kmp_affin_mask_t *mask = - KMP_CPU_INDEX(__kmp_affinity_masks, th->th.th_new_place); - KMP_CPU_COPY(th->th.th_affin_mask, mask); - th->th.th_current_place = th->th.th_new_place; - - if (__kmp_affinity_verbose) { - char buf[KMP_AFFIN_MASK_PRINT_LEN]; - __kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN, - th->th.th_affin_mask); - KMP_INFORM(BoundToOSProcSet, "OMP_PROC_BIND", (kmp_int32)getpid(), - __kmp_gettid(), gtid, buf); - } - __kmp_set_system_affinity(th->th.th_affin_mask, TRUE); -} - -#endif /* OMP_40_ENABLED */ - -int __kmp_aux_set_affinity(void **mask) { - int gtid; - kmp_info_t *th; - int retval; - - if (!KMP_AFFINITY_CAPABLE()) { - return -1; - } - - gtid = __kmp_entry_gtid(); - KA_TRACE(1000, ; { - char buf[KMP_AFFIN_MASK_PRINT_LEN]; - __kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN, - (kmp_affin_mask_t *)(*mask)); - __kmp_debug_printf( - "kmp_set_affinity: setting affinity mask for thread %d = %s\n", gtid, - buf); - }); - - if (__kmp_env_consistency_check) { - if ((mask == NULL) || (*mask == NULL)) { - KMP_FATAL(AffinityInvalidMask, "kmp_set_affinity"); - } else { - unsigned proc; - int num_procs = 0; - - KMP_CPU_SET_ITERATE(proc, ((kmp_affin_mask_t *)(*mask))) { - if (!KMP_CPU_ISSET(proc, __kmp_affin_fullMask)) { - KMP_FATAL(AffinityInvalidMask, "kmp_set_affinity"); - } - if (!KMP_CPU_ISSET(proc, (kmp_affin_mask_t *)(*mask))) { - continue; - } - num_procs++; - } - if (num_procs == 0) { - KMP_FATAL(AffinityInvalidMask, "kmp_set_affinity"); - } - -#if KMP_GROUP_AFFINITY - if (__kmp_get_proc_group((kmp_affin_mask_t *)(*mask)) < 0) { - KMP_FATAL(AffinityInvalidMask, "kmp_set_affinity"); - } -#endif /* KMP_GROUP_AFFINITY */ - } - } - - th = __kmp_threads[gtid]; - KMP_DEBUG_ASSERT(th->th.th_affin_mask != NULL); - retval = __kmp_set_system_affinity((kmp_affin_mask_t *)(*mask), FALSE); - if (retval == 0) { - KMP_CPU_COPY(th->th.th_affin_mask, (kmp_affin_mask_t *)(*mask)); - } - -#if OMP_40_ENABLED - th->th.th_current_place = KMP_PLACE_UNDEFINED; - th->th.th_new_place = KMP_PLACE_UNDEFINED; - th->th.th_first_place = 0; - th->th.th_last_place = __kmp_affinity_num_masks - 1; - - // Turn off 4.0 affinity for the current tread at this parallel level. - th->th.th_current_task->td_icvs.proc_bind = proc_bind_false; -#endif - - return retval; -} - -int __kmp_aux_get_affinity(void **mask) { - int gtid; - int retval; - kmp_info_t *th; - - if (!KMP_AFFINITY_CAPABLE()) { - return -1; - } - - gtid = __kmp_entry_gtid(); - th = __kmp_threads[gtid]; - KMP_DEBUG_ASSERT(th->th.th_affin_mask != NULL); - - KA_TRACE(1000, ; { - char buf[KMP_AFFIN_MASK_PRINT_LEN]; - __kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN, - th->th.th_affin_mask); - __kmp_printf("kmp_get_affinity: stored affinity mask for thread %d = %s\n", - gtid, buf); - }); - - if (__kmp_env_consistency_check) { - if ((mask == NULL) || (*mask == NULL)) { - KMP_FATAL(AffinityInvalidMask, "kmp_get_affinity"); - } - } - -#if !KMP_OS_WINDOWS - - retval = __kmp_get_system_affinity((kmp_affin_mask_t *)(*mask), FALSE); - KA_TRACE(1000, ; { - char buf[KMP_AFFIN_MASK_PRINT_LEN]; - __kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN, - (kmp_affin_mask_t *)(*mask)); - __kmp_printf("kmp_get_affinity: system affinity mask for thread %d = %s\n", - gtid, buf); - }); - return retval; - -#else - - KMP_CPU_COPY((kmp_affin_mask_t *)(*mask), th->th.th_affin_mask); - return 0; - -#endif /* KMP_OS_WINDOWS */ -} - -int __kmp_aux_get_affinity_max_proc() { - if (!KMP_AFFINITY_CAPABLE()) { - return 0; - } -#if KMP_GROUP_AFFINITY - if (__kmp_num_proc_groups > 1) { - return (int)(__kmp_num_proc_groups * sizeof(DWORD_PTR) * CHAR_BIT); - } -#endif - return __kmp_xproc; -} - -int __kmp_aux_set_affinity_mask_proc(int proc, void **mask) { - if (!KMP_AFFINITY_CAPABLE()) { - return -1; - } - - KA_TRACE(1000, ; { - int gtid = __kmp_entry_gtid(); - char buf[KMP_AFFIN_MASK_PRINT_LEN]; - __kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN, - (kmp_affin_mask_t *)(*mask)); - __kmp_debug_printf("kmp_set_affinity_mask_proc: setting proc %d in " - "affinity mask for thread %d = %s\n", - proc, gtid, buf); - }); - - if (__kmp_env_consistency_check) { - if ((mask == NULL) || (*mask == NULL)) { - KMP_FATAL(AffinityInvalidMask, "kmp_set_affinity_mask_proc"); - } - } - - if ((proc < 0) || (proc >= __kmp_aux_get_affinity_max_proc())) { - return -1; - } - if (!KMP_CPU_ISSET(proc, __kmp_affin_fullMask)) { - return -2; - } - - KMP_CPU_SET(proc, (kmp_affin_mask_t *)(*mask)); - return 0; -} - -int __kmp_aux_unset_affinity_mask_proc(int proc, void **mask) { - if (!KMP_AFFINITY_CAPABLE()) { - return -1; - } - - KA_TRACE(1000, ; { - int gtid = __kmp_entry_gtid(); - char buf[KMP_AFFIN_MASK_PRINT_LEN]; - __kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN, - (kmp_affin_mask_t *)(*mask)); - __kmp_debug_printf("kmp_unset_affinity_mask_proc: unsetting proc %d in " - "affinity mask for thread %d = %s\n", - proc, gtid, buf); - }); - - if (__kmp_env_consistency_check) { - if ((mask == NULL) || (*mask == NULL)) { - KMP_FATAL(AffinityInvalidMask, "kmp_unset_affinity_mask_proc"); - } - } - - if ((proc < 0) || (proc >= __kmp_aux_get_affinity_max_proc())) { - return -1; - } - if (!KMP_CPU_ISSET(proc, __kmp_affin_fullMask)) { - return -2; - } - - KMP_CPU_CLR(proc, (kmp_affin_mask_t *)(*mask)); - return 0; -} - -int __kmp_aux_get_affinity_mask_proc(int proc, void **mask) { - if (!KMP_AFFINITY_CAPABLE()) { - return -1; - } - - KA_TRACE(1000, ; { - int gtid = __kmp_entry_gtid(); - char buf[KMP_AFFIN_MASK_PRINT_LEN]; - __kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN, - (kmp_affin_mask_t *)(*mask)); - __kmp_debug_printf("kmp_get_affinity_mask_proc: getting proc %d in " - "affinity mask for thread %d = %s\n", - proc, gtid, buf); - }); - - if (__kmp_env_consistency_check) { - if ((mask == NULL) || (*mask == NULL)) { - KMP_FATAL(AffinityInvalidMask, "kmp_get_affinity_mask_proc"); - } - } - - if ((proc < 0) || (proc >= __kmp_aux_get_affinity_max_proc())) { - return -1; - } - if (!KMP_CPU_ISSET(proc, __kmp_affin_fullMask)) { - return 0; - } - - return KMP_CPU_ISSET(proc, (kmp_affin_mask_t *)(*mask)); -} - -// Dynamic affinity settings - Affinity balanced -void __kmp_balanced_affinity(kmp_info_t *th, int nthreads) { - KMP_DEBUG_ASSERT(th); - bool fine_gran = true; - int tid = th->th.th_info.ds.ds_tid; - - switch (__kmp_affinity_gran) { - case affinity_gran_fine: - case affinity_gran_thread: - break; - case affinity_gran_core: - if (__kmp_nThreadsPerCore > 1) { - fine_gran = false; - } - break; - case affinity_gran_package: - if (nCoresPerPkg > 1) { - fine_gran = false; - } - break; - default: - fine_gran = false; - } - - if (__kmp_affinity_uniform_topology()) { - int coreID; - int threadID; - // Number of hyper threads per core in HT machine - int __kmp_nth_per_core = __kmp_avail_proc / __kmp_ncores; - // Number of cores - int ncores = __kmp_ncores; - if ((nPackages > 1) && (__kmp_nth_per_core <= 1)) { - __kmp_nth_per_core = __kmp_avail_proc / nPackages; - ncores = nPackages; - } - // How many threads will be bound to each core - int chunk = nthreads / ncores; - // How many cores will have an additional thread bound to it - "big cores" - int big_cores = nthreads % ncores; - // Number of threads on the big cores - int big_nth = (chunk + 1) * big_cores; - if (tid < big_nth) { - coreID = tid / (chunk + 1); - threadID = (tid % (chunk + 1)) % __kmp_nth_per_core; - } else { // tid >= big_nth - coreID = (tid - big_cores) / chunk; - threadID = ((tid - big_cores) % chunk) % __kmp_nth_per_core; - } - - KMP_DEBUG_ASSERT2(KMP_AFFINITY_CAPABLE(), - "Illegal set affinity operation when not capable"); - - kmp_affin_mask_t *mask = th->th.th_affin_mask; - KMP_CPU_ZERO(mask); - - if (fine_gran) { - int osID = address2os[coreID * __kmp_nth_per_core + threadID].second; - KMP_CPU_SET(osID, mask); - } else { - for (int i = 0; i < __kmp_nth_per_core; i++) { - int osID; - osID = address2os[coreID * __kmp_nth_per_core + i].second; - KMP_CPU_SET(osID, mask); - } - } - if (__kmp_affinity_verbose) { - char buf[KMP_AFFIN_MASK_PRINT_LEN]; - __kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN, mask); - KMP_INFORM(BoundToOSProcSet, "KMP_AFFINITY", (kmp_int32)getpid(), - __kmp_gettid(), tid, buf); - } - __kmp_set_system_affinity(mask, TRUE); - } else { // Non-uniform topology - - kmp_affin_mask_t *mask = th->th.th_affin_mask; - KMP_CPU_ZERO(mask); - - int core_level = __kmp_affinity_find_core_level( - address2os, __kmp_avail_proc, __kmp_aff_depth - 1); - int ncores = __kmp_affinity_compute_ncores(address2os, __kmp_avail_proc, - __kmp_aff_depth - 1, core_level); - int nth_per_core = __kmp_affinity_max_proc_per_core( - address2os, __kmp_avail_proc, __kmp_aff_depth - 1, core_level); - - // For performance gain consider the special case nthreads == - // __kmp_avail_proc - if (nthreads == __kmp_avail_proc) { - if (fine_gran) { - int osID = address2os[tid].second; - KMP_CPU_SET(osID, mask); - } else { - int core = __kmp_affinity_find_core(address2os, tid, - __kmp_aff_depth - 1, core_level); - for (int i = 0; i < __kmp_avail_proc; i++) { - int osID = address2os[i].second; - if (__kmp_affinity_find_core(address2os, i, __kmp_aff_depth - 1, - core_level) == core) { - KMP_CPU_SET(osID, mask); - } - } - } - } else if (nthreads <= ncores) { - - int core = 0; - for (int i = 0; i < ncores; i++) { - // Check if this core from procarr[] is in the mask - int in_mask = 0; - for (int j = 0; j < nth_per_core; j++) { - if (procarr[i * nth_per_core + j] != -1) { - in_mask = 1; - break; - } - } - if (in_mask) { - if (tid == core) { - for (int j = 0; j < nth_per_core; j++) { - int osID = procarr[i * nth_per_core + j]; - if (osID != -1) { - KMP_CPU_SET(osID, mask); - // For fine granularity it is enough to set the first available - // osID for this core - if (fine_gran) { - break; - } - } - } - break; - } else { - core++; - } - } - } - } else { // nthreads > ncores - // Array to save the number of processors at each core - int *nproc_at_core = (int *)KMP_ALLOCA(sizeof(int) * ncores); - // Array to save the number of cores with "x" available processors; - int *ncores_with_x_procs = - (int *)KMP_ALLOCA(sizeof(int) * (nth_per_core + 1)); - // Array to save the number of cores with # procs from x to nth_per_core - int *ncores_with_x_to_max_procs = - (int *)KMP_ALLOCA(sizeof(int) * (nth_per_core + 1)); - - for (int i = 0; i <= nth_per_core; i++) { - ncores_with_x_procs[i] = 0; - ncores_with_x_to_max_procs[i] = 0; - } - - for (int i = 0; i < ncores; i++) { - int cnt = 0; - for (int j = 0; j < nth_per_core; j++) { - if (procarr[i * nth_per_core + j] != -1) { - cnt++; - } - } - nproc_at_core[i] = cnt; - ncores_with_x_procs[cnt]++; - } - - for (int i = 0; i <= nth_per_core; i++) { - for (int j = i; j <= nth_per_core; j++) { - ncores_with_x_to_max_procs[i] += ncores_with_x_procs[j]; - } - } - - // Max number of processors - int nproc = nth_per_core * ncores; - // An array to keep number of threads per each context - int *newarr = (int *)__kmp_allocate(sizeof(int) * nproc); - for (int i = 0; i < nproc; i++) { - newarr[i] = 0; - } - - int nth = nthreads; - int flag = 0; - while (nth > 0) { - for (int j = 1; j <= nth_per_core; j++) { - int cnt = ncores_with_x_to_max_procs[j]; - for (int i = 0; i < ncores; i++) { - // Skip the core with 0 processors - if (nproc_at_core[i] == 0) { - continue; - } - for (int k = 0; k < nth_per_core; k++) { - if (procarr[i * nth_per_core + k] != -1) { - if (newarr[i * nth_per_core + k] == 0) { - newarr[i * nth_per_core + k] = 1; - cnt--; - nth--; - break; - } else { - if (flag != 0) { - newarr[i * nth_per_core + k]++; - cnt--; - nth--; - break; - } - } - } - } - if (cnt == 0 || nth == 0) { - break; - } - } - if (nth == 0) { - break; - } - } - flag = 1; - } - int sum = 0; - for (int i = 0; i < nproc; i++) { - sum += newarr[i]; - if (sum > tid) { - if (fine_gran) { - int osID = procarr[i]; - KMP_CPU_SET(osID, mask); - } else { - int coreID = i / nth_per_core; - for (int ii = 0; ii < nth_per_core; ii++) { - int osID = procarr[coreID * nth_per_core + ii]; - if (osID != -1) { - KMP_CPU_SET(osID, mask); - } - } - } - break; - } - } - __kmp_free(newarr); - } - - if (__kmp_affinity_verbose) { - char buf[KMP_AFFIN_MASK_PRINT_LEN]; - __kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN, mask); - KMP_INFORM(BoundToOSProcSet, "KMP_AFFINITY", (kmp_int32)getpid(), - __kmp_gettid(), tid, buf); - } - __kmp_set_system_affinity(mask, TRUE); - } -} - -#if KMP_OS_LINUX -// We don't need this entry for Windows because -// there is GetProcessAffinityMask() api -// -// The intended usage is indicated by these steps: -// 1) The user gets the current affinity mask -// 2) Then sets the affinity by calling this function -// 3) Error check the return value -// 4) Use non-OpenMP parallelization -// 5) Reset the affinity to what was stored in step 1) -#ifdef __cplusplus -extern "C" -#endif - int - kmp_set_thread_affinity_mask_initial() -// the function returns 0 on success, -// -1 if we cannot bind thread -// >0 (errno) if an error happened during binding -{ - int gtid = __kmp_get_gtid(); - if (gtid < 0) { - // Do not touch non-omp threads - KA_TRACE(30, ("kmp_set_thread_affinity_mask_initial: " - "non-omp thread, returning\n")); - return -1; - } - if (!KMP_AFFINITY_CAPABLE() || !__kmp_init_middle) { - KA_TRACE(30, ("kmp_set_thread_affinity_mask_initial: " - "affinity not initialized, returning\n")); - return -1; - } - KA_TRACE(30, ("kmp_set_thread_affinity_mask_initial: " - "set full mask for thread %d\n", - gtid)); - KMP_DEBUG_ASSERT(__kmp_affin_fullMask != NULL); - return __kmp_set_system_affinity(__kmp_affin_fullMask, FALSE); -} -#endif - -#endif // KMP_AFFINITY_SUPPORTED Property changes on: vendor/llvm-openmp/openmp-release80-r363030/runtime/src/kmp_affinity.cpp ___________________________________________________________________ Deleted: svn:eol-style ## -1 +0,0 ## -native \ No newline at end of property Deleted: svn:keywords ## -1 +0,0 ## -FreeBSD=%H \ No newline at end of property Deleted: svn:mime-type ## -1 +0,0 ## -text/plain \ No newline at end of property Index: vendor/llvm-openmp/openmp-release80-r363030/runtime/src/kmp_ftn_os.h =================================================================== --- vendor/llvm-openmp/openmp-release80-r363030/runtime/src/kmp_ftn_os.h (revision 348960) +++ vendor/llvm-openmp/openmp-release80-r363030/runtime/src/kmp_ftn_os.h (nonexistent) @@ -1,668 +0,0 @@ -/* - * kmp_ftn_os.h -- KPTS Fortran defines header file. - */ - -//===----------------------------------------------------------------------===// -// -// The LLVM Compiler Infrastructure -// -// This file is dual licensed under the MIT and the University of Illinois Open -// Source Licenses. See LICENSE.txt for details. -// -//===----------------------------------------------------------------------===// - -#ifndef KMP_FTN_OS_H -#define KMP_FTN_OS_H - -// KMP_FNT_ENTRIES may be one of: KMP_FTN_PLAIN, KMP_FTN_UPPER, KMP_FTN_APPEND, -// KMP_FTN_UAPPEND. - -/* -------------------------- External definitions ------------------------ */ - -#if KMP_FTN_ENTRIES == KMP_FTN_PLAIN - -#define FTN_SET_STACKSIZE kmp_set_stacksize -#define FTN_SET_STACKSIZE_S kmp_set_stacksize_s -#define FTN_GET_STACKSIZE kmp_get_stacksize -#define FTN_GET_STACKSIZE_S kmp_get_stacksize_s -#define FTN_SET_BLOCKTIME kmp_set_blocktime -#define FTN_GET_BLOCKTIME kmp_get_blocktime -#define FTN_SET_LIBRARY_SERIAL kmp_set_library_serial -#define FTN_SET_LIBRARY_TURNAROUND kmp_set_library_turnaround -#define FTN_SET_LIBRARY_THROUGHPUT kmp_set_library_throughput -#define FTN_SET_LIBRARY kmp_set_library -#define FTN_GET_LIBRARY kmp_get_library -#define FTN_SET_DEFAULTS kmp_set_defaults -#define FTN_SET_DISP_NUM_BUFFERS kmp_set_disp_num_buffers -#define FTN_SET_AFFINITY kmp_set_affinity -#define FTN_GET_AFFINITY kmp_get_affinity -#define FTN_GET_AFFINITY_MAX_PROC kmp_get_affinity_max_proc -#define FTN_CREATE_AFFINITY_MASK kmp_create_affinity_mask -#define FTN_DESTROY_AFFINITY_MASK kmp_destroy_affinity_mask -#define FTN_SET_AFFINITY_MASK_PROC kmp_set_affinity_mask_proc -#define FTN_UNSET_AFFINITY_MASK_PROC kmp_unset_affinity_mask_proc -#define FTN_GET_AFFINITY_MASK_PROC kmp_get_affinity_mask_proc - -#define FTN_MALLOC kmp_malloc -#define FTN_ALIGNED_MALLOC kmp_aligned_malloc -#define FTN_CALLOC kmp_calloc -#define FTN_REALLOC kmp_realloc -#define FTN_KFREE kmp_free - -#define FTN_GET_NUM_KNOWN_THREADS kmp_get_num_known_threads - -#define FTN_SET_NUM_THREADS omp_set_num_threads -#define FTN_GET_NUM_THREADS omp_get_num_threads -#define FTN_GET_MAX_THREADS omp_get_max_threads -#define FTN_GET_THREAD_NUM omp_get_thread_num -#define FTN_GET_NUM_PROCS omp_get_num_procs -#define FTN_SET_DYNAMIC omp_set_dynamic -#define FTN_GET_DYNAMIC omp_get_dynamic -#define FTN_SET_NESTED omp_set_nested -#define FTN_GET_NESTED omp_get_nested -#define FTN_IN_PARALLEL omp_in_parallel -#define FTN_GET_THREAD_LIMIT omp_get_thread_limit -#define FTN_SET_SCHEDULE omp_set_schedule -#define FTN_GET_SCHEDULE omp_get_schedule -#define FTN_SET_MAX_ACTIVE_LEVELS omp_set_max_active_levels -#define FTN_GET_MAX_ACTIVE_LEVELS omp_get_max_active_levels -#define FTN_GET_ACTIVE_LEVEL omp_get_active_level -#define FTN_GET_LEVEL omp_get_level -#define FTN_GET_ANCESTOR_THREAD_NUM omp_get_ancestor_thread_num -#define FTN_GET_TEAM_SIZE omp_get_team_size -#define FTN_IN_FINAL omp_in_final -// #define FTN_SET_PROC_BIND omp_set_proc_bind -#define FTN_GET_PROC_BIND omp_get_proc_bind -// #define FTN_CURR_PROC_BIND omp_curr_proc_bind -#if OMP_40_ENABLED -#define FTN_GET_NUM_TEAMS omp_get_num_teams -#define FTN_GET_TEAM_NUM omp_get_team_num -#endif -#define FTN_INIT_LOCK omp_init_lock -#if KMP_USE_DYNAMIC_LOCK -#define FTN_INIT_LOCK_WITH_HINT omp_init_lock_with_hint -#define FTN_INIT_NEST_LOCK_WITH_HINT omp_init_nest_lock_with_hint -#endif -#define FTN_DESTROY_LOCK omp_destroy_lock -#define FTN_SET_LOCK omp_set_lock -#define FTN_UNSET_LOCK omp_unset_lock -#define FTN_TEST_LOCK omp_test_lock -#define FTN_INIT_NEST_LOCK omp_init_nest_lock -#define FTN_DESTROY_NEST_LOCK omp_destroy_nest_lock -#define FTN_SET_NEST_LOCK omp_set_nest_lock -#define FTN_UNSET_NEST_LOCK omp_unset_nest_lock -#define FTN_TEST_NEST_LOCK omp_test_nest_lock - -#define FTN_SET_WARNINGS_ON kmp_set_warnings_on -#define FTN_SET_WARNINGS_OFF kmp_set_warnings_off - -#define FTN_GET_WTIME omp_get_wtime -#define FTN_GET_WTICK omp_get_wtick - -#if OMP_40_ENABLED -#define FTN_GET_NUM_DEVICES omp_get_num_devices -#define FTN_GET_DEFAULT_DEVICE omp_get_default_device -#define FTN_SET_DEFAULT_DEVICE omp_set_default_device -#define FTN_IS_INITIAL_DEVICE omp_is_initial_device -#endif - -#if OMP_40_ENABLED -#define FTN_GET_CANCELLATION omp_get_cancellation -#define FTN_GET_CANCELLATION_STATUS kmp_get_cancellation_status -#endif - -#if OMP_45_ENABLED -#define FTN_GET_MAX_TASK_PRIORITY omp_get_max_task_priority -#define FTN_GET_NUM_PLACES omp_get_num_places -#define FTN_GET_PLACE_NUM_PROCS omp_get_place_num_procs -#define FTN_GET_PLACE_PROC_IDS omp_get_place_proc_ids -#define FTN_GET_PLACE_NUM omp_get_place_num -#define FTN_GET_PARTITION_NUM_PLACES omp_get_partition_num_places -#define FTN_GET_PARTITION_PLACE_NUMS omp_get_partition_place_nums -#define FTN_GET_INITIAL_DEVICE omp_get_initial_device -#ifdef KMP_STUB -#define FTN_TARGET_ALLOC omp_target_alloc -#define FTN_TARGET_FREE omp_target_free -#define FTN_TARGET_IS_PRESENT omp_target_is_present -#define FTN_TARGET_MEMCPY omp_target_memcpy -#define FTN_TARGET_MEMCPY_RECT omp_target_memcpy_rect -#define FTN_TARGET_ASSOCIATE_PTR omp_target_associate_ptr -#define FTN_TARGET_DISASSOCIATE_PTR omp_target_disassociate_ptr -#endif -#endif - -#if OMP_50_ENABLED -#define FTN_CONTROL_TOOL omp_control_tool -#define FTN_SET_DEFAULT_ALLOCATOR omp_set_default_allocator -#define FTN_GET_DEFAULT_ALLOCATOR omp_get_default_allocator -#define FTN_ALLOC omp_alloc -#define FTN_FREE omp_free -#define FTN_GET_DEVICE_NUM omp_get_device_num -#define FTN_SET_AFFINITY_FORMAT omp_set_affinity_format -#define FTN_GET_AFFINITY_FORMAT omp_get_affinity_format -#define FTN_DISPLAY_AFFINITY omp_display_affinity -#define FTN_CAPTURE_AFFINITY omp_capture_affinity -#endif - -#endif /* KMP_FTN_PLAIN */ - -/* ------------------------------------------------------------------------ */ - -#if KMP_FTN_ENTRIES == KMP_FTN_APPEND - -#define FTN_SET_STACKSIZE kmp_set_stacksize_ -#define FTN_SET_STACKSIZE_S kmp_set_stacksize_s_ -#define FTN_GET_STACKSIZE kmp_get_stacksize_ -#define FTN_GET_STACKSIZE_S kmp_get_stacksize_s_ -#define FTN_SET_BLOCKTIME kmp_set_blocktime_ -#define FTN_GET_BLOCKTIME kmp_get_blocktime_ -#define FTN_SET_LIBRARY_SERIAL kmp_set_library_serial_ -#define FTN_SET_LIBRARY_TURNAROUND kmp_set_library_turnaround_ -#define FTN_SET_LIBRARY_THROUGHPUT kmp_set_library_throughput_ -#define FTN_SET_LIBRARY kmp_set_library_ -#define FTN_GET_LIBRARY kmp_get_library_ -#define FTN_SET_DEFAULTS kmp_set_defaults_ -#define FTN_SET_DISP_NUM_BUFFERS kmp_set_disp_num_buffers_ -#define FTN_SET_AFFINITY kmp_set_affinity_ -#define FTN_GET_AFFINITY kmp_get_affinity_ -#define FTN_GET_AFFINITY_MAX_PROC kmp_get_affinity_max_proc_ -#define FTN_CREATE_AFFINITY_MASK kmp_create_affinity_mask_ -#define FTN_DESTROY_AFFINITY_MASK kmp_destroy_affinity_mask_ -#define FTN_SET_AFFINITY_MASK_PROC kmp_set_affinity_mask_proc_ -#define FTN_UNSET_AFFINITY_MASK_PROC kmp_unset_affinity_mask_proc_ -#define FTN_GET_AFFINITY_MASK_PROC kmp_get_affinity_mask_proc_ - -#define FTN_MALLOC kmp_malloc_ -#define FTN_ALIGNED_MALLOC kmp_aligned_malloc_ -#define FTN_CALLOC kmp_calloc_ -#define FTN_REALLOC kmp_realloc_ -#define FTN_KFREE kmp_free_ - -#define FTN_GET_NUM_KNOWN_THREADS kmp_get_num_known_threads_ - -#define FTN_SET_NUM_THREADS omp_set_num_threads_ -#define FTN_GET_NUM_THREADS omp_get_num_threads_ -#define FTN_GET_MAX_THREADS omp_get_max_threads_ -#define FTN_GET_THREAD_NUM omp_get_thread_num_ -#define FTN_GET_NUM_PROCS omp_get_num_procs_ -#define FTN_SET_DYNAMIC omp_set_dynamic_ -#define FTN_GET_DYNAMIC omp_get_dynamic_ -#define FTN_SET_NESTED omp_set_nested_ -#define FTN_GET_NESTED omp_get_nested_ -#define FTN_IN_PARALLEL omp_in_parallel_ -#define FTN_GET_THREAD_LIMIT omp_get_thread_limit_ -#define FTN_SET_SCHEDULE omp_set_schedule_ -#define FTN_GET_SCHEDULE omp_get_schedule_ -#define FTN_SET_MAX_ACTIVE_LEVELS omp_set_max_active_levels_ -#define FTN_GET_MAX_ACTIVE_LEVELS omp_get_max_active_levels_ -#define FTN_GET_ACTIVE_LEVEL omp_get_active_level_ -#define FTN_GET_LEVEL omp_get_level_ -#define FTN_GET_ANCESTOR_THREAD_NUM omp_get_ancestor_thread_num_ -#define FTN_GET_TEAM_SIZE omp_get_team_size_ -#define FTN_IN_FINAL omp_in_final_ -// #define FTN_SET_PROC_BIND omp_set_proc_bind_ -#define FTN_GET_PROC_BIND omp_get_proc_bind_ -// #define FTN_CURR_PROC_BIND omp_curr_proc_bind_ -#if OMP_40_ENABLED -#define FTN_GET_NUM_TEAMS omp_get_num_teams_ -#define FTN_GET_TEAM_NUM omp_get_team_num_ -#endif -#define FTN_INIT_LOCK omp_init_lock_ -#if KMP_USE_DYNAMIC_LOCK -#define FTN_INIT_LOCK_WITH_HINT omp_init_lock_with_hint_ -#define FTN_INIT_NEST_LOCK_WITH_HINT omp_init_nest_lock_with_hint_ -#endif -#define FTN_DESTROY_LOCK omp_destroy_lock_ -#define FTN_SET_LOCK omp_set_lock_ -#define FTN_UNSET_LOCK omp_unset_lock_ -#define FTN_TEST_LOCK omp_test_lock_ -#define FTN_INIT_NEST_LOCK omp_init_nest_lock_ -#define FTN_DESTROY_NEST_LOCK omp_destroy_nest_lock_ -#define FTN_SET_NEST_LOCK omp_set_nest_lock_ -#define FTN_UNSET_NEST_LOCK omp_unset_nest_lock_ -#define FTN_TEST_NEST_LOCK omp_test_nest_lock_ - -#define FTN_SET_WARNINGS_ON kmp_set_warnings_on_ -#define FTN_SET_WARNINGS_OFF kmp_set_warnings_off_ - -#define FTN_GET_WTIME omp_get_wtime_ -#define FTN_GET_WTICK omp_get_wtick_ - -#if OMP_40_ENABLED -#define FTN_GET_NUM_DEVICES omp_get_num_devices_ -#define FTN_GET_DEFAULT_DEVICE omp_get_default_device_ -#define FTN_SET_DEFAULT_DEVICE omp_set_default_device_ -#define FTN_IS_INITIAL_DEVICE omp_is_initial_device_ -#endif - -#if OMP_40_ENABLED -#define FTN_GET_CANCELLATION omp_get_cancellation_ -#define FTN_GET_CANCELLATION_STATUS kmp_get_cancellation_status_ -#endif - -#if OMP_45_ENABLED -#define FTN_GET_MAX_TASK_PRIORITY omp_get_max_task_priority_ -#define FTN_GET_NUM_PLACES omp_get_num_places_ -#define FTN_GET_PLACE_NUM_PROCS omp_get_place_num_procs_ -#define FTN_GET_PLACE_PROC_IDS omp_get_place_proc_ids_ -#define FTN_GET_PLACE_NUM omp_get_place_num_ -#define FTN_GET_PARTITION_NUM_PLACES omp_get_partition_num_places_ -#define FTN_GET_PARTITION_PLACE_NUMS omp_get_partition_place_nums_ -#define FTN_GET_INITIAL_DEVICE omp_get_initial_device_ -#ifdef KMP_STUB -#define FTN_TARGET_ALLOC omp_target_alloc_ -#define FTN_TARGET_FREE omp_target_free_ -#define FTN_TARGET_IS_PRESENT omp_target_is_present_ -#define FTN_TARGET_MEMCPY omp_target_memcpy_ -#define FTN_TARGET_MEMCPY_RECT omp_target_memcpy_rect_ -#define FTN_TARGET_ASSOCIATE_PTR omp_target_associate_ptr_ -#define FTN_TARGET_DISASSOCIATE_PTR omp_target_disassociate_ptr_ -#endif -#endif - -#if OMP_50_ENABLED -#define FTN_CONTROL_TOOL omp_control_tool_ -#define FTN_SET_DEFAULT_ALLOCATOR omp_set_default_allocator_ -#define FTN_GET_DEFAULT_ALLOCATOR omp_get_default_allocator_ -#define FTN_ALLOC omp_alloc_ -#define FTN_FREE omp_free_ -#define FTN_GET_DEVICE_NUM omp_get_device_num_ -#define FTN_SET_AFFINITY_FORMAT omp_set_affinity_format_ -#define FTN_GET_AFFINITY_FORMAT omp_get_affinity_format_ -#define FTN_DISPLAY_AFFINITY omp_display_affinity_ -#define FTN_CAPTURE_AFFINITY omp_capture_affinity_ -#endif - -#endif /* KMP_FTN_APPEND */ - -/* ------------------------------------------------------------------------ */ - -#if KMP_FTN_ENTRIES == KMP_FTN_UPPER - -#define FTN_SET_STACKSIZE KMP_SET_STACKSIZE -#define FTN_SET_STACKSIZE_S KMP_SET_STACKSIZE_S -#define FTN_GET_STACKSIZE KMP_GET_STACKSIZE -#define FTN_GET_STACKSIZE_S KMP_GET_STACKSIZE_S -#define FTN_SET_BLOCKTIME KMP_SET_BLOCKTIME -#define FTN_GET_BLOCKTIME KMP_GET_BLOCKTIME -#define FTN_SET_LIBRARY_SERIAL KMP_SET_LIBRARY_SERIAL -#define FTN_SET_LIBRARY_TURNAROUND KMP_SET_LIBRARY_TURNAROUND -#define FTN_SET_LIBRARY_THROUGHPUT KMP_SET_LIBRARY_THROUGHPUT -#define FTN_SET_LIBRARY KMP_SET_LIBRARY -#define FTN_GET_LIBRARY KMP_GET_LIBRARY -#define FTN_SET_DEFAULTS KMP_SET_DEFAULTS -#define FTN_SET_DISP_NUM_BUFFERS KMP_SET_DISP_NUM_BUFFERS -#define FTN_SET_AFFINITY KMP_SET_AFFINITY -#define FTN_GET_AFFINITY KMP_GET_AFFINITY -#define FTN_GET_AFFINITY_MAX_PROC KMP_GET_AFFINITY_MAX_PROC -#define FTN_CREATE_AFFINITY_MASK KMP_CREATE_AFFINITY_MASK -#define FTN_DESTROY_AFFINITY_MASK KMP_DESTROY_AFFINITY_MASK -#define FTN_SET_AFFINITY_MASK_PROC KMP_SET_AFFINITY_MASK_PROC -#define FTN_UNSET_AFFINITY_MASK_PROC KMP_UNSET_AFFINITY_MASK_PROC -#define FTN_GET_AFFINITY_MASK_PROC KMP_GET_AFFINITY_MASK_PROC - -#define FTN_MALLOC KMP_MALLOC -#define FTN_ALIGNED_MALLOC KMP_ALIGNED_MALLOC -#define FTN_CALLOC KMP_CALLOC -#define FTN_REALLOC KMP_REALLOC -#define FTN_KFREE KMP_FREE - -#define FTN_GET_NUM_KNOWN_THREADS KMP_GET_NUM_KNOWN_THREADS - -#define FTN_SET_NUM_THREADS OMP_SET_NUM_THREADS -#define FTN_GET_NUM_THREADS OMP_GET_NUM_THREADS -#define FTN_GET_MAX_THREADS OMP_GET_MAX_THREADS -#define FTN_GET_THREAD_NUM OMP_GET_THREAD_NUM -#define FTN_GET_NUM_PROCS OMP_GET_NUM_PROCS -#define FTN_SET_DYNAMIC OMP_SET_DYNAMIC -#define FTN_GET_DYNAMIC OMP_GET_DYNAMIC -#define FTN_SET_NESTED OMP_SET_NESTED -#define FTN_GET_NESTED OMP_GET_NESTED -#define FTN_IN_PARALLEL OMP_IN_PARALLEL -#define FTN_GET_THREAD_LIMIT OMP_GET_THREAD_LIMIT -#define FTN_SET_SCHEDULE OMP_SET_SCHEDULE -#define FTN_GET_SCHEDULE OMP_GET_SCHEDULE -#define FTN_SET_MAX_ACTIVE_LEVELS OMP_SET_MAX_ACTIVE_LEVELS -#define FTN_GET_MAX_ACTIVE_LEVELS OMP_GET_MAX_ACTIVE_LEVELS -#define FTN_GET_ACTIVE_LEVEL OMP_GET_ACTIVE_LEVEL -#define FTN_GET_LEVEL OMP_GET_LEVEL -#define FTN_GET_ANCESTOR_THREAD_NUM OMP_GET_ANCESTOR_THREAD_NUM -#define FTN_GET_TEAM_SIZE OMP_GET_TEAM_SIZE -#define FTN_IN_FINAL OMP_IN_FINAL -// #define FTN_SET_PROC_BIND OMP_SET_PROC_BIND -#define FTN_GET_PROC_BIND OMP_GET_PROC_BIND -// #define FTN_CURR_PROC_BIND OMP_CURR_PROC_BIND -#if OMP_40_ENABLED -#define FTN_GET_NUM_TEAMS OMP_GET_NUM_TEAMS -#define FTN_GET_TEAM_NUM OMP_GET_TEAM_NUM -#endif -#define FTN_INIT_LOCK OMP_INIT_LOCK -#if KMP_USE_DYNAMIC_LOCK -#define FTN_INIT_LOCK_WITH_HINT OMP_INIT_LOCK_WITH_HINT -#define FTN_INIT_NEST_LOCK_WITH_HINT OMP_INIT_NEST_LOCK_WITH_HINT -#endif -#define FTN_DESTROY_LOCK OMP_DESTROY_LOCK -#define FTN_SET_LOCK OMP_SET_LOCK -#define FTN_UNSET_LOCK OMP_UNSET_LOCK -#define FTN_TEST_LOCK OMP_TEST_LOCK -#define FTN_INIT_NEST_LOCK OMP_INIT_NEST_LOCK -#define FTN_DESTROY_NEST_LOCK OMP_DESTROY_NEST_LOCK -#define FTN_SET_NEST_LOCK OMP_SET_NEST_LOCK -#define FTN_UNSET_NEST_LOCK OMP_UNSET_NEST_LOCK -#define FTN_TEST_NEST_LOCK OMP_TEST_NEST_LOCK - -#define FTN_SET_WARNINGS_ON KMP_SET_WARNINGS_ON -#define FTN_SET_WARNINGS_OFF KMP_SET_WARNINGS_OFF - -#define FTN_GET_WTIME OMP_GET_WTIME -#define FTN_GET_WTICK OMP_GET_WTICK - -#if OMP_40_ENABLED -#define FTN_GET_NUM_DEVICES OMP_GET_NUM_DEVICES -#define FTN_GET_DEFAULT_DEVICE OMP_GET_DEFAULT_DEVICE -#define FTN_SET_DEFAULT_DEVICE OMP_SET_DEFAULT_DEVICE -#define FTN_IS_INITIAL_DEVICE OMP_IS_INITIAL_DEVICE -#endif - -#if OMP_40_ENABLED -#define FTN_GET_CANCELLATION OMP_GET_CANCELLATION -#define FTN_GET_CANCELLATION_STATUS KMP_GET_CANCELLATION_STATUS -#endif - -#if OMP_45_ENABLED -#define FTN_GET_MAX_TASK_PRIORITY OMP_GET_MAX_TASK_PRIORITY -#define FTN_GET_NUM_PLACES OMP_GET_NUM_PLACES -#define FTN_GET_PLACE_NUM_PROCS OMP_GET_PLACE_NUM_PROCS -#define FTN_GET_PLACE_PROC_IDS OMP_GET_PLACE_PROC_IDS -#define FTN_GET_PLACE_NUM OMP_GET_PLACE_NUM -#define FTN_GET_PARTITION_NUM_PLACES OMP_GET_PARTITION_NUM_PLACES -#define FTN_GET_PARTITION_PLACE_NUMS OMP_GET_PARTITION_PLACE_NUMS -#define FTN_GET_INITIAL_DEVICE OMP_GET_INITIAL_DEVICE -#ifdef KMP_STUB -#define FTN_TARGET_ALLOC OMP_TARGET_ALLOC -#define FTN_TARGET_FREE OMP_TARGET_FREE -#define FTN_TARGET_IS_PRESENT OMP_TARGET_IS_PRESENT -#define FTN_TARGET_MEMCPY OMP_TARGET_MEMCPY -#define FTN_TARGET_MEMCPY_RECT OMP_TARGET_MEMCPY_RECT -#define FTN_TARGET_ASSOCIATE_PTR OMP_TARGET_ASSOCIATE_PTR -#define FTN_TARGET_DISASSOCIATE_PTR OMP_TARGET_DISASSOCIATE_PTR -#endif -#endif - -#if OMP_50_ENABLED -#define FTN_CONTROL_TOOL OMP_CONTROL_TOOL -#define FTN_SET_DEFAULT_ALLOCATOR OMP_SET_DEFAULT_ALLOCATOR -#define FTN_GET_DEFAULT_ALLOCATOR OMP_GET_DEFAULT_ALLOCATOR -#define FTN_ALLOC OMP_ALLOC -#define FTN_FREE OMP_FREE -#define FTN_GET_DEVICE_NUM OMP_GET_DEVICE_NUM -#define FTN_SET_AFFINITY_FORMAT OMP_SET_AFFINITY_FORMAT -#define FTN_GET_AFFINITY_FORMAT OMP_GET_AFFINITY_FORMAT -#define FTN_DISPLAY_AFFINITY OMP_DISPLAY_AFFINITY -#define FTN_CAPTURE_AFFINITY OMP_CAPTURE_AFFINITY -#endif - -#endif /* KMP_FTN_UPPER */ - -/* ------------------------------------------------------------------------ */ - -#if KMP_FTN_ENTRIES == KMP_FTN_UAPPEND - -#define FTN_SET_STACKSIZE KMP_SET_STACKSIZE_ -#define FTN_SET_STACKSIZE_S KMP_SET_STACKSIZE_S_ -#define FTN_GET_STACKSIZE KMP_GET_STACKSIZE_ -#define FTN_GET_STACKSIZE_S KMP_GET_STACKSIZE_S_ -#define FTN_SET_BLOCKTIME KMP_SET_BLOCKTIME_ -#define FTN_GET_BLOCKTIME KMP_GET_BLOCKTIME_ -#define FTN_SET_LIBRARY_SERIAL KMP_SET_LIBRARY_SERIAL_ -#define FTN_SET_LIBRARY_TURNAROUND KMP_SET_LIBRARY_TURNAROUND_ -#define FTN_SET_LIBRARY_THROUGHPUT KMP_SET_LIBRARY_THROUGHPUT_ -#define FTN_SET_LIBRARY KMP_SET_LIBRARY_ -#define FTN_GET_LIBRARY KMP_GET_LIBRARY_ -#define FTN_SET_DEFAULTS KMP_SET_DEFAULTS_ -#define FTN_SET_DISP_NUM_BUFFERS KMP_SET_DISP_NUM_BUFFERS_ -#define FTN_SET_AFFINITY KMP_SET_AFFINITY_ -#define FTN_GET_AFFINITY KMP_GET_AFFINITY_ -#define FTN_GET_AFFINITY_MAX_PROC KMP_GET_AFFINITY_MAX_PROC_ -#define FTN_CREATE_AFFINITY_MASK KMP_CREATE_AFFINITY_MASK_ -#define FTN_DESTROY_AFFINITY_MASK KMP_DESTROY_AFFINITY_MASK_ -#define FTN_SET_AFFINITY_MASK_PROC KMP_SET_AFFINITY_MASK_PROC_ -#define FTN_UNSET_AFFINITY_MASK_PROC KMP_UNSET_AFFINITY_MASK_PROC_ -#define FTN_GET_AFFINITY_MASK_PROC KMP_GET_AFFINITY_MASK_PROC_ - -#define FTN_MALLOC KMP_MALLOC_ -#define FTN_ALIGNED_MALLOC KMP_ALIGNED_MALLOC_ -#define FTN_CALLOC KMP_CALLOC_ -#define FTN_REALLOC KMP_REALLOC_ -#define FTN_KFREE KMP_FREE_ - -#define FTN_GET_NUM_KNOWN_THREADS KMP_GET_NUM_KNOWN_THREADS_ - -#define FTN_SET_NUM_THREADS OMP_SET_NUM_THREADS_ -#define FTN_GET_NUM_THREADS OMP_GET_NUM_THREADS_ -#define FTN_GET_MAX_THREADS OMP_GET_MAX_THREADS_ -#define FTN_GET_THREAD_NUM OMP_GET_THREAD_NUM_ -#define FTN_GET_NUM_PROCS OMP_GET_NUM_PROCS_ -#define FTN_SET_DYNAMIC OMP_SET_DYNAMIC_ -#define FTN_GET_DYNAMIC OMP_GET_DYNAMIC_ -#define FTN_SET_NESTED OMP_SET_NESTED_ -#define FTN_GET_NESTED OMP_GET_NESTED_ -#define FTN_IN_PARALLEL OMP_IN_PARALLEL_ -#define FTN_GET_THREAD_LIMIT OMP_GET_THREAD_LIMIT_ -#define FTN_SET_SCHEDULE OMP_SET_SCHEDULE_ -#define FTN_GET_SCHEDULE OMP_GET_SCHEDULE_ -#define FTN_SET_MAX_ACTIVE_LEVELS OMP_SET_MAX_ACTIVE_LEVELS_ -#define FTN_GET_MAX_ACTIVE_LEVELS OMP_GET_MAX_ACTIVE_LEVELS_ -#define FTN_GET_ACTIVE_LEVEL OMP_GET_ACTIVE_LEVEL_ -#define FTN_GET_LEVEL OMP_GET_LEVEL_ -#define FTN_GET_ANCESTOR_THREAD_NUM OMP_GET_ANCESTOR_THREAD_NUM_ -#define FTN_GET_TEAM_SIZE OMP_GET_TEAM_SIZE_ -#define FTN_IN_FINAL OMP_IN_FINAL_ -// #define FTN_SET_PROC_BIND OMP_SET_PROC_BIND_ -#define FTN_GET_PROC_BIND OMP_GET_PROC_BIND_ -// #define FTN_CURR_PROC_BIND OMP_CURR_PROC_BIND_ -#if OMP_40_ENABLED -#define FTN_GET_NUM_TEAMS OMP_GET_NUM_TEAMS_ -#define FTN_GET_TEAM_NUM OMP_GET_TEAM_NUM_ -#endif -#define FTN_INIT_LOCK OMP_INIT_LOCK_ -#if KMP_USE_DYNAMIC_LOCK -#define FTN_INIT_LOCK_WITH_HINT OMP_INIT_LOCK_WITH_HINT_ -#define FTN_INIT_NEST_LOCK_WITH_HINT OMP_INIT_NEST_LOCK_WITH_HINT_ -#endif -#define FTN_DESTROY_LOCK OMP_DESTROY_LOCK_ -#define FTN_SET_LOCK OMP_SET_LOCK_ -#define FTN_UNSET_LOCK OMP_UNSET_LOCK_ -#define FTN_TEST_LOCK OMP_TEST_LOCK_ -#define FTN_INIT_NEST_LOCK OMP_INIT_NEST_LOCK_ -#define FTN_DESTROY_NEST_LOCK OMP_DESTROY_NEST_LOCK_ -#define FTN_SET_NEST_LOCK OMP_SET_NEST_LOCK_ -#define FTN_UNSET_NEST_LOCK OMP_UNSET_NEST_LOCK_ -#define FTN_TEST_NEST_LOCK OMP_TEST_NEST_LOCK_ - -#define FTN_SET_WARNINGS_ON KMP_SET_WARNINGS_ON_ -#define FTN_SET_WARNINGS_OFF KMP_SET_WARNINGS_OFF_ - -#define FTN_GET_WTIME OMP_GET_WTIME_ -#define FTN_GET_WTICK OMP_GET_WTICK_ - -#if OMP_40_ENABLED -#define FTN_GET_NUM_DEVICES OMP_GET_NUM_DEVICES_ -#define FTN_GET_DEFAULT_DEVICE OMP_GET_DEFAULT_DEVICE_ -#define FTN_SET_DEFAULT_DEVICE OMP_SET_DEFAULT_DEVICE_ -#define FTN_IS_INITIAL_DEVICE OMP_IS_INITIAL_DEVICE_ -#endif - -#if OMP_40_ENABLED -#define FTN_GET_CANCELLATION OMP_GET_CANCELLATION_ -#define FTN_GET_CANCELLATION_STATUS KMP_GET_CANCELLATION_STATUS_ -#endif - -#if OMP_45_ENABLED -#define FTN_GET_MAX_TASK_PRIORITY OMP_GET_MAX_TASK_PRIORITY_ -#define FTN_GET_NUM_PLACES OMP_GET_NUM_PLACES_ -#define FTN_GET_PLACE_NUM_PROCS OMP_GET_PLACE_NUM_PROCS_ -#define FTN_GET_PLACE_PROC_IDS OMP_GET_PLACE_PROC_IDS_ -#define FTN_GET_PLACE_NUM OMP_GET_PLACE_NUM_ -#define FTN_GET_PARTITION_NUM_PLACES OMP_GET_PARTITION_NUM_PLACES_ -#define FTN_GET_PARTITION_PLACE_NUMS OMP_GET_PARTITION_PLACE_NUMS_ -#define FTN_GET_INITIAL_DEVICE OMP_GET_INITIAL_DEVICE_ -#ifdef KMP_STUB -#define FTN_TARGET_ALLOC OMP_TARGET_ALLOC_ -#define FTN_TARGET_FREE OMP_TARGET_FREE_ -#define FTN_TARGET_IS_PRESENT OMP_TARGET_IS_PRESENT_ -#define FTN_TARGET_MEMCPY OMP_TARGET_MEMCPY_ -#define FTN_TARGET_MEMCPY_RECT OMP_TARGET_MEMCPY_RECT_ -#define FTN_TARGET_ASSOCIATE_PTR OMP_TARGET_ASSOCIATE_PTR_ -#define FTN_TARGET_DISASSOCIATE_PTR OMP_TARGET_DISASSOCIATE_PTR_ -#endif -#endif - -#if OMP_50_ENABLED -#define FTN_CONTROL_TOOL OMP_CONTROL_TOOL_ -#define FTN_SET_DEFAULT_ALLOCATOR OMP_SET_DEFAULT_ALLOCATOR_ -#define FTN_GET_DEFAULT_ALLOCATOR OMP_GET_DEFAULT_ALLOCATOR_ -#define FTN_ALLOC OMP_ALLOC_ -#define FTN_FREE OMP_FREE_ -#define FTN_GET_DEVICE_NUM OMP_GET_DEVICE_NUM_ -#define FTN_SET_AFFINITY_FORMAT OMP_SET_AFFINITY_FORMAT_ -#define FTN_GET_AFFINITY_FORMAT OMP_GET_AFFINITY_FORMAT_ -#define FTN_DISPLAY_AFFINITY OMP_DISPLAY_AFFINITY_ -#define FTN_CAPTURE_AFFINITY OMP_CAPTURE_AFFINITY_ -#endif - -#endif /* KMP_FTN_UAPPEND */ - -/* -------------------------- GOMP API NAMES ------------------------ */ -// All GOMP_1.0 symbols -#define KMP_API_NAME_GOMP_ATOMIC_END GOMP_atomic_end -#define KMP_API_NAME_GOMP_ATOMIC_START GOMP_atomic_start -#define KMP_API_NAME_GOMP_BARRIER GOMP_barrier -#define KMP_API_NAME_GOMP_CRITICAL_END GOMP_critical_end -#define KMP_API_NAME_GOMP_CRITICAL_NAME_END GOMP_critical_name_end -#define KMP_API_NAME_GOMP_CRITICAL_NAME_START GOMP_critical_name_start -#define KMP_API_NAME_GOMP_CRITICAL_START GOMP_critical_start -#define KMP_API_NAME_GOMP_LOOP_DYNAMIC_NEXT GOMP_loop_dynamic_next -#define KMP_API_NAME_GOMP_LOOP_DYNAMIC_START GOMP_loop_dynamic_start -#define KMP_API_NAME_GOMP_LOOP_END GOMP_loop_end -#define KMP_API_NAME_GOMP_LOOP_END_NOWAIT GOMP_loop_end_nowait -#define KMP_API_NAME_GOMP_LOOP_GUIDED_NEXT GOMP_loop_guided_next -#define KMP_API_NAME_GOMP_LOOP_GUIDED_START GOMP_loop_guided_start -#define KMP_API_NAME_GOMP_LOOP_ORDERED_DYNAMIC_NEXT \ - GOMP_loop_ordered_dynamic_next -#define KMP_API_NAME_GOMP_LOOP_ORDERED_DYNAMIC_START \ - GOMP_loop_ordered_dynamic_start -#define KMP_API_NAME_GOMP_LOOP_ORDERED_GUIDED_NEXT GOMP_loop_ordered_guided_next -#define KMP_API_NAME_GOMP_LOOP_ORDERED_GUIDED_START \ - GOMP_loop_ordered_guided_start -#define KMP_API_NAME_GOMP_LOOP_ORDERED_RUNTIME_NEXT \ - GOMP_loop_ordered_runtime_next -#define KMP_API_NAME_GOMP_LOOP_ORDERED_RUNTIME_START \ - GOMP_loop_ordered_runtime_start -#define KMP_API_NAME_GOMP_LOOP_ORDERED_STATIC_NEXT GOMP_loop_ordered_static_next -#define KMP_API_NAME_GOMP_LOOP_ORDERED_STATIC_START \ - GOMP_loop_ordered_static_start -#define KMP_API_NAME_GOMP_LOOP_RUNTIME_NEXT GOMP_loop_runtime_next -#define KMP_API_NAME_GOMP_LOOP_RUNTIME_START GOMP_loop_runtime_start -#define KMP_API_NAME_GOMP_LOOP_STATIC_NEXT GOMP_loop_static_next -#define KMP_API_NAME_GOMP_LOOP_STATIC_START GOMP_loop_static_start -#define KMP_API_NAME_GOMP_ORDERED_END GOMP_ordered_end -#define KMP_API_NAME_GOMP_ORDERED_START GOMP_ordered_start -#define KMP_API_NAME_GOMP_PARALLEL_END GOMP_parallel_end -#define KMP_API_NAME_GOMP_PARALLEL_LOOP_DYNAMIC_START \ - GOMP_parallel_loop_dynamic_start -#define KMP_API_NAME_GOMP_PARALLEL_LOOP_GUIDED_START \ - GOMP_parallel_loop_guided_start -#define KMP_API_NAME_GOMP_PARALLEL_LOOP_RUNTIME_START \ - GOMP_parallel_loop_runtime_start -#define KMP_API_NAME_GOMP_PARALLEL_LOOP_STATIC_START \ - GOMP_parallel_loop_static_start -#define KMP_API_NAME_GOMP_PARALLEL_SECTIONS_START GOMP_parallel_sections_start -#define KMP_API_NAME_GOMP_PARALLEL_START GOMP_parallel_start -#define KMP_API_NAME_GOMP_SECTIONS_END GOMP_sections_end -#define KMP_API_NAME_GOMP_SECTIONS_END_NOWAIT GOMP_sections_end_nowait -#define KMP_API_NAME_GOMP_SECTIONS_NEXT GOMP_sections_next -#define KMP_API_NAME_GOMP_SECTIONS_START GOMP_sections_start -#define KMP_API_NAME_GOMP_SINGLE_COPY_END GOMP_single_copy_end -#define KMP_API_NAME_GOMP_SINGLE_COPY_START GOMP_single_copy_start -#define KMP_API_NAME_GOMP_SINGLE_START GOMP_single_start - -// All GOMP_2.0 symbols -#define KMP_API_NAME_GOMP_TASK GOMP_task -#define KMP_API_NAME_GOMP_TASKWAIT GOMP_taskwait -#define KMP_API_NAME_GOMP_LOOP_ULL_DYNAMIC_NEXT GOMP_loop_ull_dynamic_next -#define KMP_API_NAME_GOMP_LOOP_ULL_DYNAMIC_START GOMP_loop_ull_dynamic_start -#define KMP_API_NAME_GOMP_LOOP_ULL_GUIDED_NEXT GOMP_loop_ull_guided_next -#define KMP_API_NAME_GOMP_LOOP_ULL_GUIDED_START GOMP_loop_ull_guided_start -#define KMP_API_NAME_GOMP_LOOP_ULL_ORDERED_DYNAMIC_NEXT \ - GOMP_loop_ull_ordered_dynamic_next -#define KMP_API_NAME_GOMP_LOOP_ULL_ORDERED_DYNAMIC_START \ - GOMP_loop_ull_ordered_dynamic_start -#define KMP_API_NAME_GOMP_LOOP_ULL_ORDERED_GUIDED_NEXT \ - GOMP_loop_ull_ordered_guided_next -#define KMP_API_NAME_GOMP_LOOP_ULL_ORDERED_GUIDED_START \ - GOMP_loop_ull_ordered_guided_start -#define KMP_API_NAME_GOMP_LOOP_ULL_ORDERED_RUNTIME_NEXT \ - GOMP_loop_ull_ordered_runtime_next -#define KMP_API_NAME_GOMP_LOOP_ULL_ORDERED_RUNTIME_START \ - GOMP_loop_ull_ordered_runtime_start -#define KMP_API_NAME_GOMP_LOOP_ULL_ORDERED_STATIC_NEXT \ - GOMP_loop_ull_ordered_static_next -#define KMP_API_NAME_GOMP_LOOP_ULL_ORDERED_STATIC_START \ - GOMP_loop_ull_ordered_static_start -#define KMP_API_NAME_GOMP_LOOP_ULL_RUNTIME_NEXT GOMP_loop_ull_runtime_next -#define KMP_API_NAME_GOMP_LOOP_ULL_RUNTIME_START GOMP_loop_ull_runtime_start -#define KMP_API_NAME_GOMP_LOOP_ULL_STATIC_NEXT GOMP_loop_ull_static_next -#define KMP_API_NAME_GOMP_LOOP_ULL_STATIC_START GOMP_loop_ull_static_start - -// All GOMP_3.0 symbols -#define KMP_API_NAME_GOMP_TASKYIELD GOMP_taskyield - -// All GOMP_4.0 symbols -// TODO: As of 2013-10-14, none of the GOMP_4.0 functions are implemented in -// libomp -#define KMP_API_NAME_GOMP_BARRIER_CANCEL GOMP_barrier_cancel -#define KMP_API_NAME_GOMP_CANCEL GOMP_cancel -#define KMP_API_NAME_GOMP_CANCELLATION_POINT GOMP_cancellation_point -#define KMP_API_NAME_GOMP_LOOP_END_CANCEL GOMP_loop_end_cancel -#define KMP_API_NAME_GOMP_PARALLEL_LOOP_DYNAMIC GOMP_parallel_loop_dynamic -#define KMP_API_NAME_GOMP_PARALLEL_LOOP_GUIDED GOMP_parallel_loop_guided -#define KMP_API_NAME_GOMP_PARALLEL_LOOP_RUNTIME GOMP_parallel_loop_runtime -#define KMP_API_NAME_GOMP_PARALLEL_LOOP_STATIC GOMP_parallel_loop_static -#define KMP_API_NAME_GOMP_PARALLEL_SECTIONS GOMP_parallel_sections -#define KMP_API_NAME_GOMP_PARALLEL GOMP_parallel -#define KMP_API_NAME_GOMP_SECTIONS_END_CANCEL GOMP_sections_end_cancel -#define KMP_API_NAME_GOMP_TASKGROUP_START GOMP_taskgroup_start -#define KMP_API_NAME_GOMP_TASKGROUP_END GOMP_taskgroup_end -/* Target functions should be taken care of by liboffload */ -#define KMP_API_NAME_GOMP_TARGET GOMP_target -#define KMP_API_NAME_GOMP_TARGET_DATA GOMP_target_data -#define KMP_API_NAME_GOMP_TARGET_END_DATA GOMP_target_end_data -#define KMP_API_NAME_GOMP_TARGET_UPDATE GOMP_target_update -#define KMP_API_NAME_GOMP_TEAMS GOMP_teams - -// All GOMP_4.5 symbols -#define KMP_API_NAME_GOMP_TASKLOOP GOMP_taskloop -#define KMP_API_NAME_GOMP_TASKLOOP_ULL GOMP_taskloop_ull -#define KMP_API_NAME_GOMP_DOACROSS_POST GOMP_doacross_post -#define KMP_API_NAME_GOMP_DOACROSS_WAIT GOMP_doacross_wait -#define KMP_API_NAME_GOMP_LOOP_DOACROSS_STATIC_START \ - GOMP_loop_doacross_static_start -#define KMP_API_NAME_GOMP_LOOP_DOACROSS_DYNAMIC_START \ - GOMP_loop_doacross_dynamic_start -#define KMP_API_NAME_GOMP_LOOP_DOACROSS_GUIDED_START \ - GOMP_loop_doacross_guided_start -#define KMP_API_NAME_GOMP_LOOP_DOACROSS_RUNTIME_START \ - GOMP_loop_doacross_runtime_start -#define KMP_API_NAME_GOMP_DOACROSS_ULL_POST GOMP_doacross_ull_post -#define KMP_API_NAME_GOMP_DOACROSS_ULL_WAIT GOMP_doacross_ull_wait -#define KMP_API_NAME_GOMP_LOOP_ULL_DOACROSS_STATIC_START \ - GOMP_loop_ull_doacross_static_start -#define KMP_API_NAME_GOMP_LOOP_ULL_DOACROSS_DYNAMIC_START \ - GOMP_loop_ull_doacross_dynamic_start -#define KMP_API_NAME_GOMP_LOOP_ULL_DOACROSS_GUIDED_START \ - GOMP_loop_ull_doacross_guided_start -#define KMP_API_NAME_GOMP_LOOP_ULL_DOACROSS_RUNTIME_START \ - GOMP_loop_ull_doacross_runtime_start - -#endif /* KMP_FTN_OS_H */ Property changes on: vendor/llvm-openmp/openmp-release80-r363030/runtime/src/kmp_ftn_os.h ___________________________________________________________________ Deleted: svn:eol-style ## -1 +0,0 ## -native \ No newline at end of property Deleted: svn:keywords ## -1 +0,0 ## -FreeBSD=%H \ No newline at end of property Deleted: svn:mime-type ## -1 +0,0 ## -text/plain \ No newline at end of property Index: vendor/llvm-openmp/openmp-release80-r363030/runtime/src/kmp_itt.inl =================================================================== --- vendor/llvm-openmp/openmp-release80-r363030/runtime/src/kmp_itt.inl (revision 348960) +++ vendor/llvm-openmp/openmp-release80-r363030/runtime/src/kmp_itt.inl (nonexistent) @@ -1,1043 +0,0 @@ -#if USE_ITT_BUILD -/* - * kmp_itt.inl -- Inline functions of ITT Notify. - */ - -//===----------------------------------------------------------------------===// -// -// The LLVM Compiler Infrastructure -// -// This file is dual licensed under the MIT and the University of Illinois Open -// Source Licenses. See LICENSE.txt for details. -// -//===----------------------------------------------------------------------===// - -// Inline function definitions. This file should be included into kmp_itt.h file -// for production build (to let compliler inline functions) or into kmp_itt.c -// file for debug build (to reduce the number of files to recompile and save -// build time). - -#include "kmp.h" -#include "kmp_str.h" - -#if KMP_ITT_DEBUG -extern kmp_bootstrap_lock_t __kmp_itt_debug_lock; -#define KMP_ITT_DEBUG_LOCK() \ - { __kmp_acquire_bootstrap_lock(&__kmp_itt_debug_lock); } -#define KMP_ITT_DEBUG_PRINT(...) \ - { \ - fprintf(stderr, "#%02d: ", __kmp_get_gtid()); \ - fprintf(stderr, __VA_ARGS__); \ - fflush(stderr); \ - __kmp_release_bootstrap_lock(&__kmp_itt_debug_lock); \ - } -#else -#define KMP_ITT_DEBUG_LOCK() -#define KMP_ITT_DEBUG_PRINT(...) -#endif // KMP_ITT_DEBUG - -// Ensure that the functions are static if they're supposed to be being inlined. -// Otherwise they cannot be used in more than one file, since there will be -// multiple definitions. -#if KMP_DEBUG -#define LINKAGE -#else -#define LINKAGE static inline -#endif - -// ZCA interface used by Intel(R) Inspector. Intel(R) Parallel Amplifier uses -// this API to support user-defined synchronization primitives, but does not use -// ZCA; it would be safe to turn this off until wider support becomes available. -#if USE_ITT_ZCA -#ifdef __INTEL_COMPILER -#if __INTEL_COMPILER >= 1200 -#undef __itt_sync_acquired -#undef __itt_sync_releasing -#define __itt_sync_acquired(addr) \ - __notify_zc_intrinsic((char *)"sync_acquired", addr) -#define __itt_sync_releasing(addr) \ - __notify_intrinsic((char *)"sync_releasing", addr) -#endif -#endif -#endif - -static kmp_bootstrap_lock_t metadata_lock = - KMP_BOOTSTRAP_LOCK_INITIALIZER(metadata_lock); - -/* Parallel region reporting. - * __kmp_itt_region_forking should be called by master thread of a team. - Exact moment of call does not matter, but it should be completed before any - thread of this team calls __kmp_itt_region_starting. - * __kmp_itt_region_starting should be called by each thread of a team just - before entering parallel region body. - * __kmp_itt_region_finished should be called by each thread of a team right - after returning from parallel region body. - * __kmp_itt_region_joined should be called by master thread of a team, after - all threads called __kmp_itt_region_finished. - - Note: Thread waiting at join barrier (after __kmp_itt_region_finished) can - execute some more user code -- such a thread can execute tasks. - - Note: The overhead of logging region_starting and region_finished in each - thread is too large, so these calls are not used. */ - -LINKAGE void __kmp_itt_region_forking(int gtid, int team_size, int barriers) { -#if USE_ITT_NOTIFY - kmp_team_t *team = __kmp_team_from_gtid(gtid); - if (team->t.t_active_level > 1) { - // The frame notifications are only supported for the outermost teams. - return; - } - ident_t *loc = __kmp_thread_from_gtid(gtid)->th.th_ident; - if (loc) { - // Use the reserved_2 field to store the index to the region domain. - // Assume that reserved_2 contains zero initially. Since zero is special - // value here, store the index into domain array increased by 1. - if (loc->reserved_2 == 0) { - if (__kmp_region_domain_count < KMP_MAX_FRAME_DOMAINS) { - int frm = - KMP_TEST_THEN_INC32(&__kmp_region_domain_count); // get "old" value - if (frm >= KMP_MAX_FRAME_DOMAINS) { - KMP_TEST_THEN_DEC32(&__kmp_region_domain_count); // revert the count - return; // loc->reserved_2 is still 0 - } - // if (!KMP_COMPARE_AND_STORE_ACQ32( &loc->reserved_2, 0, frm + 1 )) { - // frm = loc->reserved_2 - 1; // get value saved by other thread - // for same loc - //} // AC: this block is to replace next unsynchronized line - - // We need to save indexes for both region and barrier frames. We'll use - // loc->reserved_2 field but put region index to the low two bytes and - // barrier indexes to the high two bytes. It is OK because - // KMP_MAX_FRAME_DOMAINS = 512. - loc->reserved_2 |= (frm + 1); // save "new" value - - // Transform compiler-generated region location into the format - // that the tools more or less standardized on: - // "$omp$parallel@[file:][:]" - char *buff = NULL; - kmp_str_loc_t str_loc = __kmp_str_loc_init(loc->psource, 1); - buff = __kmp_str_format("%s$omp$parallel:%d@%s:%d:%d", str_loc.func, - team_size, str_loc.file, str_loc.line, - str_loc.col); - - __itt_suppress_push(__itt_suppress_memory_errors); - __kmp_itt_region_domains[frm] = __itt_domain_create(buff); - __itt_suppress_pop(); - - __kmp_str_free(&buff); - if (barriers) { - if (__kmp_barrier_domain_count < KMP_MAX_FRAME_DOMAINS) { - int frm = KMP_TEST_THEN_INC32( - &__kmp_barrier_domain_count); // get "old" value - if (frm >= KMP_MAX_FRAME_DOMAINS) { - KMP_TEST_THEN_DEC32( - &__kmp_barrier_domain_count); // revert the count - return; // loc->reserved_2 is still 0 - } - char *buff = NULL; - buff = __kmp_str_format("%s$omp$barrier@%s:%d", str_loc.func, - str_loc.file, str_loc.col); - __itt_suppress_push(__itt_suppress_memory_errors); - __kmp_itt_barrier_domains[frm] = __itt_domain_create(buff); - __itt_suppress_pop(); - __kmp_str_free(&buff); - // Save the barrier frame index to the high two bytes. - loc->reserved_2 |= (frm + 1) << 16; - } - } - __kmp_str_loc_free(&str_loc); - __itt_frame_begin_v3(__kmp_itt_region_domains[frm], NULL); - } - } else { // Region domain exists for this location - // Check if team size was changed. Then create new region domain for this - // location - unsigned int frm = (loc->reserved_2 & 0x0000FFFF) - 1; - if ((frm < KMP_MAX_FRAME_DOMAINS) && - (__kmp_itt_region_team_size[frm] != team_size)) { - char *buff = NULL; - kmp_str_loc_t str_loc = __kmp_str_loc_init(loc->psource, 1); - buff = __kmp_str_format("%s$omp$parallel:%d@%s:%d:%d", str_loc.func, - team_size, str_loc.file, str_loc.line, - str_loc.col); - - __itt_suppress_push(__itt_suppress_memory_errors); - __kmp_itt_region_domains[frm] = __itt_domain_create(buff); - __itt_suppress_pop(); - - __kmp_str_free(&buff); - __kmp_str_loc_free(&str_loc); - __kmp_itt_region_team_size[frm] = team_size; - __itt_frame_begin_v3(__kmp_itt_region_domains[frm], NULL); - } else { // Team size was not changed. Use existing domain. - __itt_frame_begin_v3(__kmp_itt_region_domains[frm], NULL); - } - } - KMP_ITT_DEBUG_LOCK(); - KMP_ITT_DEBUG_PRINT("[frm beg] gtid=%d, idx=%x, loc:%p\n", gtid, - loc->reserved_2, loc); - } -#endif -} // __kmp_itt_region_forking - -// ----------------------------------------------------------------------------- -LINKAGE void __kmp_itt_frame_submit(int gtid, __itt_timestamp begin, - __itt_timestamp end, int imbalance, - ident_t *loc, int team_size, int region) { -#if USE_ITT_NOTIFY - if (region) { - kmp_team_t *team = __kmp_team_from_gtid(gtid); - int serialized = (region == 2 ? 1 : 0); - if (team->t.t_active_level + serialized > 1) { - // The frame notifications are only supported for the outermost teams. - return; - } - // Check region domain has not been created before. It's index is saved in - // the low two bytes. - if ((loc->reserved_2 & 0x0000FFFF) == 0) { - if (__kmp_region_domain_count < KMP_MAX_FRAME_DOMAINS) { - int frm = - KMP_TEST_THEN_INC32(&__kmp_region_domain_count); // get "old" value - if (frm >= KMP_MAX_FRAME_DOMAINS) { - KMP_TEST_THEN_DEC32(&__kmp_region_domain_count); // revert the count - return; // loc->reserved_2 is still 0 - } - - // We need to save indexes for both region and barrier frames. We'll use - // loc->reserved_2 field but put region index to the low two bytes and - // barrier indexes to the high two bytes. It is OK because - // KMP_MAX_FRAME_DOMAINS = 512. - loc->reserved_2 |= (frm + 1); // save "new" value - - // Transform compiler-generated region location into the format - // that the tools more or less standardized on: - // "$omp$parallel:team_size@[file:][:]" - char *buff = NULL; - kmp_str_loc_t str_loc = __kmp_str_loc_init(loc->psource, 1); - buff = __kmp_str_format("%s$omp$parallel:%d@%s:%d:%d", str_loc.func, - team_size, str_loc.file, str_loc.line, - str_loc.col); - - __itt_suppress_push(__itt_suppress_memory_errors); - __kmp_itt_region_domains[frm] = __itt_domain_create(buff); - __itt_suppress_pop(); - - __kmp_str_free(&buff); - __kmp_str_loc_free(&str_loc); - __kmp_itt_region_team_size[frm] = team_size; - __itt_frame_submit_v3(__kmp_itt_region_domains[frm], NULL, begin, end); - } - } else { // Region domain exists for this location - // Check if team size was changed. Then create new region domain for this - // location - unsigned int frm = (loc->reserved_2 & 0x0000FFFF) - 1; - if ((frm < KMP_MAX_FRAME_DOMAINS) && - (__kmp_itt_region_team_size[frm] != team_size)) { - char *buff = NULL; - kmp_str_loc_t str_loc = __kmp_str_loc_init(loc->psource, 1); - buff = __kmp_str_format("%s$omp$parallel:%d@%s:%d:%d", str_loc.func, - team_size, str_loc.file, str_loc.line, - str_loc.col); - - __itt_suppress_push(__itt_suppress_memory_errors); - __kmp_itt_region_domains[frm] = __itt_domain_create(buff); - __itt_suppress_pop(); - - __kmp_str_free(&buff); - __kmp_str_loc_free(&str_loc); - __kmp_itt_region_team_size[frm] = team_size; - __itt_frame_submit_v3(__kmp_itt_region_domains[frm], NULL, begin, end); - } else { // Team size was not changed. Use existing domain. - __itt_frame_submit_v3(__kmp_itt_region_domains[frm], NULL, begin, end); - } - } - KMP_ITT_DEBUG_LOCK(); - KMP_ITT_DEBUG_PRINT( - "[reg sub] gtid=%d, idx=%x, region:%d, loc:%p, beg:%llu, end:%llu\n", - gtid, loc->reserved_2, region, loc, begin, end); - return; - } else { // called for barrier reporting - if (loc) { - if ((loc->reserved_2 & 0xFFFF0000) == 0) { - if (__kmp_barrier_domain_count < KMP_MAX_FRAME_DOMAINS) { - int frm = KMP_TEST_THEN_INC32( - &__kmp_barrier_domain_count); // get "old" value - if (frm >= KMP_MAX_FRAME_DOMAINS) { - KMP_TEST_THEN_DEC32( - &__kmp_barrier_domain_count); // revert the count - return; // loc->reserved_2 is still 0 - } - // Save the barrier frame index to the high two bytes. - loc->reserved_2 |= (frm + 1) << 16; // save "new" value - - // Transform compiler-generated region location into the format - // that the tools more or less standardized on: - // "$omp$frame@[file:][:]" - kmp_str_loc_t str_loc = __kmp_str_loc_init(loc->psource, 1); - if (imbalance) { - char *buff_imb = NULL; - buff_imb = __kmp_str_format("%s$omp$barrier-imbalance:%d@%s:%d", - str_loc.func, team_size, str_loc.file, - str_loc.col); - __itt_suppress_push(__itt_suppress_memory_errors); - __kmp_itt_imbalance_domains[frm] = __itt_domain_create(buff_imb); - __itt_suppress_pop(); - __itt_frame_submit_v3(__kmp_itt_imbalance_domains[frm], NULL, begin, - end); - __kmp_str_free(&buff_imb); - } else { - char *buff = NULL; - buff = __kmp_str_format("%s$omp$barrier@%s:%d", str_loc.func, - str_loc.file, str_loc.col); - __itt_suppress_push(__itt_suppress_memory_errors); - __kmp_itt_barrier_domains[frm] = __itt_domain_create(buff); - __itt_suppress_pop(); - __itt_frame_submit_v3(__kmp_itt_barrier_domains[frm], NULL, begin, - end); - __kmp_str_free(&buff); - } - __kmp_str_loc_free(&str_loc); - } - } else { // if it is not 0 then it should be <= KMP_MAX_FRAME_DOMAINS - if (imbalance) { - __itt_frame_submit_v3( - __kmp_itt_imbalance_domains[(loc->reserved_2 >> 16) - 1], NULL, - begin, end); - } else { - __itt_frame_submit_v3( - __kmp_itt_barrier_domains[(loc->reserved_2 >> 16) - 1], NULL, - begin, end); - } - } - KMP_ITT_DEBUG_LOCK(); - KMP_ITT_DEBUG_PRINT( - "[frm sub] gtid=%d, idx=%x, loc:%p, beg:%llu, end:%llu\n", gtid, - loc->reserved_2, loc, begin, end); - } - } -#endif -} // __kmp_itt_frame_submit - -// ----------------------------------------------------------------------------- -LINKAGE void __kmp_itt_metadata_imbalance(int gtid, kmp_uint64 begin, - kmp_uint64 end, kmp_uint64 imbalance, - kmp_uint64 reduction) { -#if USE_ITT_NOTIFY - if (metadata_domain == NULL) { - __kmp_acquire_bootstrap_lock(&metadata_lock); - if (metadata_domain == NULL) { - __itt_suppress_push(__itt_suppress_memory_errors); - metadata_domain = __itt_domain_create("OMP Metadata"); - string_handle_imbl = __itt_string_handle_create("omp_metadata_imbalance"); - string_handle_loop = __itt_string_handle_create("omp_metadata_loop"); - string_handle_sngl = __itt_string_handle_create("omp_metadata_single"); - __itt_suppress_pop(); - } - __kmp_release_bootstrap_lock(&metadata_lock); - } - - kmp_uint64 imbalance_data[4]; - imbalance_data[0] = begin; - imbalance_data[1] = end; - imbalance_data[2] = imbalance; - imbalance_data[3] = reduction; - - __itt_metadata_add(metadata_domain, __itt_null, string_handle_imbl, - __itt_metadata_u64, 4, imbalance_data); -#endif -} // __kmp_itt_metadata_imbalance - -// ----------------------------------------------------------------------------- -LINKAGE void __kmp_itt_metadata_loop(ident_t *loc, kmp_uint64 sched_type, - kmp_uint64 iterations, kmp_uint64 chunk) { -#if USE_ITT_NOTIFY - if (metadata_domain == NULL) { - __kmp_acquire_bootstrap_lock(&metadata_lock); - if (metadata_domain == NULL) { - __itt_suppress_push(__itt_suppress_memory_errors); - metadata_domain = __itt_domain_create("OMP Metadata"); - string_handle_imbl = __itt_string_handle_create("omp_metadata_imbalance"); - string_handle_loop = __itt_string_handle_create("omp_metadata_loop"); - string_handle_sngl = __itt_string_handle_create("omp_metadata_single"); - __itt_suppress_pop(); - } - __kmp_release_bootstrap_lock(&metadata_lock); - } - - // Parse line and column from psource string: ";file;func;line;col;;" - char *s_line; - char *s_col; - KMP_DEBUG_ASSERT(loc->psource); -#ifdef __cplusplus - s_line = strchr(CCAST(char *, loc->psource), ';'); -#else - s_line = strchr(loc->psource, ';'); -#endif - KMP_DEBUG_ASSERT(s_line); - s_line = strchr(s_line + 1, ';'); // 2-nd semicolon - KMP_DEBUG_ASSERT(s_line); - s_line = strchr(s_line + 1, ';'); // 3-rd semicolon - KMP_DEBUG_ASSERT(s_line); - s_col = strchr(s_line + 1, ';'); // 4-th semicolon - KMP_DEBUG_ASSERT(s_col); - - kmp_uint64 loop_data[5]; - loop_data[0] = atoi(s_line + 1); // read line - loop_data[1] = atoi(s_col + 1); // read column - loop_data[2] = sched_type; - loop_data[3] = iterations; - loop_data[4] = chunk; - - __itt_metadata_add(metadata_domain, __itt_null, string_handle_loop, - __itt_metadata_u64, 5, loop_data); -#endif -} // __kmp_itt_metadata_loop - -// ----------------------------------------------------------------------------- -LINKAGE void __kmp_itt_metadata_single(ident_t *loc) { -#if USE_ITT_NOTIFY - if (metadata_domain == NULL) { - __kmp_acquire_bootstrap_lock(&metadata_lock); - if (metadata_domain == NULL) { - __itt_suppress_push(__itt_suppress_memory_errors); - metadata_domain = __itt_domain_create("OMP Metadata"); - string_handle_imbl = __itt_string_handle_create("omp_metadata_imbalance"); - string_handle_loop = __itt_string_handle_create("omp_metadata_loop"); - string_handle_sngl = __itt_string_handle_create("omp_metadata_single"); - __itt_suppress_pop(); - } - __kmp_release_bootstrap_lock(&metadata_lock); - } - - kmp_str_loc_t str_loc = __kmp_str_loc_init(loc->psource, 1); - kmp_uint64 single_data[2]; - single_data[0] = str_loc.line; - single_data[1] = str_loc.col; - - __kmp_str_loc_free(&str_loc); - - __itt_metadata_add(metadata_domain, __itt_null, string_handle_sngl, - __itt_metadata_u64, 2, single_data); -#endif -} // __kmp_itt_metadata_single - -// ----------------------------------------------------------------------------- -LINKAGE void __kmp_itt_region_starting(int gtid) { -#if USE_ITT_NOTIFY -#endif -} // __kmp_itt_region_starting - -// ----------------------------------------------------------------------------- -LINKAGE void __kmp_itt_region_finished(int gtid) { -#if USE_ITT_NOTIFY -#endif -} // __kmp_itt_region_finished - -// ---------------------------------------------------------------------------- -LINKAGE void __kmp_itt_region_joined(int gtid) { -#if USE_ITT_NOTIFY - kmp_team_t *team = __kmp_team_from_gtid(gtid); - if (team->t.t_active_level > 1) { - // The frame notifications are only supported for the outermost teams. - return; - } - ident_t *loc = __kmp_thread_from_gtid(gtid)->th.th_ident; - if (loc && loc->reserved_2) { - unsigned int frm = (loc->reserved_2 & 0x0000FFFF) - 1; - if (frm < KMP_MAX_FRAME_DOMAINS) { - KMP_ITT_DEBUG_LOCK(); - __itt_frame_end_v3(__kmp_itt_region_domains[frm], NULL); - KMP_ITT_DEBUG_PRINT("[frm end] gtid=%d, idx=%x, loc:%p\n", gtid, - loc->reserved_2, loc); - } - } -#endif -} // __kmp_itt_region_joined - -/* Barriers reporting. - - A barrier consists of two phases: - 1. Gather -- master waits for arriving of all the worker threads; each - worker thread registers arrival and goes further. - 2. Release -- each worker threads waits until master lets it go; master lets - worker threads go. - - Function should be called by each thread: - * __kmp_itt_barrier_starting() -- before arriving to the gather phase. - * __kmp_itt_barrier_middle() -- between gather and release phases. - * __kmp_itt_barrier_finished() -- after release phase. - - Note: Call __kmp_itt_barrier_object() before call to - __kmp_itt_barrier_starting() and save result in local variable. - __kmp_itt_barrier_object(), being called too late (e. g. after gather phase) - would return itt sync object for the next barrier! - - ITT need an address (void *) to be specified as a sync object. OpenMP RTL - does not have barrier object or barrier data structure. Barrier is just a - counter in team and thread structures. We could use an address of team - structure as an barrier sync object, but ITT wants different objects for - different barriers (even whithin the same team). So let us use team address - as barrier sync object for the first barrier, then increase it by one for the - next barrier, and so on (but wrap it not to use addresses outside of team - structure). */ - -void *__kmp_itt_barrier_object(int gtid, int bt, int set_name, - int delta // 0 (current barrier) is default - // value; specify -1 to get previous - // barrier. - ) { - void *object = NULL; -#if USE_ITT_NOTIFY - kmp_info_t *thr = __kmp_thread_from_gtid(gtid); - kmp_team_t *team = thr->th.th_team; - - // NOTE: If the function is called from __kmp_fork_barrier, team pointer can - // be NULL. This "if" helps to avoid crash. However, this is not complete - // solution, and reporting fork/join barriers to ITT should be revisited. - - if (team != NULL) { - // Master thread increases b_arrived by KMP_BARRIER_STATE_BUMP each time. - // Divide b_arrived by KMP_BARRIER_STATE_BUMP to get plain barrier counter. - kmp_uint64 counter = - team->t.t_bar[bt].b_arrived / KMP_BARRIER_STATE_BUMP + delta; - // Now form the barrier id. Encode barrier type (bt) in barrier id too, so - // barriers of different types do not have the same ids. - KMP_BUILD_ASSERT(sizeof(kmp_team_t) >= bs_last_barrier); - // This conditon is a must (we would have zero divide otherwise). - KMP_BUILD_ASSERT(sizeof(kmp_team_t) >= 2 * bs_last_barrier); - // More strong condition: make sure we have room at least for for two - // differtent ids (for each barrier type). - object = reinterpret_cast( - kmp_uintptr_t(team) + - counter % (sizeof(kmp_team_t) / bs_last_barrier) * bs_last_barrier + - bt); - KMP_ITT_DEBUG_LOCK(); - KMP_ITT_DEBUG_PRINT("[bar obj] type=%d, counter=%lld, object=%p\n", bt, - counter, object); - - if (set_name) { - ident_t const *loc = NULL; - char const *src = NULL; - char const *type = "OMP Barrier"; - switch (bt) { - case bs_plain_barrier: { - // For plain barrier compiler calls __kmpc_barrier() function, which - // saves location in thr->th.th_ident. - loc = thr->th.th_ident; - // Get the barrier type from flags provided by compiler. - kmp_int32 expl = 0; - kmp_uint32 impl = 0; - if (loc != NULL) { - src = loc->psource; - expl = (loc->flags & KMP_IDENT_BARRIER_EXPL) != 0; - impl = (loc->flags & KMP_IDENT_BARRIER_IMPL) != 0; - } - if (impl) { - switch (loc->flags & KMP_IDENT_BARRIER_IMPL_MASK) { - case KMP_IDENT_BARRIER_IMPL_FOR: { - type = "OMP For Barrier"; - } break; - case KMP_IDENT_BARRIER_IMPL_SECTIONS: { - type = "OMP Sections Barrier"; - } break; - case KMP_IDENT_BARRIER_IMPL_SINGLE: { - type = "OMP Single Barrier"; - } break; - case KMP_IDENT_BARRIER_IMPL_WORKSHARE: { - type = "OMP Workshare Barrier"; - } break; - default: { - type = "OMP Implicit Barrier"; - KMP_DEBUG_ASSERT(0); - } - } - } else if (expl) { - type = "OMP Explicit Barrier"; - } - } break; - case bs_forkjoin_barrier: { - // In case of fork/join barrier we can read thr->th.th_ident, because it - // contains location of last passed construct (while join barrier is not - // such one). Use th_ident of master thread instead -- __kmp_join_call() - // called by the master thread saves location. - // - // AC: cannot read from master because __kmp_join_call may be not called - // yet, so we read the location from team. This is the same location. - // And team is valid at the enter to join barrier where this happens. - loc = team->t.t_ident; - if (loc != NULL) { - src = loc->psource; - } - type = "OMP Join Barrier"; - } break; - } - KMP_ITT_DEBUG_LOCK(); - __itt_sync_create(object, type, src, __itt_attr_barrier); - KMP_ITT_DEBUG_PRINT( - "[bar sta] scre( %p, \"%s\", \"%s\", __itt_attr_barrier )\n", object, - type, src); - } - } -#endif - return object; -} // __kmp_itt_barrier_object - -// ----------------------------------------------------------------------------- -void __kmp_itt_barrier_starting(int gtid, void *object) { -#if USE_ITT_NOTIFY - if (!KMP_MASTER_GTID(gtid)) { - KMP_ITT_DEBUG_LOCK(); - __itt_sync_releasing(object); - KMP_ITT_DEBUG_PRINT("[bar sta] srel( %p )\n", object); - } - KMP_ITT_DEBUG_LOCK(); - __itt_sync_prepare(object); - KMP_ITT_DEBUG_PRINT("[bar sta] spre( %p )\n", object); -#endif -} // __kmp_itt_barrier_starting - -// ----------------------------------------------------------------------------- -void __kmp_itt_barrier_middle(int gtid, void *object) { -#if USE_ITT_NOTIFY - if (KMP_MASTER_GTID(gtid)) { - KMP_ITT_DEBUG_LOCK(); - __itt_sync_acquired(object); - KMP_ITT_DEBUG_PRINT("[bar mid] sacq( %p )\n", object); - KMP_ITT_DEBUG_LOCK(); - __itt_sync_releasing(object); - KMP_ITT_DEBUG_PRINT("[bar mid] srel( %p )\n", object); - } else { - } -#endif -} // __kmp_itt_barrier_middle - -// ----------------------------------------------------------------------------- -void __kmp_itt_barrier_finished(int gtid, void *object) { -#if USE_ITT_NOTIFY - if (KMP_MASTER_GTID(gtid)) { - } else { - KMP_ITT_DEBUG_LOCK(); - __itt_sync_acquired(object); - KMP_ITT_DEBUG_PRINT("[bar end] sacq( %p )\n", object); - } -#endif -} // __kmp_itt_barrier_finished - -/* Taskwait reporting. - ITT need an address (void *) to be specified as a sync object. OpenMP RTL - does not have taskwait structure, so we need to construct something. */ - -void *__kmp_itt_taskwait_object(int gtid) { - void *object = NULL; -#if USE_ITT_NOTIFY - if (__itt_sync_create_ptr) { - kmp_info_t *thread = __kmp_thread_from_gtid(gtid); - kmp_taskdata_t *taskdata = thread->th.th_current_task; - object = reinterpret_cast(kmp_uintptr_t(taskdata) + - taskdata->td_taskwait_counter % - sizeof(kmp_taskdata_t)); - } -#endif - return object; -} // __kmp_itt_taskwait_object - -void __kmp_itt_taskwait_starting(int gtid, void *object) { -#if USE_ITT_NOTIFY - kmp_info_t *thread = __kmp_thread_from_gtid(gtid); - kmp_taskdata_t *taskdata = thread->th.th_current_task; - ident_t const *loc = taskdata->td_taskwait_ident; - char const *src = (loc == NULL ? NULL : loc->psource); - KMP_ITT_DEBUG_LOCK(); - __itt_sync_create(object, "OMP Taskwait", src, 0); - KMP_ITT_DEBUG_PRINT("[twa sta] scre( %p, \"OMP Taskwait\", \"%s\", 0 )\n", - object, src); - KMP_ITT_DEBUG_LOCK(); - __itt_sync_prepare(object); - KMP_ITT_DEBUG_PRINT("[twa sta] spre( %p )\n", object); -#endif -} // __kmp_itt_taskwait_starting - -void __kmp_itt_taskwait_finished(int gtid, void *object) { -#if USE_ITT_NOTIFY - KMP_ITT_DEBUG_LOCK(); - __itt_sync_acquired(object); - KMP_ITT_DEBUG_PRINT("[twa end] sacq( %p )\n", object); - KMP_ITT_DEBUG_LOCK(); - __itt_sync_destroy(object); - KMP_ITT_DEBUG_PRINT("[twa end] sdes( %p )\n", object); -#endif -} // __kmp_itt_taskwait_finished - -/* Task reporting. - Only those tasks are reported which are executed by a thread spinning at - barrier (or taskwait). Synch object passed to the function must be barrier of - taskwait the threads waiting at. */ - -void __kmp_itt_task_starting( - void *object // ITT sync object: barrier or taskwait. - ) { -#if USE_ITT_NOTIFY - if (object != NULL) { - KMP_ITT_DEBUG_LOCK(); - __itt_sync_cancel(object); - KMP_ITT_DEBUG_PRINT("[tsk sta] scan( %p )\n", object); - } -#endif -} // __kmp_itt_task_starting - -// ----------------------------------------------------------------------------- -void __kmp_itt_task_finished( - void *object // ITT sync object: barrier or taskwait. - ) { -#if USE_ITT_NOTIFY - KMP_ITT_DEBUG_LOCK(); - __itt_sync_prepare(object); - KMP_ITT_DEBUG_PRINT("[tsk end] spre( %p )\n", object); -#endif -} // __kmp_itt_task_finished - -/* Lock reporting. - * __kmp_itt_lock_creating( lock ) should be called *before* the first lock - operation (set/unset). It is not a real event shown to the user but just - setting a name for synchronization object. `lock' is an address of sync - object, the same address should be used in all subsequent calls. - * __kmp_itt_lock_acquiring() should be called before setting the lock. - * __kmp_itt_lock_acquired() should be called after setting the lock. - * __kmp_itt_lock_realeasing() should be called before unsetting the lock. - * __kmp_itt_lock_cancelled() should be called after thread cancelled waiting - for the lock. - * __kmp_itt_lock_destroyed( lock ) should be called after the last lock - operation. After __kmp_itt_lock_destroyed() all the references to the same - address will be considered as another sync object, not related with the - original one. */ - -#if KMP_USE_DYNAMIC_LOCK -// Takes location information directly -__kmp_inline void ___kmp_itt_lock_init(kmp_user_lock_p lock, char const *type, - const ident_t *loc) { -#if USE_ITT_NOTIFY - if (__itt_sync_create_ptr) { - char const *src = (loc == NULL ? NULL : loc->psource); - KMP_ITT_DEBUG_LOCK(); - __itt_sync_create(lock, type, src, 0); - KMP_ITT_DEBUG_PRINT("[lck ini] scre( %p, \"%s\", \"%s\", 0 )\n", lock, type, - src); - } -#endif -} -#else // KMP_USE_DYNAMIC_LOCK -// Internal guts -- common code for locks and critical sections, do not call -// directly. -__kmp_inline void ___kmp_itt_lock_init(kmp_user_lock_p lock, char const *type) { -#if USE_ITT_NOTIFY - if (__itt_sync_create_ptr) { - ident_t const *loc = NULL; - if (__kmp_get_user_lock_location_ != NULL) - loc = __kmp_get_user_lock_location_((lock)); - char const *src = (loc == NULL ? NULL : loc->psource); - KMP_ITT_DEBUG_LOCK(); - __itt_sync_create(lock, type, src, 0); - KMP_ITT_DEBUG_PRINT("[lck ini] scre( %p, \"%s\", \"%s\", 0 )\n", lock, type, - src); - } -#endif -} // ___kmp_itt_lock_init -#endif // KMP_USE_DYNAMIC_LOCK - -// Internal guts -- common code for locks and critical sections, do not call -// directly. -__kmp_inline void ___kmp_itt_lock_fini(kmp_user_lock_p lock, char const *type) { -#if USE_ITT_NOTIFY - KMP_ITT_DEBUG_LOCK(); - __itt_sync_destroy(lock); - KMP_ITT_DEBUG_PRINT("[lck dst] sdes( %p )\n", lock); -#endif -} // ___kmp_itt_lock_fini - -// ----------------------------------------------------------------------------- -#if KMP_USE_DYNAMIC_LOCK -void __kmp_itt_lock_creating(kmp_user_lock_p lock, const ident_t *loc) { - ___kmp_itt_lock_init(lock, "OMP Lock", loc); -} -#else -void __kmp_itt_lock_creating(kmp_user_lock_p lock) { - ___kmp_itt_lock_init(lock, "OMP Lock"); -} // __kmp_itt_lock_creating -#endif - -void __kmp_itt_lock_acquiring(kmp_user_lock_p lock) { -#if KMP_USE_DYNAMIC_LOCK && USE_ITT_NOTIFY - // postpone lock object access - if (__itt_sync_prepare_ptr) { - if (KMP_EXTRACT_D_TAG(lock) == 0) { - kmp_indirect_lock_t *ilk = KMP_LOOKUP_I_LOCK(lock); - __itt_sync_prepare(ilk->lock); - } else { - __itt_sync_prepare(lock); - } - } -#else - __itt_sync_prepare(lock); -#endif -} // __kmp_itt_lock_acquiring - -void __kmp_itt_lock_acquired(kmp_user_lock_p lock) { -#if KMP_USE_DYNAMIC_LOCK && USE_ITT_NOTIFY - // postpone lock object access - if (__itt_sync_acquired_ptr) { - if (KMP_EXTRACT_D_TAG(lock) == 0) { - kmp_indirect_lock_t *ilk = KMP_LOOKUP_I_LOCK(lock); - __itt_sync_acquired(ilk->lock); - } else { - __itt_sync_acquired(lock); - } - } -#else - __itt_sync_acquired(lock); -#endif -} // __kmp_itt_lock_acquired - -void __kmp_itt_lock_releasing(kmp_user_lock_p lock) { -#if KMP_USE_DYNAMIC_LOCK && USE_ITT_NOTIFY - if (__itt_sync_releasing_ptr) { - if (KMP_EXTRACT_D_TAG(lock) == 0) { - kmp_indirect_lock_t *ilk = KMP_LOOKUP_I_LOCK(lock); - __itt_sync_releasing(ilk->lock); - } else { - __itt_sync_releasing(lock); - } - } -#else - __itt_sync_releasing(lock); -#endif -} // __kmp_itt_lock_releasing - -void __kmp_itt_lock_cancelled(kmp_user_lock_p lock) { -#if KMP_USE_DYNAMIC_LOCK && USE_ITT_NOTIFY - if (__itt_sync_cancel_ptr) { - if (KMP_EXTRACT_D_TAG(lock) == 0) { - kmp_indirect_lock_t *ilk = KMP_LOOKUP_I_LOCK(lock); - __itt_sync_cancel(ilk->lock); - } else { - __itt_sync_cancel(lock); - } - } -#else - __itt_sync_cancel(lock); -#endif -} // __kmp_itt_lock_cancelled - -void __kmp_itt_lock_destroyed(kmp_user_lock_p lock) { - ___kmp_itt_lock_fini(lock, "OMP Lock"); -} // __kmp_itt_lock_destroyed - -/* Critical reporting. - Critical sections are treated exactly as locks (but have different object - type). */ -#if KMP_USE_DYNAMIC_LOCK -void __kmp_itt_critical_creating(kmp_user_lock_p lock, const ident_t *loc) { - ___kmp_itt_lock_init(lock, "OMP Critical", loc); -} -#else -void __kmp_itt_critical_creating(kmp_user_lock_p lock) { - ___kmp_itt_lock_init(lock, "OMP Critical"); -} // __kmp_itt_critical_creating -#endif - -void __kmp_itt_critical_acquiring(kmp_user_lock_p lock) { - __itt_sync_prepare(lock); -} // __kmp_itt_critical_acquiring - -void __kmp_itt_critical_acquired(kmp_user_lock_p lock) { - __itt_sync_acquired(lock); -} // __kmp_itt_critical_acquired - -void __kmp_itt_critical_releasing(kmp_user_lock_p lock) { - __itt_sync_releasing(lock); -} // __kmp_itt_critical_releasing - -void __kmp_itt_critical_destroyed(kmp_user_lock_p lock) { - ___kmp_itt_lock_fini(lock, "OMP Critical"); -} // __kmp_itt_critical_destroyed - -/* Single reporting. */ - -void __kmp_itt_single_start(int gtid) { -#if USE_ITT_NOTIFY - if (__itt_mark_create_ptr || KMP_ITT_DEBUG) { - kmp_info_t *thr = __kmp_thread_from_gtid((gtid)); - ident_t *loc = thr->th.th_ident; - char const *src = (loc == NULL ? NULL : loc->psource); - kmp_str_buf_t name; - __kmp_str_buf_init(&name); - __kmp_str_buf_print(&name, "OMP Single-%s", src); - KMP_ITT_DEBUG_LOCK(); - thr->th.th_itt_mark_single = __itt_mark_create(name.str); - KMP_ITT_DEBUG_PRINT("[sin sta] mcre( \"%s\") -> %d\n", name.str, - thr->th.th_itt_mark_single); - __kmp_str_buf_free(&name); - KMP_ITT_DEBUG_LOCK(); - __itt_mark(thr->th.th_itt_mark_single, NULL); - KMP_ITT_DEBUG_PRINT("[sin sta] mark( %d, NULL )\n", - thr->th.th_itt_mark_single); - } -#endif -} // __kmp_itt_single_start - -void __kmp_itt_single_end(int gtid) { -#if USE_ITT_NOTIFY - __itt_mark_type mark = __kmp_thread_from_gtid(gtid)->th.th_itt_mark_single; - KMP_ITT_DEBUG_LOCK(); - __itt_mark_off(mark); - KMP_ITT_DEBUG_PRINT("[sin end] moff( %d )\n", mark); -#endif -} // __kmp_itt_single_end - -/* Ordered reporting. - * __kmp_itt_ordered_init is called by each thread *before* first using sync - object. ITT team would like it to be called once, but it requires extra - synchronization. - * __kmp_itt_ordered_prep is called when thread is going to enter ordered - section (before synchronization). - * __kmp_itt_ordered_start is called just before entering user code (after - synchronization). - * __kmp_itt_ordered_end is called after returning from user code. - - Sync object is th->th.th_dispatch->th_dispatch_sh_current. - Events are not generated in case of serialized team. */ - -void __kmp_itt_ordered_init(int gtid) { -#if USE_ITT_NOTIFY - if (__itt_sync_create_ptr) { - kmp_info_t *thr = __kmp_thread_from_gtid(gtid); - ident_t const *loc = thr->th.th_ident; - char const *src = (loc == NULL ? NULL : loc->psource); - __itt_sync_create(thr->th.th_dispatch->th_dispatch_sh_current, - "OMP Ordered", src, 0); - } -#endif -} // __kmp_itt_ordered_init - -void __kmp_itt_ordered_prep(int gtid) { -#if USE_ITT_NOTIFY - if (__itt_sync_create_ptr) { - kmp_team_t *t = __kmp_team_from_gtid(gtid); - if (!t->t.t_serialized) { - kmp_info_t *th = __kmp_thread_from_gtid(gtid); - __itt_sync_prepare(th->th.th_dispatch->th_dispatch_sh_current); - } - } -#endif -} // __kmp_itt_ordered_prep - -void __kmp_itt_ordered_start(int gtid) { -#if USE_ITT_NOTIFY - if (__itt_sync_create_ptr) { - kmp_team_t *t = __kmp_team_from_gtid(gtid); - if (!t->t.t_serialized) { - kmp_info_t *th = __kmp_thread_from_gtid(gtid); - __itt_sync_acquired(th->th.th_dispatch->th_dispatch_sh_current); - } - } -#endif -} // __kmp_itt_ordered_start - -void __kmp_itt_ordered_end(int gtid) { -#if USE_ITT_NOTIFY - if (__itt_sync_create_ptr) { - kmp_team_t *t = __kmp_team_from_gtid(gtid); - if (!t->t.t_serialized) { - kmp_info_t *th = __kmp_thread_from_gtid(gtid); - __itt_sync_releasing(th->th.th_dispatch->th_dispatch_sh_current); - } - } -#endif -} // __kmp_itt_ordered_end - -/* Threads reporting. */ - -void __kmp_itt_thread_ignore() { - __itt_thr_ignore(); -} // __kmp_itt_thread_ignore - -void __kmp_itt_thread_name(int gtid) { -#if USE_ITT_NOTIFY - if (__itt_thr_name_set_ptr) { - kmp_str_buf_t name; - __kmp_str_buf_init(&name); - if (KMP_MASTER_GTID(gtid)) { - __kmp_str_buf_print(&name, "OMP Master Thread #%d", gtid); - } else { - __kmp_str_buf_print(&name, "OMP Worker Thread #%d", gtid); - } - KMP_ITT_DEBUG_LOCK(); - __itt_thr_name_set(name.str, name.used); - KMP_ITT_DEBUG_PRINT("[thr nam] name( \"%s\")\n", name.str); - __kmp_str_buf_free(&name); - } -#endif -} // __kmp_itt_thread_name - -/* System object reporting. - ITT catches operations with system sync objects (like Windows* OS on IA-32 - architecture API critical sections and events). We only need to specify - name ("OMP Scheduler") for the object to let ITT know it is an object used - by OpenMP RTL for internal purposes. */ - -void __kmp_itt_system_object_created(void *object, char const *name) { -#if USE_ITT_NOTIFY - KMP_ITT_DEBUG_LOCK(); - __itt_sync_create(object, "OMP Scheduler", name, 0); - KMP_ITT_DEBUG_PRINT("[sys obj] scre( %p, \"OMP Scheduler\", \"%s\", 0 )\n", - object, name); -#endif -} // __kmp_itt_system_object_created - -/* Stack stitching api. - Master calls "create" and put the stitching id into team structure. - Workers read the stitching id and call "enter" / "leave" api. - Master calls "destroy" at the end of the parallel region. */ - -__itt_caller __kmp_itt_stack_caller_create() { -#if USE_ITT_NOTIFY - if (!__itt_stack_caller_create_ptr) - return NULL; - KMP_ITT_DEBUG_LOCK(); - __itt_caller id = __itt_stack_caller_create(); - KMP_ITT_DEBUG_PRINT("[stk cre] %p\n", id); - return id; -#endif - return NULL; -} - -void __kmp_itt_stack_caller_destroy(__itt_caller id) { -#if USE_ITT_NOTIFY - if (__itt_stack_caller_destroy_ptr) { - KMP_ITT_DEBUG_LOCK(); - __itt_stack_caller_destroy(id); - KMP_ITT_DEBUG_PRINT("[stk des] %p\n", id); - } -#endif -} - -void __kmp_itt_stack_callee_enter(__itt_caller id) { -#if USE_ITT_NOTIFY - if (__itt_stack_callee_enter_ptr) { - KMP_ITT_DEBUG_LOCK(); - __itt_stack_callee_enter(id); - KMP_ITT_DEBUG_PRINT("[stk ent] %p\n", id); - } -#endif -} - -void __kmp_itt_stack_callee_leave(__itt_caller id) { -#if USE_ITT_NOTIFY - if (__itt_stack_callee_leave_ptr) { - KMP_ITT_DEBUG_LOCK(); - __itt_stack_callee_leave(id); - KMP_ITT_DEBUG_PRINT("[stk lea] %p\n", id); - } -#endif -} - -#endif /* USE_ITT_BUILD */ Index: vendor/llvm-openmp/openmp-release80-r363030/runtime/src/z_Windows_NT_util.cpp =================================================================== --- vendor/llvm-openmp/openmp-release80-r363030/runtime/src/z_Windows_NT_util.cpp (revision 348960) +++ vendor/llvm-openmp/openmp-release80-r363030/runtime/src/z_Windows_NT_util.cpp (nonexistent) @@ -1,1569 +0,0 @@ -/* - * z_Windows_NT_util.cpp -- platform specific routines. - */ - -//===----------------------------------------------------------------------===// -// -// The LLVM Compiler Infrastructure -// -// This file is dual licensed under the MIT and the University of Illinois Open -// Source Licenses. See LICENSE.txt for details. -// -//===----------------------------------------------------------------------===// - -#include "kmp.h" -#include "kmp_affinity.h" -#include "kmp_i18n.h" -#include "kmp_io.h" -#include "kmp_itt.h" -#include "kmp_wait_release.h" - -/* This code is related to NtQuerySystemInformation() function. This function - is used in the Load balance algorithm for OMP_DYNAMIC=true to find the - number of running threads in the system. */ - -#include // UNICODE_STRING -#include - -enum SYSTEM_INFORMATION_CLASS { - SystemProcessInformation = 5 -}; // SYSTEM_INFORMATION_CLASS - -struct CLIENT_ID { - HANDLE UniqueProcess; - HANDLE UniqueThread; -}; // struct CLIENT_ID - -enum THREAD_STATE { - StateInitialized, - StateReady, - StateRunning, - StateStandby, - StateTerminated, - StateWait, - StateTransition, - StateUnknown -}; // enum THREAD_STATE - -struct VM_COUNTERS { - SIZE_T PeakVirtualSize; - SIZE_T VirtualSize; - ULONG PageFaultCount; - SIZE_T PeakWorkingSetSize; - SIZE_T WorkingSetSize; - SIZE_T QuotaPeakPagedPoolUsage; - SIZE_T QuotaPagedPoolUsage; - SIZE_T QuotaPeakNonPagedPoolUsage; - SIZE_T QuotaNonPagedPoolUsage; - SIZE_T PagefileUsage; - SIZE_T PeakPagefileUsage; - SIZE_T PrivatePageCount; -}; // struct VM_COUNTERS - -struct SYSTEM_THREAD { - LARGE_INTEGER KernelTime; - LARGE_INTEGER UserTime; - LARGE_INTEGER CreateTime; - ULONG WaitTime; - LPVOID StartAddress; - CLIENT_ID ClientId; - DWORD Priority; - LONG BasePriority; - ULONG ContextSwitchCount; - THREAD_STATE State; - ULONG WaitReason; -}; // SYSTEM_THREAD - -KMP_BUILD_ASSERT(offsetof(SYSTEM_THREAD, KernelTime) == 0); -#if KMP_ARCH_X86 -KMP_BUILD_ASSERT(offsetof(SYSTEM_THREAD, StartAddress) == 28); -KMP_BUILD_ASSERT(offsetof(SYSTEM_THREAD, State) == 52); -#else -KMP_BUILD_ASSERT(offsetof(SYSTEM_THREAD, StartAddress) == 32); -KMP_BUILD_ASSERT(offsetof(SYSTEM_THREAD, State) == 68); -#endif - -struct SYSTEM_PROCESS_INFORMATION { - ULONG NextEntryOffset; - ULONG NumberOfThreads; - LARGE_INTEGER Reserved[3]; - LARGE_INTEGER CreateTime; - LARGE_INTEGER UserTime; - LARGE_INTEGER KernelTime; - UNICODE_STRING ImageName; - DWORD BasePriority; - HANDLE ProcessId; - HANDLE ParentProcessId; - ULONG HandleCount; - ULONG Reserved2[2]; - VM_COUNTERS VMCounters; - IO_COUNTERS IOCounters; - SYSTEM_THREAD Threads[1]; -}; // SYSTEM_PROCESS_INFORMATION -typedef SYSTEM_PROCESS_INFORMATION *PSYSTEM_PROCESS_INFORMATION; - -KMP_BUILD_ASSERT(offsetof(SYSTEM_PROCESS_INFORMATION, NextEntryOffset) == 0); -KMP_BUILD_ASSERT(offsetof(SYSTEM_PROCESS_INFORMATION, CreateTime) == 32); -KMP_BUILD_ASSERT(offsetof(SYSTEM_PROCESS_INFORMATION, ImageName) == 56); -#if KMP_ARCH_X86 -KMP_BUILD_ASSERT(offsetof(SYSTEM_PROCESS_INFORMATION, ProcessId) == 68); -KMP_BUILD_ASSERT(offsetof(SYSTEM_PROCESS_INFORMATION, HandleCount) == 76); -KMP_BUILD_ASSERT(offsetof(SYSTEM_PROCESS_INFORMATION, VMCounters) == 88); -KMP_BUILD_ASSERT(offsetof(SYSTEM_PROCESS_INFORMATION, IOCounters) == 136); -KMP_BUILD_ASSERT(offsetof(SYSTEM_PROCESS_INFORMATION, Threads) == 184); -#else -KMP_BUILD_ASSERT(offsetof(SYSTEM_PROCESS_INFORMATION, ProcessId) == 80); -KMP_BUILD_ASSERT(offsetof(SYSTEM_PROCESS_INFORMATION, HandleCount) == 96); -KMP_BUILD_ASSERT(offsetof(SYSTEM_PROCESS_INFORMATION, VMCounters) == 112); -KMP_BUILD_ASSERT(offsetof(SYSTEM_PROCESS_INFORMATION, IOCounters) == 208); -KMP_BUILD_ASSERT(offsetof(SYSTEM_PROCESS_INFORMATION, Threads) == 256); -#endif - -typedef NTSTATUS(NTAPI *NtQuerySystemInformation_t)(SYSTEM_INFORMATION_CLASS, - PVOID, ULONG, PULONG); -NtQuerySystemInformation_t NtQuerySystemInformation = NULL; - -HMODULE ntdll = NULL; - -/* End of NtQuerySystemInformation()-related code */ - -static HMODULE kernel32 = NULL; - -#if KMP_HANDLE_SIGNALS -typedef void (*sig_func_t)(int); -static sig_func_t __kmp_sighldrs[NSIG]; -static int __kmp_siginstalled[NSIG]; -#endif - -#if KMP_USE_MONITOR -static HANDLE __kmp_monitor_ev; -#endif -static kmp_int64 __kmp_win32_time; -double __kmp_win32_tick; - -int __kmp_init_runtime = FALSE; -CRITICAL_SECTION __kmp_win32_section; - -void __kmp_win32_mutex_init(kmp_win32_mutex_t *mx) { - InitializeCriticalSection(&mx->cs); -#if USE_ITT_BUILD - __kmp_itt_system_object_created(&mx->cs, "Critical Section"); -#endif /* USE_ITT_BUILD */ -} - -void __kmp_win32_mutex_destroy(kmp_win32_mutex_t *mx) { - DeleteCriticalSection(&mx->cs); -} - -void __kmp_win32_mutex_lock(kmp_win32_mutex_t *mx) { - EnterCriticalSection(&mx->cs); -} - -void __kmp_win32_mutex_unlock(kmp_win32_mutex_t *mx) { - LeaveCriticalSection(&mx->cs); -} - -void __kmp_win32_cond_init(kmp_win32_cond_t *cv) { - cv->waiters_count_ = 0; - cv->wait_generation_count_ = 0; - cv->release_count_ = 0; - - /* Initialize the critical section */ - __kmp_win32_mutex_init(&cv->waiters_count_lock_); - - /* Create a manual-reset event. */ - cv->event_ = CreateEvent(NULL, // no security - TRUE, // manual-reset - FALSE, // non-signaled initially - NULL); // unnamed -#if USE_ITT_BUILD - __kmp_itt_system_object_created(cv->event_, "Event"); -#endif /* USE_ITT_BUILD */ -} - -void __kmp_win32_cond_destroy(kmp_win32_cond_t *cv) { - __kmp_win32_mutex_destroy(&cv->waiters_count_lock_); - __kmp_free_handle(cv->event_); - memset(cv, '\0', sizeof(*cv)); -} - -/* TODO associate cv with a team instead of a thread so as to optimize - the case where we wake up a whole team */ - -void __kmp_win32_cond_wait(kmp_win32_cond_t *cv, kmp_win32_mutex_t *mx, - kmp_info_t *th, int need_decrease_load) { - int my_generation; - int last_waiter; - - /* Avoid race conditions */ - __kmp_win32_mutex_lock(&cv->waiters_count_lock_); - - /* Increment count of waiters */ - cv->waiters_count_++; - - /* Store current generation in our activation record. */ - my_generation = cv->wait_generation_count_; - - __kmp_win32_mutex_unlock(&cv->waiters_count_lock_); - __kmp_win32_mutex_unlock(mx); - - for (;;) { - int wait_done; - - /* Wait until the event is signaled */ - WaitForSingleObject(cv->event_, INFINITE); - - __kmp_win32_mutex_lock(&cv->waiters_count_lock_); - - /* Exit the loop when the event_> is signaled and there are still - waiting threads from this that haven't been released - from this wait yet. */ - wait_done = (cv->release_count_ > 0) && - (cv->wait_generation_count_ != my_generation); - - __kmp_win32_mutex_unlock(&cv->waiters_count_lock_); - - /* there used to be a semicolon after the if statement, it looked like a - bug, so i removed it */ - if (wait_done) - break; - } - - __kmp_win32_mutex_lock(mx); - __kmp_win32_mutex_lock(&cv->waiters_count_lock_); - - cv->waiters_count_--; - cv->release_count_--; - - last_waiter = (cv->release_count_ == 0); - - __kmp_win32_mutex_unlock(&cv->waiters_count_lock_); - - if (last_waiter) { - /* We're the last waiter to be notified, so reset the manual event. */ - ResetEvent(cv->event_); - } -} - -void __kmp_win32_cond_broadcast(kmp_win32_cond_t *cv) { - __kmp_win32_mutex_lock(&cv->waiters_count_lock_); - - if (cv->waiters_count_ > 0) { - SetEvent(cv->event_); - /* Release all the threads in this generation. */ - - cv->release_count_ = cv->waiters_count_; - - /* Start a new generation. */ - cv->wait_generation_count_++; - } - - __kmp_win32_mutex_unlock(&cv->waiters_count_lock_); -} - -void __kmp_win32_cond_signal(kmp_win32_cond_t *cv) { - __kmp_win32_cond_broadcast(cv); -} - -void __kmp_enable(int new_state) { - if (__kmp_init_runtime) - LeaveCriticalSection(&__kmp_win32_section); -} - -void __kmp_disable(int *old_state) { - *old_state = 0; - - if (__kmp_init_runtime) - EnterCriticalSection(&__kmp_win32_section); -} - -void __kmp_suspend_initialize(void) { /* do nothing */ -} - -static void __kmp_suspend_initialize_thread(kmp_info_t *th) { - if (!TCR_4(th->th.th_suspend_init)) { - /* this means we haven't initialized the suspension pthread objects for this - thread in this instance of the process */ - __kmp_win32_cond_init(&th->th.th_suspend_cv); - __kmp_win32_mutex_init(&th->th.th_suspend_mx); - TCW_4(th->th.th_suspend_init, TRUE); - } -} - -void __kmp_suspend_uninitialize_thread(kmp_info_t *th) { - if (TCR_4(th->th.th_suspend_init)) { - /* this means we have initialize the suspension pthread objects for this - thread in this instance of the process */ - __kmp_win32_cond_destroy(&th->th.th_suspend_cv); - __kmp_win32_mutex_destroy(&th->th.th_suspend_mx); - TCW_4(th->th.th_suspend_init, FALSE); - } -} - -/* This routine puts the calling thread to sleep after setting the - sleep bit for the indicated flag variable to true. */ -template -static inline void __kmp_suspend_template(int th_gtid, C *flag) { - kmp_info_t *th = __kmp_threads[th_gtid]; - int status; - typename C::flag_t old_spin; - - KF_TRACE(30, ("__kmp_suspend_template: T#%d enter for flag's loc(%p)\n", - th_gtid, flag->get())); - - __kmp_suspend_initialize_thread(th); - __kmp_win32_mutex_lock(&th->th.th_suspend_mx); - - KF_TRACE(10, ("__kmp_suspend_template: T#%d setting sleep bit for flag's" - " loc(%p)\n", - th_gtid, flag->get())); - - /* TODO: shouldn't this use release semantics to ensure that - __kmp_suspend_initialize_thread gets called first? */ - old_spin = flag->set_sleeping(); - - KF_TRACE(5, ("__kmp_suspend_template: T#%d set sleep bit for flag's" - " loc(%p)==%d\n", - th_gtid, flag->get(), *(flag->get()))); - - if (flag->done_check_val(old_spin)) { - old_spin = flag->unset_sleeping(); - KF_TRACE(5, ("__kmp_suspend_template: T#%d false alarm, reset sleep bit " - "for flag's loc(%p)\n", - th_gtid, flag->get())); - } else { -#ifdef DEBUG_SUSPEND - __kmp_suspend_count++; -#endif - /* Encapsulate in a loop as the documentation states that this may "with - low probability" return when the condition variable has not been signaled - or broadcast */ - int deactivated = FALSE; - TCW_PTR(th->th.th_sleep_loc, (void *)flag); - while (flag->is_sleeping()) { - KF_TRACE(15, ("__kmp_suspend_template: T#%d about to perform " - "kmp_win32_cond_wait()\n", - th_gtid)); - // Mark the thread as no longer active (only in the first iteration of the - // loop). - if (!deactivated) { - th->th.th_active = FALSE; - if (th->th.th_active_in_pool) { - th->th.th_active_in_pool = FALSE; - KMP_ATOMIC_DEC(&__kmp_thread_pool_active_nth); - KMP_DEBUG_ASSERT(TCR_4(__kmp_thread_pool_active_nth) >= 0); - } - deactivated = TRUE; - - __kmp_win32_cond_wait(&th->th.th_suspend_cv, &th->th.th_suspend_mx, 0, - 0); - } else { - __kmp_win32_cond_wait(&th->th.th_suspend_cv, &th->th.th_suspend_mx, 0, - 0); - } - -#ifdef KMP_DEBUG - if (flag->is_sleeping()) { - KF_TRACE(100, - ("__kmp_suspend_template: T#%d spurious wakeup\n", th_gtid)); - } -#endif /* KMP_DEBUG */ - - } // while - - // Mark the thread as active again (if it was previous marked as inactive) - if (deactivated) { - th->th.th_active = TRUE; - if (TCR_4(th->th.th_in_pool)) { - KMP_ATOMIC_INC(&__kmp_thread_pool_active_nth); - th->th.th_active_in_pool = TRUE; - } - } - } - - __kmp_win32_mutex_unlock(&th->th.th_suspend_mx); - - KF_TRACE(30, ("__kmp_suspend_template: T#%d exit\n", th_gtid)); -} - -void __kmp_suspend_32(int th_gtid, kmp_flag_32 *flag) { - __kmp_suspend_template(th_gtid, flag); -} -void __kmp_suspend_64(int th_gtid, kmp_flag_64 *flag) { - __kmp_suspend_template(th_gtid, flag); -} -void __kmp_suspend_oncore(int th_gtid, kmp_flag_oncore *flag) { - __kmp_suspend_template(th_gtid, flag); -} - -/* This routine signals the thread specified by target_gtid to wake up - after setting the sleep bit indicated by the flag argument to FALSE */ -template -static inline void __kmp_resume_template(int target_gtid, C *flag) { - kmp_info_t *th = __kmp_threads[target_gtid]; - int status; - -#ifdef KMP_DEBUG - int gtid = TCR_4(__kmp_init_gtid) ? __kmp_get_gtid() : -1; -#endif - - KF_TRACE(30, ("__kmp_resume_template: T#%d wants to wakeup T#%d enter\n", - gtid, target_gtid)); - - __kmp_suspend_initialize_thread(th); - __kmp_win32_mutex_lock(&th->th.th_suspend_mx); - - if (!flag) { // coming from __kmp_null_resume_wrapper - flag = (C *)th->th.th_sleep_loc; - } - - // First, check if the flag is null or its type has changed. If so, someone - // else woke it up. - if (!flag || flag->get_type() != flag->get_ptr_type()) { // get_ptr_type - // simply shows what - // flag was cast to - KF_TRACE(5, ("__kmp_resume_template: T#%d exiting, thread T#%d already " - "awake: flag's loc(%p)\n", - gtid, target_gtid, NULL)); - __kmp_win32_mutex_unlock(&th->th.th_suspend_mx); - return; - } else { - typename C::flag_t old_spin = flag->unset_sleeping(); - if (!flag->is_sleeping_val(old_spin)) { - KF_TRACE(5, ("__kmp_resume_template: T#%d exiting, thread T#%d already " - "awake: flag's loc(%p): %u => %u\n", - gtid, target_gtid, flag->get(), old_spin, *(flag->get()))); - __kmp_win32_mutex_unlock(&th->th.th_suspend_mx); - return; - } - } - TCW_PTR(th->th.th_sleep_loc, NULL); - KF_TRACE(5, ("__kmp_resume_template: T#%d about to wakeup T#%d, reset sleep " - "bit for flag's loc(%p)\n", - gtid, target_gtid, flag->get())); - - __kmp_win32_cond_signal(&th->th.th_suspend_cv); - __kmp_win32_mutex_unlock(&th->th.th_suspend_mx); - - KF_TRACE(30, ("__kmp_resume_template: T#%d exiting after signaling wake up" - " for T#%d\n", - gtid, target_gtid)); -} - -void __kmp_resume_32(int target_gtid, kmp_flag_32 *flag) { - __kmp_resume_template(target_gtid, flag); -} -void __kmp_resume_64(int target_gtid, kmp_flag_64 *flag) { - __kmp_resume_template(target_gtid, flag); -} -void __kmp_resume_oncore(int target_gtid, kmp_flag_oncore *flag) { - __kmp_resume_template(target_gtid, flag); -} - -void __kmp_yield(int cond) { - if (cond) - Sleep(0); -} - -void __kmp_gtid_set_specific(int gtid) { - if (__kmp_init_gtid) { - KA_TRACE(50, ("__kmp_gtid_set_specific: T#%d key:%d\n", gtid, - __kmp_gtid_threadprivate_key)); - if (!TlsSetValue(__kmp_gtid_threadprivate_key, (LPVOID)(gtid + 1))) - KMP_FATAL(TLSSetValueFailed); - } else { - KA_TRACE(50, ("__kmp_gtid_set_specific: runtime shutdown, returning\n")); - } -} - -int __kmp_gtid_get_specific() { - int gtid; - if (!__kmp_init_gtid) { - KA_TRACE(50, ("__kmp_gtid_get_specific: runtime shutdown, returning " - "KMP_GTID_SHUTDOWN\n")); - return KMP_GTID_SHUTDOWN; - } - gtid = (int)(kmp_intptr_t)TlsGetValue(__kmp_gtid_threadprivate_key); - if (gtid == 0) { - gtid = KMP_GTID_DNE; - } else { - gtid--; - } - KA_TRACE(50, ("__kmp_gtid_get_specific: key:%d gtid:%d\n", - __kmp_gtid_threadprivate_key, gtid)); - return gtid; -} - -void __kmp_affinity_bind_thread(int proc) { - if (__kmp_num_proc_groups > 1) { - // Form the GROUP_AFFINITY struct directly, rather than filling - // out a bit vector and calling __kmp_set_system_affinity(). - GROUP_AFFINITY ga; - KMP_DEBUG_ASSERT((proc >= 0) && (proc < (__kmp_num_proc_groups * CHAR_BIT * - sizeof(DWORD_PTR)))); - ga.Group = proc / (CHAR_BIT * sizeof(DWORD_PTR)); - ga.Mask = (unsigned long long)1 << (proc % (CHAR_BIT * sizeof(DWORD_PTR))); - ga.Reserved[0] = ga.Reserved[1] = ga.Reserved[2] = 0; - - KMP_DEBUG_ASSERT(__kmp_SetThreadGroupAffinity != NULL); - if (__kmp_SetThreadGroupAffinity(GetCurrentThread(), &ga, NULL) == 0) { - DWORD error = GetLastError(); - if (__kmp_affinity_verbose) { // AC: continue silently if not verbose - kmp_msg_t err_code = KMP_ERR(error); - __kmp_msg(kmp_ms_warning, KMP_MSG(CantSetThreadAffMask), err_code, - __kmp_msg_null); - if (__kmp_generate_warnings == kmp_warnings_off) { - __kmp_str_free(&err_code.str); - } - } - } - } else { - kmp_affin_mask_t *mask; - KMP_CPU_ALLOC_ON_STACK(mask); - KMP_CPU_ZERO(mask); - KMP_CPU_SET(proc, mask); - __kmp_set_system_affinity(mask, TRUE); - KMP_CPU_FREE_FROM_STACK(mask); - } -} - -void __kmp_affinity_determine_capable(const char *env_var) { -// All versions of Windows* OS (since Win '95) support SetThreadAffinityMask(). - -#if KMP_GROUP_AFFINITY - KMP_AFFINITY_ENABLE(__kmp_num_proc_groups * sizeof(DWORD_PTR)); -#else - KMP_AFFINITY_ENABLE(sizeof(DWORD_PTR)); -#endif - - KA_TRACE(10, ("__kmp_affinity_determine_capable: " - "Windows* OS affinity interface functional (mask size = " - "%" KMP_SIZE_T_SPEC ").\n", - __kmp_affin_mask_size)); -} - -double __kmp_read_cpu_time(void) { - FILETIME CreationTime, ExitTime, KernelTime, UserTime; - int status; - double cpu_time; - - cpu_time = 0; - - status = GetProcessTimes(GetCurrentProcess(), &CreationTime, &ExitTime, - &KernelTime, &UserTime); - - if (status) { - double sec = 0; - - sec += KernelTime.dwHighDateTime; - sec += UserTime.dwHighDateTime; - - /* Shift left by 32 bits */ - sec *= (double)(1 << 16) * (double)(1 << 16); - - sec += KernelTime.dwLowDateTime; - sec += UserTime.dwLowDateTime; - - cpu_time += (sec * 100.0) / KMP_NSEC_PER_SEC; - } - - return cpu_time; -} - -int __kmp_read_system_info(struct kmp_sys_info *info) { - info->maxrss = 0; /* the maximum resident set size utilized (in kilobytes) */ - info->minflt = 0; /* the number of page faults serviced without any I/O */ - info->majflt = 0; /* the number of page faults serviced that required I/O */ - info->nswap = 0; // the number of times a process was "swapped" out of memory - info->inblock = 0; // the number of times the file system had to perform input - info->oublock = 0; // number of times the file system had to perform output - info->nvcsw = 0; /* the number of times a context switch was voluntarily */ - info->nivcsw = 0; /* the number of times a context switch was forced */ - - return 1; -} - -void __kmp_runtime_initialize(void) { - SYSTEM_INFO info; - kmp_str_buf_t path; - UINT path_size; - - if (__kmp_init_runtime) { - return; - } - -#if KMP_DYNAMIC_LIB - /* Pin dynamic library for the lifetime of application */ - { - // First, turn off error message boxes - UINT err_mode = SetErrorMode(SEM_FAILCRITICALERRORS); - HMODULE h; - BOOL ret = GetModuleHandleEx(GET_MODULE_HANDLE_EX_FLAG_FROM_ADDRESS | - GET_MODULE_HANDLE_EX_FLAG_PIN, - (LPCTSTR)&__kmp_serial_initialize, &h); - KMP_DEBUG_ASSERT2(h && ret, "OpenMP RTL cannot find itself loaded"); - SetErrorMode(err_mode); // Restore error mode - KA_TRACE(10, ("__kmp_runtime_initialize: dynamic library pinned\n")); - } -#endif - - InitializeCriticalSection(&__kmp_win32_section); -#if USE_ITT_BUILD - __kmp_itt_system_object_created(&__kmp_win32_section, "Critical Section"); -#endif /* USE_ITT_BUILD */ - __kmp_initialize_system_tick(); - -#if (KMP_ARCH_X86 || KMP_ARCH_X86_64) - if (!__kmp_cpuinfo.initialized) { - __kmp_query_cpuid(&__kmp_cpuinfo); - } -#endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */ - -/* Set up minimum number of threads to switch to TLS gtid */ -#if KMP_OS_WINDOWS && !KMP_DYNAMIC_LIB - // Windows* OS, static library. - /* New thread may use stack space previously used by another thread, - currently terminated. On Windows* OS, in case of static linking, we do not - know the moment of thread termination, and our structures (__kmp_threads - and __kmp_root arrays) are still keep info about dead threads. This leads - to problem in __kmp_get_global_thread_id() function: it wrongly finds gtid - (by searching through stack addresses of all known threads) for - unregistered foreign tread. - - Setting __kmp_tls_gtid_min to 0 workarounds this problem: - __kmp_get_global_thread_id() does not search through stacks, but get gtid - from TLS immediately. - --ln - */ - __kmp_tls_gtid_min = 0; -#else - __kmp_tls_gtid_min = KMP_TLS_GTID_MIN; -#endif - - /* for the static library */ - if (!__kmp_gtid_threadprivate_key) { - __kmp_gtid_threadprivate_key = TlsAlloc(); - if (__kmp_gtid_threadprivate_key == TLS_OUT_OF_INDEXES) { - KMP_FATAL(TLSOutOfIndexes); - } - } - - // Load ntdll.dll. - /* Simple GetModuleHandle( "ntdll.dl" ) is not suitable due to security issue - (see http://www.microsoft.com/technet/security/advisory/2269637.mspx). We - have to specify full path to the library. */ - __kmp_str_buf_init(&path); - path_size = GetSystemDirectory(path.str, path.size); - KMP_DEBUG_ASSERT(path_size > 0); - if (path_size >= path.size) { - // Buffer is too short. Expand the buffer and try again. - __kmp_str_buf_reserve(&path, path_size); - path_size = GetSystemDirectory(path.str, path.size); - KMP_DEBUG_ASSERT(path_size > 0); - } - if (path_size > 0 && path_size < path.size) { - // Now we have system directory name in the buffer. - // Append backslash and name of dll to form full path, - path.used = path_size; - __kmp_str_buf_print(&path, "\\%s", "ntdll.dll"); - - // Now load ntdll using full path. - ntdll = GetModuleHandle(path.str); - } - - KMP_DEBUG_ASSERT(ntdll != NULL); - if (ntdll != NULL) { - NtQuerySystemInformation = (NtQuerySystemInformation_t)GetProcAddress( - ntdll, "NtQuerySystemInformation"); - } - KMP_DEBUG_ASSERT(NtQuerySystemInformation != NULL); - -#if KMP_GROUP_AFFINITY - // Load kernel32.dll. - // Same caveat - must use full system path name. - if (path_size > 0 && path_size < path.size) { - // Truncate the buffer back to just the system path length, - // discarding "\\ntdll.dll", and replacing it with "kernel32.dll". - path.used = path_size; - __kmp_str_buf_print(&path, "\\%s", "kernel32.dll"); - - // Load kernel32.dll using full path. - kernel32 = GetModuleHandle(path.str); - KA_TRACE(10, ("__kmp_runtime_initialize: kernel32.dll = %s\n", path.str)); - - // Load the function pointers to kernel32.dll routines - // that may or may not exist on this system. - if (kernel32 != NULL) { - __kmp_GetActiveProcessorCount = - (kmp_GetActiveProcessorCount_t)GetProcAddress( - kernel32, "GetActiveProcessorCount"); - __kmp_GetActiveProcessorGroupCount = - (kmp_GetActiveProcessorGroupCount_t)GetProcAddress( - kernel32, "GetActiveProcessorGroupCount"); - __kmp_GetThreadGroupAffinity = - (kmp_GetThreadGroupAffinity_t)GetProcAddress( - kernel32, "GetThreadGroupAffinity"); - __kmp_SetThreadGroupAffinity = - (kmp_SetThreadGroupAffinity_t)GetProcAddress( - kernel32, "SetThreadGroupAffinity"); - - KA_TRACE(10, ("__kmp_runtime_initialize: __kmp_GetActiveProcessorCount" - " = %p\n", - __kmp_GetActiveProcessorCount)); - KA_TRACE(10, ("__kmp_runtime_initialize: " - "__kmp_GetActiveProcessorGroupCount = %p\n", - __kmp_GetActiveProcessorGroupCount)); - KA_TRACE(10, ("__kmp_runtime_initialize:__kmp_GetThreadGroupAffinity" - " = %p\n", - __kmp_GetThreadGroupAffinity)); - KA_TRACE(10, ("__kmp_runtime_initialize: __kmp_SetThreadGroupAffinity" - " = %p\n", - __kmp_SetThreadGroupAffinity)); - KA_TRACE(10, ("__kmp_runtime_initialize: sizeof(kmp_affin_mask_t) = %d\n", - sizeof(kmp_affin_mask_t))); - - // See if group affinity is supported on this system. - // If so, calculate the #groups and #procs. - // - // Group affinity was introduced with Windows* 7 OS and - // Windows* Server 2008 R2 OS. - if ((__kmp_GetActiveProcessorCount != NULL) && - (__kmp_GetActiveProcessorGroupCount != NULL) && - (__kmp_GetThreadGroupAffinity != NULL) && - (__kmp_SetThreadGroupAffinity != NULL) && - ((__kmp_num_proc_groups = __kmp_GetActiveProcessorGroupCount()) > - 1)) { - // Calculate the total number of active OS procs. - int i; - - KA_TRACE(10, ("__kmp_runtime_initialize: %d processor groups" - " detected\n", - __kmp_num_proc_groups)); - - __kmp_xproc = 0; - - for (i = 0; i < __kmp_num_proc_groups; i++) { - DWORD size = __kmp_GetActiveProcessorCount(i); - __kmp_xproc += size; - KA_TRACE(10, ("__kmp_runtime_initialize: proc group %d size = %d\n", - i, size)); - } - } else { - KA_TRACE(10, ("__kmp_runtime_initialize: %d processor groups" - " detected\n", - __kmp_num_proc_groups)); - } - } - } - if (__kmp_num_proc_groups <= 1) { - GetSystemInfo(&info); - __kmp_xproc = info.dwNumberOfProcessors; - } -#else - GetSystemInfo(&info); - __kmp_xproc = info.dwNumberOfProcessors; -#endif /* KMP_GROUP_AFFINITY */ - - // If the OS said there were 0 procs, take a guess and use a value of 2. - // This is done for Linux* OS, also. Do we need error / warning? - if (__kmp_xproc <= 0) { - __kmp_xproc = 2; - } - - KA_TRACE(5, - ("__kmp_runtime_initialize: total processors = %d\n", __kmp_xproc)); - - __kmp_str_buf_free(&path); - -#if USE_ITT_BUILD - __kmp_itt_initialize(); -#endif /* USE_ITT_BUILD */ - - __kmp_init_runtime = TRUE; -} // __kmp_runtime_initialize - -void __kmp_runtime_destroy(void) { - if (!__kmp_init_runtime) { - return; - } - -#if USE_ITT_BUILD - __kmp_itt_destroy(); -#endif /* USE_ITT_BUILD */ - - /* we can't DeleteCriticalsection( & __kmp_win32_section ); */ - /* due to the KX_TRACE() commands */ - KA_TRACE(40, ("__kmp_runtime_destroy\n")); - - if (__kmp_gtid_threadprivate_key) { - TlsFree(__kmp_gtid_threadprivate_key); - __kmp_gtid_threadprivate_key = 0; - } - - __kmp_affinity_uninitialize(); - DeleteCriticalSection(&__kmp_win32_section); - - ntdll = NULL; - NtQuerySystemInformation = NULL; - -#if KMP_ARCH_X86_64 - kernel32 = NULL; - __kmp_GetActiveProcessorCount = NULL; - __kmp_GetActiveProcessorGroupCount = NULL; - __kmp_GetThreadGroupAffinity = NULL; - __kmp_SetThreadGroupAffinity = NULL; -#endif // KMP_ARCH_X86_64 - - __kmp_init_runtime = FALSE; -} - -void __kmp_terminate_thread(int gtid) { - kmp_info_t *th = __kmp_threads[gtid]; - - if (!th) - return; - - KA_TRACE(10, ("__kmp_terminate_thread: kill (%d)\n", gtid)); - - if (TerminateThread(th->th.th_info.ds.ds_thread, (DWORD)-1) == FALSE) { - /* It's OK, the thread may have exited already */ - } - __kmp_free_handle(th->th.th_info.ds.ds_thread); -} - -void __kmp_clear_system_time(void) { - BOOL status; - LARGE_INTEGER time; - status = QueryPerformanceCounter(&time); - __kmp_win32_time = (kmp_int64)time.QuadPart; -} - -void __kmp_initialize_system_tick(void) { - { - BOOL status; - LARGE_INTEGER freq; - - status = QueryPerformanceFrequency(&freq); - if (!status) { - DWORD error = GetLastError(); - __kmp_fatal(KMP_MSG(FunctionError, "QueryPerformanceFrequency()"), - KMP_ERR(error), __kmp_msg_null); - - } else { - __kmp_win32_tick = ((double)1.0) / (double)freq.QuadPart; - } - } -} - -/* Calculate the elapsed wall clock time for the user */ - -void __kmp_elapsed(double *t) { - BOOL status; - LARGE_INTEGER now; - status = QueryPerformanceCounter(&now); - *t = ((double)now.QuadPart) * __kmp_win32_tick; -} - -/* Calculate the elapsed wall clock tick for the user */ - -void __kmp_elapsed_tick(double *t) { *t = __kmp_win32_tick; } - -void __kmp_read_system_time(double *delta) { - if (delta != NULL) { - BOOL status; - LARGE_INTEGER now; - - status = QueryPerformanceCounter(&now); - - *delta = ((double)(((kmp_int64)now.QuadPart) - __kmp_win32_time)) * - __kmp_win32_tick; - } -} - -/* Return the current time stamp in nsec */ -kmp_uint64 __kmp_now_nsec() { - LARGE_INTEGER now; - QueryPerformanceCounter(&now); - return 1e9 * __kmp_win32_tick * now.QuadPart; -} - -extern "C" -void *__stdcall __kmp_launch_worker(void *arg) { - volatile void *stack_data; - void *exit_val; - void *padding = 0; - kmp_info_t *this_thr = (kmp_info_t *)arg; - int gtid; - - gtid = this_thr->th.th_info.ds.ds_gtid; - __kmp_gtid_set_specific(gtid); -#ifdef KMP_TDATA_GTID -#error "This define causes problems with LoadLibrary() + declspec(thread) " \ - "on Windows* OS. See CQ50564, tests kmp_load_library*.c and this MSDN " \ - "reference: http://support.microsoft.com/kb/118816" -//__kmp_gtid = gtid; -#endif - -#if USE_ITT_BUILD - __kmp_itt_thread_name(gtid); -#endif /* USE_ITT_BUILD */ - - __kmp_affinity_set_init_mask(gtid, FALSE); - -#if KMP_ARCH_X86 || KMP_ARCH_X86_64 - // Set FP control regs to be a copy of the parallel initialization thread's. - __kmp_clear_x87_fpu_status_word(); - __kmp_load_x87_fpu_control_word(&__kmp_init_x87_fpu_control_word); - __kmp_load_mxcsr(&__kmp_init_mxcsr); -#endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */ - - if (__kmp_stkoffset > 0 && gtid > 0) { - padding = KMP_ALLOCA(gtid * __kmp_stkoffset); - } - - KMP_FSYNC_RELEASING(&this_thr->th.th_info.ds.ds_alive); - this_thr->th.th_info.ds.ds_thread_id = GetCurrentThreadId(); - TCW_4(this_thr->th.th_info.ds.ds_alive, TRUE); - - if (TCR_4(__kmp_gtid_mode) < - 2) { // check stack only if it is used to get gtid - TCW_PTR(this_thr->th.th_info.ds.ds_stackbase, &stack_data); - KMP_ASSERT(this_thr->th.th_info.ds.ds_stackgrow == FALSE); - __kmp_check_stack_overlap(this_thr); - } - KMP_MB(); - exit_val = __kmp_launch_thread(this_thr); - KMP_FSYNC_RELEASING(&this_thr->th.th_info.ds.ds_alive); - TCW_4(this_thr->th.th_info.ds.ds_alive, FALSE); - KMP_MB(); - return exit_val; -} - -#if KMP_USE_MONITOR -/* The monitor thread controls all of the threads in the complex */ - -void *__stdcall __kmp_launch_monitor(void *arg) { - DWORD wait_status; - kmp_thread_t monitor; - int status; - int interval; - kmp_info_t *this_thr = (kmp_info_t *)arg; - - KMP_DEBUG_ASSERT(__kmp_init_monitor); - TCW_4(__kmp_init_monitor, 2); // AC: Signal library that monitor has started - // TODO: hide "2" in enum (like {true,false,started}) - this_thr->th.th_info.ds.ds_thread_id = GetCurrentThreadId(); - TCW_4(this_thr->th.th_info.ds.ds_alive, TRUE); - - KMP_MB(); /* Flush all pending memory write invalidates. */ - KA_TRACE(10, ("__kmp_launch_monitor: launched\n")); - - monitor = GetCurrentThread(); - - /* set thread priority */ - status = SetThreadPriority(monitor, THREAD_PRIORITY_HIGHEST); - if (!status) { - DWORD error = GetLastError(); - __kmp_fatal(KMP_MSG(CantSetThreadPriority), KMP_ERR(error), __kmp_msg_null); - } - - /* register us as monitor */ - __kmp_gtid_set_specific(KMP_GTID_MONITOR); -#ifdef KMP_TDATA_GTID -#error "This define causes problems with LoadLibrary() + declspec(thread) " \ - "on Windows* OS. See CQ50564, tests kmp_load_library*.c and this MSDN " \ - "reference: http://support.microsoft.com/kb/118816" -//__kmp_gtid = KMP_GTID_MONITOR; -#endif - -#if USE_ITT_BUILD - __kmp_itt_thread_ignore(); // Instruct Intel(R) Threading Tools to ignore -// monitor thread. -#endif /* USE_ITT_BUILD */ - - KMP_MB(); /* Flush all pending memory write invalidates. */ - - interval = (1000 / __kmp_monitor_wakeups); /* in milliseconds */ - - while (!TCR_4(__kmp_global.g.g_done)) { - /* This thread monitors the state of the system */ - - KA_TRACE(15, ("__kmp_launch_monitor: update\n")); - - wait_status = WaitForSingleObject(__kmp_monitor_ev, interval); - - if (wait_status == WAIT_TIMEOUT) { - TCW_4(__kmp_global.g.g_time.dt.t_value, - TCR_4(__kmp_global.g.g_time.dt.t_value) + 1); - } - - KMP_MB(); /* Flush all pending memory write invalidates. */ - } - - KA_TRACE(10, ("__kmp_launch_monitor: finished\n")); - - status = SetThreadPriority(monitor, THREAD_PRIORITY_NORMAL); - if (!status) { - DWORD error = GetLastError(); - __kmp_fatal(KMP_MSG(CantSetThreadPriority), KMP_ERR(error), __kmp_msg_null); - } - - if (__kmp_global.g.g_abort != 0) { - /* now we need to terminate the worker threads */ - /* the value of t_abort is the signal we caught */ - int gtid; - - KA_TRACE(10, ("__kmp_launch_monitor: terminate sig=%d\n", - (__kmp_global.g.g_abort))); - - /* terminate the OpenMP worker threads */ - /* TODO this is not valid for sibling threads!! - * the uber master might not be 0 anymore.. */ - for (gtid = 1; gtid < __kmp_threads_capacity; ++gtid) - __kmp_terminate_thread(gtid); - - __kmp_cleanup(); - - Sleep(0); - - KA_TRACE(10, - ("__kmp_launch_monitor: raise sig=%d\n", __kmp_global.g.g_abort)); - - if (__kmp_global.g.g_abort > 0) { - raise(__kmp_global.g.g_abort); - } - } - - TCW_4(this_thr->th.th_info.ds.ds_alive, FALSE); - - KMP_MB(); - return arg; -} -#endif - -void __kmp_create_worker(int gtid, kmp_info_t *th, size_t stack_size) { - kmp_thread_t handle; - DWORD idThread; - - KA_TRACE(10, ("__kmp_create_worker: try to create thread (%d)\n", gtid)); - - th->th.th_info.ds.ds_gtid = gtid; - - if (KMP_UBER_GTID(gtid)) { - int stack_data; - - /* TODO: GetCurrentThread() returns a pseudo-handle that is unsuitable for - other threads to use. Is it appropriate to just use GetCurrentThread? - When should we close this handle? When unregistering the root? */ - { - BOOL rc; - rc = DuplicateHandle(GetCurrentProcess(), GetCurrentThread(), - GetCurrentProcess(), &th->th.th_info.ds.ds_thread, 0, - FALSE, DUPLICATE_SAME_ACCESS); - KMP_ASSERT(rc); - KA_TRACE(10, (" __kmp_create_worker: ROOT Handle duplicated, th = %p, " - "handle = %" KMP_UINTPTR_SPEC "\n", - (LPVOID)th, th->th.th_info.ds.ds_thread)); - th->th.th_info.ds.ds_thread_id = GetCurrentThreadId(); - } - if (TCR_4(__kmp_gtid_mode) < 2) { // check stack only if used to get gtid - /* we will dynamically update the stack range if gtid_mode == 1 */ - TCW_PTR(th->th.th_info.ds.ds_stackbase, &stack_data); - TCW_PTR(th->th.th_info.ds.ds_stacksize, 0); - TCW_4(th->th.th_info.ds.ds_stackgrow, TRUE); - __kmp_check_stack_overlap(th); - } - } else { - KMP_MB(); /* Flush all pending memory write invalidates. */ - - /* Set stack size for this thread now. */ - KA_TRACE(10, - ("__kmp_create_worker: stack_size = %" KMP_SIZE_T_SPEC " bytes\n", - stack_size)); - - stack_size += gtid * __kmp_stkoffset; - - TCW_PTR(th->th.th_info.ds.ds_stacksize, stack_size); - TCW_4(th->th.th_info.ds.ds_stackgrow, FALSE); - - KA_TRACE(10, - ("__kmp_create_worker: (before) stack_size = %" KMP_SIZE_T_SPEC - " bytes, &__kmp_launch_worker = %p, th = %p, &idThread = %p\n", - (SIZE_T)stack_size, (LPTHREAD_START_ROUTINE)&__kmp_launch_worker, - (LPVOID)th, &idThread)); - - handle = CreateThread( - NULL, (SIZE_T)stack_size, (LPTHREAD_START_ROUTINE)__kmp_launch_worker, - (LPVOID)th, STACK_SIZE_PARAM_IS_A_RESERVATION, &idThread); - - KA_TRACE(10, - ("__kmp_create_worker: (after) stack_size = %" KMP_SIZE_T_SPEC - " bytes, &__kmp_launch_worker = %p, th = %p, " - "idThread = %u, handle = %" KMP_UINTPTR_SPEC "\n", - (SIZE_T)stack_size, (LPTHREAD_START_ROUTINE)&__kmp_launch_worker, - (LPVOID)th, idThread, handle)); - - if (handle == 0) { - DWORD error = GetLastError(); - __kmp_fatal(KMP_MSG(CantCreateThread), KMP_ERR(error), __kmp_msg_null); - } else { - th->th.th_info.ds.ds_thread = handle; - } - - KMP_MB(); /* Flush all pending memory write invalidates. */ - } - - KA_TRACE(10, ("__kmp_create_worker: done creating thread (%d)\n", gtid)); -} - -int __kmp_still_running(kmp_info_t *th) { - return (WAIT_TIMEOUT == WaitForSingleObject(th->th.th_info.ds.ds_thread, 0)); -} - -#if KMP_USE_MONITOR -void __kmp_create_monitor(kmp_info_t *th) { - kmp_thread_t handle; - DWORD idThread; - int ideal, new_ideal; - - if (__kmp_dflt_blocktime == KMP_MAX_BLOCKTIME) { - // We don't need monitor thread in case of MAX_BLOCKTIME - KA_TRACE(10, ("__kmp_create_monitor: skipping monitor thread because of " - "MAX blocktime\n")); - th->th.th_info.ds.ds_tid = 0; // this makes reap_monitor no-op - th->th.th_info.ds.ds_gtid = 0; - TCW_4(__kmp_init_monitor, 2); // Signal to stop waiting for monitor creation - return; - } - KA_TRACE(10, ("__kmp_create_monitor: try to create monitor\n")); - - KMP_MB(); /* Flush all pending memory write invalidates. */ - - __kmp_monitor_ev = CreateEvent(NULL, TRUE, FALSE, NULL); - if (__kmp_monitor_ev == NULL) { - DWORD error = GetLastError(); - __kmp_fatal(KMP_MSG(CantCreateEvent), KMP_ERR(error), __kmp_msg_null); - } -#if USE_ITT_BUILD - __kmp_itt_system_object_created(__kmp_monitor_ev, "Event"); -#endif /* USE_ITT_BUILD */ - - th->th.th_info.ds.ds_tid = KMP_GTID_MONITOR; - th->th.th_info.ds.ds_gtid = KMP_GTID_MONITOR; - - // FIXME - on Windows* OS, if __kmp_monitor_stksize = 0, figure out how - // to automatically expand stacksize based on CreateThread error code. - if (__kmp_monitor_stksize == 0) { - __kmp_monitor_stksize = KMP_DEFAULT_MONITOR_STKSIZE; - } - if (__kmp_monitor_stksize < __kmp_sys_min_stksize) { - __kmp_monitor_stksize = __kmp_sys_min_stksize; - } - - KA_TRACE(10, ("__kmp_create_monitor: requested stacksize = %d bytes\n", - (int)__kmp_monitor_stksize)); - - TCW_4(__kmp_global.g.g_time.dt.t_value, 0); - - handle = - CreateThread(NULL, (SIZE_T)__kmp_monitor_stksize, - (LPTHREAD_START_ROUTINE)__kmp_launch_monitor, (LPVOID)th, - STACK_SIZE_PARAM_IS_A_RESERVATION, &idThread); - if (handle == 0) { - DWORD error = GetLastError(); - __kmp_fatal(KMP_MSG(CantCreateThread), KMP_ERR(error), __kmp_msg_null); - } else - th->th.th_info.ds.ds_thread = handle; - - KMP_MB(); /* Flush all pending memory write invalidates. */ - - KA_TRACE(10, ("__kmp_create_monitor: monitor created %p\n", - (void *)th->th.th_info.ds.ds_thread)); -} -#endif - -/* Check to see if thread is still alive. - NOTE: The ExitProcess(code) system call causes all threads to Terminate - with a exit_val = code. Because of this we can not rely on exit_val having - any particular value. So this routine may return STILL_ALIVE in exit_val - even after the thread is dead. */ - -int __kmp_is_thread_alive(kmp_info_t *th, DWORD *exit_val) { - DWORD rc; - rc = GetExitCodeThread(th->th.th_info.ds.ds_thread, exit_val); - if (rc == 0) { - DWORD error = GetLastError(); - __kmp_fatal(KMP_MSG(FunctionError, "GetExitCodeThread()"), KMP_ERR(error), - __kmp_msg_null); - } - return (*exit_val == STILL_ACTIVE); -} - -void __kmp_exit_thread(int exit_status) { - ExitThread(exit_status); -} // __kmp_exit_thread - -// This is a common part for both __kmp_reap_worker() and __kmp_reap_monitor(). -static void __kmp_reap_common(kmp_info_t *th) { - DWORD exit_val; - - KMP_MB(); /* Flush all pending memory write invalidates. */ - - KA_TRACE( - 10, ("__kmp_reap_common: try to reap (%d)\n", th->th.th_info.ds.ds_gtid)); - - /* 2006-10-19: - There are two opposite situations: - 1. Windows* OS keep thread alive after it resets ds_alive flag and - exits from thread function. (For example, see C70770/Q394281 "unloading of - dll based on OMP is very slow".) - 2. Windows* OS may kill thread before it resets ds_alive flag. - - Right solution seems to be waiting for *either* thread termination *or* - ds_alive resetting. */ - { - // TODO: This code is very similar to KMP_WAIT_YIELD. Need to generalize - // KMP_WAIT_YIELD to cover this usage also. - void *obj = NULL; - kmp_uint32 spins; -#if USE_ITT_BUILD - KMP_FSYNC_SPIN_INIT(obj, (void *)&th->th.th_info.ds.ds_alive); -#endif /* USE_ITT_BUILD */ - KMP_INIT_YIELD(spins); - do { -#if USE_ITT_BUILD - KMP_FSYNC_SPIN_PREPARE(obj); -#endif /* USE_ITT_BUILD */ - __kmp_is_thread_alive(th, &exit_val); - KMP_YIELD(TCR_4(__kmp_nth) > __kmp_avail_proc); - KMP_YIELD_SPIN(spins); - } while (exit_val == STILL_ACTIVE && TCR_4(th->th.th_info.ds.ds_alive)); -#if USE_ITT_BUILD - if (exit_val == STILL_ACTIVE) { - KMP_FSYNC_CANCEL(obj); - } else { - KMP_FSYNC_SPIN_ACQUIRED(obj); - } -#endif /* USE_ITT_BUILD */ - } - - __kmp_free_handle(th->th.th_info.ds.ds_thread); - - /* NOTE: The ExitProcess(code) system call causes all threads to Terminate - with a exit_val = code. Because of this we can not rely on exit_val having - any particular value. */ - if (exit_val == STILL_ACTIVE) { - KA_TRACE(1, ("__kmp_reap_common: thread still active.\n")); - } else if ((void *)exit_val != (void *)th) { - KA_TRACE(1, ("__kmp_reap_common: ExitProcess / TerminateThread used?\n")); - } - - KA_TRACE(10, - ("__kmp_reap_common: done reaping (%d), handle = %" KMP_UINTPTR_SPEC - "\n", - th->th.th_info.ds.ds_gtid, th->th.th_info.ds.ds_thread)); - - th->th.th_info.ds.ds_thread = 0; - th->th.th_info.ds.ds_tid = KMP_GTID_DNE; - th->th.th_info.ds.ds_gtid = KMP_GTID_DNE; - th->th.th_info.ds.ds_thread_id = 0; - - KMP_MB(); /* Flush all pending memory write invalidates. */ -} - -#if KMP_USE_MONITOR -void __kmp_reap_monitor(kmp_info_t *th) { - int status; - - KA_TRACE(10, ("__kmp_reap_monitor: try to reap %p\n", - (void *)th->th.th_info.ds.ds_thread)); - - // If monitor has been created, its tid and gtid should be KMP_GTID_MONITOR. - // If both tid and gtid are 0, it means the monitor did not ever start. - // If both tid and gtid are KMP_GTID_DNE, the monitor has been shut down. - KMP_DEBUG_ASSERT(th->th.th_info.ds.ds_tid == th->th.th_info.ds.ds_gtid); - if (th->th.th_info.ds.ds_gtid != KMP_GTID_MONITOR) { - KA_TRACE(10, ("__kmp_reap_monitor: monitor did not start, returning\n")); - return; - } - - KMP_MB(); /* Flush all pending memory write invalidates. */ - - status = SetEvent(__kmp_monitor_ev); - if (status == FALSE) { - DWORD error = GetLastError(); - __kmp_fatal(KMP_MSG(CantSetEvent), KMP_ERR(error), __kmp_msg_null); - } - KA_TRACE(10, ("__kmp_reap_monitor: reaping thread (%d)\n", - th->th.th_info.ds.ds_gtid)); - __kmp_reap_common(th); - - __kmp_free_handle(__kmp_monitor_ev); - - KMP_MB(); /* Flush all pending memory write invalidates. */ -} -#endif - -void __kmp_reap_worker(kmp_info_t *th) { - KA_TRACE(10, ("__kmp_reap_worker: reaping thread (%d)\n", - th->th.th_info.ds.ds_gtid)); - __kmp_reap_common(th); -} - -#if KMP_HANDLE_SIGNALS - -static void __kmp_team_handler(int signo) { - if (__kmp_global.g.g_abort == 0) { - // Stage 1 signal handler, let's shut down all of the threads. - if (__kmp_debug_buf) { - __kmp_dump_debug_buffer(); - } - KMP_MB(); // Flush all pending memory write invalidates. - TCW_4(__kmp_global.g.g_abort, signo); - KMP_MB(); // Flush all pending memory write invalidates. - TCW_4(__kmp_global.g.g_done, TRUE); - KMP_MB(); // Flush all pending memory write invalidates. - } -} // __kmp_team_handler - -static sig_func_t __kmp_signal(int signum, sig_func_t handler) { - sig_func_t old = signal(signum, handler); - if (old == SIG_ERR) { - int error = errno; - __kmp_fatal(KMP_MSG(FunctionError, "signal"), KMP_ERR(error), - __kmp_msg_null); - } - return old; -} - -static void __kmp_install_one_handler(int sig, sig_func_t handler, - int parallel_init) { - sig_func_t old; - KMP_MB(); /* Flush all pending memory write invalidates. */ - KB_TRACE(60, ("__kmp_install_one_handler: called: sig=%d\n", sig)); - if (parallel_init) { - old = __kmp_signal(sig, handler); - // SIG_DFL on Windows* OS in NULL or 0. - if (old == __kmp_sighldrs[sig]) { - __kmp_siginstalled[sig] = 1; - } else { // Restore/keep user's handler if one previously installed. - old = __kmp_signal(sig, old); - } - } else { - // Save initial/system signal handlers to see if user handlers installed. - // 2009-09-23: It is a dead code. On Windows* OS __kmp_install_signals - // called once with parallel_init == TRUE. - old = __kmp_signal(sig, SIG_DFL); - __kmp_sighldrs[sig] = old; - __kmp_signal(sig, old); - } - KMP_MB(); /* Flush all pending memory write invalidates. */ -} // __kmp_install_one_handler - -static void __kmp_remove_one_handler(int sig) { - if (__kmp_siginstalled[sig]) { - sig_func_t old; - KMP_MB(); // Flush all pending memory write invalidates. - KB_TRACE(60, ("__kmp_remove_one_handler: called: sig=%d\n", sig)); - old = __kmp_signal(sig, __kmp_sighldrs[sig]); - if (old != __kmp_team_handler) { - KB_TRACE(10, ("__kmp_remove_one_handler: oops, not our handler, " - "restoring: sig=%d\n", - sig)); - old = __kmp_signal(sig, old); - } - __kmp_sighldrs[sig] = NULL; - __kmp_siginstalled[sig] = 0; - KMP_MB(); // Flush all pending memory write invalidates. - } -} // __kmp_remove_one_handler - -void __kmp_install_signals(int parallel_init) { - KB_TRACE(10, ("__kmp_install_signals: called\n")); - if (!__kmp_handle_signals) { - KB_TRACE(10, ("__kmp_install_signals: KMP_HANDLE_SIGNALS is false - " - "handlers not installed\n")); - return; - } - __kmp_install_one_handler(SIGINT, __kmp_team_handler, parallel_init); - __kmp_install_one_handler(SIGILL, __kmp_team_handler, parallel_init); - __kmp_install_one_handler(SIGABRT, __kmp_team_handler, parallel_init); - __kmp_install_one_handler(SIGFPE, __kmp_team_handler, parallel_init); - __kmp_install_one_handler(SIGSEGV, __kmp_team_handler, parallel_init); - __kmp_install_one_handler(SIGTERM, __kmp_team_handler, parallel_init); -} // __kmp_install_signals - -void __kmp_remove_signals(void) { - int sig; - KB_TRACE(10, ("__kmp_remove_signals: called\n")); - for (sig = 1; sig < NSIG; ++sig) { - __kmp_remove_one_handler(sig); - } -} // __kmp_remove_signals - -#endif // KMP_HANDLE_SIGNALS - -/* Put the thread to sleep for a time period */ -void __kmp_thread_sleep(int millis) { - DWORD status; - - status = SleepEx((DWORD)millis, FALSE); - if (status) { - DWORD error = GetLastError(); - __kmp_fatal(KMP_MSG(FunctionError, "SleepEx()"), KMP_ERR(error), - __kmp_msg_null); - } -} - -// Determine whether the given address is mapped into the current address space. -int __kmp_is_address_mapped(void *addr) { - DWORD status; - MEMORY_BASIC_INFORMATION lpBuffer; - SIZE_T dwLength; - - dwLength = sizeof(MEMORY_BASIC_INFORMATION); - - status = VirtualQuery(addr, &lpBuffer, dwLength); - - return !(((lpBuffer.State == MEM_RESERVE) || (lpBuffer.State == MEM_FREE)) || - ((lpBuffer.Protect == PAGE_NOACCESS) || - (lpBuffer.Protect == PAGE_EXECUTE))); -} - -kmp_uint64 __kmp_hardware_timestamp(void) { - kmp_uint64 r = 0; - - QueryPerformanceCounter((LARGE_INTEGER *)&r); - return r; -} - -/* Free handle and check the error code */ -void __kmp_free_handle(kmp_thread_t tHandle) { - /* called with parameter type HANDLE also, thus suppose kmp_thread_t defined - * as HANDLE */ - BOOL rc; - rc = CloseHandle(tHandle); - if (!rc) { - DWORD error = GetLastError(); - __kmp_fatal(KMP_MSG(CantCloseHandle), KMP_ERR(error), __kmp_msg_null); - } -} - -int __kmp_get_load_balance(int max) { - static ULONG glb_buff_size = 100 * 1024; - - // Saved count of the running threads for the thread balance algortihm - static int glb_running_threads = 0; - static double glb_call_time = 0; /* Thread balance algorithm call time */ - - int running_threads = 0; // Number of running threads in the system. - NTSTATUS status = 0; - ULONG buff_size = 0; - ULONG info_size = 0; - void *buffer = NULL; - PSYSTEM_PROCESS_INFORMATION spi = NULL; - int first_time = 1; - - double call_time = 0.0; // start, finish; - - __kmp_elapsed(&call_time); - - if (glb_call_time && - (call_time - glb_call_time < __kmp_load_balance_interval)) { - running_threads = glb_running_threads; - goto finish; - } - glb_call_time = call_time; - - // Do not spend time on running algorithm if we have a permanent error. - if (NtQuerySystemInformation == NULL) { - running_threads = -1; - goto finish; - } - - if (max <= 0) { - max = INT_MAX; - } - - do { - - if (first_time) { - buff_size = glb_buff_size; - } else { - buff_size = 2 * buff_size; - } - - buffer = KMP_INTERNAL_REALLOC(buffer, buff_size); - if (buffer == NULL) { - running_threads = -1; - goto finish; - } - status = NtQuerySystemInformation(SystemProcessInformation, buffer, - buff_size, &info_size); - first_time = 0; - - } while (status == STATUS_INFO_LENGTH_MISMATCH); - glb_buff_size = buff_size; - -#define CHECK(cond) \ - { \ - KMP_DEBUG_ASSERT(cond); \ - if (!(cond)) { \ - running_threads = -1; \ - goto finish; \ - } \ - } - - CHECK(buff_size >= info_size); - spi = PSYSTEM_PROCESS_INFORMATION(buffer); - for (;;) { - ptrdiff_t offset = uintptr_t(spi) - uintptr_t(buffer); - CHECK(0 <= offset && - offset + sizeof(SYSTEM_PROCESS_INFORMATION) < info_size); - HANDLE pid = spi->ProcessId; - ULONG num = spi->NumberOfThreads; - CHECK(num >= 1); - size_t spi_size = - sizeof(SYSTEM_PROCESS_INFORMATION) + sizeof(SYSTEM_THREAD) * (num - 1); - CHECK(offset + spi_size < - info_size); // Make sure process info record fits the buffer. - if (spi->NextEntryOffset != 0) { - CHECK(spi_size <= - spi->NextEntryOffset); // And do not overlap with the next record. - } - // pid == 0 corresponds to the System Idle Process. It always has running - // threads on all cores. So, we don't consider the running threads of this - // process. - if (pid != 0) { - for (int i = 0; i < num; ++i) { - THREAD_STATE state = spi->Threads[i].State; - // Count threads that have Ready or Running state. - // !!! TODO: Why comment does not match the code??? - if (state == StateRunning) { - ++running_threads; - // Stop counting running threads if the number is already greater than - // the number of available cores - if (running_threads >= max) { - goto finish; - } - } - } - } - if (spi->NextEntryOffset == 0) { - break; - } - spi = PSYSTEM_PROCESS_INFORMATION(uintptr_t(spi) + spi->NextEntryOffset); - } - -#undef CHECK - -finish: // Clean up and exit. - - if (buffer != NULL) { - KMP_INTERNAL_FREE(buffer); - } - - glb_running_threads = running_threads; - - return running_threads; -} //__kmp_get_load_balance() Property changes on: vendor/llvm-openmp/openmp-release80-r363030/runtime/src/z_Windows_NT_util.cpp ___________________________________________________________________ Deleted: svn:eol-style ## -1 +0,0 ## -native \ No newline at end of property Deleted: svn:keywords ## -1 +0,0 ## -FreeBSD=%H \ No newline at end of property Deleted: svn:mime-type ## -1 +0,0 ## -text/plain \ No newline at end of property Index: vendor/llvm-openmp/openmp-release80-r363030/runtime/src/kmp_csupport.cpp =================================================================== --- vendor/llvm-openmp/openmp-release80-r363030/runtime/src/kmp_csupport.cpp (revision 348960) +++ vendor/llvm-openmp/openmp-release80-r363030/runtime/src/kmp_csupport.cpp (nonexistent) @@ -1,4164 +0,0 @@ -/* - * kmp_csupport.cpp -- kfront linkage support for OpenMP. - */ - -//===----------------------------------------------------------------------===// -// -// The LLVM Compiler Infrastructure -// -// This file is dual licensed under the MIT and the University of Illinois Open -// Source Licenses. See LICENSE.txt for details. -// -//===----------------------------------------------------------------------===// - -#define __KMP_IMP -#include "omp.h" /* extern "C" declarations of user-visible routines */ -#include "kmp.h" -#include "kmp_error.h" -#include "kmp_i18n.h" -#include "kmp_itt.h" -#include "kmp_lock.h" -#include "kmp_stats.h" - -#if OMPT_SUPPORT -#include "ompt-specific.h" -#endif - -#define MAX_MESSAGE 512 - -// flags will be used in future, e.g. to implement openmp_strict library -// restrictions - -/*! - * @ingroup STARTUP_SHUTDOWN - * @param loc in source location information - * @param flags in for future use (currently ignored) - * - * Initialize the runtime library. This call is optional; if it is not made then - * it will be implicitly called by attempts to use other library functions. - */ -void __kmpc_begin(ident_t *loc, kmp_int32 flags) { - // By default __kmpc_begin() is no-op. - char *env; - if ((env = getenv("KMP_INITIAL_THREAD_BIND")) != NULL && - __kmp_str_match_true(env)) { - __kmp_middle_initialize(); - KC_TRACE(10, ("__kmpc_begin: middle initialization called\n")); - } else if (__kmp_ignore_mppbeg() == FALSE) { - // By default __kmp_ignore_mppbeg() returns TRUE. - __kmp_internal_begin(); - KC_TRACE(10, ("__kmpc_begin: called\n")); - } -} - -/*! - * @ingroup STARTUP_SHUTDOWN - * @param loc source location information - * - * Shutdown the runtime library. This is also optional, and even if called will - * not do anything unless the `KMP_IGNORE_MPPEND` environment variable is set to - * zero. - */ -void __kmpc_end(ident_t *loc) { - // By default, __kmp_ignore_mppend() returns TRUE which makes __kmpc_end() - // call no-op. However, this can be overridden with KMP_IGNORE_MPPEND - // environment variable. If KMP_IGNORE_MPPEND is 0, __kmp_ignore_mppend() - // returns FALSE and __kmpc_end() will unregister this root (it can cause - // library shut down). - if (__kmp_ignore_mppend() == FALSE) { - KC_TRACE(10, ("__kmpc_end: called\n")); - KA_TRACE(30, ("__kmpc_end\n")); - - __kmp_internal_end_thread(-1); - } -#if KMP_OS_WINDOWS && OMPT_SUPPORT - // Normal exit process on Windows does not allow worker threads of the final - // parallel region to finish reporting their events, so shutting down the - // library here fixes the issue at least for the cases where __kmpc_end() is - // placed properly. - if (ompt_enabled.enabled) - __kmp_internal_end_library(__kmp_gtid_get_specific()); -#endif -} - -/*! -@ingroup THREAD_STATES -@param loc Source location information. -@return The global thread index of the active thread. - -This function can be called in any context. - -If the runtime has ony been entered at the outermost level from a -single (necessarily non-OpenMP*) thread, then the thread number is -that which would be returned by omp_get_thread_num() in the outermost -active parallel construct. (Or zero if there is no active parallel -construct, since the master thread is necessarily thread zero). - -If multiple non-OpenMP threads all enter an OpenMP construct then this -will be a unique thread identifier among all the threads created by -the OpenMP runtime (but the value cannote be defined in terms of -OpenMP thread ids returned by omp_get_thread_num()). -*/ -kmp_int32 __kmpc_global_thread_num(ident_t *loc) { - kmp_int32 gtid = __kmp_entry_gtid(); - - KC_TRACE(10, ("__kmpc_global_thread_num: T#%d\n", gtid)); - - return gtid; -} - -/*! -@ingroup THREAD_STATES -@param loc Source location information. -@return The number of threads under control of the OpenMP* runtime - -This function can be called in any context. -It returns the total number of threads under the control of the OpenMP runtime. -That is not a number that can be determined by any OpenMP standard calls, since -the library may be called from more than one non-OpenMP thread, and this -reflects the total over all such calls. Similarly the runtime maintains -underlying threads even when they are not active (since the cost of creating -and destroying OS threads is high), this call counts all such threads even if -they are not waiting for work. -*/ -kmp_int32 __kmpc_global_num_threads(ident_t *loc) { - KC_TRACE(10, - ("__kmpc_global_num_threads: num_threads = %d\n", __kmp_all_nth)); - - return TCR_4(__kmp_all_nth); -} - -/*! -@ingroup THREAD_STATES -@param loc Source location information. -@return The thread number of the calling thread in the innermost active parallel -construct. -*/ -kmp_int32 __kmpc_bound_thread_num(ident_t *loc) { - KC_TRACE(10, ("__kmpc_bound_thread_num: called\n")); - return __kmp_tid_from_gtid(__kmp_entry_gtid()); -} - -/*! -@ingroup THREAD_STATES -@param loc Source location information. -@return The number of threads in the innermost active parallel construct. -*/ -kmp_int32 __kmpc_bound_num_threads(ident_t *loc) { - KC_TRACE(10, ("__kmpc_bound_num_threads: called\n")); - - return __kmp_entry_thread()->th.th_team->t.t_nproc; -} - -/*! - * @ingroup DEPRECATED - * @param loc location description - * - * This function need not be called. It always returns TRUE. - */ -kmp_int32 __kmpc_ok_to_fork(ident_t *loc) { -#ifndef KMP_DEBUG - - return TRUE; - -#else - - const char *semi2; - const char *semi3; - int line_no; - - if (__kmp_par_range == 0) { - return TRUE; - } - semi2 = loc->psource; - if (semi2 == NULL) { - return TRUE; - } - semi2 = strchr(semi2, ';'); - if (semi2 == NULL) { - return TRUE; - } - semi2 = strchr(semi2 + 1, ';'); - if (semi2 == NULL) { - return TRUE; - } - if (__kmp_par_range_filename[0]) { - const char *name = semi2 - 1; - while ((name > loc->psource) && (*name != '/') && (*name != ';')) { - name--; - } - if ((*name == '/') || (*name == ';')) { - name++; - } - if (strncmp(__kmp_par_range_filename, name, semi2 - name)) { - return __kmp_par_range < 0; - } - } - semi3 = strchr(semi2 + 1, ';'); - if (__kmp_par_range_routine[0]) { - if ((semi3 != NULL) && (semi3 > semi2) && - (strncmp(__kmp_par_range_routine, semi2 + 1, semi3 - semi2 - 1))) { - return __kmp_par_range < 0; - } - } - if (KMP_SSCANF(semi3 + 1, "%d", &line_no) == 1) { - if ((line_no >= __kmp_par_range_lb) && (line_no <= __kmp_par_range_ub)) { - return __kmp_par_range > 0; - } - return __kmp_par_range < 0; - } - return TRUE; - -#endif /* KMP_DEBUG */ -} - -/*! -@ingroup THREAD_STATES -@param loc Source location information. -@return 1 if this thread is executing inside an active parallel region, zero if -not. -*/ -kmp_int32 __kmpc_in_parallel(ident_t *loc) { - return __kmp_entry_thread()->th.th_root->r.r_active; -} - -/*! -@ingroup PARALLEL -@param loc source location information -@param global_tid global thread number -@param num_threads number of threads requested for this parallel construct - -Set the number of threads to be used by the next fork spawned by this thread. -This call is only required if the parallel construct has a `num_threads` clause. -*/ -void __kmpc_push_num_threads(ident_t *loc, kmp_int32 global_tid, - kmp_int32 num_threads) { - KA_TRACE(20, ("__kmpc_push_num_threads: enter T#%d num_threads=%d\n", - global_tid, num_threads)); - - __kmp_push_num_threads(loc, global_tid, num_threads); -} - -void __kmpc_pop_num_threads(ident_t *loc, kmp_int32 global_tid) { - KA_TRACE(20, ("__kmpc_pop_num_threads: enter\n")); - - /* the num_threads are automatically popped */ -} - -#if OMP_40_ENABLED - -void __kmpc_push_proc_bind(ident_t *loc, kmp_int32 global_tid, - kmp_int32 proc_bind) { - KA_TRACE(20, ("__kmpc_push_proc_bind: enter T#%d proc_bind=%d\n", global_tid, - proc_bind)); - - __kmp_push_proc_bind(loc, global_tid, (kmp_proc_bind_t)proc_bind); -} - -#endif /* OMP_40_ENABLED */ - -/*! -@ingroup PARALLEL -@param loc source location information -@param argc total number of arguments in the ellipsis -@param microtask pointer to callback routine consisting of outlined parallel -construct -@param ... pointers to shared variables that aren't global - -Do the actual fork and call the microtask in the relevant number of threads. -*/ -void __kmpc_fork_call(ident_t *loc, kmp_int32 argc, kmpc_micro microtask, ...) { - int gtid = __kmp_entry_gtid(); - -#if (KMP_STATS_ENABLED) - // If we were in a serial region, then stop the serial timer, record - // the event, and start parallel region timer - stats_state_e previous_state = KMP_GET_THREAD_STATE(); - if (previous_state == stats_state_e::SERIAL_REGION) { - KMP_EXCHANGE_PARTITIONED_TIMER(OMP_parallel_overhead); - } else { - KMP_PUSH_PARTITIONED_TIMER(OMP_parallel_overhead); - } - int inParallel = __kmpc_in_parallel(loc); - if (inParallel) { - KMP_COUNT_BLOCK(OMP_NESTED_PARALLEL); - } else { - KMP_COUNT_BLOCK(OMP_PARALLEL); - } -#endif - - // maybe to save thr_state is enough here - { - va_list ap; - va_start(ap, microtask); - -#if OMPT_SUPPORT - ompt_frame_t *ompt_frame; - if (ompt_enabled.enabled) { - kmp_info_t *master_th = __kmp_threads[gtid]; - kmp_team_t *parent_team = master_th->th.th_team; - ompt_lw_taskteam_t *lwt = parent_team->t.ompt_serialized_team_info; - if (lwt) - ompt_frame = &(lwt->ompt_task_info.frame); - else { - int tid = __kmp_tid_from_gtid(gtid); - ompt_frame = &( - parent_team->t.t_implicit_task_taskdata[tid].ompt_task_info.frame); - } - ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0); - OMPT_STORE_RETURN_ADDRESS(gtid); - } -#endif - -#if INCLUDE_SSC_MARKS - SSC_MARK_FORKING(); -#endif - __kmp_fork_call(loc, gtid, fork_context_intel, argc, - VOLATILE_CAST(microtask_t) microtask, // "wrapped" task - VOLATILE_CAST(launch_t) __kmp_invoke_task_func, -/* TODO: revert workaround for Intel(R) 64 tracker #96 */ -#if (KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64) && KMP_OS_LINUX - &ap -#else - ap -#endif - ); -#if INCLUDE_SSC_MARKS - SSC_MARK_JOINING(); -#endif - __kmp_join_call(loc, gtid -#if OMPT_SUPPORT - , - fork_context_intel -#endif - ); - - va_end(ap); - } - -#if KMP_STATS_ENABLED - if (previous_state == stats_state_e::SERIAL_REGION) { - KMP_EXCHANGE_PARTITIONED_TIMER(OMP_serial); - } else { - KMP_POP_PARTITIONED_TIMER(); - } -#endif // KMP_STATS_ENABLED -} - -#if OMP_40_ENABLED -/*! -@ingroup PARALLEL -@param loc source location information -@param global_tid global thread number -@param num_teams number of teams requested for the teams construct -@param num_threads number of threads per team requested for the teams construct - -Set the number of teams to be used by the teams construct. -This call is only required if the teams construct has a `num_teams` clause -or a `thread_limit` clause (or both). -*/ -void __kmpc_push_num_teams(ident_t *loc, kmp_int32 global_tid, - kmp_int32 num_teams, kmp_int32 num_threads) { - KA_TRACE(20, - ("__kmpc_push_num_teams: enter T#%d num_teams=%d num_threads=%d\n", - global_tid, num_teams, num_threads)); - - __kmp_push_num_teams(loc, global_tid, num_teams, num_threads); -} - -/*! -@ingroup PARALLEL -@param loc source location information -@param argc total number of arguments in the ellipsis -@param microtask pointer to callback routine consisting of outlined teams -construct -@param ... pointers to shared variables that aren't global - -Do the actual fork and call the microtask in the relevant number of threads. -*/ -void __kmpc_fork_teams(ident_t *loc, kmp_int32 argc, kmpc_micro microtask, - ...) { - int gtid = __kmp_entry_gtid(); - kmp_info_t *this_thr = __kmp_threads[gtid]; - va_list ap; - va_start(ap, microtask); - - KMP_COUNT_BLOCK(OMP_TEAMS); - - // remember teams entry point and nesting level - this_thr->th.th_teams_microtask = microtask; - this_thr->th.th_teams_level = - this_thr->th.th_team->t.t_level; // AC: can be >0 on host - -#if OMPT_SUPPORT - kmp_team_t *parent_team = this_thr->th.th_team; - int tid = __kmp_tid_from_gtid(gtid); - if (ompt_enabled.enabled) { - parent_team->t.t_implicit_task_taskdata[tid] - .ompt_task_info.frame.enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0); - } - OMPT_STORE_RETURN_ADDRESS(gtid); -#endif - - // check if __kmpc_push_num_teams called, set default number of teams - // otherwise - if (this_thr->th.th_teams_size.nteams == 0) { - __kmp_push_num_teams(loc, gtid, 0, 0); - } - KMP_DEBUG_ASSERT(this_thr->th.th_set_nproc >= 1); - KMP_DEBUG_ASSERT(this_thr->th.th_teams_size.nteams >= 1); - KMP_DEBUG_ASSERT(this_thr->th.th_teams_size.nth >= 1); - - __kmp_fork_call(loc, gtid, fork_context_intel, argc, - VOLATILE_CAST(microtask_t) - __kmp_teams_master, // "wrapped" task - VOLATILE_CAST(launch_t) __kmp_invoke_teams_master, -#if (KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64) && KMP_OS_LINUX - &ap -#else - ap -#endif - ); - __kmp_join_call(loc, gtid -#if OMPT_SUPPORT - , - fork_context_intel -#endif - ); - - this_thr->th.th_teams_microtask = NULL; - this_thr->th.th_teams_level = 0; - *(kmp_int64 *)(&this_thr->th.th_teams_size) = 0L; - va_end(ap); -} -#endif /* OMP_40_ENABLED */ - -// I don't think this function should ever have been exported. -// The __kmpc_ prefix was misapplied. I'm fairly certain that no generated -// openmp code ever called it, but it's been exported from the RTL for so -// long that I'm afraid to remove the definition. -int __kmpc_invoke_task_func(int gtid) { return __kmp_invoke_task_func(gtid); } - -/*! -@ingroup PARALLEL -@param loc source location information -@param global_tid global thread number - -Enter a serialized parallel construct. This interface is used to handle a -conditional parallel region, like this, -@code -#pragma omp parallel if (condition) -@endcode -when the condition is false. -*/ -void __kmpc_serialized_parallel(ident_t *loc, kmp_int32 global_tid) { -// The implementation is now in kmp_runtime.cpp so that it can share static -// functions with kmp_fork_call since the tasks to be done are similar in -// each case. -#if OMPT_SUPPORT - OMPT_STORE_RETURN_ADDRESS(global_tid); -#endif - __kmp_serialized_parallel(loc, global_tid); -} - -/*! -@ingroup PARALLEL -@param loc source location information -@param global_tid global thread number - -Leave a serialized parallel construct. -*/ -void __kmpc_end_serialized_parallel(ident_t *loc, kmp_int32 global_tid) { - kmp_internal_control_t *top; - kmp_info_t *this_thr; - kmp_team_t *serial_team; - - KC_TRACE(10, - ("__kmpc_end_serialized_parallel: called by T#%d\n", global_tid)); - - /* skip all this code for autopar serialized loops since it results in - unacceptable overhead */ - if (loc != NULL && (loc->flags & KMP_IDENT_AUTOPAR)) - return; - - // Not autopar code - if (!TCR_4(__kmp_init_parallel)) - __kmp_parallel_initialize(); - - this_thr = __kmp_threads[global_tid]; - serial_team = this_thr->th.th_serial_team; - -#if OMP_45_ENABLED - kmp_task_team_t *task_team = this_thr->th.th_task_team; - - // we need to wait for the proxy tasks before finishing the thread - if (task_team != NULL && task_team->tt.tt_found_proxy_tasks) - __kmp_task_team_wait(this_thr, serial_team USE_ITT_BUILD_ARG(NULL)); -#endif - - KMP_MB(); - KMP_DEBUG_ASSERT(serial_team); - KMP_ASSERT(serial_team->t.t_serialized); - KMP_DEBUG_ASSERT(this_thr->th.th_team == serial_team); - KMP_DEBUG_ASSERT(serial_team != this_thr->th.th_root->r.r_root_team); - KMP_DEBUG_ASSERT(serial_team->t.t_threads); - KMP_DEBUG_ASSERT(serial_team->t.t_threads[0] == this_thr); - -#if OMPT_SUPPORT - if (ompt_enabled.enabled && - this_thr->th.ompt_thread_info.state != ompt_state_overhead) { - OMPT_CUR_TASK_INFO(this_thr)->frame.exit_frame = ompt_data_none; - if (ompt_enabled.ompt_callback_implicit_task) { - ompt_callbacks.ompt_callback(ompt_callback_implicit_task)( - ompt_scope_end, NULL, OMPT_CUR_TASK_DATA(this_thr), 1, - OMPT_CUR_TASK_INFO(this_thr)->thread_num, ompt_task_implicit); - } - - // reset clear the task id only after unlinking the task - ompt_data_t *parent_task_data; - __ompt_get_task_info_internal(1, NULL, &parent_task_data, NULL, NULL, NULL); - - if (ompt_enabled.ompt_callback_parallel_end) { - ompt_callbacks.ompt_callback(ompt_callback_parallel_end)( - &(serial_team->t.ompt_team_info.parallel_data), parent_task_data, - ompt_parallel_invoker_program, OMPT_LOAD_RETURN_ADDRESS(global_tid)); - } - __ompt_lw_taskteam_unlink(this_thr); - this_thr->th.ompt_thread_info.state = ompt_state_overhead; - } -#endif - - /* If necessary, pop the internal control stack values and replace the team - * values */ - top = serial_team->t.t_control_stack_top; - if (top && top->serial_nesting_level == serial_team->t.t_serialized) { - copy_icvs(&serial_team->t.t_threads[0]->th.th_current_task->td_icvs, top); - serial_team->t.t_control_stack_top = top->next; - __kmp_free(top); - } - - // if( serial_team -> t.t_serialized > 1 ) - serial_team->t.t_level--; - - /* pop dispatch buffers stack */ - KMP_DEBUG_ASSERT(serial_team->t.t_dispatch->th_disp_buffer); - { - dispatch_private_info_t *disp_buffer = - serial_team->t.t_dispatch->th_disp_buffer; - serial_team->t.t_dispatch->th_disp_buffer = - serial_team->t.t_dispatch->th_disp_buffer->next; - __kmp_free(disp_buffer); - } -#if OMP_50_ENABLED - this_thr->th.th_def_allocator = serial_team->t.t_def_allocator; // restore -#endif - - --serial_team->t.t_serialized; - if (serial_team->t.t_serialized == 0) { - -/* return to the parallel section */ - -#if KMP_ARCH_X86 || KMP_ARCH_X86_64 - if (__kmp_inherit_fp_control && serial_team->t.t_fp_control_saved) { - __kmp_clear_x87_fpu_status_word(); - __kmp_load_x87_fpu_control_word(&serial_team->t.t_x87_fpu_control_word); - __kmp_load_mxcsr(&serial_team->t.t_mxcsr); - } -#endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */ - - this_thr->th.th_team = serial_team->t.t_parent; - this_thr->th.th_info.ds.ds_tid = serial_team->t.t_master_tid; - - /* restore values cached in the thread */ - this_thr->th.th_team_nproc = serial_team->t.t_parent->t.t_nproc; /* JPH */ - this_thr->th.th_team_master = - serial_team->t.t_parent->t.t_threads[0]; /* JPH */ - this_thr->th.th_team_serialized = this_thr->th.th_team->t.t_serialized; - - /* TODO the below shouldn't need to be adjusted for serialized teams */ - this_thr->th.th_dispatch = - &this_thr->th.th_team->t.t_dispatch[serial_team->t.t_master_tid]; - - __kmp_pop_current_task_from_thread(this_thr); - - KMP_ASSERT(this_thr->th.th_current_task->td_flags.executing == 0); - this_thr->th.th_current_task->td_flags.executing = 1; - - if (__kmp_tasking_mode != tskm_immediate_exec) { - // Copy the task team from the new child / old parent team to the thread. - this_thr->th.th_task_team = - this_thr->th.th_team->t.t_task_team[this_thr->th.th_task_state]; - KA_TRACE(20, - ("__kmpc_end_serialized_parallel: T#%d restoring task_team %p / " - "team %p\n", - global_tid, this_thr->th.th_task_team, this_thr->th.th_team)); - } - } else { - if (__kmp_tasking_mode != tskm_immediate_exec) { - KA_TRACE(20, ("__kmpc_end_serialized_parallel: T#%d decreasing nesting " - "depth of serial team %p to %d\n", - global_tid, serial_team, serial_team->t.t_serialized)); - } - } - - if (__kmp_env_consistency_check) - __kmp_pop_parallel(global_tid, NULL); -#if OMPT_SUPPORT - if (ompt_enabled.enabled) - this_thr->th.ompt_thread_info.state = - ((this_thr->th.th_team_serialized) ? ompt_state_work_serial - : ompt_state_work_parallel); -#endif -} - -/*! -@ingroup SYNCHRONIZATION -@param loc source location information. - -Execute flush. This is implemented as a full memory fence. (Though -depending on the memory ordering convention obeyed by the compiler -even that may not be necessary). -*/ -void __kmpc_flush(ident_t *loc) { - KC_TRACE(10, ("__kmpc_flush: called\n")); - - /* need explicit __mf() here since use volatile instead in library */ - KMP_MB(); /* Flush all pending memory write invalidates. */ - -#if (KMP_ARCH_X86 || KMP_ARCH_X86_64) -#if KMP_MIC -// fence-style instructions do not exist, but lock; xaddl $0,(%rsp) can be used. -// We shouldn't need it, though, since the ABI rules require that -// * If the compiler generates NGO stores it also generates the fence -// * If users hand-code NGO stores they should insert the fence -// therefore no incomplete unordered stores should be visible. -#else - // C74404 - // This is to address non-temporal store instructions (sfence needed). - // The clflush instruction is addressed either (mfence needed). - // Probably the non-temporal load monvtdqa instruction should also be - // addressed. - // mfence is a SSE2 instruction. Do not execute it if CPU is not SSE2. - if (!__kmp_cpuinfo.initialized) { - __kmp_query_cpuid(&__kmp_cpuinfo); - } - if (!__kmp_cpuinfo.sse2) { - // CPU cannot execute SSE2 instructions. - } else { -#if KMP_COMPILER_ICC - _mm_mfence(); -#elif KMP_COMPILER_MSVC - MemoryBarrier(); -#else - __sync_synchronize(); -#endif // KMP_COMPILER_ICC - } -#endif // KMP_MIC -#elif (KMP_ARCH_ARM || KMP_ARCH_AARCH64 || KMP_ARCH_MIPS || KMP_ARCH_MIPS64) -// Nothing to see here move along -#elif KMP_ARCH_PPC64 -// Nothing needed here (we have a real MB above). -#if KMP_OS_CNK - // The flushing thread needs to yield here; this prevents a - // busy-waiting thread from saturating the pipeline. flush is - // often used in loops like this: - // while (!flag) { - // #pragma omp flush(flag) - // } - // and adding the yield here is good for at least a 10x speedup - // when running >2 threads per core (on the NAS LU benchmark). - __kmp_yield(TRUE); -#endif -#else -#error Unknown or unsupported architecture -#endif - -#if OMPT_SUPPORT && OMPT_OPTIONAL - if (ompt_enabled.ompt_callback_flush) { - ompt_callbacks.ompt_callback(ompt_callback_flush)( - __ompt_get_thread_data_internal(), OMPT_GET_RETURN_ADDRESS(0)); - } -#endif -} - -/* -------------------------------------------------------------------------- */ -/*! -@ingroup SYNCHRONIZATION -@param loc source location information -@param global_tid thread id. - -Execute a barrier. -*/ -void __kmpc_barrier(ident_t *loc, kmp_int32 global_tid) { - KMP_COUNT_BLOCK(OMP_BARRIER); - KC_TRACE(10, ("__kmpc_barrier: called T#%d\n", global_tid)); - - if (!TCR_4(__kmp_init_parallel)) - __kmp_parallel_initialize(); - - if (__kmp_env_consistency_check) { - if (loc == 0) { - KMP_WARNING(ConstructIdentInvalid); // ??? What does it mean for the user? - } - - __kmp_check_barrier(global_tid, ct_barrier, loc); - } - -#if OMPT_SUPPORT - ompt_frame_t *ompt_frame; - if (ompt_enabled.enabled) { - __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL); - if (ompt_frame->enter_frame.ptr == NULL) - ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0); - OMPT_STORE_RETURN_ADDRESS(global_tid); - } -#endif - __kmp_threads[global_tid]->th.th_ident = loc; - // TODO: explicit barrier_wait_id: - // this function is called when 'barrier' directive is present or - // implicit barrier at the end of a worksharing construct. - // 1) better to add a per-thread barrier counter to a thread data structure - // 2) set to 0 when a new team is created - // 4) no sync is required - - __kmp_barrier(bs_plain_barrier, global_tid, FALSE, 0, NULL, NULL); -#if OMPT_SUPPORT && OMPT_OPTIONAL - if (ompt_enabled.enabled) { - ompt_frame->enter_frame = ompt_data_none; - } -#endif -} - -/* The BARRIER for a MASTER section is always explicit */ -/*! -@ingroup WORK_SHARING -@param loc source location information. -@param global_tid global thread number . -@return 1 if this thread should execute the master block, 0 otherwise. -*/ -kmp_int32 __kmpc_master(ident_t *loc, kmp_int32 global_tid) { - int status = 0; - - KC_TRACE(10, ("__kmpc_master: called T#%d\n", global_tid)); - - if (!TCR_4(__kmp_init_parallel)) - __kmp_parallel_initialize(); - - if (KMP_MASTER_GTID(global_tid)) { - KMP_COUNT_BLOCK(OMP_MASTER); - KMP_PUSH_PARTITIONED_TIMER(OMP_master); - status = 1; - } - -#if OMPT_SUPPORT && OMPT_OPTIONAL - if (status) { - if (ompt_enabled.ompt_callback_master) { - kmp_info_t *this_thr = __kmp_threads[global_tid]; - kmp_team_t *team = this_thr->th.th_team; - - int tid = __kmp_tid_from_gtid(global_tid); - ompt_callbacks.ompt_callback(ompt_callback_master)( - ompt_scope_begin, &(team->t.ompt_team_info.parallel_data), - &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data), - OMPT_GET_RETURN_ADDRESS(0)); - } - } -#endif - - if (__kmp_env_consistency_check) { -#if KMP_USE_DYNAMIC_LOCK - if (status) - __kmp_push_sync(global_tid, ct_master, loc, NULL, 0); - else - __kmp_check_sync(global_tid, ct_master, loc, NULL, 0); -#else - if (status) - __kmp_push_sync(global_tid, ct_master, loc, NULL); - else - __kmp_check_sync(global_tid, ct_master, loc, NULL); -#endif - } - - return status; -} - -/*! -@ingroup WORK_SHARING -@param loc source location information. -@param global_tid global thread number . - -Mark the end of a master region. This should only be called by the -thread that executes the master region. -*/ -void __kmpc_end_master(ident_t *loc, kmp_int32 global_tid) { - KC_TRACE(10, ("__kmpc_end_master: called T#%d\n", global_tid)); - - KMP_DEBUG_ASSERT(KMP_MASTER_GTID(global_tid)); - KMP_POP_PARTITIONED_TIMER(); - -#if OMPT_SUPPORT && OMPT_OPTIONAL - kmp_info_t *this_thr = __kmp_threads[global_tid]; - kmp_team_t *team = this_thr->th.th_team; - if (ompt_enabled.ompt_callback_master) { - int tid = __kmp_tid_from_gtid(global_tid); - ompt_callbacks.ompt_callback(ompt_callback_master)( - ompt_scope_end, &(team->t.ompt_team_info.parallel_data), - &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data), - OMPT_GET_RETURN_ADDRESS(0)); - } -#endif - - if (__kmp_env_consistency_check) { - if (global_tid < 0) - KMP_WARNING(ThreadIdentInvalid); - - if (KMP_MASTER_GTID(global_tid)) - __kmp_pop_sync(global_tid, ct_master, loc); - } -} - -/*! -@ingroup WORK_SHARING -@param loc source location information. -@param gtid global thread number. - -Start execution of an ordered construct. -*/ -void __kmpc_ordered(ident_t *loc, kmp_int32 gtid) { - int cid = 0; - kmp_info_t *th; - KMP_DEBUG_ASSERT(__kmp_init_serial); - - KC_TRACE(10, ("__kmpc_ordered: called T#%d\n", gtid)); - - if (!TCR_4(__kmp_init_parallel)) - __kmp_parallel_initialize(); - -#if USE_ITT_BUILD - __kmp_itt_ordered_prep(gtid); -// TODO: ordered_wait_id -#endif /* USE_ITT_BUILD */ - - th = __kmp_threads[gtid]; - -#if OMPT_SUPPORT && OMPT_OPTIONAL - kmp_team_t *team; - ompt_wait_id_t lck; - void *codeptr_ra; - if (ompt_enabled.enabled) { - OMPT_STORE_RETURN_ADDRESS(gtid); - team = __kmp_team_from_gtid(gtid); - lck = (ompt_wait_id_t)&team->t.t_ordered.dt.t_value; - /* OMPT state update */ - th->th.ompt_thread_info.wait_id = lck; - th->th.ompt_thread_info.state = ompt_state_wait_ordered; - - /* OMPT event callback */ - codeptr_ra = OMPT_LOAD_RETURN_ADDRESS(gtid); - if (ompt_enabled.ompt_callback_mutex_acquire) { - ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)( - ompt_mutex_ordered, omp_lock_hint_none, kmp_mutex_impl_spin, - (ompt_wait_id_t)lck, codeptr_ra); - } - } -#endif - - if (th->th.th_dispatch->th_deo_fcn != 0) - (*th->th.th_dispatch->th_deo_fcn)(>id, &cid, loc); - else - __kmp_parallel_deo(>id, &cid, loc); - -#if OMPT_SUPPORT && OMPT_OPTIONAL - if (ompt_enabled.enabled) { - /* OMPT state update */ - th->th.ompt_thread_info.state = ompt_state_work_parallel; - th->th.ompt_thread_info.wait_id = 0; - - /* OMPT event callback */ - if (ompt_enabled.ompt_callback_mutex_acquired) { - ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)( - ompt_mutex_ordered, (ompt_wait_id_t)lck, codeptr_ra); - } - } -#endif - -#if USE_ITT_BUILD - __kmp_itt_ordered_start(gtid); -#endif /* USE_ITT_BUILD */ -} - -/*! -@ingroup WORK_SHARING -@param loc source location information. -@param gtid global thread number. - -End execution of an ordered construct. -*/ -void __kmpc_end_ordered(ident_t *loc, kmp_int32 gtid) { - int cid = 0; - kmp_info_t *th; - - KC_TRACE(10, ("__kmpc_end_ordered: called T#%d\n", gtid)); - -#if USE_ITT_BUILD - __kmp_itt_ordered_end(gtid); -// TODO: ordered_wait_id -#endif /* USE_ITT_BUILD */ - - th = __kmp_threads[gtid]; - - if (th->th.th_dispatch->th_dxo_fcn != 0) - (*th->th.th_dispatch->th_dxo_fcn)(>id, &cid, loc); - else - __kmp_parallel_dxo(>id, &cid, loc); - -#if OMPT_SUPPORT && OMPT_OPTIONAL - OMPT_STORE_RETURN_ADDRESS(gtid); - if (ompt_enabled.ompt_callback_mutex_released) { - ompt_callbacks.ompt_callback(ompt_callback_mutex_released)( - ompt_mutex_ordered, - (ompt_wait_id_t)&__kmp_team_from_gtid(gtid)->t.t_ordered.dt.t_value, - OMPT_LOAD_RETURN_ADDRESS(gtid)); - } -#endif -} - -#if KMP_USE_DYNAMIC_LOCK - -static __forceinline void -__kmp_init_indirect_csptr(kmp_critical_name *crit, ident_t const *loc, - kmp_int32 gtid, kmp_indirect_locktag_t tag) { - // Pointer to the allocated indirect lock is written to crit, while indexing - // is ignored. - void *idx; - kmp_indirect_lock_t **lck; - lck = (kmp_indirect_lock_t **)crit; - kmp_indirect_lock_t *ilk = __kmp_allocate_indirect_lock(&idx, gtid, tag); - KMP_I_LOCK_FUNC(ilk, init)(ilk->lock); - KMP_SET_I_LOCK_LOCATION(ilk, loc); - KMP_SET_I_LOCK_FLAGS(ilk, kmp_lf_critical_section); - KA_TRACE(20, - ("__kmp_init_indirect_csptr: initialized indirect lock #%d\n", tag)); -#if USE_ITT_BUILD - __kmp_itt_critical_creating(ilk->lock, loc); -#endif - int status = KMP_COMPARE_AND_STORE_PTR(lck, nullptr, ilk); - if (status == 0) { -#if USE_ITT_BUILD - __kmp_itt_critical_destroyed(ilk->lock); -#endif - // We don't really need to destroy the unclaimed lock here since it will be - // cleaned up at program exit. - // KMP_D_LOCK_FUNC(&idx, destroy)((kmp_dyna_lock_t *)&idx); - } - KMP_DEBUG_ASSERT(*lck != NULL); -} - -// Fast-path acquire tas lock -#define KMP_ACQUIRE_TAS_LOCK(lock, gtid) \ - { \ - kmp_tas_lock_t *l = (kmp_tas_lock_t *)lock; \ - kmp_int32 tas_free = KMP_LOCK_FREE(tas); \ - kmp_int32 tas_busy = KMP_LOCK_BUSY(gtid + 1, tas); \ - if (KMP_ATOMIC_LD_RLX(&l->lk.poll) != tas_free || \ - !__kmp_atomic_compare_store_acq(&l->lk.poll, tas_free, tas_busy)) { \ - kmp_uint32 spins; \ - KMP_FSYNC_PREPARE(l); \ - KMP_INIT_YIELD(spins); \ - if (TCR_4(__kmp_nth) > \ - (__kmp_avail_proc ? __kmp_avail_proc : __kmp_xproc)) { \ - KMP_YIELD(TRUE); \ - } else { \ - KMP_YIELD_SPIN(spins); \ - } \ - kmp_backoff_t backoff = __kmp_spin_backoff_params; \ - while ( \ - KMP_ATOMIC_LD_RLX(&l->lk.poll) != tas_free || \ - !__kmp_atomic_compare_store_acq(&l->lk.poll, tas_free, tas_busy)) { \ - __kmp_spin_backoff(&backoff); \ - if (TCR_4(__kmp_nth) > \ - (__kmp_avail_proc ? __kmp_avail_proc : __kmp_xproc)) { \ - KMP_YIELD(TRUE); \ - } else { \ - KMP_YIELD_SPIN(spins); \ - } \ - } \ - } \ - KMP_FSYNC_ACQUIRED(l); \ - } - -// Fast-path test tas lock -#define KMP_TEST_TAS_LOCK(lock, gtid, rc) \ - { \ - kmp_tas_lock_t *l = (kmp_tas_lock_t *)lock; \ - kmp_int32 tas_free = KMP_LOCK_FREE(tas); \ - kmp_int32 tas_busy = KMP_LOCK_BUSY(gtid + 1, tas); \ - rc = KMP_ATOMIC_LD_RLX(&l->lk.poll) == tas_free && \ - __kmp_atomic_compare_store_acq(&l->lk.poll, tas_free, tas_busy); \ - } - -// Fast-path release tas lock -#define KMP_RELEASE_TAS_LOCK(lock, gtid) \ - { KMP_ATOMIC_ST_REL(&((kmp_tas_lock_t *)lock)->lk.poll, KMP_LOCK_FREE(tas)); } - -#if KMP_USE_FUTEX - -#include -#include -#ifndef FUTEX_WAIT -#define FUTEX_WAIT 0 -#endif -#ifndef FUTEX_WAKE -#define FUTEX_WAKE 1 -#endif - -// Fast-path acquire futex lock -#define KMP_ACQUIRE_FUTEX_LOCK(lock, gtid) \ - { \ - kmp_futex_lock_t *ftx = (kmp_futex_lock_t *)lock; \ - kmp_int32 gtid_code = (gtid + 1) << 1; \ - KMP_MB(); \ - KMP_FSYNC_PREPARE(ftx); \ - kmp_int32 poll_val; \ - while ((poll_val = KMP_COMPARE_AND_STORE_RET32( \ - &(ftx->lk.poll), KMP_LOCK_FREE(futex), \ - KMP_LOCK_BUSY(gtid_code, futex))) != KMP_LOCK_FREE(futex)) { \ - kmp_int32 cond = KMP_LOCK_STRIP(poll_val) & 1; \ - if (!cond) { \ - if (!KMP_COMPARE_AND_STORE_RET32(&(ftx->lk.poll), poll_val, \ - poll_val | \ - KMP_LOCK_BUSY(1, futex))) { \ - continue; \ - } \ - poll_val |= KMP_LOCK_BUSY(1, futex); \ - } \ - kmp_int32 rc; \ - if ((rc = syscall(__NR_futex, &(ftx->lk.poll), FUTEX_WAIT, poll_val, \ - NULL, NULL, 0)) != 0) { \ - continue; \ - } \ - gtid_code |= 1; \ - } \ - KMP_FSYNC_ACQUIRED(ftx); \ - } - -// Fast-path test futex lock -#define KMP_TEST_FUTEX_LOCK(lock, gtid, rc) \ - { \ - kmp_futex_lock_t *ftx = (kmp_futex_lock_t *)lock; \ - if (KMP_COMPARE_AND_STORE_ACQ32(&(ftx->lk.poll), KMP_LOCK_FREE(futex), \ - KMP_LOCK_BUSY(gtid + 1 << 1, futex))) { \ - KMP_FSYNC_ACQUIRED(ftx); \ - rc = TRUE; \ - } else { \ - rc = FALSE; \ - } \ - } - -// Fast-path release futex lock -#define KMP_RELEASE_FUTEX_LOCK(lock, gtid) \ - { \ - kmp_futex_lock_t *ftx = (kmp_futex_lock_t *)lock; \ - KMP_MB(); \ - KMP_FSYNC_RELEASING(ftx); \ - kmp_int32 poll_val = \ - KMP_XCHG_FIXED32(&(ftx->lk.poll), KMP_LOCK_FREE(futex)); \ - if (KMP_LOCK_STRIP(poll_val) & 1) { \ - syscall(__NR_futex, &(ftx->lk.poll), FUTEX_WAKE, \ - KMP_LOCK_BUSY(1, futex), NULL, NULL, 0); \ - } \ - KMP_MB(); \ - KMP_YIELD(TCR_4(__kmp_nth) > \ - (__kmp_avail_proc ? __kmp_avail_proc : __kmp_xproc)); \ - } - -#endif // KMP_USE_FUTEX - -#else // KMP_USE_DYNAMIC_LOCK - -static kmp_user_lock_p __kmp_get_critical_section_ptr(kmp_critical_name *crit, - ident_t const *loc, - kmp_int32 gtid) { - kmp_user_lock_p *lck_pp = (kmp_user_lock_p *)crit; - - // Because of the double-check, the following load doesn't need to be volatile - kmp_user_lock_p lck = (kmp_user_lock_p)TCR_PTR(*lck_pp); - - if (lck == NULL) { - void *idx; - - // Allocate & initialize the lock. - // Remember alloc'ed locks in table in order to free them in __kmp_cleanup() - lck = __kmp_user_lock_allocate(&idx, gtid, kmp_lf_critical_section); - __kmp_init_user_lock_with_checks(lck); - __kmp_set_user_lock_location(lck, loc); -#if USE_ITT_BUILD - __kmp_itt_critical_creating(lck); -// __kmp_itt_critical_creating() should be called *before* the first usage -// of underlying lock. It is the only place where we can guarantee it. There -// are chances the lock will destroyed with no usage, but it is not a -// problem, because this is not real event seen by user but rather setting -// name for object (lock). See more details in kmp_itt.h. -#endif /* USE_ITT_BUILD */ - - // Use a cmpxchg instruction to slam the start of the critical section with - // the lock pointer. If another thread beat us to it, deallocate the lock, - // and use the lock that the other thread allocated. - int status = KMP_COMPARE_AND_STORE_PTR(lck_pp, 0, lck); - - if (status == 0) { -// Deallocate the lock and reload the value. -#if USE_ITT_BUILD - __kmp_itt_critical_destroyed(lck); -// Let ITT know the lock is destroyed and the same memory location may be reused -// for another purpose. -#endif /* USE_ITT_BUILD */ - __kmp_destroy_user_lock_with_checks(lck); - __kmp_user_lock_free(&idx, gtid, lck); - lck = (kmp_user_lock_p)TCR_PTR(*lck_pp); - KMP_DEBUG_ASSERT(lck != NULL); - } - } - return lck; -} - -#endif // KMP_USE_DYNAMIC_LOCK - -/*! -@ingroup WORK_SHARING -@param loc source location information. -@param global_tid global thread number . -@param crit identity of the critical section. This could be a pointer to a lock -associated with the critical section, or some other suitably unique value. - -Enter code protected by a `critical` construct. -This function blocks until the executing thread can enter the critical section. -*/ -void __kmpc_critical(ident_t *loc, kmp_int32 global_tid, - kmp_critical_name *crit) { -#if KMP_USE_DYNAMIC_LOCK -#if OMPT_SUPPORT && OMPT_OPTIONAL - OMPT_STORE_RETURN_ADDRESS(global_tid); -#endif // OMPT_SUPPORT - __kmpc_critical_with_hint(loc, global_tid, crit, omp_lock_hint_none); -#else - KMP_COUNT_BLOCK(OMP_CRITICAL); -#if OMPT_SUPPORT && OMPT_OPTIONAL - ompt_state_t prev_state = ompt_state_undefined; - ompt_thread_info_t ti; -#endif - kmp_user_lock_p lck; - - KC_TRACE(10, ("__kmpc_critical: called T#%d\n", global_tid)); - - // TODO: add THR_OVHD_STATE - - KMP_PUSH_PARTITIONED_TIMER(OMP_critical_wait); - KMP_CHECK_USER_LOCK_INIT(); - - if ((__kmp_user_lock_kind == lk_tas) && - (sizeof(lck->tas.lk.poll) <= OMP_CRITICAL_SIZE)) { - lck = (kmp_user_lock_p)crit; - } -#if KMP_USE_FUTEX - else if ((__kmp_user_lock_kind == lk_futex) && - (sizeof(lck->futex.lk.poll) <= OMP_CRITICAL_SIZE)) { - lck = (kmp_user_lock_p)crit; - } -#endif - else { // ticket, queuing or drdpa - lck = __kmp_get_critical_section_ptr(crit, loc, global_tid); - } - - if (__kmp_env_consistency_check) - __kmp_push_sync(global_tid, ct_critical, loc, lck); - -// since the critical directive binds to all threads, not just the current -// team we have to check this even if we are in a serialized team. -// also, even if we are the uber thread, we still have to conduct the lock, -// as we have to contend with sibling threads. - -#if USE_ITT_BUILD - __kmp_itt_critical_acquiring(lck); -#endif /* USE_ITT_BUILD */ -#if OMPT_SUPPORT && OMPT_OPTIONAL - OMPT_STORE_RETURN_ADDRESS(gtid); - void *codeptr_ra = NULL; - if (ompt_enabled.enabled) { - ti = __kmp_threads[global_tid]->th.ompt_thread_info; - /* OMPT state update */ - prev_state = ti.state; - ti.wait_id = (ompt_wait_id_t)lck; - ti.state = ompt_state_wait_critical; - - /* OMPT event callback */ - codeptr_ra = OMPT_LOAD_RETURN_ADDRESS(gtid); - if (ompt_enabled.ompt_callback_mutex_acquire) { - ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)( - ompt_mutex_critical, omp_lock_hint_none, __ompt_get_mutex_impl_type(), - (ompt_wait_id_t)crit, codeptr_ra); - } - } -#endif - // Value of 'crit' should be good for using as a critical_id of the critical - // section directive. - __kmp_acquire_user_lock_with_checks(lck, global_tid); - -#if USE_ITT_BUILD - __kmp_itt_critical_acquired(lck); -#endif /* USE_ITT_BUILD */ -#if OMPT_SUPPORT && OMPT_OPTIONAL - if (ompt_enabled.enabled) { - /* OMPT state update */ - ti.state = prev_state; - ti.wait_id = 0; - - /* OMPT event callback */ - if (ompt_enabled.ompt_callback_mutex_acquired) { - ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)( - ompt_mutex_critical, (ompt_wait_id_t)crit, codeptr_ra); - } - } -#endif - KMP_POP_PARTITIONED_TIMER(); - - KMP_PUSH_PARTITIONED_TIMER(OMP_critical); - KA_TRACE(15, ("__kmpc_critical: done T#%d\n", global_tid)); -#endif // KMP_USE_DYNAMIC_LOCK -} - -#if KMP_USE_DYNAMIC_LOCK - -// Converts the given hint to an internal lock implementation -static __forceinline kmp_dyna_lockseq_t __kmp_map_hint_to_lock(uintptr_t hint) { -#if KMP_USE_TSX -#define KMP_TSX_LOCK(seq) lockseq_##seq -#else -#define KMP_TSX_LOCK(seq) __kmp_user_lock_seq -#endif - -#if KMP_ARCH_X86 || KMP_ARCH_X86_64 -#define KMP_CPUINFO_RTM (__kmp_cpuinfo.rtm) -#else -#define KMP_CPUINFO_RTM 0 -#endif - - // Hints that do not require further logic - if (hint & kmp_lock_hint_hle) - return KMP_TSX_LOCK(hle); - if (hint & kmp_lock_hint_rtm) - return KMP_CPUINFO_RTM ? KMP_TSX_LOCK(rtm) : __kmp_user_lock_seq; - if (hint & kmp_lock_hint_adaptive) - return KMP_CPUINFO_RTM ? KMP_TSX_LOCK(adaptive) : __kmp_user_lock_seq; - - // Rule out conflicting hints first by returning the default lock - if ((hint & omp_lock_hint_contended) && (hint & omp_lock_hint_uncontended)) - return __kmp_user_lock_seq; - if ((hint & omp_lock_hint_speculative) && - (hint & omp_lock_hint_nonspeculative)) - return __kmp_user_lock_seq; - - // Do not even consider speculation when it appears to be contended - if (hint & omp_lock_hint_contended) - return lockseq_queuing; - - // Uncontended lock without speculation - if ((hint & omp_lock_hint_uncontended) && !(hint & omp_lock_hint_speculative)) - return lockseq_tas; - - // HLE lock for speculation - if (hint & omp_lock_hint_speculative) - return KMP_TSX_LOCK(hle); - - return __kmp_user_lock_seq; -} - -#if OMPT_SUPPORT && OMPT_OPTIONAL -#if KMP_USE_DYNAMIC_LOCK -static kmp_mutex_impl_t -__ompt_get_mutex_impl_type(void *user_lock, kmp_indirect_lock_t *ilock = 0) { - if (user_lock) { - switch (KMP_EXTRACT_D_TAG(user_lock)) { - case 0: - break; -#if KMP_USE_FUTEX - case locktag_futex: - return kmp_mutex_impl_queuing; -#endif - case locktag_tas: - return kmp_mutex_impl_spin; -#if KMP_USE_TSX - case locktag_hle: - return kmp_mutex_impl_speculative; -#endif - default: - return kmp_mutex_impl_none; - } - ilock = KMP_LOOKUP_I_LOCK(user_lock); - } - KMP_ASSERT(ilock); - switch (ilock->type) { -#if KMP_USE_TSX - case locktag_adaptive: - case locktag_rtm: - return kmp_mutex_impl_speculative; -#endif - case locktag_nested_tas: - return kmp_mutex_impl_spin; -#if KMP_USE_FUTEX - case locktag_nested_futex: -#endif - case locktag_ticket: - case locktag_queuing: - case locktag_drdpa: - case locktag_nested_ticket: - case locktag_nested_queuing: - case locktag_nested_drdpa: - return kmp_mutex_impl_queuing; - default: - return kmp_mutex_impl_none; - } -} -#else -// For locks without dynamic binding -static kmp_mutex_impl_t __ompt_get_mutex_impl_type() { - switch (__kmp_user_lock_kind) { - case lk_tas: - return kmp_mutex_impl_spin; -#if KMP_USE_FUTEX - case lk_futex: -#endif - case lk_ticket: - case lk_queuing: - case lk_drdpa: - return kmp_mutex_impl_queuing; -#if KMP_USE_TSX - case lk_hle: - case lk_rtm: - case lk_adaptive: - return kmp_mutex_impl_speculative; -#endif - default: - return kmp_mutex_impl_none; - } -} -#endif // KMP_USE_DYNAMIC_LOCK -#endif // OMPT_SUPPORT && OMPT_OPTIONAL - -/*! -@ingroup WORK_SHARING -@param loc source location information. -@param global_tid global thread number. -@param crit identity of the critical section. This could be a pointer to a lock -associated with the critical section, or some other suitably unique value. -@param hint the lock hint. - -Enter code protected by a `critical` construct with a hint. The hint value is -used to suggest a lock implementation. This function blocks until the executing -thread can enter the critical section unless the hint suggests use of -speculative execution and the hardware supports it. -*/ -void __kmpc_critical_with_hint(ident_t *loc, kmp_int32 global_tid, - kmp_critical_name *crit, uint32_t hint) { - KMP_COUNT_BLOCK(OMP_CRITICAL); - kmp_user_lock_p lck; -#if OMPT_SUPPORT && OMPT_OPTIONAL - ompt_state_t prev_state = ompt_state_undefined; - ompt_thread_info_t ti; - // This is the case, if called from __kmpc_critical: - void *codeptr = OMPT_LOAD_RETURN_ADDRESS(global_tid); - if (!codeptr) - codeptr = OMPT_GET_RETURN_ADDRESS(0); -#endif - - KC_TRACE(10, ("__kmpc_critical: called T#%d\n", global_tid)); - - kmp_dyna_lock_t *lk = (kmp_dyna_lock_t *)crit; - // Check if it is initialized. - KMP_PUSH_PARTITIONED_TIMER(OMP_critical_wait); - if (*lk == 0) { - kmp_dyna_lockseq_t lckseq = __kmp_map_hint_to_lock(hint); - if (KMP_IS_D_LOCK(lckseq)) { - KMP_COMPARE_AND_STORE_ACQ32((volatile kmp_int32 *)crit, 0, - KMP_GET_D_TAG(lckseq)); - } else { - __kmp_init_indirect_csptr(crit, loc, global_tid, KMP_GET_I_TAG(lckseq)); - } - } - // Branch for accessing the actual lock object and set operation. This - // branching is inevitable since this lock initialization does not follow the - // normal dispatch path (lock table is not used). - if (KMP_EXTRACT_D_TAG(lk) != 0) { - lck = (kmp_user_lock_p)lk; - if (__kmp_env_consistency_check) { - __kmp_push_sync(global_tid, ct_critical, loc, lck, - __kmp_map_hint_to_lock(hint)); - } -#if USE_ITT_BUILD - __kmp_itt_critical_acquiring(lck); -#endif -#if OMPT_SUPPORT && OMPT_OPTIONAL - if (ompt_enabled.enabled) { - ti = __kmp_threads[global_tid]->th.ompt_thread_info; - /* OMPT state update */ - prev_state = ti.state; - ti.wait_id = (ompt_wait_id_t)lck; - ti.state = ompt_state_wait_critical; - - /* OMPT event callback */ - if (ompt_enabled.ompt_callback_mutex_acquire) { - ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)( - ompt_mutex_critical, (unsigned int)hint, - __ompt_get_mutex_impl_type(crit), (ompt_wait_id_t)crit, codeptr); - } - } -#endif -#if KMP_USE_INLINED_TAS - if (__kmp_user_lock_seq == lockseq_tas && !__kmp_env_consistency_check) { - KMP_ACQUIRE_TAS_LOCK(lck, global_tid); - } else -#elif KMP_USE_INLINED_FUTEX - if (__kmp_user_lock_seq == lockseq_futex && !__kmp_env_consistency_check) { - KMP_ACQUIRE_FUTEX_LOCK(lck, global_tid); - } else -#endif - { - KMP_D_LOCK_FUNC(lk, set)(lk, global_tid); - } - } else { - kmp_indirect_lock_t *ilk = *((kmp_indirect_lock_t **)lk); - lck = ilk->lock; - if (__kmp_env_consistency_check) { - __kmp_push_sync(global_tid, ct_critical, loc, lck, - __kmp_map_hint_to_lock(hint)); - } -#if USE_ITT_BUILD - __kmp_itt_critical_acquiring(lck); -#endif -#if OMPT_SUPPORT && OMPT_OPTIONAL - if (ompt_enabled.enabled) { - ti = __kmp_threads[global_tid]->th.ompt_thread_info; - /* OMPT state update */ - prev_state = ti.state; - ti.wait_id = (ompt_wait_id_t)lck; - ti.state = ompt_state_wait_critical; - - /* OMPT event callback */ - if (ompt_enabled.ompt_callback_mutex_acquire) { - ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)( - ompt_mutex_critical, (unsigned int)hint, - __ompt_get_mutex_impl_type(0, ilk), (ompt_wait_id_t)crit, codeptr); - } - } -#endif - KMP_I_LOCK_FUNC(ilk, set)(lck, global_tid); - } - KMP_POP_PARTITIONED_TIMER(); - -#if USE_ITT_BUILD - __kmp_itt_critical_acquired(lck); -#endif /* USE_ITT_BUILD */ -#if OMPT_SUPPORT && OMPT_OPTIONAL - if (ompt_enabled.enabled) { - /* OMPT state update */ - ti.state = prev_state; - ti.wait_id = 0; - - /* OMPT event callback */ - if (ompt_enabled.ompt_callback_mutex_acquired) { - ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)( - ompt_mutex_critical, (ompt_wait_id_t)crit, codeptr); - } - } -#endif - - KMP_PUSH_PARTITIONED_TIMER(OMP_critical); - KA_TRACE(15, ("__kmpc_critical: done T#%d\n", global_tid)); -} // __kmpc_critical_with_hint - -#endif // KMP_USE_DYNAMIC_LOCK - -/*! -@ingroup WORK_SHARING -@param loc source location information. -@param global_tid global thread number . -@param crit identity of the critical section. This could be a pointer to a lock -associated with the critical section, or some other suitably unique value. - -Leave a critical section, releasing any lock that was held during its execution. -*/ -void __kmpc_end_critical(ident_t *loc, kmp_int32 global_tid, - kmp_critical_name *crit) { - kmp_user_lock_p lck; - - KC_TRACE(10, ("__kmpc_end_critical: called T#%d\n", global_tid)); - -#if KMP_USE_DYNAMIC_LOCK - if (KMP_IS_D_LOCK(__kmp_user_lock_seq)) { - lck = (kmp_user_lock_p)crit; - KMP_ASSERT(lck != NULL); - if (__kmp_env_consistency_check) { - __kmp_pop_sync(global_tid, ct_critical, loc); - } -#if USE_ITT_BUILD - __kmp_itt_critical_releasing(lck); -#endif -#if KMP_USE_INLINED_TAS - if (__kmp_user_lock_seq == lockseq_tas && !__kmp_env_consistency_check) { - KMP_RELEASE_TAS_LOCK(lck, global_tid); - } else -#elif KMP_USE_INLINED_FUTEX - if (__kmp_user_lock_seq == lockseq_futex && !__kmp_env_consistency_check) { - KMP_RELEASE_FUTEX_LOCK(lck, global_tid); - } else -#endif - { - KMP_D_LOCK_FUNC(lck, unset)((kmp_dyna_lock_t *)lck, global_tid); - } - } else { - kmp_indirect_lock_t *ilk = - (kmp_indirect_lock_t *)TCR_PTR(*((kmp_indirect_lock_t **)crit)); - KMP_ASSERT(ilk != NULL); - lck = ilk->lock; - if (__kmp_env_consistency_check) { - __kmp_pop_sync(global_tid, ct_critical, loc); - } -#if USE_ITT_BUILD - __kmp_itt_critical_releasing(lck); -#endif - KMP_I_LOCK_FUNC(ilk, unset)(lck, global_tid); - } - -#else // KMP_USE_DYNAMIC_LOCK - - if ((__kmp_user_lock_kind == lk_tas) && - (sizeof(lck->tas.lk.poll) <= OMP_CRITICAL_SIZE)) { - lck = (kmp_user_lock_p)crit; - } -#if KMP_USE_FUTEX - else if ((__kmp_user_lock_kind == lk_futex) && - (sizeof(lck->futex.lk.poll) <= OMP_CRITICAL_SIZE)) { - lck = (kmp_user_lock_p)crit; - } -#endif - else { // ticket, queuing or drdpa - lck = (kmp_user_lock_p)TCR_PTR(*((kmp_user_lock_p *)crit)); - } - - KMP_ASSERT(lck != NULL); - - if (__kmp_env_consistency_check) - __kmp_pop_sync(global_tid, ct_critical, loc); - -#if USE_ITT_BUILD - __kmp_itt_critical_releasing(lck); -#endif /* USE_ITT_BUILD */ - // Value of 'crit' should be good for using as a critical_id of the critical - // section directive. - __kmp_release_user_lock_with_checks(lck, global_tid); - -#endif // KMP_USE_DYNAMIC_LOCK - -#if OMPT_SUPPORT && OMPT_OPTIONAL - /* OMPT release event triggers after lock is released; place here to trigger - * for all #if branches */ - OMPT_STORE_RETURN_ADDRESS(global_tid); - if (ompt_enabled.ompt_callback_mutex_released) { - ompt_callbacks.ompt_callback(ompt_callback_mutex_released)( - ompt_mutex_critical, (ompt_wait_id_t)crit, OMPT_LOAD_RETURN_ADDRESS(0)); - } -#endif - - KMP_POP_PARTITIONED_TIMER(); - KA_TRACE(15, ("__kmpc_end_critical: done T#%d\n", global_tid)); -} - -/*! -@ingroup SYNCHRONIZATION -@param loc source location information -@param global_tid thread id. -@return one if the thread should execute the master block, zero otherwise - -Start execution of a combined barrier and master. The barrier is executed inside -this function. -*/ -kmp_int32 __kmpc_barrier_master(ident_t *loc, kmp_int32 global_tid) { - int status; - - KC_TRACE(10, ("__kmpc_barrier_master: called T#%d\n", global_tid)); - - if (!TCR_4(__kmp_init_parallel)) - __kmp_parallel_initialize(); - - if (__kmp_env_consistency_check) - __kmp_check_barrier(global_tid, ct_barrier, loc); - -#if OMPT_SUPPORT - ompt_frame_t *ompt_frame; - if (ompt_enabled.enabled) { - __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL); - if (ompt_frame->enter_frame.ptr == NULL) - ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0); - OMPT_STORE_RETURN_ADDRESS(global_tid); - } -#endif -#if USE_ITT_NOTIFY - __kmp_threads[global_tid]->th.th_ident = loc; -#endif - status = __kmp_barrier(bs_plain_barrier, global_tid, TRUE, 0, NULL, NULL); -#if OMPT_SUPPORT && OMPT_OPTIONAL - if (ompt_enabled.enabled) { - ompt_frame->enter_frame = ompt_data_none; - } -#endif - - return (status != 0) ? 0 : 1; -} - -/*! -@ingroup SYNCHRONIZATION -@param loc source location information -@param global_tid thread id. - -Complete the execution of a combined barrier and master. This function should -only be called at the completion of the master code. Other threads will -still be waiting at the barrier and this call releases them. -*/ -void __kmpc_end_barrier_master(ident_t *loc, kmp_int32 global_tid) { - KC_TRACE(10, ("__kmpc_end_barrier_master: called T#%d\n", global_tid)); - - __kmp_end_split_barrier(bs_plain_barrier, global_tid); -} - -/*! -@ingroup SYNCHRONIZATION -@param loc source location information -@param global_tid thread id. -@return one if the thread should execute the master block, zero otherwise - -Start execution of a combined barrier and master(nowait) construct. -The barrier is executed inside this function. -There is no equivalent "end" function, since the -*/ -kmp_int32 __kmpc_barrier_master_nowait(ident_t *loc, kmp_int32 global_tid) { - kmp_int32 ret; - - KC_TRACE(10, ("__kmpc_barrier_master_nowait: called T#%d\n", global_tid)); - - if (!TCR_4(__kmp_init_parallel)) - __kmp_parallel_initialize(); - - if (__kmp_env_consistency_check) { - if (loc == 0) { - KMP_WARNING(ConstructIdentInvalid); // ??? What does it mean for the user? - } - __kmp_check_barrier(global_tid, ct_barrier, loc); - } - -#if OMPT_SUPPORT - ompt_frame_t *ompt_frame; - if (ompt_enabled.enabled) { - __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL); - if (ompt_frame->enter_frame.ptr == NULL) - ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0); - OMPT_STORE_RETURN_ADDRESS(global_tid); - } -#endif -#if USE_ITT_NOTIFY - __kmp_threads[global_tid]->th.th_ident = loc; -#endif - __kmp_barrier(bs_plain_barrier, global_tid, FALSE, 0, NULL, NULL); -#if OMPT_SUPPORT && OMPT_OPTIONAL - if (ompt_enabled.enabled) { - ompt_frame->enter_frame = ompt_data_none; - } -#endif - - ret = __kmpc_master(loc, global_tid); - - if (__kmp_env_consistency_check) { - /* there's no __kmpc_end_master called; so the (stats) */ - /* actions of __kmpc_end_master are done here */ - - if (global_tid < 0) { - KMP_WARNING(ThreadIdentInvalid); - } - if (ret) { - /* only one thread should do the pop since only */ - /* one did the push (see __kmpc_master()) */ - - __kmp_pop_sync(global_tid, ct_master, loc); - } - } - - return (ret); -} - -/* The BARRIER for a SINGLE process section is always explicit */ -/*! -@ingroup WORK_SHARING -@param loc source location information -@param global_tid global thread number -@return One if this thread should execute the single construct, zero otherwise. - -Test whether to execute a single construct. -There are no implicit barriers in the two "single" calls, rather the compiler -should introduce an explicit barrier if it is required. -*/ - -kmp_int32 __kmpc_single(ident_t *loc, kmp_int32 global_tid) { - kmp_int32 rc = __kmp_enter_single(global_tid, loc, TRUE); - - if (rc) { - // We are going to execute the single statement, so we should count it. - KMP_COUNT_BLOCK(OMP_SINGLE); - KMP_PUSH_PARTITIONED_TIMER(OMP_single); - } - -#if OMPT_SUPPORT && OMPT_OPTIONAL - kmp_info_t *this_thr = __kmp_threads[global_tid]; - kmp_team_t *team = this_thr->th.th_team; - int tid = __kmp_tid_from_gtid(global_tid); - - if (ompt_enabled.enabled) { - if (rc) { - if (ompt_enabled.ompt_callback_work) { - ompt_callbacks.ompt_callback(ompt_callback_work)( - ompt_work_single_executor, ompt_scope_begin, - &(team->t.ompt_team_info.parallel_data), - &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data), - 1, OMPT_GET_RETURN_ADDRESS(0)); - } - } else { - if (ompt_enabled.ompt_callback_work) { - ompt_callbacks.ompt_callback(ompt_callback_work)( - ompt_work_single_other, ompt_scope_begin, - &(team->t.ompt_team_info.parallel_data), - &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data), - 1, OMPT_GET_RETURN_ADDRESS(0)); - ompt_callbacks.ompt_callback(ompt_callback_work)( - ompt_work_single_other, ompt_scope_end, - &(team->t.ompt_team_info.parallel_data), - &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data), - 1, OMPT_GET_RETURN_ADDRESS(0)); - } - } - } -#endif - - return rc; -} - -/*! -@ingroup WORK_SHARING -@param loc source location information -@param global_tid global thread number - -Mark the end of a single construct. This function should -only be called by the thread that executed the block of code protected -by the `single` construct. -*/ -void __kmpc_end_single(ident_t *loc, kmp_int32 global_tid) { - __kmp_exit_single(global_tid); - KMP_POP_PARTITIONED_TIMER(); - -#if OMPT_SUPPORT && OMPT_OPTIONAL - kmp_info_t *this_thr = __kmp_threads[global_tid]; - kmp_team_t *team = this_thr->th.th_team; - int tid = __kmp_tid_from_gtid(global_tid); - - if (ompt_enabled.ompt_callback_work) { - ompt_callbacks.ompt_callback(ompt_callback_work)( - ompt_work_single_executor, ompt_scope_end, - &(team->t.ompt_team_info.parallel_data), - &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data), 1, - OMPT_GET_RETURN_ADDRESS(0)); - } -#endif -} - -/*! -@ingroup WORK_SHARING -@param loc Source location -@param global_tid Global thread id - -Mark the end of a statically scheduled loop. -*/ -void __kmpc_for_static_fini(ident_t *loc, kmp_int32 global_tid) { - KMP_POP_PARTITIONED_TIMER(); - KE_TRACE(10, ("__kmpc_for_static_fini called T#%d\n", global_tid)); - -#if OMPT_SUPPORT && OMPT_OPTIONAL - if (ompt_enabled.ompt_callback_work) { - ompt_work_t ompt_work_type = ompt_work_loop; - ompt_team_info_t *team_info = __ompt_get_teaminfo(0, NULL); - ompt_task_info_t *task_info = __ompt_get_task_info_object(0); - // Determine workshare type - if (loc != NULL) { - if ((loc->flags & KMP_IDENT_WORK_LOOP) != 0) { - ompt_work_type = ompt_work_loop; - } else if ((loc->flags & KMP_IDENT_WORK_SECTIONS) != 0) { - ompt_work_type = ompt_work_sections; - } else if ((loc->flags & KMP_IDENT_WORK_DISTRIBUTE) != 0) { - ompt_work_type = ompt_work_distribute; - } else { - // use default set above. - // a warning about this case is provided in __kmpc_for_static_init - } - KMP_DEBUG_ASSERT(ompt_work_type); - } - ompt_callbacks.ompt_callback(ompt_callback_work)( - ompt_work_type, ompt_scope_end, &(team_info->parallel_data), - &(task_info->task_data), 0, OMPT_GET_RETURN_ADDRESS(0)); - } -#endif - if (__kmp_env_consistency_check) - __kmp_pop_workshare(global_tid, ct_pdo, loc); -} - -// User routines which take C-style arguments (call by value) -// different from the Fortran equivalent routines - -void ompc_set_num_threads(int arg) { - // !!!!! TODO: check the per-task binding - __kmp_set_num_threads(arg, __kmp_entry_gtid()); -} - -void ompc_set_dynamic(int flag) { - kmp_info_t *thread; - - /* For the thread-private implementation of the internal controls */ - thread = __kmp_entry_thread(); - - __kmp_save_internal_controls(thread); - - set__dynamic(thread, flag ? TRUE : FALSE); -} - -void ompc_set_nested(int flag) { - kmp_info_t *thread; - - /* For the thread-private internal controls implementation */ - thread = __kmp_entry_thread(); - - __kmp_save_internal_controls(thread); - - set__nested(thread, flag ? TRUE : FALSE); -} - -void ompc_set_max_active_levels(int max_active_levels) { - /* TO DO */ - /* we want per-task implementation of this internal control */ - - /* For the per-thread internal controls implementation */ - __kmp_set_max_active_levels(__kmp_entry_gtid(), max_active_levels); -} - -void ompc_set_schedule(omp_sched_t kind, int modifier) { - // !!!!! TODO: check the per-task binding - __kmp_set_schedule(__kmp_entry_gtid(), (kmp_sched_t)kind, modifier); -} - -int ompc_get_ancestor_thread_num(int level) { - return __kmp_get_ancestor_thread_num(__kmp_entry_gtid(), level); -} - -int ompc_get_team_size(int level) { - return __kmp_get_team_size(__kmp_entry_gtid(), level); -} - -#if OMP_50_ENABLED -/* OpenMP 5.0 Affinity Format API */ - -void ompc_set_affinity_format(char const *format) { - if (!__kmp_init_serial) { - __kmp_serial_initialize(); - } - __kmp_strncpy_truncate(__kmp_affinity_format, KMP_AFFINITY_FORMAT_SIZE, - format, KMP_STRLEN(format) + 1); -} - -size_t ompc_get_affinity_format(char *buffer, size_t size) { - size_t format_size; - if (!__kmp_init_serial) { - __kmp_serial_initialize(); - } - format_size = KMP_STRLEN(__kmp_affinity_format); - if (buffer && size) { - __kmp_strncpy_truncate(buffer, size, __kmp_affinity_format, - format_size + 1); - } - return format_size; -} - -void ompc_display_affinity(char const *format) { - int gtid; - if (!TCR_4(__kmp_init_middle)) { - __kmp_middle_initialize(); - } - gtid = __kmp_get_gtid(); - __kmp_aux_display_affinity(gtid, format); -} - -size_t ompc_capture_affinity(char *buffer, size_t buf_size, - char const *format) { - int gtid; - size_t num_required; - kmp_str_buf_t capture_buf; - if (!TCR_4(__kmp_init_middle)) { - __kmp_middle_initialize(); - } - gtid = __kmp_get_gtid(); - __kmp_str_buf_init(&capture_buf); - num_required = __kmp_aux_capture_affinity(gtid, format, &capture_buf); - if (buffer && buf_size) { - __kmp_strncpy_truncate(buffer, buf_size, capture_buf.str, - capture_buf.used + 1); - } - __kmp_str_buf_free(&capture_buf); - return num_required; -} -#endif /* OMP_50_ENABLED */ - -void kmpc_set_stacksize(int arg) { - // __kmp_aux_set_stacksize initializes the library if needed - __kmp_aux_set_stacksize(arg); -} - -void kmpc_set_stacksize_s(size_t arg) { - // __kmp_aux_set_stacksize initializes the library if needed - __kmp_aux_set_stacksize(arg); -} - -void kmpc_set_blocktime(int arg) { - int gtid, tid; - kmp_info_t *thread; - - gtid = __kmp_entry_gtid(); - tid = __kmp_tid_from_gtid(gtid); - thread = __kmp_thread_from_gtid(gtid); - - __kmp_aux_set_blocktime(arg, thread, tid); -} - -void kmpc_set_library(int arg) { - // __kmp_user_set_library initializes the library if needed - __kmp_user_set_library((enum library_type)arg); -} - -void kmpc_set_defaults(char const *str) { - // __kmp_aux_set_defaults initializes the library if needed - __kmp_aux_set_defaults(str, KMP_STRLEN(str)); -} - -void kmpc_set_disp_num_buffers(int arg) { - // ignore after initialization because some teams have already - // allocated dispatch buffers - if (__kmp_init_serial == 0 && arg > 0) - __kmp_dispatch_num_buffers = arg; -} - -int kmpc_set_affinity_mask_proc(int proc, void **mask) { -#if defined(KMP_STUB) || !KMP_AFFINITY_SUPPORTED - return -1; -#else - if (!TCR_4(__kmp_init_middle)) { - __kmp_middle_initialize(); - } - return __kmp_aux_set_affinity_mask_proc(proc, mask); -#endif -} - -int kmpc_unset_affinity_mask_proc(int proc, void **mask) { -#if defined(KMP_STUB) || !KMP_AFFINITY_SUPPORTED - return -1; -#else - if (!TCR_4(__kmp_init_middle)) { - __kmp_middle_initialize(); - } - return __kmp_aux_unset_affinity_mask_proc(proc, mask); -#endif -} - -int kmpc_get_affinity_mask_proc(int proc, void **mask) { -#if defined(KMP_STUB) || !KMP_AFFINITY_SUPPORTED - return -1; -#else - if (!TCR_4(__kmp_init_middle)) { - __kmp_middle_initialize(); - } - return __kmp_aux_get_affinity_mask_proc(proc, mask); -#endif -} - -/* -------------------------------------------------------------------------- */ -/*! -@ingroup THREADPRIVATE -@param loc source location information -@param gtid global thread number -@param cpy_size size of the cpy_data buffer -@param cpy_data pointer to data to be copied -@param cpy_func helper function to call for copying data -@param didit flag variable: 1=single thread; 0=not single thread - -__kmpc_copyprivate implements the interface for the private data broadcast -needed for the copyprivate clause associated with a single region in an -OpenMP* program (both C and Fortran). -All threads participating in the parallel region call this routine. -One of the threads (called the single thread) should have the didit -variable set to 1 and all other threads should have that variable set to 0. -All threads pass a pointer to a data buffer (cpy_data) that they have built. - -The OpenMP specification forbids the use of nowait on the single region when a -copyprivate clause is present. However, @ref __kmpc_copyprivate implements a -barrier internally to avoid race conditions, so the code generation for the -single region should avoid generating a barrier after the call to @ref -__kmpc_copyprivate. - -The gtid parameter is the global thread id for the current thread. -The loc parameter is a pointer to source location information. - -Internal implementation: The single thread will first copy its descriptor -address (cpy_data) to a team-private location, then the other threads will each -call the function pointed to by the parameter cpy_func, which carries out the -copy by copying the data using the cpy_data buffer. - -The cpy_func routine used for the copy and the contents of the data area defined -by cpy_data and cpy_size may be built in any fashion that will allow the copy -to be done. For instance, the cpy_data buffer can hold the actual data to be -copied or it may hold a list of pointers to the data. The cpy_func routine must -interpret the cpy_data buffer appropriately. - -The interface to cpy_func is as follows: -@code -void cpy_func( void *destination, void *source ) -@endcode -where void *destination is the cpy_data pointer for the thread being copied to -and void *source is the cpy_data pointer for the thread being copied from. -*/ -void __kmpc_copyprivate(ident_t *loc, kmp_int32 gtid, size_t cpy_size, - void *cpy_data, void (*cpy_func)(void *, void *), - kmp_int32 didit) { - void **data_ptr; - - KC_TRACE(10, ("__kmpc_copyprivate: called T#%d\n", gtid)); - - KMP_MB(); - - data_ptr = &__kmp_team_from_gtid(gtid)->t.t_copypriv_data; - - if (__kmp_env_consistency_check) { - if (loc == 0) { - KMP_WARNING(ConstructIdentInvalid); - } - } - - // ToDo: Optimize the following two barriers into some kind of split barrier - - if (didit) - *data_ptr = cpy_data; - -#if OMPT_SUPPORT - ompt_frame_t *ompt_frame; - if (ompt_enabled.enabled) { - __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL); - if (ompt_frame->enter_frame.ptr == NULL) - ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0); - OMPT_STORE_RETURN_ADDRESS(gtid); - } -#endif -/* This barrier is not a barrier region boundary */ -#if USE_ITT_NOTIFY - __kmp_threads[gtid]->th.th_ident = loc; -#endif - __kmp_barrier(bs_plain_barrier, gtid, FALSE, 0, NULL, NULL); - - if (!didit) - (*cpy_func)(cpy_data, *data_ptr); - -// Consider next barrier a user-visible barrier for barrier region boundaries -// Nesting checks are already handled by the single construct checks - -#if OMPT_SUPPORT - if (ompt_enabled.enabled) { - OMPT_STORE_RETURN_ADDRESS(gtid); - } -#endif -#if USE_ITT_NOTIFY - __kmp_threads[gtid]->th.th_ident = loc; // TODO: check if it is needed (e.g. -// tasks can overwrite the location) -#endif - __kmp_barrier(bs_plain_barrier, gtid, FALSE, 0, NULL, NULL); -#if OMPT_SUPPORT && OMPT_OPTIONAL - if (ompt_enabled.enabled) { - ompt_frame->enter_frame = ompt_data_none; - } -#endif -} - -/* -------------------------------------------------------------------------- */ - -#define INIT_LOCK __kmp_init_user_lock_with_checks -#define INIT_NESTED_LOCK __kmp_init_nested_user_lock_with_checks -#define ACQUIRE_LOCK __kmp_acquire_user_lock_with_checks -#define ACQUIRE_LOCK_TIMED __kmp_acquire_user_lock_with_checks_timed -#define ACQUIRE_NESTED_LOCK __kmp_acquire_nested_user_lock_with_checks -#define ACQUIRE_NESTED_LOCK_TIMED \ - __kmp_acquire_nested_user_lock_with_checks_timed -#define RELEASE_LOCK __kmp_release_user_lock_with_checks -#define RELEASE_NESTED_LOCK __kmp_release_nested_user_lock_with_checks -#define TEST_LOCK __kmp_test_user_lock_with_checks -#define TEST_NESTED_LOCK __kmp_test_nested_user_lock_with_checks -#define DESTROY_LOCK __kmp_destroy_user_lock_with_checks -#define DESTROY_NESTED_LOCK __kmp_destroy_nested_user_lock_with_checks - -// TODO: Make check abort messages use location info & pass it into -// with_checks routines - -#if KMP_USE_DYNAMIC_LOCK - -// internal lock initializer -static __forceinline void __kmp_init_lock_with_hint(ident_t *loc, void **lock, - kmp_dyna_lockseq_t seq) { - if (KMP_IS_D_LOCK(seq)) { - KMP_INIT_D_LOCK(lock, seq); -#if USE_ITT_BUILD - __kmp_itt_lock_creating((kmp_user_lock_p)lock, NULL); -#endif - } else { - KMP_INIT_I_LOCK(lock, seq); -#if USE_ITT_BUILD - kmp_indirect_lock_t *ilk = KMP_LOOKUP_I_LOCK(lock); - __kmp_itt_lock_creating(ilk->lock, loc); -#endif - } -} - -// internal nest lock initializer -static __forceinline void -__kmp_init_nest_lock_with_hint(ident_t *loc, void **lock, - kmp_dyna_lockseq_t seq) { -#if KMP_USE_TSX - // Don't have nested lock implementation for speculative locks - if (seq == lockseq_hle || seq == lockseq_rtm || seq == lockseq_adaptive) - seq = __kmp_user_lock_seq; -#endif - switch (seq) { - case lockseq_tas: - seq = lockseq_nested_tas; - break; -#if KMP_USE_FUTEX - case lockseq_futex: - seq = lockseq_nested_futex; - break; -#endif - case lockseq_ticket: - seq = lockseq_nested_ticket; - break; - case lockseq_queuing: - seq = lockseq_nested_queuing; - break; - case lockseq_drdpa: - seq = lockseq_nested_drdpa; - break; - default: - seq = lockseq_nested_queuing; - } - KMP_INIT_I_LOCK(lock, seq); -#if USE_ITT_BUILD - kmp_indirect_lock_t *ilk = KMP_LOOKUP_I_LOCK(lock); - __kmp_itt_lock_creating(ilk->lock, loc); -#endif -} - -/* initialize the lock with a hint */ -void __kmpc_init_lock_with_hint(ident_t *loc, kmp_int32 gtid, void **user_lock, - uintptr_t hint) { - KMP_DEBUG_ASSERT(__kmp_init_serial); - if (__kmp_env_consistency_check && user_lock == NULL) { - KMP_FATAL(LockIsUninitialized, "omp_init_lock_with_hint"); - } - - __kmp_init_lock_with_hint(loc, user_lock, __kmp_map_hint_to_lock(hint)); - -#if OMPT_SUPPORT && OMPT_OPTIONAL - // This is the case, if called from omp_init_lock_with_hint: - void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); - if (!codeptr) - codeptr = OMPT_GET_RETURN_ADDRESS(0); - if (ompt_enabled.ompt_callback_lock_init) { - ompt_callbacks.ompt_callback(ompt_callback_lock_init)( - ompt_mutex_lock, (omp_lock_hint_t)hint, - __ompt_get_mutex_impl_type(user_lock), (ompt_wait_id_t)user_lock, - codeptr); - } -#endif -} - -/* initialize the lock with a hint */ -void __kmpc_init_nest_lock_with_hint(ident_t *loc, kmp_int32 gtid, - void **user_lock, uintptr_t hint) { - KMP_DEBUG_ASSERT(__kmp_init_serial); - if (__kmp_env_consistency_check && user_lock == NULL) { - KMP_FATAL(LockIsUninitialized, "omp_init_nest_lock_with_hint"); - } - - __kmp_init_nest_lock_with_hint(loc, user_lock, __kmp_map_hint_to_lock(hint)); - -#if OMPT_SUPPORT && OMPT_OPTIONAL - // This is the case, if called from omp_init_lock_with_hint: - void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); - if (!codeptr) - codeptr = OMPT_GET_RETURN_ADDRESS(0); - if (ompt_enabled.ompt_callback_lock_init) { - ompt_callbacks.ompt_callback(ompt_callback_lock_init)( - ompt_mutex_nest_lock, (omp_lock_hint_t)hint, - __ompt_get_mutex_impl_type(user_lock), (ompt_wait_id_t)user_lock, - codeptr); - } -#endif -} - -#endif // KMP_USE_DYNAMIC_LOCK - -/* initialize the lock */ -void __kmpc_init_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) { -#if KMP_USE_DYNAMIC_LOCK - - KMP_DEBUG_ASSERT(__kmp_init_serial); - if (__kmp_env_consistency_check && user_lock == NULL) { - KMP_FATAL(LockIsUninitialized, "omp_init_lock"); - } - __kmp_init_lock_with_hint(loc, user_lock, __kmp_user_lock_seq); - -#if OMPT_SUPPORT && OMPT_OPTIONAL - // This is the case, if called from omp_init_lock_with_hint: - void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); - if (!codeptr) - codeptr = OMPT_GET_RETURN_ADDRESS(0); - if (ompt_enabled.ompt_callback_lock_init) { - ompt_callbacks.ompt_callback(ompt_callback_lock_init)( - ompt_mutex_lock, omp_lock_hint_none, - __ompt_get_mutex_impl_type(user_lock), (ompt_wait_id_t)user_lock, - codeptr); - } -#endif - -#else // KMP_USE_DYNAMIC_LOCK - - static char const *const func = "omp_init_lock"; - kmp_user_lock_p lck; - KMP_DEBUG_ASSERT(__kmp_init_serial); - - if (__kmp_env_consistency_check) { - if (user_lock == NULL) { - KMP_FATAL(LockIsUninitialized, func); - } - } - - KMP_CHECK_USER_LOCK_INIT(); - - if ((__kmp_user_lock_kind == lk_tas) && - (sizeof(lck->tas.lk.poll) <= OMP_LOCK_T_SIZE)) { - lck = (kmp_user_lock_p)user_lock; - } -#if KMP_USE_FUTEX - else if ((__kmp_user_lock_kind == lk_futex) && - (sizeof(lck->futex.lk.poll) <= OMP_LOCK_T_SIZE)) { - lck = (kmp_user_lock_p)user_lock; - } -#endif - else { - lck = __kmp_user_lock_allocate(user_lock, gtid, 0); - } - INIT_LOCK(lck); - __kmp_set_user_lock_location(lck, loc); - -#if OMPT_SUPPORT && OMPT_OPTIONAL - // This is the case, if called from omp_init_lock_with_hint: - void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); - if (!codeptr) - codeptr = OMPT_GET_RETURN_ADDRESS(0); - if (ompt_enabled.ompt_callback_lock_init) { - ompt_callbacks.ompt_callback(ompt_callback_lock_init)( - ompt_mutex_lock, omp_lock_hint_none, __ompt_get_mutex_impl_type(), - (ompt_wait_id_t)user_lock, codeptr); - } -#endif - -#if USE_ITT_BUILD - __kmp_itt_lock_creating(lck); -#endif /* USE_ITT_BUILD */ - -#endif // KMP_USE_DYNAMIC_LOCK -} // __kmpc_init_lock - -/* initialize the lock */ -void __kmpc_init_nest_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) { -#if KMP_USE_DYNAMIC_LOCK - - KMP_DEBUG_ASSERT(__kmp_init_serial); - if (__kmp_env_consistency_check && user_lock == NULL) { - KMP_FATAL(LockIsUninitialized, "omp_init_nest_lock"); - } - __kmp_init_nest_lock_with_hint(loc, user_lock, __kmp_user_lock_seq); - -#if OMPT_SUPPORT && OMPT_OPTIONAL - // This is the case, if called from omp_init_lock_with_hint: - void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); - if (!codeptr) - codeptr = OMPT_GET_RETURN_ADDRESS(0); - if (ompt_enabled.ompt_callback_lock_init) { - ompt_callbacks.ompt_callback(ompt_callback_lock_init)( - ompt_mutex_nest_lock, omp_lock_hint_none, - __ompt_get_mutex_impl_type(user_lock), (ompt_wait_id_t)user_lock, - codeptr); - } -#endif - -#else // KMP_USE_DYNAMIC_LOCK - - static char const *const func = "omp_init_nest_lock"; - kmp_user_lock_p lck; - KMP_DEBUG_ASSERT(__kmp_init_serial); - - if (__kmp_env_consistency_check) { - if (user_lock == NULL) { - KMP_FATAL(LockIsUninitialized, func); - } - } - - KMP_CHECK_USER_LOCK_INIT(); - - if ((__kmp_user_lock_kind == lk_tas) && - (sizeof(lck->tas.lk.poll) + sizeof(lck->tas.lk.depth_locked) <= - OMP_NEST_LOCK_T_SIZE)) { - lck = (kmp_user_lock_p)user_lock; - } -#if KMP_USE_FUTEX - else if ((__kmp_user_lock_kind == lk_futex) && - (sizeof(lck->futex.lk.poll) + sizeof(lck->futex.lk.depth_locked) <= - OMP_NEST_LOCK_T_SIZE)) { - lck = (kmp_user_lock_p)user_lock; - } -#endif - else { - lck = __kmp_user_lock_allocate(user_lock, gtid, 0); - } - - INIT_NESTED_LOCK(lck); - __kmp_set_user_lock_location(lck, loc); - -#if OMPT_SUPPORT && OMPT_OPTIONAL - // This is the case, if called from omp_init_lock_with_hint: - void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); - if (!codeptr) - codeptr = OMPT_GET_RETURN_ADDRESS(0); - if (ompt_enabled.ompt_callback_lock_init) { - ompt_callbacks.ompt_callback(ompt_callback_lock_init)( - ompt_mutex_nest_lock, omp_lock_hint_none, __ompt_get_mutex_impl_type(), - (ompt_wait_id_t)user_lock, codeptr); - } -#endif - -#if USE_ITT_BUILD - __kmp_itt_lock_creating(lck); -#endif /* USE_ITT_BUILD */ - -#endif // KMP_USE_DYNAMIC_LOCK -} // __kmpc_init_nest_lock - -void __kmpc_destroy_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) { -#if KMP_USE_DYNAMIC_LOCK - -#if USE_ITT_BUILD - kmp_user_lock_p lck; - if (KMP_EXTRACT_D_TAG(user_lock) == 0) { - lck = ((kmp_indirect_lock_t *)KMP_LOOKUP_I_LOCK(user_lock))->lock; - } else { - lck = (kmp_user_lock_p)user_lock; - } - __kmp_itt_lock_destroyed(lck); -#endif -#if OMPT_SUPPORT && OMPT_OPTIONAL - // This is the case, if called from omp_init_lock_with_hint: - void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); - if (!codeptr) - codeptr = OMPT_GET_RETURN_ADDRESS(0); - if (ompt_enabled.ompt_callback_lock_destroy) { - kmp_user_lock_p lck; - if (KMP_EXTRACT_D_TAG(user_lock) == 0) { - lck = ((kmp_indirect_lock_t *)KMP_LOOKUP_I_LOCK(user_lock))->lock; - } else { - lck = (kmp_user_lock_p)user_lock; - } - ompt_callbacks.ompt_callback(ompt_callback_lock_destroy)( - ompt_mutex_lock, (ompt_wait_id_t)user_lock, codeptr); - } -#endif - KMP_D_LOCK_FUNC(user_lock, destroy)((kmp_dyna_lock_t *)user_lock); -#else - kmp_user_lock_p lck; - - if ((__kmp_user_lock_kind == lk_tas) && - (sizeof(lck->tas.lk.poll) <= OMP_LOCK_T_SIZE)) { - lck = (kmp_user_lock_p)user_lock; - } -#if KMP_USE_FUTEX - else if ((__kmp_user_lock_kind == lk_futex) && - (sizeof(lck->futex.lk.poll) <= OMP_LOCK_T_SIZE)) { - lck = (kmp_user_lock_p)user_lock; - } -#endif - else { - lck = __kmp_lookup_user_lock(user_lock, "omp_destroy_lock"); - } - -#if OMPT_SUPPORT && OMPT_OPTIONAL - // This is the case, if called from omp_init_lock_with_hint: - void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); - if (!codeptr) - codeptr = OMPT_GET_RETURN_ADDRESS(0); - if (ompt_enabled.ompt_callback_lock_destroy) { - ompt_callbacks.ompt_callback(ompt_callback_lock_destroy)( - ompt_mutex_lock, (ompt_wait_id_t)user_lock, codeptr); - } -#endif - -#if USE_ITT_BUILD - __kmp_itt_lock_destroyed(lck); -#endif /* USE_ITT_BUILD */ - DESTROY_LOCK(lck); - - if ((__kmp_user_lock_kind == lk_tas) && - (sizeof(lck->tas.lk.poll) <= OMP_LOCK_T_SIZE)) { - ; - } -#if KMP_USE_FUTEX - else if ((__kmp_user_lock_kind == lk_futex) && - (sizeof(lck->futex.lk.poll) <= OMP_LOCK_T_SIZE)) { - ; - } -#endif - else { - __kmp_user_lock_free(user_lock, gtid, lck); - } -#endif // KMP_USE_DYNAMIC_LOCK -} // __kmpc_destroy_lock - -/* destroy the lock */ -void __kmpc_destroy_nest_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) { -#if KMP_USE_DYNAMIC_LOCK - -#if USE_ITT_BUILD - kmp_indirect_lock_t *ilk = KMP_LOOKUP_I_LOCK(user_lock); - __kmp_itt_lock_destroyed(ilk->lock); -#endif -#if OMPT_SUPPORT && OMPT_OPTIONAL - // This is the case, if called from omp_init_lock_with_hint: - void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); - if (!codeptr) - codeptr = OMPT_GET_RETURN_ADDRESS(0); - if (ompt_enabled.ompt_callback_lock_destroy) { - ompt_callbacks.ompt_callback(ompt_callback_lock_destroy)( - ompt_mutex_nest_lock, (ompt_wait_id_t)user_lock, codeptr); - } -#endif - KMP_D_LOCK_FUNC(user_lock, destroy)((kmp_dyna_lock_t *)user_lock); - -#else // KMP_USE_DYNAMIC_LOCK - - kmp_user_lock_p lck; - - if ((__kmp_user_lock_kind == lk_tas) && - (sizeof(lck->tas.lk.poll) + sizeof(lck->tas.lk.depth_locked) <= - OMP_NEST_LOCK_T_SIZE)) { - lck = (kmp_user_lock_p)user_lock; - } -#if KMP_USE_FUTEX - else if ((__kmp_user_lock_kind == lk_futex) && - (sizeof(lck->futex.lk.poll) + sizeof(lck->futex.lk.depth_locked) <= - OMP_NEST_LOCK_T_SIZE)) { - lck = (kmp_user_lock_p)user_lock; - } -#endif - else { - lck = __kmp_lookup_user_lock(user_lock, "omp_destroy_nest_lock"); - } - -#if OMPT_SUPPORT && OMPT_OPTIONAL - // This is the case, if called from omp_init_lock_with_hint: - void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); - if (!codeptr) - codeptr = OMPT_GET_RETURN_ADDRESS(0); - if (ompt_enabled.ompt_callback_lock_destroy) { - ompt_callbacks.ompt_callback(ompt_callback_lock_destroy)( - ompt_mutex_nest_lock, (ompt_wait_id_t)user_lock, codeptr); - } -#endif - -#if USE_ITT_BUILD - __kmp_itt_lock_destroyed(lck); -#endif /* USE_ITT_BUILD */ - - DESTROY_NESTED_LOCK(lck); - - if ((__kmp_user_lock_kind == lk_tas) && - (sizeof(lck->tas.lk.poll) + sizeof(lck->tas.lk.depth_locked) <= - OMP_NEST_LOCK_T_SIZE)) { - ; - } -#if KMP_USE_FUTEX - else if ((__kmp_user_lock_kind == lk_futex) && - (sizeof(lck->futex.lk.poll) + sizeof(lck->futex.lk.depth_locked) <= - OMP_NEST_LOCK_T_SIZE)) { - ; - } -#endif - else { - __kmp_user_lock_free(user_lock, gtid, lck); - } -#endif // KMP_USE_DYNAMIC_LOCK -} // __kmpc_destroy_nest_lock - -void __kmpc_set_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) { - KMP_COUNT_BLOCK(OMP_set_lock); -#if KMP_USE_DYNAMIC_LOCK - int tag = KMP_EXTRACT_D_TAG(user_lock); -#if USE_ITT_BUILD - __kmp_itt_lock_acquiring( - (kmp_user_lock_p) - user_lock); // itt function will get to the right lock object. -#endif -#if OMPT_SUPPORT && OMPT_OPTIONAL - // This is the case, if called from omp_init_lock_with_hint: - void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); - if (!codeptr) - codeptr = OMPT_GET_RETURN_ADDRESS(0); - if (ompt_enabled.ompt_callback_mutex_acquire) { - ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)( - ompt_mutex_lock, omp_lock_hint_none, - __ompt_get_mutex_impl_type(user_lock), (ompt_wait_id_t)user_lock, - codeptr); - } -#endif -#if KMP_USE_INLINED_TAS - if (tag == locktag_tas && !__kmp_env_consistency_check) { - KMP_ACQUIRE_TAS_LOCK(user_lock, gtid); - } else -#elif KMP_USE_INLINED_FUTEX - if (tag == locktag_futex && !__kmp_env_consistency_check) { - KMP_ACQUIRE_FUTEX_LOCK(user_lock, gtid); - } else -#endif - { - __kmp_direct_set[tag]((kmp_dyna_lock_t *)user_lock, gtid); - } -#if USE_ITT_BUILD - __kmp_itt_lock_acquired((kmp_user_lock_p)user_lock); -#endif -#if OMPT_SUPPORT && OMPT_OPTIONAL - if (ompt_enabled.ompt_callback_mutex_acquired) { - ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)( - ompt_mutex_lock, (ompt_wait_id_t)user_lock, codeptr); - } -#endif - -#else // KMP_USE_DYNAMIC_LOCK - - kmp_user_lock_p lck; - - if ((__kmp_user_lock_kind == lk_tas) && - (sizeof(lck->tas.lk.poll) <= OMP_LOCK_T_SIZE)) { - lck = (kmp_user_lock_p)user_lock; - } -#if KMP_USE_FUTEX - else if ((__kmp_user_lock_kind == lk_futex) && - (sizeof(lck->futex.lk.poll) <= OMP_LOCK_T_SIZE)) { - lck = (kmp_user_lock_p)user_lock; - } -#endif - else { - lck = __kmp_lookup_user_lock(user_lock, "omp_set_lock"); - } - -#if USE_ITT_BUILD - __kmp_itt_lock_acquiring(lck); -#endif /* USE_ITT_BUILD */ -#if OMPT_SUPPORT && OMPT_OPTIONAL - // This is the case, if called from omp_init_lock_with_hint: - void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); - if (!codeptr) - codeptr = OMPT_GET_RETURN_ADDRESS(0); - if (ompt_enabled.ompt_callback_mutex_acquire) { - ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)( - ompt_mutex_lock, omp_lock_hint_none, __ompt_get_mutex_impl_type(), - (ompt_wait_id_t)lck, codeptr); - } -#endif - - ACQUIRE_LOCK(lck, gtid); - -#if USE_ITT_BUILD - __kmp_itt_lock_acquired(lck); -#endif /* USE_ITT_BUILD */ - -#if OMPT_SUPPORT && OMPT_OPTIONAL - if (ompt_enabled.ompt_callback_mutex_acquired) { - ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)( - ompt_mutex_lock, (ompt_wait_id_t)lck, codeptr); - } -#endif - -#endif // KMP_USE_DYNAMIC_LOCK -} - -void __kmpc_set_nest_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) { -#if KMP_USE_DYNAMIC_LOCK - -#if USE_ITT_BUILD - __kmp_itt_lock_acquiring((kmp_user_lock_p)user_lock); -#endif -#if OMPT_SUPPORT && OMPT_OPTIONAL - // This is the case, if called from omp_init_lock_with_hint: - void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); - if (!codeptr) - codeptr = OMPT_GET_RETURN_ADDRESS(0); - if (ompt_enabled.enabled) { - if (ompt_enabled.ompt_callback_mutex_acquire) { - ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)( - ompt_mutex_nest_lock, omp_lock_hint_none, - __ompt_get_mutex_impl_type(user_lock), (ompt_wait_id_t)user_lock, - codeptr); - } - } -#endif - int acquire_status = - KMP_D_LOCK_FUNC(user_lock, set)((kmp_dyna_lock_t *)user_lock, gtid); - (void) acquire_status; -#if USE_ITT_BUILD - __kmp_itt_lock_acquired((kmp_user_lock_p)user_lock); -#endif - -#if OMPT_SUPPORT && OMPT_OPTIONAL - if (ompt_enabled.enabled) { - if (acquire_status == KMP_LOCK_ACQUIRED_FIRST) { - if (ompt_enabled.ompt_callback_mutex_acquired) { - // lock_first - ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)( - ompt_mutex_nest_lock, (ompt_wait_id_t)user_lock, codeptr); - } - } else { - if (ompt_enabled.ompt_callback_nest_lock) { - // lock_next - ompt_callbacks.ompt_callback(ompt_callback_nest_lock)( - ompt_scope_begin, (ompt_wait_id_t)user_lock, codeptr); - } - } - } -#endif - -#else // KMP_USE_DYNAMIC_LOCK - int acquire_status; - kmp_user_lock_p lck; - - if ((__kmp_user_lock_kind == lk_tas) && - (sizeof(lck->tas.lk.poll) + sizeof(lck->tas.lk.depth_locked) <= - OMP_NEST_LOCK_T_SIZE)) { - lck = (kmp_user_lock_p)user_lock; - } -#if KMP_USE_FUTEX - else if ((__kmp_user_lock_kind == lk_futex) && - (sizeof(lck->futex.lk.poll) + sizeof(lck->futex.lk.depth_locked) <= - OMP_NEST_LOCK_T_SIZE)) { - lck = (kmp_user_lock_p)user_lock; - } -#endif - else { - lck = __kmp_lookup_user_lock(user_lock, "omp_set_nest_lock"); - } - -#if USE_ITT_BUILD - __kmp_itt_lock_acquiring(lck); -#endif /* USE_ITT_BUILD */ -#if OMPT_SUPPORT && OMPT_OPTIONAL - // This is the case, if called from omp_init_lock_with_hint: - void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); - if (!codeptr) - codeptr = OMPT_GET_RETURN_ADDRESS(0); - if (ompt_enabled.enabled) { - if (ompt_enabled.ompt_callback_mutex_acquire) { - ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)( - ompt_mutex_nest_lock, omp_lock_hint_none, - __ompt_get_mutex_impl_type(), (ompt_wait_id_t)lck, codeptr); - } - } -#endif - - ACQUIRE_NESTED_LOCK(lck, gtid, &acquire_status); - -#if USE_ITT_BUILD - __kmp_itt_lock_acquired(lck); -#endif /* USE_ITT_BUILD */ - -#if OMPT_SUPPORT && OMPT_OPTIONAL - if (ompt_enabled.enabled) { - if (acquire_status == KMP_LOCK_ACQUIRED_FIRST) { - if (ompt_enabled.ompt_callback_mutex_acquired) { - // lock_first - ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)( - ompt_mutex_nest_lock, (ompt_wait_id_t)lck, codeptr); - } - } else { - if (ompt_enabled.ompt_callback_nest_lock) { - // lock_next - ompt_callbacks.ompt_callback(ompt_callback_nest_lock)( - ompt_scope_begin, (ompt_wait_id_t)lck, codeptr); - } - } - } -#endif - -#endif // KMP_USE_DYNAMIC_LOCK -} - -void __kmpc_unset_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) { -#if KMP_USE_DYNAMIC_LOCK - - int tag = KMP_EXTRACT_D_TAG(user_lock); -#if USE_ITT_BUILD - __kmp_itt_lock_releasing((kmp_user_lock_p)user_lock); -#endif -#if KMP_USE_INLINED_TAS - if (tag == locktag_tas && !__kmp_env_consistency_check) { - KMP_RELEASE_TAS_LOCK(user_lock, gtid); - } else -#elif KMP_USE_INLINED_FUTEX - if (tag == locktag_futex && !__kmp_env_consistency_check) { - KMP_RELEASE_FUTEX_LOCK(user_lock, gtid); - } else -#endif - { - __kmp_direct_unset[tag]((kmp_dyna_lock_t *)user_lock, gtid); - } - -#if OMPT_SUPPORT && OMPT_OPTIONAL - // This is the case, if called from omp_init_lock_with_hint: - void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); - if (!codeptr) - codeptr = OMPT_GET_RETURN_ADDRESS(0); - if (ompt_enabled.ompt_callback_mutex_released) { - ompt_callbacks.ompt_callback(ompt_callback_mutex_released)( - ompt_mutex_lock, (ompt_wait_id_t)user_lock, codeptr); - } -#endif - -#else // KMP_USE_DYNAMIC_LOCK - - kmp_user_lock_p lck; - - /* Can't use serial interval since not block structured */ - /* release the lock */ - - if ((__kmp_user_lock_kind == lk_tas) && - (sizeof(lck->tas.lk.poll) <= OMP_LOCK_T_SIZE)) { -#if KMP_OS_LINUX && \ - (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64) -// "fast" path implemented to fix customer performance issue -#if USE_ITT_BUILD - __kmp_itt_lock_releasing((kmp_user_lock_p)user_lock); -#endif /* USE_ITT_BUILD */ - TCW_4(((kmp_user_lock_p)user_lock)->tas.lk.poll, 0); - KMP_MB(); - -#if OMPT_SUPPORT && OMPT_OPTIONAL - // This is the case, if called from omp_init_lock_with_hint: - void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); - if (!codeptr) - codeptr = OMPT_GET_RETURN_ADDRESS(0); - if (ompt_enabled.ompt_callback_mutex_released) { - ompt_callbacks.ompt_callback(ompt_callback_mutex_released)( - ompt_mutex_lock, (ompt_wait_id_t)lck, codeptr); - } -#endif - - return; -#else - lck = (kmp_user_lock_p)user_lock; -#endif - } -#if KMP_USE_FUTEX - else if ((__kmp_user_lock_kind == lk_futex) && - (sizeof(lck->futex.lk.poll) <= OMP_LOCK_T_SIZE)) { - lck = (kmp_user_lock_p)user_lock; - } -#endif - else { - lck = __kmp_lookup_user_lock(user_lock, "omp_unset_lock"); - } - -#if USE_ITT_BUILD - __kmp_itt_lock_releasing(lck); -#endif /* USE_ITT_BUILD */ - - RELEASE_LOCK(lck, gtid); - -#if OMPT_SUPPORT && OMPT_OPTIONAL - // This is the case, if called from omp_init_lock_with_hint: - void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); - if (!codeptr) - codeptr = OMPT_GET_RETURN_ADDRESS(0); - if (ompt_enabled.ompt_callback_mutex_released) { - ompt_callbacks.ompt_callback(ompt_callback_mutex_released)( - ompt_mutex_lock, (ompt_wait_id_t)lck, codeptr); - } -#endif - -#endif // KMP_USE_DYNAMIC_LOCK -} - -/* release the lock */ -void __kmpc_unset_nest_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) { -#if KMP_USE_DYNAMIC_LOCK - -#if USE_ITT_BUILD - __kmp_itt_lock_releasing((kmp_user_lock_p)user_lock); -#endif - int release_status = - KMP_D_LOCK_FUNC(user_lock, unset)((kmp_dyna_lock_t *)user_lock, gtid); - (void) release_status; - -#if OMPT_SUPPORT && OMPT_OPTIONAL - // This is the case, if called from omp_init_lock_with_hint: - void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); - if (!codeptr) - codeptr = OMPT_GET_RETURN_ADDRESS(0); - if (ompt_enabled.enabled) { - if (release_status == KMP_LOCK_RELEASED) { - if (ompt_enabled.ompt_callback_mutex_released) { - // release_lock_last - ompt_callbacks.ompt_callback(ompt_callback_mutex_released)( - ompt_mutex_nest_lock, (ompt_wait_id_t)user_lock, codeptr); - } - } else if (ompt_enabled.ompt_callback_nest_lock) { - // release_lock_prev - ompt_callbacks.ompt_callback(ompt_callback_nest_lock)( - ompt_scope_end, (ompt_wait_id_t)user_lock, codeptr); - } - } -#endif - -#else // KMP_USE_DYNAMIC_LOCK - - kmp_user_lock_p lck; - - /* Can't use serial interval since not block structured */ - - if ((__kmp_user_lock_kind == lk_tas) && - (sizeof(lck->tas.lk.poll) + sizeof(lck->tas.lk.depth_locked) <= - OMP_NEST_LOCK_T_SIZE)) { -#if KMP_OS_LINUX && \ - (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64) - // "fast" path implemented to fix customer performance issue - kmp_tas_lock_t *tl = (kmp_tas_lock_t *)user_lock; -#if USE_ITT_BUILD - __kmp_itt_lock_releasing((kmp_user_lock_p)user_lock); -#endif /* USE_ITT_BUILD */ - -#if OMPT_SUPPORT && OMPT_OPTIONAL - int release_status = KMP_LOCK_STILL_HELD; -#endif - - if (--(tl->lk.depth_locked) == 0) { - TCW_4(tl->lk.poll, 0); -#if OMPT_SUPPORT && OMPT_OPTIONAL - release_status = KMP_LOCK_RELEASED; -#endif - } - KMP_MB(); - -#if OMPT_SUPPORT && OMPT_OPTIONAL - // This is the case, if called from omp_init_lock_with_hint: - void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); - if (!codeptr) - codeptr = OMPT_GET_RETURN_ADDRESS(0); - if (ompt_enabled.enabled) { - if (release_status == KMP_LOCK_RELEASED) { - if (ompt_enabled.ompt_callback_mutex_released) { - // release_lock_last - ompt_callbacks.ompt_callback(ompt_callback_mutex_released)( - ompt_mutex_nest_lock, (ompt_wait_id_t)lck, codeptr); - } - } else if (ompt_enabled.ompt_callback_nest_lock) { - // release_lock_previous - ompt_callbacks.ompt_callback(ompt_callback_nest_lock)( - ompt_mutex_scope_end, (ompt_wait_id_t)lck, codeptr); - } - } -#endif - - return; -#else - lck = (kmp_user_lock_p)user_lock; -#endif - } -#if KMP_USE_FUTEX - else if ((__kmp_user_lock_kind == lk_futex) && - (sizeof(lck->futex.lk.poll) + sizeof(lck->futex.lk.depth_locked) <= - OMP_NEST_LOCK_T_SIZE)) { - lck = (kmp_user_lock_p)user_lock; - } -#endif - else { - lck = __kmp_lookup_user_lock(user_lock, "omp_unset_nest_lock"); - } - -#if USE_ITT_BUILD - __kmp_itt_lock_releasing(lck); -#endif /* USE_ITT_BUILD */ - - int release_status; - release_status = RELEASE_NESTED_LOCK(lck, gtid); -#if OMPT_SUPPORT && OMPT_OPTIONAL - // This is the case, if called from omp_init_lock_with_hint: - void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); - if (!codeptr) - codeptr = OMPT_GET_RETURN_ADDRESS(0); - if (ompt_enabled.enabled) { - if (release_status == KMP_LOCK_RELEASED) { - if (ompt_enabled.ompt_callback_mutex_released) { - // release_lock_last - ompt_callbacks.ompt_callback(ompt_callback_mutex_released)( - ompt_mutex_nest_lock, (ompt_wait_id_t)lck, codeptr); - } - } else if (ompt_enabled.ompt_callback_nest_lock) { - // release_lock_previous - ompt_callbacks.ompt_callback(ompt_callback_nest_lock)( - ompt_mutex_scope_end, (ompt_wait_id_t)lck, codeptr); - } - } -#endif - -#endif // KMP_USE_DYNAMIC_LOCK -} - -/* try to acquire the lock */ -int __kmpc_test_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) { - KMP_COUNT_BLOCK(OMP_test_lock); - -#if KMP_USE_DYNAMIC_LOCK - int rc; - int tag = KMP_EXTRACT_D_TAG(user_lock); -#if USE_ITT_BUILD - __kmp_itt_lock_acquiring((kmp_user_lock_p)user_lock); -#endif -#if OMPT_SUPPORT && OMPT_OPTIONAL - // This is the case, if called from omp_init_lock_with_hint: - void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); - if (!codeptr) - codeptr = OMPT_GET_RETURN_ADDRESS(0); - if (ompt_enabled.ompt_callback_mutex_acquire) { - ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)( - ompt_mutex_lock, omp_lock_hint_none, - __ompt_get_mutex_impl_type(user_lock), (ompt_wait_id_t)user_lock, - codeptr); - } -#endif -#if KMP_USE_INLINED_TAS - if (tag == locktag_tas && !__kmp_env_consistency_check) { - KMP_TEST_TAS_LOCK(user_lock, gtid, rc); - } else -#elif KMP_USE_INLINED_FUTEX - if (tag == locktag_futex && !__kmp_env_consistency_check) { - KMP_TEST_FUTEX_LOCK(user_lock, gtid, rc); - } else -#endif - { - rc = __kmp_direct_test[tag]((kmp_dyna_lock_t *)user_lock, gtid); - } - if (rc) { -#if USE_ITT_BUILD - __kmp_itt_lock_acquired((kmp_user_lock_p)user_lock); -#endif -#if OMPT_SUPPORT && OMPT_OPTIONAL - if (ompt_enabled.ompt_callback_mutex_acquired) { - ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)( - ompt_mutex_lock, (ompt_wait_id_t)user_lock, codeptr); - } -#endif - return FTN_TRUE; - } else { -#if USE_ITT_BUILD - __kmp_itt_lock_cancelled((kmp_user_lock_p)user_lock); -#endif - return FTN_FALSE; - } - -#else // KMP_USE_DYNAMIC_LOCK - - kmp_user_lock_p lck; - int rc; - - if ((__kmp_user_lock_kind == lk_tas) && - (sizeof(lck->tas.lk.poll) <= OMP_LOCK_T_SIZE)) { - lck = (kmp_user_lock_p)user_lock; - } -#if KMP_USE_FUTEX - else if ((__kmp_user_lock_kind == lk_futex) && - (sizeof(lck->futex.lk.poll) <= OMP_LOCK_T_SIZE)) { - lck = (kmp_user_lock_p)user_lock; - } -#endif - else { - lck = __kmp_lookup_user_lock(user_lock, "omp_test_lock"); - } - -#if USE_ITT_BUILD - __kmp_itt_lock_acquiring(lck); -#endif /* USE_ITT_BUILD */ -#if OMPT_SUPPORT && OMPT_OPTIONAL - // This is the case, if called from omp_init_lock_with_hint: - void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); - if (!codeptr) - codeptr = OMPT_GET_RETURN_ADDRESS(0); - if (ompt_enabled.ompt_callback_mutex_acquire) { - ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)( - ompt_mutex_lock, omp_lock_hint_none, __ompt_get_mutex_impl_type(), - (ompt_wait_id_t)lck, codeptr); - } -#endif - - rc = TEST_LOCK(lck, gtid); -#if USE_ITT_BUILD - if (rc) { - __kmp_itt_lock_acquired(lck); - } else { - __kmp_itt_lock_cancelled(lck); - } -#endif /* USE_ITT_BUILD */ -#if OMPT_SUPPORT && OMPT_OPTIONAL - if (rc && ompt_enabled.ompt_callback_mutex_acquired) { - ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)( - ompt_mutex_lock, (ompt_wait_id_t)lck, codeptr); - } -#endif - - return (rc ? FTN_TRUE : FTN_FALSE); - -/* Can't use serial interval since not block structured */ - -#endif // KMP_USE_DYNAMIC_LOCK -} - -/* try to acquire the lock */ -int __kmpc_test_nest_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) { -#if KMP_USE_DYNAMIC_LOCK - int rc; -#if USE_ITT_BUILD - __kmp_itt_lock_acquiring((kmp_user_lock_p)user_lock); -#endif -#if OMPT_SUPPORT && OMPT_OPTIONAL - // This is the case, if called from omp_init_lock_with_hint: - void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); - if (!codeptr) - codeptr = OMPT_GET_RETURN_ADDRESS(0); - if (ompt_enabled.ompt_callback_mutex_acquire) { - ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)( - ompt_mutex_nest_lock, omp_lock_hint_none, - __ompt_get_mutex_impl_type(user_lock), (ompt_wait_id_t)user_lock, - codeptr); - } -#endif - rc = KMP_D_LOCK_FUNC(user_lock, test)((kmp_dyna_lock_t *)user_lock, gtid); -#if USE_ITT_BUILD - if (rc) { - __kmp_itt_lock_acquired((kmp_user_lock_p)user_lock); - } else { - __kmp_itt_lock_cancelled((kmp_user_lock_p)user_lock); - } -#endif -#if OMPT_SUPPORT && OMPT_OPTIONAL - if (ompt_enabled.enabled && rc) { - if (rc == 1) { - if (ompt_enabled.ompt_callback_mutex_acquired) { - // lock_first - ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)( - ompt_mutex_nest_lock, (ompt_wait_id_t)user_lock, codeptr); - } - } else { - if (ompt_enabled.ompt_callback_nest_lock) { - // lock_next - ompt_callbacks.ompt_callback(ompt_callback_nest_lock)( - ompt_scope_begin, (ompt_wait_id_t)user_lock, codeptr); - } - } - } -#endif - return rc; - -#else // KMP_USE_DYNAMIC_LOCK - - kmp_user_lock_p lck; - int rc; - - if ((__kmp_user_lock_kind == lk_tas) && - (sizeof(lck->tas.lk.poll) + sizeof(lck->tas.lk.depth_locked) <= - OMP_NEST_LOCK_T_SIZE)) { - lck = (kmp_user_lock_p)user_lock; - } -#if KMP_USE_FUTEX - else if ((__kmp_user_lock_kind == lk_futex) && - (sizeof(lck->futex.lk.poll) + sizeof(lck->futex.lk.depth_locked) <= - OMP_NEST_LOCK_T_SIZE)) { - lck = (kmp_user_lock_p)user_lock; - } -#endif - else { - lck = __kmp_lookup_user_lock(user_lock, "omp_test_nest_lock"); - } - -#if USE_ITT_BUILD - __kmp_itt_lock_acquiring(lck); -#endif /* USE_ITT_BUILD */ - -#if OMPT_SUPPORT && OMPT_OPTIONAL - // This is the case, if called from omp_init_lock_with_hint: - void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); - if (!codeptr) - codeptr = OMPT_GET_RETURN_ADDRESS(0); - if (ompt_enabled.enabled) && - ompt_enabled.ompt_callback_mutex_acquire) { - ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)( - ompt_mutex_nest_lock, omp_lock_hint_none, - __ompt_get_mutex_impl_type(), (ompt_wait_id_t)lck, codeptr); - } -#endif - - rc = TEST_NESTED_LOCK(lck, gtid); -#if USE_ITT_BUILD - if (rc) { - __kmp_itt_lock_acquired(lck); - } else { - __kmp_itt_lock_cancelled(lck); - } -#endif /* USE_ITT_BUILD */ -#if OMPT_SUPPORT && OMPT_OPTIONAL - if (ompt_enabled.enabled && rc) { - if (rc == 1) { - if (ompt_enabled.ompt_callback_mutex_acquired) { - // lock_first - ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)( - ompt_mutex_nest_lock, (ompt_wait_id_t)lck, codeptr); - } - } else { - if (ompt_enabled.ompt_callback_nest_lock) { - // lock_next - ompt_callbacks.ompt_callback(ompt_callback_nest_lock)( - ompt_mutex_scope_begin, (ompt_wait_id_t)lck, codeptr); - } - } - } -#endif - return rc; - -/* Can't use serial interval since not block structured */ - -#endif // KMP_USE_DYNAMIC_LOCK -} - -// Interface to fast scalable reduce methods routines - -// keep the selected method in a thread local structure for cross-function -// usage: will be used in __kmpc_end_reduce* functions; -// another solution: to re-determine the method one more time in -// __kmpc_end_reduce* functions (new prototype required then) -// AT: which solution is better? -#define __KMP_SET_REDUCTION_METHOD(gtid, rmethod) \ - ((__kmp_threads[(gtid)]->th.th_local.packed_reduction_method) = (rmethod)) - -#define __KMP_GET_REDUCTION_METHOD(gtid) \ - (__kmp_threads[(gtid)]->th.th_local.packed_reduction_method) - -// description of the packed_reduction_method variable: look at the macros in -// kmp.h - -// used in a critical section reduce block -static __forceinline void -__kmp_enter_critical_section_reduce_block(ident_t *loc, kmp_int32 global_tid, - kmp_critical_name *crit) { - - // this lock was visible to a customer and to the threading profile tool as a - // serial overhead span (although it's used for an internal purpose only) - // why was it visible in previous implementation? - // should we keep it visible in new reduce block? - kmp_user_lock_p lck; - -#if KMP_USE_DYNAMIC_LOCK - - kmp_dyna_lock_t *lk = (kmp_dyna_lock_t *)crit; - // Check if it is initialized. - if (*lk == 0) { - if (KMP_IS_D_LOCK(__kmp_user_lock_seq)) { - KMP_COMPARE_AND_STORE_ACQ32((volatile kmp_int32 *)crit, 0, - KMP_GET_D_TAG(__kmp_user_lock_seq)); - } else { - __kmp_init_indirect_csptr(crit, loc, global_tid, - KMP_GET_I_TAG(__kmp_user_lock_seq)); - } - } - // Branch for accessing the actual lock object and set operation. This - // branching is inevitable since this lock initialization does not follow the - // normal dispatch path (lock table is not used). - if (KMP_EXTRACT_D_TAG(lk) != 0) { - lck = (kmp_user_lock_p)lk; - KMP_DEBUG_ASSERT(lck != NULL); - if (__kmp_env_consistency_check) { - __kmp_push_sync(global_tid, ct_critical, loc, lck, __kmp_user_lock_seq); - } - KMP_D_LOCK_FUNC(lk, set)(lk, global_tid); - } else { - kmp_indirect_lock_t *ilk = *((kmp_indirect_lock_t **)lk); - lck = ilk->lock; - KMP_DEBUG_ASSERT(lck != NULL); - if (__kmp_env_consistency_check) { - __kmp_push_sync(global_tid, ct_critical, loc, lck, __kmp_user_lock_seq); - } - KMP_I_LOCK_FUNC(ilk, set)(lck, global_tid); - } - -#else // KMP_USE_DYNAMIC_LOCK - - // We know that the fast reduction code is only emitted by Intel compilers - // with 32 byte critical sections. If there isn't enough space, then we - // have to use a pointer. - if (__kmp_base_user_lock_size <= INTEL_CRITICAL_SIZE) { - lck = (kmp_user_lock_p)crit; - } else { - lck = __kmp_get_critical_section_ptr(crit, loc, global_tid); - } - KMP_DEBUG_ASSERT(lck != NULL); - - if (__kmp_env_consistency_check) - __kmp_push_sync(global_tid, ct_critical, loc, lck); - - __kmp_acquire_user_lock_with_checks(lck, global_tid); - -#endif // KMP_USE_DYNAMIC_LOCK -} - -// used in a critical section reduce block -static __forceinline void -__kmp_end_critical_section_reduce_block(ident_t *loc, kmp_int32 global_tid, - kmp_critical_name *crit) { - - kmp_user_lock_p lck; - -#if KMP_USE_DYNAMIC_LOCK - - if (KMP_IS_D_LOCK(__kmp_user_lock_seq)) { - lck = (kmp_user_lock_p)crit; - if (__kmp_env_consistency_check) - __kmp_pop_sync(global_tid, ct_critical, loc); - KMP_D_LOCK_FUNC(lck, unset)((kmp_dyna_lock_t *)lck, global_tid); - } else { - kmp_indirect_lock_t *ilk = - (kmp_indirect_lock_t *)TCR_PTR(*((kmp_indirect_lock_t **)crit)); - if (__kmp_env_consistency_check) - __kmp_pop_sync(global_tid, ct_critical, loc); - KMP_I_LOCK_FUNC(ilk, unset)(ilk->lock, global_tid); - } - -#else // KMP_USE_DYNAMIC_LOCK - - // We know that the fast reduction code is only emitted by Intel compilers - // with 32 byte critical sections. If there isn't enough space, then we have - // to use a pointer. - if (__kmp_base_user_lock_size > 32) { - lck = *((kmp_user_lock_p *)crit); - KMP_ASSERT(lck != NULL); - } else { - lck = (kmp_user_lock_p)crit; - } - - if (__kmp_env_consistency_check) - __kmp_pop_sync(global_tid, ct_critical, loc); - - __kmp_release_user_lock_with_checks(lck, global_tid); - -#endif // KMP_USE_DYNAMIC_LOCK -} // __kmp_end_critical_section_reduce_block - -#if OMP_40_ENABLED -static __forceinline int -__kmp_swap_teams_for_teams_reduction(kmp_info_t *th, kmp_team_t **team_p, - int *task_state) { - kmp_team_t *team; - - // Check if we are inside the teams construct? - if (th->th.th_teams_microtask) { - *team_p = team = th->th.th_team; - if (team->t.t_level == th->th.th_teams_level) { - // This is reduction at teams construct. - KMP_DEBUG_ASSERT(!th->th.th_info.ds.ds_tid); // AC: check that tid == 0 - // Let's swap teams temporarily for the reduction. - th->th.th_info.ds.ds_tid = team->t.t_master_tid; - th->th.th_team = team->t.t_parent; - th->th.th_team_nproc = th->th.th_team->t.t_nproc; - th->th.th_task_team = th->th.th_team->t.t_task_team[0]; - *task_state = th->th.th_task_state; - th->th.th_task_state = 0; - - return 1; - } - } - return 0; -} - -static __forceinline void -__kmp_restore_swapped_teams(kmp_info_t *th, kmp_team_t *team, int task_state) { - // Restore thread structure swapped in __kmp_swap_teams_for_teams_reduction. - th->th.th_info.ds.ds_tid = 0; - th->th.th_team = team; - th->th.th_team_nproc = team->t.t_nproc; - th->th.th_task_team = team->t.t_task_team[task_state]; - th->th.th_task_state = task_state; -} -#endif - -/* 2.a.i. Reduce Block without a terminating barrier */ -/*! -@ingroup SYNCHRONIZATION -@param loc source location information -@param global_tid global thread number -@param num_vars number of items (variables) to be reduced -@param reduce_size size of data in bytes to be reduced -@param reduce_data pointer to data to be reduced -@param reduce_func callback function providing reduction operation on two -operands and returning result of reduction in lhs_data -@param lck pointer to the unique lock data structure -@result 1 for the master thread, 0 for all other team threads, 2 for all team -threads if atomic reduction needed - -The nowait version is used for a reduce clause with the nowait argument. -*/ -kmp_int32 -__kmpc_reduce_nowait(ident_t *loc, kmp_int32 global_tid, kmp_int32 num_vars, - size_t reduce_size, void *reduce_data, - void (*reduce_func)(void *lhs_data, void *rhs_data), - kmp_critical_name *lck) { - - KMP_COUNT_BLOCK(REDUCE_nowait); - int retval = 0; - PACKED_REDUCTION_METHOD_T packed_reduction_method; -#if OMP_40_ENABLED - kmp_info_t *th; - kmp_team_t *team; - int teams_swapped = 0, task_state; -#endif - KA_TRACE(10, ("__kmpc_reduce_nowait() enter: called T#%d\n", global_tid)); - - // why do we need this initialization here at all? - // Reduction clause can not be used as a stand-alone directive. - - // do not call __kmp_serial_initialize(), it will be called by - // __kmp_parallel_initialize() if needed - // possible detection of false-positive race by the threadchecker ??? - if (!TCR_4(__kmp_init_parallel)) - __kmp_parallel_initialize(); - -// check correctness of reduce block nesting -#if KMP_USE_DYNAMIC_LOCK - if (__kmp_env_consistency_check) - __kmp_push_sync(global_tid, ct_reduce, loc, NULL, 0); -#else - if (__kmp_env_consistency_check) - __kmp_push_sync(global_tid, ct_reduce, loc, NULL); -#endif - -#if OMP_40_ENABLED - th = __kmp_thread_from_gtid(global_tid); - teams_swapped = __kmp_swap_teams_for_teams_reduction(th, &team, &task_state); -#endif // OMP_40_ENABLED - - // packed_reduction_method value will be reused by __kmp_end_reduce* function, - // the value should be kept in a variable - // the variable should be either a construct-specific or thread-specific - // property, not a team specific property - // (a thread can reach the next reduce block on the next construct, reduce - // method may differ on the next construct) - // an ident_t "loc" parameter could be used as a construct-specific property - // (what if loc == 0?) - // (if both construct-specific and team-specific variables were shared, - // then unness extra syncs should be needed) - // a thread-specific variable is better regarding two issues above (next - // construct and extra syncs) - // a thread-specific "th_local.reduction_method" variable is used currently - // each thread executes 'determine' and 'set' lines (no need to execute by one - // thread, to avoid unness extra syncs) - - packed_reduction_method = __kmp_determine_reduction_method( - loc, global_tid, num_vars, reduce_size, reduce_data, reduce_func, lck); - __KMP_SET_REDUCTION_METHOD(global_tid, packed_reduction_method); - - if (packed_reduction_method == critical_reduce_block) { - - __kmp_enter_critical_section_reduce_block(loc, global_tid, lck); - retval = 1; - - } else if (packed_reduction_method == empty_reduce_block) { - - // usage: if team size == 1, no synchronization is required ( Intel - // platforms only ) - retval = 1; - - } else if (packed_reduction_method == atomic_reduce_block) { - - retval = 2; - - // all threads should do this pop here (because __kmpc_end_reduce_nowait() - // won't be called by the code gen) - // (it's not quite good, because the checking block has been closed by - // this 'pop', - // but atomic operation has not been executed yet, will be executed - // slightly later, literally on next instruction) - if (__kmp_env_consistency_check) - __kmp_pop_sync(global_tid, ct_reduce, loc); - - } else if (TEST_REDUCTION_METHOD(packed_reduction_method, - tree_reduce_block)) { - -// AT: performance issue: a real barrier here -// AT: (if master goes slow, other threads are blocked here waiting for the -// master to come and release them) -// AT: (it's not what a customer might expect specifying NOWAIT clause) -// AT: (specifying NOWAIT won't result in improvement of performance, it'll -// be confusing to a customer) -// AT: another implementation of *barrier_gather*nowait() (or some other design) -// might go faster and be more in line with sense of NOWAIT -// AT: TO DO: do epcc test and compare times - -// this barrier should be invisible to a customer and to the threading profile -// tool (it's neither a terminating barrier nor customer's code, it's -// used for an internal purpose) -#if OMPT_SUPPORT - // JP: can this barrier potentially leed to task scheduling? - // JP: as long as there is a barrier in the implementation, OMPT should and - // will provide the barrier events - // so we set-up the necessary frame/return addresses. - ompt_frame_t *ompt_frame; - if (ompt_enabled.enabled) { - __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL); - if (ompt_frame->enter_frame.ptr == NULL) - ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0); - OMPT_STORE_RETURN_ADDRESS(global_tid); - } -#endif -#if USE_ITT_NOTIFY - __kmp_threads[global_tid]->th.th_ident = loc; -#endif - retval = - __kmp_barrier(UNPACK_REDUCTION_BARRIER(packed_reduction_method), - global_tid, FALSE, reduce_size, reduce_data, reduce_func); - retval = (retval != 0) ? (0) : (1); -#if OMPT_SUPPORT && OMPT_OPTIONAL - if (ompt_enabled.enabled) { - ompt_frame->enter_frame = ompt_data_none; - } -#endif - - // all other workers except master should do this pop here - // ( none of other workers will get to __kmpc_end_reduce_nowait() ) - if (__kmp_env_consistency_check) { - if (retval == 0) { - __kmp_pop_sync(global_tid, ct_reduce, loc); - } - } - - } else { - - // should never reach this block - KMP_ASSERT(0); // "unexpected method" - } -#if OMP_40_ENABLED - if (teams_swapped) { - __kmp_restore_swapped_teams(th, team, task_state); - } -#endif - KA_TRACE( - 10, - ("__kmpc_reduce_nowait() exit: called T#%d: method %08x, returns %08x\n", - global_tid, packed_reduction_method, retval)); - - return retval; -} - -/*! -@ingroup SYNCHRONIZATION -@param loc source location information -@param global_tid global thread id. -@param lck pointer to the unique lock data structure - -Finish the execution of a reduce nowait. -*/ -void __kmpc_end_reduce_nowait(ident_t *loc, kmp_int32 global_tid, - kmp_critical_name *lck) { - - PACKED_REDUCTION_METHOD_T packed_reduction_method; - - KA_TRACE(10, ("__kmpc_end_reduce_nowait() enter: called T#%d\n", global_tid)); - - packed_reduction_method = __KMP_GET_REDUCTION_METHOD(global_tid); - - if (packed_reduction_method == critical_reduce_block) { - - __kmp_end_critical_section_reduce_block(loc, global_tid, lck); - - } else if (packed_reduction_method == empty_reduce_block) { - - // usage: if team size == 1, no synchronization is required ( on Intel - // platforms only ) - - } else if (packed_reduction_method == atomic_reduce_block) { - - // neither master nor other workers should get here - // (code gen does not generate this call in case 2: atomic reduce block) - // actually it's better to remove this elseif at all; - // after removal this value will checked by the 'else' and will assert - - } else if (TEST_REDUCTION_METHOD(packed_reduction_method, - tree_reduce_block)) { - - // only master gets here - - } else { - - // should never reach this block - KMP_ASSERT(0); // "unexpected method" - } - - if (__kmp_env_consistency_check) - __kmp_pop_sync(global_tid, ct_reduce, loc); - - KA_TRACE(10, ("__kmpc_end_reduce_nowait() exit: called T#%d: method %08x\n", - global_tid, packed_reduction_method)); - - return; -} - -/* 2.a.ii. Reduce Block with a terminating barrier */ - -/*! -@ingroup SYNCHRONIZATION -@param loc source location information -@param global_tid global thread number -@param num_vars number of items (variables) to be reduced -@param reduce_size size of data in bytes to be reduced -@param reduce_data pointer to data to be reduced -@param reduce_func callback function providing reduction operation on two -operands and returning result of reduction in lhs_data -@param lck pointer to the unique lock data structure -@result 1 for the master thread, 0 for all other team threads, 2 for all team -threads if atomic reduction needed - -A blocking reduce that includes an implicit barrier. -*/ -kmp_int32 __kmpc_reduce(ident_t *loc, kmp_int32 global_tid, kmp_int32 num_vars, - size_t reduce_size, void *reduce_data, - void (*reduce_func)(void *lhs_data, void *rhs_data), - kmp_critical_name *lck) { - KMP_COUNT_BLOCK(REDUCE_wait); - int retval = 0; - PACKED_REDUCTION_METHOD_T packed_reduction_method; -#if OMP_40_ENABLED - kmp_info_t *th; - kmp_team_t *team; - int teams_swapped = 0, task_state; -#endif - - KA_TRACE(10, ("__kmpc_reduce() enter: called T#%d\n", global_tid)); - - // why do we need this initialization here at all? - // Reduction clause can not be a stand-alone directive. - - // do not call __kmp_serial_initialize(), it will be called by - // __kmp_parallel_initialize() if needed - // possible detection of false-positive race by the threadchecker ??? - if (!TCR_4(__kmp_init_parallel)) - __kmp_parallel_initialize(); - -// check correctness of reduce block nesting -#if KMP_USE_DYNAMIC_LOCK - if (__kmp_env_consistency_check) - __kmp_push_sync(global_tid, ct_reduce, loc, NULL, 0); -#else - if (__kmp_env_consistency_check) - __kmp_push_sync(global_tid, ct_reduce, loc, NULL); -#endif - -#if OMP_40_ENABLED - th = __kmp_thread_from_gtid(global_tid); - teams_swapped = __kmp_swap_teams_for_teams_reduction(th, &team, &task_state); -#endif // OMP_40_ENABLED - - packed_reduction_method = __kmp_determine_reduction_method( - loc, global_tid, num_vars, reduce_size, reduce_data, reduce_func, lck); - __KMP_SET_REDUCTION_METHOD(global_tid, packed_reduction_method); - - if (packed_reduction_method == critical_reduce_block) { - - __kmp_enter_critical_section_reduce_block(loc, global_tid, lck); - retval = 1; - - } else if (packed_reduction_method == empty_reduce_block) { - - // usage: if team size == 1, no synchronization is required ( Intel - // platforms only ) - retval = 1; - - } else if (packed_reduction_method == atomic_reduce_block) { - - retval = 2; - - } else if (TEST_REDUCTION_METHOD(packed_reduction_method, - tree_reduce_block)) { - -// case tree_reduce_block: -// this barrier should be visible to a customer and to the threading profile -// tool (it's a terminating barrier on constructs if NOWAIT not specified) -#if OMPT_SUPPORT - ompt_frame_t *ompt_frame; - if (ompt_enabled.enabled) { - __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL); - if (ompt_frame->enter_frame.ptr == NULL) - ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0); - OMPT_STORE_RETURN_ADDRESS(global_tid); - } -#endif -#if USE_ITT_NOTIFY - __kmp_threads[global_tid]->th.th_ident = - loc; // needed for correct notification of frames -#endif - retval = - __kmp_barrier(UNPACK_REDUCTION_BARRIER(packed_reduction_method), - global_tid, TRUE, reduce_size, reduce_data, reduce_func); - retval = (retval != 0) ? (0) : (1); -#if OMPT_SUPPORT && OMPT_OPTIONAL - if (ompt_enabled.enabled) { - ompt_frame->enter_frame = ompt_data_none; - } -#endif - - // all other workers except master should do this pop here - // ( none of other workers except master will enter __kmpc_end_reduce() ) - if (__kmp_env_consistency_check) { - if (retval == 0) { // 0: all other workers; 1: master - __kmp_pop_sync(global_tid, ct_reduce, loc); - } - } - - } else { - - // should never reach this block - KMP_ASSERT(0); // "unexpected method" - } -#if OMP_40_ENABLED - if (teams_swapped) { - __kmp_restore_swapped_teams(th, team, task_state); - } -#endif - - KA_TRACE(10, - ("__kmpc_reduce() exit: called T#%d: method %08x, returns %08x\n", - global_tid, packed_reduction_method, retval)); - - return retval; -} - -/*! -@ingroup SYNCHRONIZATION -@param loc source location information -@param global_tid global thread id. -@param lck pointer to the unique lock data structure - -Finish the execution of a blocking reduce. -The lck pointer must be the same as that used in the corresponding -start function. -*/ -void __kmpc_end_reduce(ident_t *loc, kmp_int32 global_tid, - kmp_critical_name *lck) { - - PACKED_REDUCTION_METHOD_T packed_reduction_method; -#if OMP_40_ENABLED - kmp_info_t *th; - kmp_team_t *team; - int teams_swapped = 0, task_state; -#endif - - KA_TRACE(10, ("__kmpc_end_reduce() enter: called T#%d\n", global_tid)); - -#if OMP_40_ENABLED - th = __kmp_thread_from_gtid(global_tid); - teams_swapped = __kmp_swap_teams_for_teams_reduction(th, &team, &task_state); -#endif // OMP_40_ENABLED - - packed_reduction_method = __KMP_GET_REDUCTION_METHOD(global_tid); - - // this barrier should be visible to a customer and to the threading profile - // tool (it's a terminating barrier on constructs if NOWAIT not specified) - - if (packed_reduction_method == critical_reduce_block) { - - __kmp_end_critical_section_reduce_block(loc, global_tid, lck); - -// TODO: implicit barrier: should be exposed -#if OMPT_SUPPORT - ompt_frame_t *ompt_frame; - if (ompt_enabled.enabled) { - __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL); - if (ompt_frame->enter_frame.ptr == NULL) - ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0); - OMPT_STORE_RETURN_ADDRESS(global_tid); - } -#endif -#if USE_ITT_NOTIFY - __kmp_threads[global_tid]->th.th_ident = loc; -#endif - __kmp_barrier(bs_plain_barrier, global_tid, FALSE, 0, NULL, NULL); -#if OMPT_SUPPORT && OMPT_OPTIONAL - if (ompt_enabled.enabled) { - ompt_frame->enter_frame = ompt_data_none; - } -#endif - - } else if (packed_reduction_method == empty_reduce_block) { - -// usage: if team size==1, no synchronization is required (Intel platforms only) - -// TODO: implicit barrier: should be exposed -#if OMPT_SUPPORT - ompt_frame_t *ompt_frame; - if (ompt_enabled.enabled) { - __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL); - if (ompt_frame->enter_frame.ptr == NULL) - ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0); - OMPT_STORE_RETURN_ADDRESS(global_tid); - } -#endif -#if USE_ITT_NOTIFY - __kmp_threads[global_tid]->th.th_ident = loc; -#endif - __kmp_barrier(bs_plain_barrier, global_tid, FALSE, 0, NULL, NULL); -#if OMPT_SUPPORT && OMPT_OPTIONAL - if (ompt_enabled.enabled) { - ompt_frame->enter_frame = ompt_data_none; - } -#endif - - } else if (packed_reduction_method == atomic_reduce_block) { - -#if OMPT_SUPPORT - ompt_frame_t *ompt_frame; - if (ompt_enabled.enabled) { - __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL); - if (ompt_frame->enter_frame.ptr == NULL) - ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0); - OMPT_STORE_RETURN_ADDRESS(global_tid); - } -#endif -// TODO: implicit barrier: should be exposed -#if USE_ITT_NOTIFY - __kmp_threads[global_tid]->th.th_ident = loc; -#endif - __kmp_barrier(bs_plain_barrier, global_tid, FALSE, 0, NULL, NULL); -#if OMPT_SUPPORT && OMPT_OPTIONAL - if (ompt_enabled.enabled) { - ompt_frame->enter_frame = ompt_data_none; - } -#endif - - } else if (TEST_REDUCTION_METHOD(packed_reduction_method, - tree_reduce_block)) { - - // only master executes here (master releases all other workers) - __kmp_end_split_barrier(UNPACK_REDUCTION_BARRIER(packed_reduction_method), - global_tid); - - } else { - - // should never reach this block - KMP_ASSERT(0); // "unexpected method" - } -#if OMP_40_ENABLED - if (teams_swapped) { - __kmp_restore_swapped_teams(th, team, task_state); - } -#endif - - if (__kmp_env_consistency_check) - __kmp_pop_sync(global_tid, ct_reduce, loc); - - KA_TRACE(10, ("__kmpc_end_reduce() exit: called T#%d: method %08x\n", - global_tid, packed_reduction_method)); - - return; -} - -#undef __KMP_GET_REDUCTION_METHOD -#undef __KMP_SET_REDUCTION_METHOD - -/* end of interface to fast scalable reduce routines */ - -kmp_uint64 __kmpc_get_taskid() { - - kmp_int32 gtid; - kmp_info_t *thread; - - gtid = __kmp_get_gtid(); - if (gtid < 0) { - return 0; - } - thread = __kmp_thread_from_gtid(gtid); - return thread->th.th_current_task->td_task_id; - -} // __kmpc_get_taskid - -kmp_uint64 __kmpc_get_parent_taskid() { - - kmp_int32 gtid; - kmp_info_t *thread; - kmp_taskdata_t *parent_task; - - gtid = __kmp_get_gtid(); - if (gtid < 0) { - return 0; - } - thread = __kmp_thread_from_gtid(gtid); - parent_task = thread->th.th_current_task->td_parent; - return (parent_task == NULL ? 0 : parent_task->td_task_id); - -} // __kmpc_get_parent_taskid - -#if OMP_45_ENABLED -/*! -@ingroup WORK_SHARING -@param loc source location information. -@param gtid global thread number. -@param num_dims number of associated doacross loops. -@param dims info on loops bounds. - -Initialize doacross loop information. -Expect compiler send us inclusive bounds, -e.g. for(i=2;i<9;i+=2) lo=2, up=8, st=2. -*/ -void __kmpc_doacross_init(ident_t *loc, int gtid, int num_dims, - const struct kmp_dim *dims) { - int j, idx; - kmp_int64 last, trace_count; - kmp_info_t *th = __kmp_threads[gtid]; - kmp_team_t *team = th->th.th_team; - kmp_uint32 *flags; - kmp_disp_t *pr_buf = th->th.th_dispatch; - dispatch_shared_info_t *sh_buf; - - KA_TRACE( - 20, - ("__kmpc_doacross_init() enter: called T#%d, num dims %d, active %d\n", - gtid, num_dims, !team->t.t_serialized)); - KMP_DEBUG_ASSERT(dims != NULL); - KMP_DEBUG_ASSERT(num_dims > 0); - - if (team->t.t_serialized) { - KA_TRACE(20, ("__kmpc_doacross_init() exit: serialized team\n")); - return; // no dependencies if team is serialized - } - KMP_DEBUG_ASSERT(team->t.t_nproc > 1); - idx = pr_buf->th_doacross_buf_idx++; // Increment index of shared buffer for - // the next loop - sh_buf = &team->t.t_disp_buffer[idx % __kmp_dispatch_num_buffers]; - - // Save bounds info into allocated private buffer - KMP_DEBUG_ASSERT(pr_buf->th_doacross_info == NULL); - pr_buf->th_doacross_info = (kmp_int64 *)__kmp_thread_malloc( - th, sizeof(kmp_int64) * (4 * num_dims + 1)); - KMP_DEBUG_ASSERT(pr_buf->th_doacross_info != NULL); - pr_buf->th_doacross_info[0] = - (kmp_int64)num_dims; // first element is number of dimensions - // Save also address of num_done in order to access it later without knowing - // the buffer index - pr_buf->th_doacross_info[1] = (kmp_int64)&sh_buf->doacross_num_done; - pr_buf->th_doacross_info[2] = dims[0].lo; - pr_buf->th_doacross_info[3] = dims[0].up; - pr_buf->th_doacross_info[4] = dims[0].st; - last = 5; - for (j = 1; j < num_dims; ++j) { - kmp_int64 - range_length; // To keep ranges of all dimensions but the first dims[0] - if (dims[j].st == 1) { // most common case - // AC: should we care of ranges bigger than LLONG_MAX? (not for now) - range_length = dims[j].up - dims[j].lo + 1; - } else { - if (dims[j].st > 0) { - KMP_DEBUG_ASSERT(dims[j].up > dims[j].lo); - range_length = (kmp_uint64)(dims[j].up - dims[j].lo) / dims[j].st + 1; - } else { // negative increment - KMP_DEBUG_ASSERT(dims[j].lo > dims[j].up); - range_length = - (kmp_uint64)(dims[j].lo - dims[j].up) / (-dims[j].st) + 1; - } - } - pr_buf->th_doacross_info[last++] = range_length; - pr_buf->th_doacross_info[last++] = dims[j].lo; - pr_buf->th_doacross_info[last++] = dims[j].up; - pr_buf->th_doacross_info[last++] = dims[j].st; - } - - // Compute total trip count. - // Start with range of dims[0] which we don't need to keep in the buffer. - if (dims[0].st == 1) { // most common case - trace_count = dims[0].up - dims[0].lo + 1; - } else if (dims[0].st > 0) { - KMP_DEBUG_ASSERT(dims[0].up > dims[0].lo); - trace_count = (kmp_uint64)(dims[0].up - dims[0].lo) / dims[0].st + 1; - } else { // negative increment - KMP_DEBUG_ASSERT(dims[0].lo > dims[0].up); - trace_count = (kmp_uint64)(dims[0].lo - dims[0].up) / (-dims[0].st) + 1; - } - for (j = 1; j < num_dims; ++j) { - trace_count *= pr_buf->th_doacross_info[4 * j + 1]; // use kept ranges - } - KMP_DEBUG_ASSERT(trace_count > 0); - - // Check if shared buffer is not occupied by other loop (idx - - // __kmp_dispatch_num_buffers) - if (idx != sh_buf->doacross_buf_idx) { - // Shared buffer is occupied, wait for it to be free - __kmp_wait_yield_4((volatile kmp_uint32 *)&sh_buf->doacross_buf_idx, idx, - __kmp_eq_4, NULL); - } -#if KMP_32_BIT_ARCH - // Check if we are the first thread. After the CAS the first thread gets 0, - // others get 1 if initialization is in progress, allocated pointer otherwise. - // Treat pointer as volatile integer (value 0 or 1) until memory is allocated. - flags = (kmp_uint32 *)KMP_COMPARE_AND_STORE_RET32( - (volatile kmp_int32 *)&sh_buf->doacross_flags, NULL, 1); -#else - flags = (kmp_uint32 *)KMP_COMPARE_AND_STORE_RET64( - (volatile kmp_int64 *)&sh_buf->doacross_flags, NULL, 1LL); -#endif - if (flags == NULL) { - // we are the first thread, allocate the array of flags - size_t size = trace_count / 8 + 8; // in bytes, use single bit per iteration - flags = (kmp_uint32 *)__kmp_thread_calloc(th, size, 1); - KMP_MB(); - sh_buf->doacross_flags = flags; - } else if (flags == (kmp_uint32 *)1) { -#if KMP_32_BIT_ARCH - // initialization is still in progress, need to wait - while (*(volatile kmp_int32 *)&sh_buf->doacross_flags == 1) -#else - while (*(volatile kmp_int64 *)&sh_buf->doacross_flags == 1LL) -#endif - KMP_YIELD(TRUE); - KMP_MB(); - } else { - KMP_MB(); - } - KMP_DEBUG_ASSERT(sh_buf->doacross_flags > (kmp_uint32 *)1); // check ptr value - pr_buf->th_doacross_flags = - sh_buf->doacross_flags; // save private copy in order to not - // touch shared buffer on each iteration - KA_TRACE(20, ("__kmpc_doacross_init() exit: T#%d\n", gtid)); -} - -void __kmpc_doacross_wait(ident_t *loc, int gtid, const kmp_int64 *vec) { - kmp_int32 shft, num_dims, i; - kmp_uint32 flag; - kmp_int64 iter_number; // iteration number of "collapsed" loop nest - kmp_info_t *th = __kmp_threads[gtid]; - kmp_team_t *team = th->th.th_team; - kmp_disp_t *pr_buf; - kmp_int64 lo, up, st; - - KA_TRACE(20, ("__kmpc_doacross_wait() enter: called T#%d\n", gtid)); - if (team->t.t_serialized) { - KA_TRACE(20, ("__kmpc_doacross_wait() exit: serialized team\n")); - return; // no dependencies if team is serialized - } - - // calculate sequential iteration number and check out-of-bounds condition - pr_buf = th->th.th_dispatch; - KMP_DEBUG_ASSERT(pr_buf->th_doacross_info != NULL); - num_dims = pr_buf->th_doacross_info[0]; - lo = pr_buf->th_doacross_info[2]; - up = pr_buf->th_doacross_info[3]; - st = pr_buf->th_doacross_info[4]; - if (st == 1) { // most common case - if (vec[0] < lo || vec[0] > up) { - KA_TRACE(20, ("__kmpc_doacross_wait() exit: T#%d iter %lld is out of " - "bounds [%lld,%lld]\n", - gtid, vec[0], lo, up)); - return; - } - iter_number = vec[0] - lo; - } else if (st > 0) { - if (vec[0] < lo || vec[0] > up) { - KA_TRACE(20, ("__kmpc_doacross_wait() exit: T#%d iter %lld is out of " - "bounds [%lld,%lld]\n", - gtid, vec[0], lo, up)); - return; - } - iter_number = (kmp_uint64)(vec[0] - lo) / st; - } else { // negative increment - if (vec[0] > lo || vec[0] < up) { - KA_TRACE(20, ("__kmpc_doacross_wait() exit: T#%d iter %lld is out of " - "bounds [%lld,%lld]\n", - gtid, vec[0], lo, up)); - return; - } - iter_number = (kmp_uint64)(lo - vec[0]) / (-st); - } - for (i = 1; i < num_dims; ++i) { - kmp_int64 iter, ln; - kmp_int32 j = i * 4; - ln = pr_buf->th_doacross_info[j + 1]; - lo = pr_buf->th_doacross_info[j + 2]; - up = pr_buf->th_doacross_info[j + 3]; - st = pr_buf->th_doacross_info[j + 4]; - if (st == 1) { - if (vec[i] < lo || vec[i] > up) { - KA_TRACE(20, ("__kmpc_doacross_wait() exit: T#%d iter %lld is out of " - "bounds [%lld,%lld]\n", - gtid, vec[i], lo, up)); - return; - } - iter = vec[i] - lo; - } else if (st > 0) { - if (vec[i] < lo || vec[i] > up) { - KA_TRACE(20, ("__kmpc_doacross_wait() exit: T#%d iter %lld is out of " - "bounds [%lld,%lld]\n", - gtid, vec[i], lo, up)); - return; - } - iter = (kmp_uint64)(vec[i] - lo) / st; - } else { // st < 0 - if (vec[i] > lo || vec[i] < up) { - KA_TRACE(20, ("__kmpc_doacross_wait() exit: T#%d iter %lld is out of " - "bounds [%lld,%lld]\n", - gtid, vec[i], lo, up)); - return; - } - iter = (kmp_uint64)(lo - vec[i]) / (-st); - } - iter_number = iter + ln * iter_number; - } - shft = iter_number % 32; // use 32-bit granularity - iter_number >>= 5; // divided by 32 - flag = 1 << shft; - while ((flag & pr_buf->th_doacross_flags[iter_number]) == 0) { - KMP_YIELD(TRUE); - } - KMP_MB(); - KA_TRACE(20, - ("__kmpc_doacross_wait() exit: T#%d wait for iter %lld completed\n", - gtid, (iter_number << 5) + shft)); -} - -void __kmpc_doacross_post(ident_t *loc, int gtid, const kmp_int64 *vec) { - kmp_int32 shft, num_dims, i; - kmp_uint32 flag; - kmp_int64 iter_number; // iteration number of "collapsed" loop nest - kmp_info_t *th = __kmp_threads[gtid]; - kmp_team_t *team = th->th.th_team; - kmp_disp_t *pr_buf; - kmp_int64 lo, st; - - KA_TRACE(20, ("__kmpc_doacross_post() enter: called T#%d\n", gtid)); - if (team->t.t_serialized) { - KA_TRACE(20, ("__kmpc_doacross_post() exit: serialized team\n")); - return; // no dependencies if team is serialized - } - - // calculate sequential iteration number (same as in "wait" but no - // out-of-bounds checks) - pr_buf = th->th.th_dispatch; - KMP_DEBUG_ASSERT(pr_buf->th_doacross_info != NULL); - num_dims = pr_buf->th_doacross_info[0]; - lo = pr_buf->th_doacross_info[2]; - st = pr_buf->th_doacross_info[4]; - if (st == 1) { // most common case - iter_number = vec[0] - lo; - } else if (st > 0) { - iter_number = (kmp_uint64)(vec[0] - lo) / st; - } else { // negative increment - iter_number = (kmp_uint64)(lo - vec[0]) / (-st); - } - for (i = 1; i < num_dims; ++i) { - kmp_int64 iter, ln; - kmp_int32 j = i * 4; - ln = pr_buf->th_doacross_info[j + 1]; - lo = pr_buf->th_doacross_info[j + 2]; - st = pr_buf->th_doacross_info[j + 4]; - if (st == 1) { - iter = vec[i] - lo; - } else if (st > 0) { - iter = (kmp_uint64)(vec[i] - lo) / st; - } else { // st < 0 - iter = (kmp_uint64)(lo - vec[i]) / (-st); - } - iter_number = iter + ln * iter_number; - } - shft = iter_number % 32; // use 32-bit granularity - iter_number >>= 5; // divided by 32 - flag = 1 << shft; - KMP_MB(); - if ((flag & pr_buf->th_doacross_flags[iter_number]) == 0) - KMP_TEST_THEN_OR32(&pr_buf->th_doacross_flags[iter_number], flag); - KA_TRACE(20, ("__kmpc_doacross_post() exit: T#%d iter %lld posted\n", gtid, - (iter_number << 5) + shft)); -} - -void __kmpc_doacross_fini(ident_t *loc, int gtid) { - kmp_int32 num_done; - kmp_info_t *th = __kmp_threads[gtid]; - kmp_team_t *team = th->th.th_team; - kmp_disp_t *pr_buf = th->th.th_dispatch; - - KA_TRACE(20, ("__kmpc_doacross_fini() enter: called T#%d\n", gtid)); - if (team->t.t_serialized) { - KA_TRACE(20, ("__kmpc_doacross_fini() exit: serialized team %p\n", team)); - return; // nothing to do - } - num_done = KMP_TEST_THEN_INC32((kmp_int32 *)pr_buf->th_doacross_info[1]) + 1; - if (num_done == th->th.th_team_nproc) { - // we are the last thread, need to free shared resources - int idx = pr_buf->th_doacross_buf_idx - 1; - dispatch_shared_info_t *sh_buf = - &team->t.t_disp_buffer[idx % __kmp_dispatch_num_buffers]; - KMP_DEBUG_ASSERT(pr_buf->th_doacross_info[1] == - (kmp_int64)&sh_buf->doacross_num_done); - KMP_DEBUG_ASSERT(num_done == sh_buf->doacross_num_done); - KMP_DEBUG_ASSERT(idx == sh_buf->doacross_buf_idx); - __kmp_thread_free(th, CCAST(kmp_uint32 *, sh_buf->doacross_flags)); - sh_buf->doacross_flags = NULL; - sh_buf->doacross_num_done = 0; - sh_buf->doacross_buf_idx += - __kmp_dispatch_num_buffers; // free buffer for future re-use - } - // free private resources (need to keep buffer index forever) - pr_buf->th_doacross_flags = NULL; - __kmp_thread_free(th, (void *)pr_buf->th_doacross_info); - pr_buf->th_doacross_info = NULL; - KA_TRACE(20, ("__kmpc_doacross_fini() exit: T#%d\n", gtid)); -} -#endif - -#if OMP_50_ENABLED -int __kmpc_get_target_offload(void) { - if (!__kmp_init_serial) { - __kmp_serial_initialize(); - } - return __kmp_target_offload; -} -#endif // OMP_50_ENABLED - -// end of file // Property changes on: vendor/llvm-openmp/openmp-release80-r363030/runtime/src/kmp_csupport.cpp ___________________________________________________________________ Deleted: svn:eol-style ## -1 +0,0 ## -native \ No newline at end of property Deleted: svn:keywords ## -1 +0,0 ## -FreeBSD=%H \ No newline at end of property Deleted: svn:mime-type ## -1 +0,0 ## -text/plain \ No newline at end of property Index: vendor/llvm-openmp/openmp-release80-r363030/runtime/src/kmp_settings.cpp =================================================================== --- vendor/llvm-openmp/openmp-release80-r363030/runtime/src/kmp_settings.cpp (revision 348960) +++ vendor/llvm-openmp/openmp-release80-r363030/runtime/src/kmp_settings.cpp (nonexistent) @@ -1,5832 +0,0 @@ -/* - * kmp_settings.cpp -- Initialize environment variables - */ - -//===----------------------------------------------------------------------===// -// -// The LLVM Compiler Infrastructure -// -// This file is dual licensed under the MIT and the University of Illinois Open -// Source Licenses. See LICENSE.txt for details. -// -//===----------------------------------------------------------------------===// - -#include "kmp.h" -#include "kmp_affinity.h" -#include "kmp_atomic.h" -#if KMP_USE_HIER_SCHED -#include "kmp_dispatch_hier.h" -#endif -#include "kmp_environment.h" -#include "kmp_i18n.h" -#include "kmp_io.h" -#include "kmp_itt.h" -#include "kmp_lock.h" -#include "kmp_settings.h" -#include "kmp_str.h" -#include "kmp_wrapper_getpid.h" -#include // toupper() - -static int __kmp_env_toPrint(char const *name, int flag); - -bool __kmp_env_format = 0; // 0 - old format; 1 - new format - -// ----------------------------------------------------------------------------- -// Helper string functions. Subject to move to kmp_str. - -#ifdef USE_LOAD_BALANCE -static double __kmp_convert_to_double(char const *s) { - double result; - - if (KMP_SSCANF(s, "%lf", &result) < 1) { - result = 0.0; - } - - return result; -} -#endif - -#ifdef KMP_DEBUG -static unsigned int __kmp_readstr_with_sentinel(char *dest, char const *src, - size_t len, char sentinel) { - unsigned int i; - for (i = 0; i < len; i++) { - if ((*src == '\0') || (*src == sentinel)) { - break; - } - *(dest++) = *(src++); - } - *dest = '\0'; - return i; -} -#endif - -static int __kmp_match_with_sentinel(char const *a, char const *b, size_t len, - char sentinel) { - size_t l = 0; - - if (a == NULL) - a = ""; - if (b == NULL) - b = ""; - while (*a && *b && *b != sentinel) { - char ca = *a, cb = *b; - - if (ca >= 'a' && ca <= 'z') - ca -= 'a' - 'A'; - if (cb >= 'a' && cb <= 'z') - cb -= 'a' - 'A'; - if (ca != cb) - return FALSE; - ++l; - ++a; - ++b; - } - return l >= len; -} - -// Expected usage: -// token is the token to check for. -// buf is the string being parsed. -// *end returns the char after the end of the token. -// it is not modified unless a match occurs. -// -// Example 1: -// -// if (__kmp_match_str("token", buf, *end) { -// -// buf = end; -// } -// -// Example 2: -// -// if (__kmp_match_str("token", buf, *end) { -// char *save = **end; -// **end = sentinel; -// -// **end = save; -// buf = end; -// } - -static int __kmp_match_str(char const *token, char const *buf, - const char **end) { - - KMP_ASSERT(token != NULL); - KMP_ASSERT(buf != NULL); - KMP_ASSERT(end != NULL); - - while (*token && *buf) { - char ct = *token, cb = *buf; - - if (ct >= 'a' && ct <= 'z') - ct -= 'a' - 'A'; - if (cb >= 'a' && cb <= 'z') - cb -= 'a' - 'A'; - if (ct != cb) - return FALSE; - ++token; - ++buf; - } - if (*token) { - return FALSE; - } - *end = buf; - return TRUE; -} - -#if KMP_OS_DARWIN -static size_t __kmp_round4k(size_t size) { - size_t _4k = 4 * 1024; - if (size & (_4k - 1)) { - size &= ~(_4k - 1); - if (size <= KMP_SIZE_T_MAX - _4k) { - size += _4k; // Round up if there is no overflow. - } - } - return size; -} // __kmp_round4k -#endif - -/* Here, multipliers are like __kmp_convert_to_seconds, but floating-point - values are allowed, and the return value is in milliseconds. The default - multiplier is milliseconds. Returns INT_MAX only if the value specified - matches "infinit*". Returns -1 if specified string is invalid. */ -int __kmp_convert_to_milliseconds(char const *data) { - int ret, nvalues, factor; - char mult, extra; - double value; - - if (data == NULL) - return (-1); - if (__kmp_str_match("infinit", -1, data)) - return (INT_MAX); - value = (double)0.0; - mult = '\0'; - nvalues = KMP_SSCANF(data, "%lf%c%c", &value, &mult, &extra); - if (nvalues < 1) - return (-1); - if (nvalues == 1) - mult = '\0'; - if (nvalues == 3) - return (-1); - - if (value < 0) - return (-1); - - switch (mult) { - case '\0': - /* default is milliseconds */ - factor = 1; - break; - case 's': - case 'S': - factor = 1000; - break; - case 'm': - case 'M': - factor = 1000 * 60; - break; - case 'h': - case 'H': - factor = 1000 * 60 * 60; - break; - case 'd': - case 'D': - factor = 1000 * 24 * 60 * 60; - break; - default: - return (-1); - } - - if (value >= ((INT_MAX - 1) / factor)) - ret = INT_MAX - 1; /* Don't allow infinite value here */ - else - ret = (int)(value * (double)factor); /* truncate to int */ - - return ret; -} - -static int __kmp_strcasecmp_with_sentinel(char const *a, char const *b, - char sentinel) { - if (a == NULL) - a = ""; - if (b == NULL) - b = ""; - while (*a && *b && *b != sentinel) { - char ca = *a, cb = *b; - - if (ca >= 'a' && ca <= 'z') - ca -= 'a' - 'A'; - if (cb >= 'a' && cb <= 'z') - cb -= 'a' - 'A'; - if (ca != cb) - return (int)(unsigned char)*a - (int)(unsigned char)*b; - ++a; - ++b; - } - return *a - ? (*b && *b != sentinel) - ? (int)(unsigned char)*a - (int)(unsigned char)*b - : 1 - : (*b && *b != sentinel) ? -1 : 0; -} - -// ============================================================================= -// Table structures and helper functions. - -typedef struct __kmp_setting kmp_setting_t; -typedef struct __kmp_stg_ss_data kmp_stg_ss_data_t; -typedef struct __kmp_stg_wp_data kmp_stg_wp_data_t; -typedef struct __kmp_stg_fr_data kmp_stg_fr_data_t; - -typedef void (*kmp_stg_parse_func_t)(char const *name, char const *value, - void *data); -typedef void (*kmp_stg_print_func_t)(kmp_str_buf_t *buffer, char const *name, - void *data); - -struct __kmp_setting { - char const *name; // Name of setting (environment variable). - kmp_stg_parse_func_t parse; // Parser function. - kmp_stg_print_func_t print; // Print function. - void *data; // Data passed to parser and printer. - int set; // Variable set during this "session" - // (__kmp_env_initialize() or kmp_set_defaults() call). - int defined; // Variable set in any "session". -}; // struct __kmp_setting - -struct __kmp_stg_ss_data { - size_t factor; // Default factor: 1 for KMP_STACKSIZE, 1024 for others. - kmp_setting_t **rivals; // Array of pointers to rivals (including itself). -}; // struct __kmp_stg_ss_data - -struct __kmp_stg_wp_data { - int omp; // 0 -- KMP_LIBRARY, 1 -- OMP_WAIT_POLICY. - kmp_setting_t **rivals; // Array of pointers to rivals (including itself). -}; // struct __kmp_stg_wp_data - -struct __kmp_stg_fr_data { - int force; // 0 -- KMP_DETERMINISTIC_REDUCTION, 1 -- KMP_FORCE_REDUCTION. - kmp_setting_t **rivals; // Array of pointers to rivals (including itself). -}; // struct __kmp_stg_fr_data - -static int __kmp_stg_check_rivals( // 0 -- Ok, 1 -- errors found. - char const *name, // Name of variable. - char const *value, // Value of the variable. - kmp_setting_t **rivals // List of rival settings (must include current one). - ); - -// ----------------------------------------------------------------------------- -// Helper parse functions. - -static void __kmp_stg_parse_bool(char const *name, char const *value, - int *out) { - if (__kmp_str_match_true(value)) { - *out = TRUE; - } else if (__kmp_str_match_false(value)) { - *out = FALSE; - } else { - __kmp_msg(kmp_ms_warning, KMP_MSG(BadBoolValue, name, value), - KMP_HNT(ValidBoolValues), __kmp_msg_null); - } -} // __kmp_stg_parse_bool - -static void __kmp_stg_parse_size(char const *name, char const *value, - size_t size_min, size_t size_max, - int *is_specified, size_t *out, - size_t factor) { - char const *msg = NULL; -#if KMP_OS_DARWIN - size_min = __kmp_round4k(size_min); - size_max = __kmp_round4k(size_max); -#endif // KMP_OS_DARWIN - if (value) { - if (is_specified != NULL) { - *is_specified = 1; - } - __kmp_str_to_size(value, out, factor, &msg); - if (msg == NULL) { - if (*out > size_max) { - *out = size_max; - msg = KMP_I18N_STR(ValueTooLarge); - } else if (*out < size_min) { - *out = size_min; - msg = KMP_I18N_STR(ValueTooSmall); - } else { -#if KMP_OS_DARWIN - size_t round4k = __kmp_round4k(*out); - if (*out != round4k) { - *out = round4k; - msg = KMP_I18N_STR(NotMultiple4K); - } -#endif - } - } else { - // If integer overflow occurred, * out == KMP_SIZE_T_MAX. Cut it to - // size_max silently. - if (*out < size_min) { - *out = size_max; - } else if (*out > size_max) { - *out = size_max; - } - } - if (msg != NULL) { - // Message is not empty. Print warning. - kmp_str_buf_t buf; - __kmp_str_buf_init(&buf); - __kmp_str_buf_print_size(&buf, *out); - KMP_WARNING(ParseSizeIntWarn, name, value, msg); - KMP_INFORM(Using_str_Value, name, buf.str); - __kmp_str_buf_free(&buf); - } - } -} // __kmp_stg_parse_size - -static void __kmp_stg_parse_str(char const *name, char const *value, - char **out) { - __kmp_str_free(out); - *out = __kmp_str_format("%s", value); -} // __kmp_stg_parse_str - -static void __kmp_stg_parse_int( - char const - *name, // I: Name of environment variable (used in warning messages). - char const *value, // I: Value of environment variable to parse. - int min, // I: Miminal allowed value. - int max, // I: Maximum allowed value. - int *out // O: Output (parsed) value. - ) { - char const *msg = NULL; - kmp_uint64 uint = *out; - __kmp_str_to_uint(value, &uint, &msg); - if (msg == NULL) { - if (uint < (unsigned int)min) { - msg = KMP_I18N_STR(ValueTooSmall); - uint = min; - } else if (uint > (unsigned int)max) { - msg = KMP_I18N_STR(ValueTooLarge); - uint = max; - } - } else { - // If overflow occurred msg contains error message and uint is very big. Cut - // tmp it to INT_MAX. - if (uint < (unsigned int)min) { - uint = min; - } else if (uint > (unsigned int)max) { - uint = max; - } - } - if (msg != NULL) { - // Message is not empty. Print warning. - kmp_str_buf_t buf; - KMP_WARNING(ParseSizeIntWarn, name, value, msg); - __kmp_str_buf_init(&buf); - __kmp_str_buf_print(&buf, "%" KMP_UINT64_SPEC "", uint); - KMP_INFORM(Using_uint64_Value, name, buf.str); - __kmp_str_buf_free(&buf); - } - *out = uint; -} // __kmp_stg_parse_int - -#if KMP_DEBUG_ADAPTIVE_LOCKS -static void __kmp_stg_parse_file(char const *name, char const *value, - const char *suffix, char **out) { - char buffer[256]; - char *t; - int hasSuffix; - __kmp_str_free(out); - t = (char *)strrchr(value, '.'); - hasSuffix = t && __kmp_str_eqf(t, suffix); - t = __kmp_str_format("%s%s", value, hasSuffix ? "" : suffix); - __kmp_expand_file_name(buffer, sizeof(buffer), t); - __kmp_str_free(&t); - *out = __kmp_str_format("%s", buffer); -} // __kmp_stg_parse_file -#endif - -#ifdef KMP_DEBUG -static char *par_range_to_print = NULL; - -static void __kmp_stg_parse_par_range(char const *name, char const *value, - int *out_range, char *out_routine, - char *out_file, int *out_lb, - int *out_ub) { - size_t len = KMP_STRLEN(value) + 1; - par_range_to_print = (char *)KMP_INTERNAL_MALLOC(len + 1); - KMP_STRNCPY_S(par_range_to_print, len + 1, value, len + 1); - __kmp_par_range = +1; - __kmp_par_range_lb = 0; - __kmp_par_range_ub = INT_MAX; - for (;;) { - unsigned int len; - if (*value == '\0') { - break; - } - if (!__kmp_strcasecmp_with_sentinel("routine", value, '=')) { - value = strchr(value, '=') + 1; - len = __kmp_readstr_with_sentinel(out_routine, value, - KMP_PAR_RANGE_ROUTINE_LEN - 1, ','); - if (len == 0) { - goto par_range_error; - } - value = strchr(value, ','); - if (value != NULL) { - value++; - } - continue; - } - if (!__kmp_strcasecmp_with_sentinel("filename", value, '=')) { - value = strchr(value, '=') + 1; - len = __kmp_readstr_with_sentinel(out_file, value, - KMP_PAR_RANGE_FILENAME_LEN - 1, ','); - if (len == 0) { - goto par_range_error; - } - value = strchr(value, ','); - if (value != NULL) { - value++; - } - continue; - } - if ((!__kmp_strcasecmp_with_sentinel("range", value, '=')) || - (!__kmp_strcasecmp_with_sentinel("incl_range", value, '='))) { - value = strchr(value, '=') + 1; - if (KMP_SSCANF(value, "%d:%d", out_lb, out_ub) != 2) { - goto par_range_error; - } - *out_range = +1; - value = strchr(value, ','); - if (value != NULL) { - value++; - } - continue; - } - if (!__kmp_strcasecmp_with_sentinel("excl_range", value, '=')) { - value = strchr(value, '=') + 1; - if (KMP_SSCANF(value, "%d:%d", out_lb, out_ub) != 2) { - goto par_range_error; - } - *out_range = -1; - value = strchr(value, ','); - if (value != NULL) { - value++; - } - continue; - } - par_range_error: - KMP_WARNING(ParRangeSyntax, name); - __kmp_par_range = 0; - break; - } -} // __kmp_stg_parse_par_range -#endif - -int __kmp_initial_threads_capacity(int req_nproc) { - int nth = 32; - - /* MIN( MAX( 32, 4 * $OMP_NUM_THREADS, 4 * omp_get_num_procs() ), - * __kmp_max_nth) */ - if (nth < (4 * req_nproc)) - nth = (4 * req_nproc); - if (nth < (4 * __kmp_xproc)) - nth = (4 * __kmp_xproc); - - if (nth > __kmp_max_nth) - nth = __kmp_max_nth; - - return nth; -} - -int __kmp_default_tp_capacity(int req_nproc, int max_nth, - int all_threads_specified) { - int nth = 128; - - if (all_threads_specified) - return max_nth; - /* MIN( MAX (128, 4 * $OMP_NUM_THREADS, 4 * omp_get_num_procs() ), - * __kmp_max_nth ) */ - if (nth < (4 * req_nproc)) - nth = (4 * req_nproc); - if (nth < (4 * __kmp_xproc)) - nth = (4 * __kmp_xproc); - - if (nth > __kmp_max_nth) - nth = __kmp_max_nth; - - return nth; -} - -// ----------------------------------------------------------------------------- -// Helper print functions. - -static void __kmp_stg_print_bool(kmp_str_buf_t *buffer, char const *name, - int value) { - if (__kmp_env_format) { - KMP_STR_BUF_PRINT_BOOL; - } else { - __kmp_str_buf_print(buffer, " %s=%s\n", name, value ? "true" : "false"); - } -} // __kmp_stg_print_bool - -static void __kmp_stg_print_int(kmp_str_buf_t *buffer, char const *name, - int value) { - if (__kmp_env_format) { - KMP_STR_BUF_PRINT_INT; - } else { - __kmp_str_buf_print(buffer, " %s=%d\n", name, value); - } -} // __kmp_stg_print_int - -#if USE_ITT_BUILD && USE_ITT_NOTIFY -static void __kmp_stg_print_uint64(kmp_str_buf_t *buffer, char const *name, - kmp_uint64 value) { - if (__kmp_env_format) { - KMP_STR_BUF_PRINT_UINT64; - } else { - __kmp_str_buf_print(buffer, " %s=%" KMP_UINT64_SPEC "\n", name, value); - } -} // __kmp_stg_print_uint64 -#endif - -static void __kmp_stg_print_str(kmp_str_buf_t *buffer, char const *name, - char const *value) { - if (__kmp_env_format) { - KMP_STR_BUF_PRINT_STR; - } else { - __kmp_str_buf_print(buffer, " %s=%s\n", name, value); - } -} // __kmp_stg_print_str - -static void __kmp_stg_print_size(kmp_str_buf_t *buffer, char const *name, - size_t value) { - if (__kmp_env_format) { - KMP_STR_BUF_PRINT_NAME_EX(name); - __kmp_str_buf_print_size(buffer, value); - __kmp_str_buf_print(buffer, "'\n"); - } else { - __kmp_str_buf_print(buffer, " %s=", name); - __kmp_str_buf_print_size(buffer, value); - __kmp_str_buf_print(buffer, "\n"); - return; - } -} // __kmp_stg_print_size - -// ============================================================================= -// Parse and print functions. - -// ----------------------------------------------------------------------------- -// KMP_DEVICE_THREAD_LIMIT, KMP_ALL_THREADS - -static void __kmp_stg_parse_device_thread_limit(char const *name, - char const *value, void *data) { - kmp_setting_t **rivals = (kmp_setting_t **)data; - int rc; - if (strcmp(name, "KMP_ALL_THREADS") == 0) { - KMP_INFORM(EnvVarDeprecated, name, "KMP_DEVICE_THREAD_LIMIT"); - } - rc = __kmp_stg_check_rivals(name, value, rivals); - if (rc) { - return; - } - if (!__kmp_strcasecmp_with_sentinel("all", value, 0)) { - __kmp_max_nth = __kmp_xproc; - __kmp_allThreadsSpecified = 1; - } else { - __kmp_stg_parse_int(name, value, 1, __kmp_sys_max_nth, &__kmp_max_nth); - __kmp_allThreadsSpecified = 0; - } - K_DIAG(1, ("__kmp_max_nth == %d\n", __kmp_max_nth)); - -} // __kmp_stg_parse_device_thread_limit - -static void __kmp_stg_print_device_thread_limit(kmp_str_buf_t *buffer, - char const *name, void *data) { - __kmp_stg_print_int(buffer, name, __kmp_max_nth); -} // __kmp_stg_print_device_thread_limit - -// ----------------------------------------------------------------------------- -// OMP_THREAD_LIMIT -static void __kmp_stg_parse_thread_limit(char const *name, char const *value, - void *data) { - __kmp_stg_parse_int(name, value, 1, __kmp_sys_max_nth, &__kmp_cg_max_nth); - K_DIAG(1, ("__kmp_cg_max_nth == %d\n", __kmp_cg_max_nth)); - -} // __kmp_stg_parse_thread_limit - -static void __kmp_stg_print_thread_limit(kmp_str_buf_t *buffer, - char const *name, void *data) { - __kmp_stg_print_int(buffer, name, __kmp_cg_max_nth); -} // __kmp_stg_print_thread_limit - -// ----------------------------------------------------------------------------- -// KMP_TEAMS_THREAD_LIMIT -static void __kmp_stg_parse_teams_thread_limit(char const *name, - char const *value, void *data) { - __kmp_stg_parse_int(name, value, 1, __kmp_sys_max_nth, &__kmp_teams_max_nth); -} // __kmp_stg_teams_thread_limit - -static void __kmp_stg_print_teams_thread_limit(kmp_str_buf_t *buffer, - char const *name, void *data) { - __kmp_stg_print_int(buffer, name, __kmp_teams_max_nth); -} // __kmp_stg_print_teams_thread_limit - -// ----------------------------------------------------------------------------- -// KMP_BLOCKTIME - -static void __kmp_stg_parse_blocktime(char const *name, char const *value, - void *data) { - __kmp_dflt_blocktime = __kmp_convert_to_milliseconds(value); - if (__kmp_dflt_blocktime < 0) { - __kmp_dflt_blocktime = KMP_DEFAULT_BLOCKTIME; - __kmp_msg(kmp_ms_warning, KMP_MSG(InvalidValue, name, value), - __kmp_msg_null); - KMP_INFORM(Using_int_Value, name, __kmp_dflt_blocktime); - __kmp_env_blocktime = FALSE; // Revert to default as if var not set. - } else { - if (__kmp_dflt_blocktime < KMP_MIN_BLOCKTIME) { - __kmp_dflt_blocktime = KMP_MIN_BLOCKTIME; - __kmp_msg(kmp_ms_warning, KMP_MSG(SmallValue, name, value), - __kmp_msg_null); - KMP_INFORM(MinValueUsing, name, __kmp_dflt_blocktime); - } else if (__kmp_dflt_blocktime > KMP_MAX_BLOCKTIME) { - __kmp_dflt_blocktime = KMP_MAX_BLOCKTIME; - __kmp_msg(kmp_ms_warning, KMP_MSG(LargeValue, name, value), - __kmp_msg_null); - KMP_INFORM(MaxValueUsing, name, __kmp_dflt_blocktime); - } - __kmp_env_blocktime = TRUE; // KMP_BLOCKTIME was specified. - } -#if KMP_USE_MONITOR - // calculate number of monitor thread wakeup intervals corresponding to - // blocktime. - __kmp_monitor_wakeups = - KMP_WAKEUPS_FROM_BLOCKTIME(__kmp_dflt_blocktime, __kmp_monitor_wakeups); - __kmp_bt_intervals = - KMP_INTERVALS_FROM_BLOCKTIME(__kmp_dflt_blocktime, __kmp_monitor_wakeups); -#endif - K_DIAG(1, ("__kmp_env_blocktime == %d\n", __kmp_env_blocktime)); - if (__kmp_env_blocktime) { - K_DIAG(1, ("__kmp_dflt_blocktime == %d\n", __kmp_dflt_blocktime)); - } -} // __kmp_stg_parse_blocktime - -static void __kmp_stg_print_blocktime(kmp_str_buf_t *buffer, char const *name, - void *data) { - __kmp_stg_print_int(buffer, name, __kmp_dflt_blocktime); -} // __kmp_stg_print_blocktime - -// ----------------------------------------------------------------------------- -// KMP_DUPLICATE_LIB_OK - -static void __kmp_stg_parse_duplicate_lib_ok(char const *name, - char const *value, void *data) { - /* actually this variable is not supported, put here for compatibility with - earlier builds and for static/dynamic combination */ - __kmp_stg_parse_bool(name, value, &__kmp_duplicate_library_ok); -} // __kmp_stg_parse_duplicate_lib_ok - -static void __kmp_stg_print_duplicate_lib_ok(kmp_str_buf_t *buffer, - char const *name, void *data) { - __kmp_stg_print_bool(buffer, name, __kmp_duplicate_library_ok); -} // __kmp_stg_print_duplicate_lib_ok - -// ----------------------------------------------------------------------------- -// KMP_INHERIT_FP_CONTROL - -#if KMP_ARCH_X86 || KMP_ARCH_X86_64 - -static void __kmp_stg_parse_inherit_fp_control(char const *name, - char const *value, void *data) { - __kmp_stg_parse_bool(name, value, &__kmp_inherit_fp_control); -} // __kmp_stg_parse_inherit_fp_control - -static void __kmp_stg_print_inherit_fp_control(kmp_str_buf_t *buffer, - char const *name, void *data) { -#if KMP_DEBUG - __kmp_stg_print_bool(buffer, name, __kmp_inherit_fp_control); -#endif /* KMP_DEBUG */ -} // __kmp_stg_print_inherit_fp_control - -#endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */ - -// Used for OMP_WAIT_POLICY -static char const *blocktime_str = NULL; - -// ----------------------------------------------------------------------------- -// KMP_LIBRARY, OMP_WAIT_POLICY - -static void __kmp_stg_parse_wait_policy(char const *name, char const *value, - void *data) { - - kmp_stg_wp_data_t *wait = (kmp_stg_wp_data_t *)data; - int rc; - - rc = __kmp_stg_check_rivals(name, value, wait->rivals); - if (rc) { - return; - } - - if (wait->omp) { - if (__kmp_str_match("ACTIVE", 1, value)) { - __kmp_library = library_turnaround; - if (blocktime_str == NULL) { - // KMP_BLOCKTIME not specified, so set default to "infinite". - __kmp_dflt_blocktime = KMP_MAX_BLOCKTIME; - } - } else if (__kmp_str_match("PASSIVE", 1, value)) { - __kmp_library = library_throughput; - if (blocktime_str == NULL) { - // KMP_BLOCKTIME not specified, so set default to 0. - __kmp_dflt_blocktime = 0; - } - } else { - KMP_WARNING(StgInvalidValue, name, value); - } - } else { - if (__kmp_str_match("serial", 1, value)) { /* S */ - __kmp_library = library_serial; - } else if (__kmp_str_match("throughput", 2, value)) { /* TH */ - __kmp_library = library_throughput; - } else if (__kmp_str_match("turnaround", 2, value)) { /* TU */ - __kmp_library = library_turnaround; - } else if (__kmp_str_match("dedicated", 1, value)) { /* D */ - __kmp_library = library_turnaround; - } else if (__kmp_str_match("multiuser", 1, value)) { /* M */ - __kmp_library = library_throughput; - } else { - KMP_WARNING(StgInvalidValue, name, value); - } - } - __kmp_aux_set_library(__kmp_library); - -} // __kmp_stg_parse_wait_policy - -static void __kmp_stg_print_wait_policy(kmp_str_buf_t *buffer, char const *name, - void *data) { - - kmp_stg_wp_data_t *wait = (kmp_stg_wp_data_t *)data; - char const *value = NULL; - - if (wait->omp) { - switch (__kmp_library) { - case library_turnaround: { - value = "ACTIVE"; - } break; - case library_throughput: { - value = "PASSIVE"; - } break; - } - } else { - switch (__kmp_library) { - case library_serial: { - value = "serial"; - } break; - case library_turnaround: { - value = "turnaround"; - } break; - case library_throughput: { - value = "throughput"; - } break; - } - } - if (value != NULL) { - __kmp_stg_print_str(buffer, name, value); - } - -} // __kmp_stg_print_wait_policy - -#if KMP_USE_MONITOR -// ----------------------------------------------------------------------------- -// KMP_MONITOR_STACKSIZE - -static void __kmp_stg_parse_monitor_stacksize(char const *name, - char const *value, void *data) { - __kmp_stg_parse_size(name, value, __kmp_sys_min_stksize, KMP_MAX_STKSIZE, - NULL, &__kmp_monitor_stksize, 1); -} // __kmp_stg_parse_monitor_stacksize - -static void __kmp_stg_print_monitor_stacksize(kmp_str_buf_t *buffer, - char const *name, void *data) { - if (__kmp_env_format) { - if (__kmp_monitor_stksize > 0) - KMP_STR_BUF_PRINT_NAME_EX(name); - else - KMP_STR_BUF_PRINT_NAME; - } else { - __kmp_str_buf_print(buffer, " %s", name); - } - if (__kmp_monitor_stksize > 0) { - __kmp_str_buf_print_size(buffer, __kmp_monitor_stksize); - } else { - __kmp_str_buf_print(buffer, ": %s\n", KMP_I18N_STR(NotDefined)); - } - if (__kmp_env_format && __kmp_monitor_stksize) { - __kmp_str_buf_print(buffer, "'\n"); - } -} // __kmp_stg_print_monitor_stacksize -#endif // KMP_USE_MONITOR - -// ----------------------------------------------------------------------------- -// KMP_SETTINGS - -static void __kmp_stg_parse_settings(char const *name, char const *value, - void *data) { - __kmp_stg_parse_bool(name, value, &__kmp_settings); -} // __kmp_stg_parse_settings - -static void __kmp_stg_print_settings(kmp_str_buf_t *buffer, char const *name, - void *data) { - __kmp_stg_print_bool(buffer, name, __kmp_settings); -} // __kmp_stg_print_settings - -// ----------------------------------------------------------------------------- -// KMP_STACKPAD - -static void __kmp_stg_parse_stackpad(char const *name, char const *value, - void *data) { - __kmp_stg_parse_int(name, // Env var name - value, // Env var value - KMP_MIN_STKPADDING, // Min value - KMP_MAX_STKPADDING, // Max value - &__kmp_stkpadding // Var to initialize - ); -} // __kmp_stg_parse_stackpad - -static void __kmp_stg_print_stackpad(kmp_str_buf_t *buffer, char const *name, - void *data) { - __kmp_stg_print_int(buffer, name, __kmp_stkpadding); -} // __kmp_stg_print_stackpad - -// ----------------------------------------------------------------------------- -// KMP_STACKOFFSET - -static void __kmp_stg_parse_stackoffset(char const *name, char const *value, - void *data) { - __kmp_stg_parse_size(name, // Env var name - value, // Env var value - KMP_MIN_STKOFFSET, // Min value - KMP_MAX_STKOFFSET, // Max value - NULL, // - &__kmp_stkoffset, // Var to initialize - 1); -} // __kmp_stg_parse_stackoffset - -static void __kmp_stg_print_stackoffset(kmp_str_buf_t *buffer, char const *name, - void *data) { - __kmp_stg_print_size(buffer, name, __kmp_stkoffset); -} // __kmp_stg_print_stackoffset - -// ----------------------------------------------------------------------------- -// KMP_STACKSIZE, OMP_STACKSIZE, GOMP_STACKSIZE - -static void __kmp_stg_parse_stacksize(char const *name, char const *value, - void *data) { - - kmp_stg_ss_data_t *stacksize = (kmp_stg_ss_data_t *)data; - int rc; - - rc = __kmp_stg_check_rivals(name, value, stacksize->rivals); - if (rc) { - return; - } - __kmp_stg_parse_size(name, // Env var name - value, // Env var value - __kmp_sys_min_stksize, // Min value - KMP_MAX_STKSIZE, // Max value - &__kmp_env_stksize, // - &__kmp_stksize, // Var to initialize - stacksize->factor); - -} // __kmp_stg_parse_stacksize - -// This function is called for printing both KMP_STACKSIZE (factor is 1) and -// OMP_STACKSIZE (factor is 1024). Currently it is not possible to print -// OMP_STACKSIZE value in bytes. We can consider adding this possibility by a -// customer request in future. -static void __kmp_stg_print_stacksize(kmp_str_buf_t *buffer, char const *name, - void *data) { - kmp_stg_ss_data_t *stacksize = (kmp_stg_ss_data_t *)data; - if (__kmp_env_format) { - KMP_STR_BUF_PRINT_NAME_EX(name); - __kmp_str_buf_print_size(buffer, (__kmp_stksize % 1024) - ? __kmp_stksize / stacksize->factor - : __kmp_stksize); - __kmp_str_buf_print(buffer, "'\n"); - } else { - __kmp_str_buf_print(buffer, " %s=", name); - __kmp_str_buf_print_size(buffer, (__kmp_stksize % 1024) - ? __kmp_stksize / stacksize->factor - : __kmp_stksize); - __kmp_str_buf_print(buffer, "\n"); - } -} // __kmp_stg_print_stacksize - -// ----------------------------------------------------------------------------- -// KMP_VERSION - -static void __kmp_stg_parse_version(char const *name, char const *value, - void *data) { - __kmp_stg_parse_bool(name, value, &__kmp_version); -} // __kmp_stg_parse_version - -static void __kmp_stg_print_version(kmp_str_buf_t *buffer, char const *name, - void *data) { - __kmp_stg_print_bool(buffer, name, __kmp_version); -} // __kmp_stg_print_version - -// ----------------------------------------------------------------------------- -// KMP_WARNINGS - -static void __kmp_stg_parse_warnings(char const *name, char const *value, - void *data) { - __kmp_stg_parse_bool(name, value, &__kmp_generate_warnings); - if (__kmp_generate_warnings != kmp_warnings_off) { - // AC: only 0/1 values documented, so reset to explicit to distinguish from - // default setting - __kmp_generate_warnings = kmp_warnings_explicit; - } -} // __kmp_stg_parse_warnings - -static void __kmp_stg_print_warnings(kmp_str_buf_t *buffer, char const *name, - void *data) { - // AC: TODO: change to print_int? (needs documentation change) - __kmp_stg_print_bool(buffer, name, __kmp_generate_warnings); -} // __kmp_stg_print_warnings - -// ----------------------------------------------------------------------------- -// OMP_NESTED, OMP_NUM_THREADS - -static void __kmp_stg_parse_nested(char const *name, char const *value, - void *data) { - __kmp_stg_parse_bool(name, value, &__kmp_dflt_nested); -} // __kmp_stg_parse_nested - -static void __kmp_stg_print_nested(kmp_str_buf_t *buffer, char const *name, - void *data) { - __kmp_stg_print_bool(buffer, name, __kmp_dflt_nested); -} // __kmp_stg_print_nested - -static void __kmp_parse_nested_num_threads(const char *var, const char *env, - kmp_nested_nthreads_t *nth_array) { - const char *next = env; - const char *scan = next; - - int total = 0; // Count elements that were set. It'll be used as an array size - int prev_comma = FALSE; // For correct processing sequential commas - - // Count the number of values in the env. var string - for (;;) { - SKIP_WS(next); - - if (*next == '\0') { - break; - } - // Next character is not an integer or not a comma => end of list - if (((*next < '0') || (*next > '9')) && (*next != ',')) { - KMP_WARNING(NthSyntaxError, var, env); - return; - } - // The next character is ',' - if (*next == ',') { - // ',' is the fisrt character - if (total == 0 || prev_comma) { - total++; - } - prev_comma = TRUE; - next++; // skip ',' - SKIP_WS(next); - } - // Next character is a digit - if (*next >= '0' && *next <= '9') { - prev_comma = FALSE; - SKIP_DIGITS(next); - total++; - const char *tmp = next; - SKIP_WS(tmp); - if ((*next == ' ' || *next == '\t') && (*tmp >= '0' && *tmp <= '9')) { - KMP_WARNING(NthSpacesNotAllowed, var, env); - return; - } - } - } - KMP_DEBUG_ASSERT(total > 0); - if (total <= 0) { - KMP_WARNING(NthSyntaxError, var, env); - return; - } - - // Check if the nested nthreads array exists - if (!nth_array->nth) { - // Allocate an array of double size - nth_array->nth = (int *)KMP_INTERNAL_MALLOC(sizeof(int) * total * 2); - if (nth_array->nth == NULL) { - KMP_FATAL(MemoryAllocFailed); - } - nth_array->size = total * 2; - } else { - if (nth_array->size < total) { - // Increase the array size - do { - nth_array->size *= 2; - } while (nth_array->size < total); - - nth_array->nth = (int *)KMP_INTERNAL_REALLOC( - nth_array->nth, sizeof(int) * nth_array->size); - if (nth_array->nth == NULL) { - KMP_FATAL(MemoryAllocFailed); - } - } - } - nth_array->used = total; - int i = 0; - - prev_comma = FALSE; - total = 0; - // Save values in the array - for (;;) { - SKIP_WS(scan); - if (*scan == '\0') { - break; - } - // The next character is ',' - if (*scan == ',') { - // ',' in the beginning of the list - if (total == 0) { - // The value is supposed to be equal to __kmp_avail_proc but it is - // unknown at the moment. - // So let's put a placeholder (#threads = 0) to correct it later. - nth_array->nth[i++] = 0; - total++; - } else if (prev_comma) { - // Num threads is inherited from the previous level - nth_array->nth[i] = nth_array->nth[i - 1]; - i++; - total++; - } - prev_comma = TRUE; - scan++; // skip ',' - SKIP_WS(scan); - } - // Next character is a digit - if (*scan >= '0' && *scan <= '9') { - int num; - const char *buf = scan; - char const *msg = NULL; - prev_comma = FALSE; - SKIP_DIGITS(scan); - total++; - - num = __kmp_str_to_int(buf, *scan); - if (num < KMP_MIN_NTH) { - msg = KMP_I18N_STR(ValueTooSmall); - num = KMP_MIN_NTH; - } else if (num > __kmp_sys_max_nth) { - msg = KMP_I18N_STR(ValueTooLarge); - num = __kmp_sys_max_nth; - } - if (msg != NULL) { - // Message is not empty. Print warning. - KMP_WARNING(ParseSizeIntWarn, var, env, msg); - KMP_INFORM(Using_int_Value, var, num); - } - nth_array->nth[i++] = num; - } - } -} - -static void __kmp_stg_parse_num_threads(char const *name, char const *value, - void *data) { - // TODO: Remove this option. OMP_NUM_THREADS is a list of positive integers! - if (!__kmp_strcasecmp_with_sentinel("all", value, 0)) { - // The array of 1 element - __kmp_nested_nth.nth = (int *)KMP_INTERNAL_MALLOC(sizeof(int)); - __kmp_nested_nth.size = __kmp_nested_nth.used = 1; - __kmp_nested_nth.nth[0] = __kmp_dflt_team_nth = __kmp_dflt_team_nth_ub = - __kmp_xproc; - } else { - __kmp_parse_nested_num_threads(name, value, &__kmp_nested_nth); - if (__kmp_nested_nth.nth) { - __kmp_dflt_team_nth = __kmp_nested_nth.nth[0]; - if (__kmp_dflt_team_nth_ub < __kmp_dflt_team_nth) { - __kmp_dflt_team_nth_ub = __kmp_dflt_team_nth; - } - } - } - K_DIAG(1, ("__kmp_dflt_team_nth == %d\n", __kmp_dflt_team_nth)); -} // __kmp_stg_parse_num_threads - -static void __kmp_stg_print_num_threads(kmp_str_buf_t *buffer, char const *name, - void *data) { - if (__kmp_env_format) { - KMP_STR_BUF_PRINT_NAME; - } else { - __kmp_str_buf_print(buffer, " %s", name); - } - if (__kmp_nested_nth.used) { - kmp_str_buf_t buf; - __kmp_str_buf_init(&buf); - for (int i = 0; i < __kmp_nested_nth.used; i++) { - __kmp_str_buf_print(&buf, "%d", __kmp_nested_nth.nth[i]); - if (i < __kmp_nested_nth.used - 1) { - __kmp_str_buf_print(&buf, ","); - } - } - __kmp_str_buf_print(buffer, "='%s'\n", buf.str); - __kmp_str_buf_free(&buf); - } else { - __kmp_str_buf_print(buffer, ": %s\n", KMP_I18N_STR(NotDefined)); - } -} // __kmp_stg_print_num_threads - -// ----------------------------------------------------------------------------- -// OpenMP 3.0: KMP_TASKING, OMP_MAX_ACTIVE_LEVELS, - -static void __kmp_stg_parse_tasking(char const *name, char const *value, - void *data) { - __kmp_stg_parse_int(name, value, 0, (int)tskm_max, - (int *)&__kmp_tasking_mode); -} // __kmp_stg_parse_tasking - -static void __kmp_stg_print_tasking(kmp_str_buf_t *buffer, char const *name, - void *data) { - __kmp_stg_print_int(buffer, name, __kmp_tasking_mode); -} // __kmp_stg_print_tasking - -static void __kmp_stg_parse_task_stealing(char const *name, char const *value, - void *data) { - __kmp_stg_parse_int(name, value, 0, 1, - (int *)&__kmp_task_stealing_constraint); -} // __kmp_stg_parse_task_stealing - -static void __kmp_stg_print_task_stealing(kmp_str_buf_t *buffer, - char const *name, void *data) { - __kmp_stg_print_int(buffer, name, __kmp_task_stealing_constraint); -} // __kmp_stg_print_task_stealing - -static void __kmp_stg_parse_max_active_levels(char const *name, - char const *value, void *data) { - __kmp_stg_parse_int(name, value, 0, KMP_MAX_ACTIVE_LEVELS_LIMIT, - &__kmp_dflt_max_active_levels); -} // __kmp_stg_parse_max_active_levels - -static void __kmp_stg_print_max_active_levels(kmp_str_buf_t *buffer, - char const *name, void *data) { - __kmp_stg_print_int(buffer, name, __kmp_dflt_max_active_levels); -} // __kmp_stg_print_max_active_levels - -#if OMP_40_ENABLED -// ----------------------------------------------------------------------------- -// OpenMP 4.0: OMP_DEFAULT_DEVICE -static void __kmp_stg_parse_default_device(char const *name, char const *value, - void *data) { - __kmp_stg_parse_int(name, value, 0, KMP_MAX_DEFAULT_DEVICE_LIMIT, - &__kmp_default_device); -} // __kmp_stg_parse_default_device - -static void __kmp_stg_print_default_device(kmp_str_buf_t *buffer, - char const *name, void *data) { - __kmp_stg_print_int(buffer, name, __kmp_default_device); -} // __kmp_stg_print_default_device -#endif - -#if OMP_50_ENABLED -// ----------------------------------------------------------------------------- -// OpenMP 5.0: OMP_TARGET_OFFLOAD -static void __kmp_stg_parse_target_offload(char const *name, char const *value, - void *data) { - const char *next = value; - const char *scan = next; - - __kmp_target_offload = tgt_default; - SKIP_WS(next); - if (*next == '\0') - return; - scan = next; - if (__kmp_match_str("MANDATORY", scan, &next)) { - __kmp_target_offload = tgt_mandatory; - } else if (__kmp_match_str("DISABLED", scan, &next)) { - __kmp_target_offload = tgt_disabled; - } else if (__kmp_match_str("DEFAULT", scan, &next)) { - __kmp_target_offload = tgt_default; - } else { - KMP_WARNING(SyntaxErrorUsing, name, "DEFAULT"); - } - -} // __kmp_stg_parse_target_offload - -static void __kmp_stg_print_target_offload(kmp_str_buf_t *buffer, - char const *name, void *data) { - const char *value = NULL; - if (__kmp_target_offload == tgt_default) - value = "DEFAULT"; - else if (__kmp_target_offload == tgt_mandatory) - value = "MANDATORY"; - else if (__kmp_target_offload == tgt_disabled) - value = "DISABLED"; - if (value) { - __kmp_str_buf_print(buffer, " %s=%s\n", name, value); - } -} // __kmp_stg_print_target_offload -#endif - -#if OMP_45_ENABLED -// ----------------------------------------------------------------------------- -// OpenMP 4.5: OMP_MAX_TASK_PRIORITY -static void __kmp_stg_parse_max_task_priority(char const *name, - char const *value, void *data) { - __kmp_stg_parse_int(name, value, 0, KMP_MAX_TASK_PRIORITY_LIMIT, - &__kmp_max_task_priority); -} // __kmp_stg_parse_max_task_priority - -static void __kmp_stg_print_max_task_priority(kmp_str_buf_t *buffer, - char const *name, void *data) { - __kmp_stg_print_int(buffer, name, __kmp_max_task_priority); -} // __kmp_stg_print_max_task_priority - -// KMP_TASKLOOP_MIN_TASKS -// taskloop threashold to switch from recursive to linear tasks creation -static void __kmp_stg_parse_taskloop_min_tasks(char const *name, - char const *value, void *data) { - int tmp; - __kmp_stg_parse_int(name, value, 0, INT_MAX, &tmp); - __kmp_taskloop_min_tasks = tmp; -} // __kmp_stg_parse_taskloop_min_tasks - -static void __kmp_stg_print_taskloop_min_tasks(kmp_str_buf_t *buffer, - char const *name, void *data) { - __kmp_stg_print_int(buffer, name, __kmp_taskloop_min_tasks); -} // __kmp_stg_print_taskloop_min_tasks -#endif // OMP_45_ENABLED - -// ----------------------------------------------------------------------------- -// KMP_DISP_NUM_BUFFERS -static void __kmp_stg_parse_disp_buffers(char const *name, char const *value, - void *data) { - if (TCR_4(__kmp_init_serial)) { - KMP_WARNING(EnvSerialWarn, name); - return; - } // read value before serial initialization only - __kmp_stg_parse_int(name, value, 1, KMP_MAX_NTH, &__kmp_dispatch_num_buffers); -} // __kmp_stg_parse_disp_buffers - -static void __kmp_stg_print_disp_buffers(kmp_str_buf_t *buffer, - char const *name, void *data) { - __kmp_stg_print_int(buffer, name, __kmp_dispatch_num_buffers); -} // __kmp_stg_print_disp_buffers - -#if KMP_NESTED_HOT_TEAMS -// ----------------------------------------------------------------------------- -// KMP_HOT_TEAMS_MAX_LEVEL, KMP_HOT_TEAMS_MODE - -static void __kmp_stg_parse_hot_teams_level(char const *name, char const *value, - void *data) { - if (TCR_4(__kmp_init_parallel)) { - KMP_WARNING(EnvParallelWarn, name); - return; - } // read value before first parallel only - __kmp_stg_parse_int(name, value, 0, KMP_MAX_ACTIVE_LEVELS_LIMIT, - &__kmp_hot_teams_max_level); -} // __kmp_stg_parse_hot_teams_level - -static void __kmp_stg_print_hot_teams_level(kmp_str_buf_t *buffer, - char const *name, void *data) { - __kmp_stg_print_int(buffer, name, __kmp_hot_teams_max_level); -} // __kmp_stg_print_hot_teams_level - -static void __kmp_stg_parse_hot_teams_mode(char const *name, char const *value, - void *data) { - if (TCR_4(__kmp_init_parallel)) { - KMP_WARNING(EnvParallelWarn, name); - return; - } // read value before first parallel only - __kmp_stg_parse_int(name, value, 0, KMP_MAX_ACTIVE_LEVELS_LIMIT, - &__kmp_hot_teams_mode); -} // __kmp_stg_parse_hot_teams_mode - -static void __kmp_stg_print_hot_teams_mode(kmp_str_buf_t *buffer, - char const *name, void *data) { - __kmp_stg_print_int(buffer, name, __kmp_hot_teams_mode); -} // __kmp_stg_print_hot_teams_mode - -#endif // KMP_NESTED_HOT_TEAMS - -// ----------------------------------------------------------------------------- -// KMP_HANDLE_SIGNALS - -#if KMP_HANDLE_SIGNALS - -static void __kmp_stg_parse_handle_signals(char const *name, char const *value, - void *data) { - __kmp_stg_parse_bool(name, value, &__kmp_handle_signals); -} // __kmp_stg_parse_handle_signals - -static void __kmp_stg_print_handle_signals(kmp_str_buf_t *buffer, - char const *name, void *data) { - __kmp_stg_print_bool(buffer, name, __kmp_handle_signals); -} // __kmp_stg_print_handle_signals - -#endif // KMP_HANDLE_SIGNALS - -// ----------------------------------------------------------------------------- -// KMP_X_DEBUG, KMP_DEBUG, KMP_DEBUG_BUF_*, KMP_DIAG - -#ifdef KMP_DEBUG - -#define KMP_STG_X_DEBUG(x) \ - static void __kmp_stg_parse_##x##_debug(char const *name, char const *value, \ - void *data) { \ - __kmp_stg_parse_int(name, value, 0, INT_MAX, &kmp_##x##_debug); \ - } /* __kmp_stg_parse_x_debug */ \ - static void __kmp_stg_print_##x##_debug(kmp_str_buf_t *buffer, \ - char const *name, void *data) { \ - __kmp_stg_print_int(buffer, name, kmp_##x##_debug); \ - } /* __kmp_stg_print_x_debug */ - -KMP_STG_X_DEBUG(a) -KMP_STG_X_DEBUG(b) -KMP_STG_X_DEBUG(c) -KMP_STG_X_DEBUG(d) -KMP_STG_X_DEBUG(e) -KMP_STG_X_DEBUG(f) - -#undef KMP_STG_X_DEBUG - -static void __kmp_stg_parse_debug(char const *name, char const *value, - void *data) { - int debug = 0; - __kmp_stg_parse_int(name, value, 0, INT_MAX, &debug); - if (kmp_a_debug < debug) { - kmp_a_debug = debug; - } - if (kmp_b_debug < debug) { - kmp_b_debug = debug; - } - if (kmp_c_debug < debug) { - kmp_c_debug = debug; - } - if (kmp_d_debug < debug) { - kmp_d_debug = debug; - } - if (kmp_e_debug < debug) { - kmp_e_debug = debug; - } - if (kmp_f_debug < debug) { - kmp_f_debug = debug; - } -} // __kmp_stg_parse_debug - -static void __kmp_stg_parse_debug_buf(char const *name, char const *value, - void *data) { - __kmp_stg_parse_bool(name, value, &__kmp_debug_buf); - // !!! TODO: Move buffer initialization of of this file! It may works - // incorrectly if KMP_DEBUG_BUF is parsed before KMP_DEBUG_BUF_LINES or - // KMP_DEBUG_BUF_CHARS. - if (__kmp_debug_buf) { - int i; - int elements = __kmp_debug_buf_lines * __kmp_debug_buf_chars; - - /* allocate and initialize all entries in debug buffer to empty */ - __kmp_debug_buffer = (char *)__kmp_page_allocate(elements * sizeof(char)); - for (i = 0; i < elements; i += __kmp_debug_buf_chars) - __kmp_debug_buffer[i] = '\0'; - - __kmp_debug_count = 0; - } - K_DIAG(1, ("__kmp_debug_buf = %d\n", __kmp_debug_buf)); -} // __kmp_stg_parse_debug_buf - -static void __kmp_stg_print_debug_buf(kmp_str_buf_t *buffer, char const *name, - void *data) { - __kmp_stg_print_bool(buffer, name, __kmp_debug_buf); -} // __kmp_stg_print_debug_buf - -static void __kmp_stg_parse_debug_buf_atomic(char const *name, - char const *value, void *data) { - __kmp_stg_parse_bool(name, value, &__kmp_debug_buf_atomic); -} // __kmp_stg_parse_debug_buf_atomic - -static void __kmp_stg_print_debug_buf_atomic(kmp_str_buf_t *buffer, - char const *name, void *data) { - __kmp_stg_print_bool(buffer, name, __kmp_debug_buf_atomic); -} // __kmp_stg_print_debug_buf_atomic - -static void __kmp_stg_parse_debug_buf_chars(char const *name, char const *value, - void *data) { - __kmp_stg_parse_int(name, value, KMP_DEBUG_BUF_CHARS_MIN, INT_MAX, - &__kmp_debug_buf_chars); -} // __kmp_stg_debug_parse_buf_chars - -static void __kmp_stg_print_debug_buf_chars(kmp_str_buf_t *buffer, - char const *name, void *data) { - __kmp_stg_print_int(buffer, name, __kmp_debug_buf_chars); -} // __kmp_stg_print_debug_buf_chars - -static void __kmp_stg_parse_debug_buf_lines(char const *name, char const *value, - void *data) { - __kmp_stg_parse_int(name, value, KMP_DEBUG_BUF_LINES_MIN, INT_MAX, - &__kmp_debug_buf_lines); -} // __kmp_stg_parse_debug_buf_lines - -static void __kmp_stg_print_debug_buf_lines(kmp_str_buf_t *buffer, - char const *name, void *data) { - __kmp_stg_print_int(buffer, name, __kmp_debug_buf_lines); -} // __kmp_stg_print_debug_buf_lines - -static void __kmp_stg_parse_diag(char const *name, char const *value, - void *data) { - __kmp_stg_parse_int(name, value, 0, INT_MAX, &kmp_diag); -} // __kmp_stg_parse_diag - -static void __kmp_stg_print_diag(kmp_str_buf_t *buffer, char const *name, - void *data) { - __kmp_stg_print_int(buffer, name, kmp_diag); -} // __kmp_stg_print_diag - -#endif // KMP_DEBUG - -// ----------------------------------------------------------------------------- -// KMP_ALIGN_ALLOC - -static void __kmp_stg_parse_align_alloc(char const *name, char const *value, - void *data) { - __kmp_stg_parse_size(name, value, CACHE_LINE, INT_MAX, NULL, - &__kmp_align_alloc, 1); -} // __kmp_stg_parse_align_alloc - -static void __kmp_stg_print_align_alloc(kmp_str_buf_t *buffer, char const *name, - void *data) { - __kmp_stg_print_size(buffer, name, __kmp_align_alloc); -} // __kmp_stg_print_align_alloc - -// ----------------------------------------------------------------------------- -// KMP_PLAIN_BARRIER, KMP_FORKJOIN_BARRIER, KMP_REDUCTION_BARRIER - -// TODO: Remove __kmp_barrier_branch_bit_env_name varibale, remove loops from -// parse and print functions, pass required info through data argument. - -static void __kmp_stg_parse_barrier_branch_bit(char const *name, - char const *value, void *data) { - const char *var; - - /* ---------- Barrier branch bit control ------------ */ - for (int i = bs_plain_barrier; i < bs_last_barrier; i++) { - var = __kmp_barrier_branch_bit_env_name[i]; - if ((strcmp(var, name) == 0) && (value != 0)) { - char *comma; - - comma = CCAST(char *, strchr(value, ',')); - __kmp_barrier_gather_branch_bits[i] = - (kmp_uint32)__kmp_str_to_int(value, ','); - /* is there a specified release parameter? */ - if (comma == NULL) { - __kmp_barrier_release_branch_bits[i] = __kmp_barrier_release_bb_dflt; - } else { - __kmp_barrier_release_branch_bits[i] = - (kmp_uint32)__kmp_str_to_int(comma + 1, 0); - - if (__kmp_barrier_release_branch_bits[i] > KMP_MAX_BRANCH_BITS) { - __kmp_msg(kmp_ms_warning, - KMP_MSG(BarrReleaseValueInvalid, name, comma + 1), - __kmp_msg_null); - __kmp_barrier_release_branch_bits[i] = __kmp_barrier_release_bb_dflt; - } - } - if (__kmp_barrier_gather_branch_bits[i] > KMP_MAX_BRANCH_BITS) { - KMP_WARNING(BarrGatherValueInvalid, name, value); - KMP_INFORM(Using_uint_Value, name, __kmp_barrier_gather_bb_dflt); - __kmp_barrier_gather_branch_bits[i] = __kmp_barrier_gather_bb_dflt; - } - } - K_DIAG(1, ("%s == %d,%d\n", __kmp_barrier_branch_bit_env_name[i], - __kmp_barrier_gather_branch_bits[i], - __kmp_barrier_release_branch_bits[i])) - } -} // __kmp_stg_parse_barrier_branch_bit - -static void __kmp_stg_print_barrier_branch_bit(kmp_str_buf_t *buffer, - char const *name, void *data) { - const char *var; - for (int i = bs_plain_barrier; i < bs_last_barrier; i++) { - var = __kmp_barrier_branch_bit_env_name[i]; - if (strcmp(var, name) == 0) { - if (__kmp_env_format) { - KMP_STR_BUF_PRINT_NAME_EX(__kmp_barrier_branch_bit_env_name[i]); - } else { - __kmp_str_buf_print(buffer, " %s='", - __kmp_barrier_branch_bit_env_name[i]); - } - __kmp_str_buf_print(buffer, "%d,%d'\n", - __kmp_barrier_gather_branch_bits[i], - __kmp_barrier_release_branch_bits[i]); - } - } -} // __kmp_stg_print_barrier_branch_bit - -// ---------------------------------------------------------------------------- -// KMP_PLAIN_BARRIER_PATTERN, KMP_FORKJOIN_BARRIER_PATTERN, -// KMP_REDUCTION_BARRIER_PATTERN - -// TODO: Remove __kmp_barrier_pattern_name variable, remove loops from parse and -// print functions, pass required data to functions through data argument. - -static void __kmp_stg_parse_barrier_pattern(char const *name, char const *value, - void *data) { - const char *var; - /* ---------- Barrier method control ------------ */ - - for (int i = bs_plain_barrier; i < bs_last_barrier; i++) { - var = __kmp_barrier_pattern_env_name[i]; - - if ((strcmp(var, name) == 0) && (value != 0)) { - int j; - char *comma = CCAST(char *, strchr(value, ',')); - - /* handle first parameter: gather pattern */ - for (j = bp_linear_bar; j < bp_last_bar; j++) { - if (__kmp_match_with_sentinel(__kmp_barrier_pattern_name[j], value, 1, - ',')) { - __kmp_barrier_gather_pattern[i] = (kmp_bar_pat_e)j; - break; - } - } - if (j == bp_last_bar) { - KMP_WARNING(BarrGatherValueInvalid, name, value); - KMP_INFORM(Using_str_Value, name, - __kmp_barrier_pattern_name[bp_linear_bar]); - } - - /* handle second parameter: release pattern */ - if (comma != NULL) { - for (j = bp_linear_bar; j < bp_last_bar; j++) { - if (__kmp_str_match(__kmp_barrier_pattern_name[j], 1, comma + 1)) { - __kmp_barrier_release_pattern[i] = (kmp_bar_pat_e)j; - break; - } - } - if (j == bp_last_bar) { - __kmp_msg(kmp_ms_warning, - KMP_MSG(BarrReleaseValueInvalid, name, comma + 1), - __kmp_msg_null); - KMP_INFORM(Using_str_Value, name, - __kmp_barrier_pattern_name[bp_linear_bar]); - } - } - } - } -} // __kmp_stg_parse_barrier_pattern - -static void __kmp_stg_print_barrier_pattern(kmp_str_buf_t *buffer, - char const *name, void *data) { - const char *var; - for (int i = bs_plain_barrier; i < bs_last_barrier; i++) { - var = __kmp_barrier_pattern_env_name[i]; - if (strcmp(var, name) == 0) { - int j = __kmp_barrier_gather_pattern[i]; - int k = __kmp_barrier_release_pattern[i]; - if (__kmp_env_format) { - KMP_STR_BUF_PRINT_NAME_EX(__kmp_barrier_pattern_env_name[i]); - } else { - __kmp_str_buf_print(buffer, " %s='", - __kmp_barrier_pattern_env_name[i]); - } - __kmp_str_buf_print(buffer, "%s,%s'\n", __kmp_barrier_pattern_name[j], - __kmp_barrier_pattern_name[k]); - } - } -} // __kmp_stg_print_barrier_pattern - -// ----------------------------------------------------------------------------- -// KMP_ABORT_DELAY - -static void __kmp_stg_parse_abort_delay(char const *name, char const *value, - void *data) { - // Units of KMP_DELAY_ABORT are seconds, units of __kmp_abort_delay is - // milliseconds. - int delay = __kmp_abort_delay / 1000; - __kmp_stg_parse_int(name, value, 0, INT_MAX / 1000, &delay); - __kmp_abort_delay = delay * 1000; -} // __kmp_stg_parse_abort_delay - -static void __kmp_stg_print_abort_delay(kmp_str_buf_t *buffer, char const *name, - void *data) { - __kmp_stg_print_int(buffer, name, __kmp_abort_delay); -} // __kmp_stg_print_abort_delay - -// ----------------------------------------------------------------------------- -// KMP_CPUINFO_FILE - -static void __kmp_stg_parse_cpuinfo_file(char const *name, char const *value, - void *data) { -#if KMP_AFFINITY_SUPPORTED - __kmp_stg_parse_str(name, value, &__kmp_cpuinfo_file); - K_DIAG(1, ("__kmp_cpuinfo_file == %s\n", __kmp_cpuinfo_file)); -#endif -} //__kmp_stg_parse_cpuinfo_file - -static void __kmp_stg_print_cpuinfo_file(kmp_str_buf_t *buffer, - char const *name, void *data) { -#if KMP_AFFINITY_SUPPORTED - if (__kmp_env_format) { - KMP_STR_BUF_PRINT_NAME; - } else { - __kmp_str_buf_print(buffer, " %s", name); - } - if (__kmp_cpuinfo_file) { - __kmp_str_buf_print(buffer, "='%s'\n", __kmp_cpuinfo_file); - } else { - __kmp_str_buf_print(buffer, ": %s\n", KMP_I18N_STR(NotDefined)); - } -#endif -} //__kmp_stg_print_cpuinfo_file - -// ----------------------------------------------------------------------------- -// KMP_FORCE_REDUCTION, KMP_DETERMINISTIC_REDUCTION - -static void __kmp_stg_parse_force_reduction(char const *name, char const *value, - void *data) { - kmp_stg_fr_data_t *reduction = (kmp_stg_fr_data_t *)data; - int rc; - - rc = __kmp_stg_check_rivals(name, value, reduction->rivals); - if (rc) { - return; - } - if (reduction->force) { - if (value != 0) { - if (__kmp_str_match("critical", 0, value)) - __kmp_force_reduction_method = critical_reduce_block; - else if (__kmp_str_match("atomic", 0, value)) - __kmp_force_reduction_method = atomic_reduce_block; - else if (__kmp_str_match("tree", 0, value)) - __kmp_force_reduction_method = tree_reduce_block; - else { - KMP_FATAL(UnknownForceReduction, name, value); - } - } - } else { - __kmp_stg_parse_bool(name, value, &__kmp_determ_red); - if (__kmp_determ_red) { - __kmp_force_reduction_method = tree_reduce_block; - } else { - __kmp_force_reduction_method = reduction_method_not_defined; - } - } - K_DIAG(1, ("__kmp_force_reduction_method == %d\n", - __kmp_force_reduction_method)); -} // __kmp_stg_parse_force_reduction - -static void __kmp_stg_print_force_reduction(kmp_str_buf_t *buffer, - char const *name, void *data) { - - kmp_stg_fr_data_t *reduction = (kmp_stg_fr_data_t *)data; - if (reduction->force) { - if (__kmp_force_reduction_method == critical_reduce_block) { - __kmp_stg_print_str(buffer, name, "critical"); - } else if (__kmp_force_reduction_method == atomic_reduce_block) { - __kmp_stg_print_str(buffer, name, "atomic"); - } else if (__kmp_force_reduction_method == tree_reduce_block) { - __kmp_stg_print_str(buffer, name, "tree"); - } else { - if (__kmp_env_format) { - KMP_STR_BUF_PRINT_NAME; - } else { - __kmp_str_buf_print(buffer, " %s", name); - } - __kmp_str_buf_print(buffer, ": %s\n", KMP_I18N_STR(NotDefined)); - } - } else { - __kmp_stg_print_bool(buffer, name, __kmp_determ_red); - } - -} // __kmp_stg_print_force_reduction - -// ----------------------------------------------------------------------------- -// KMP_STORAGE_MAP - -static void __kmp_stg_parse_storage_map(char const *name, char const *value, - void *data) { - if (__kmp_str_match("verbose", 1, value)) { - __kmp_storage_map = TRUE; - __kmp_storage_map_verbose = TRUE; - __kmp_storage_map_verbose_specified = TRUE; - - } else { - __kmp_storage_map_verbose = FALSE; - __kmp_stg_parse_bool(name, value, &__kmp_storage_map); // !!! - } -} // __kmp_stg_parse_storage_map - -static void __kmp_stg_print_storage_map(kmp_str_buf_t *buffer, char const *name, - void *data) { - if (__kmp_storage_map_verbose || __kmp_storage_map_verbose_specified) { - __kmp_stg_print_str(buffer, name, "verbose"); - } else { - __kmp_stg_print_bool(buffer, name, __kmp_storage_map); - } -} // __kmp_stg_print_storage_map - -// ----------------------------------------------------------------------------- -// KMP_ALL_THREADPRIVATE - -static void __kmp_stg_parse_all_threadprivate(char const *name, - char const *value, void *data) { - __kmp_stg_parse_int(name, value, - __kmp_allThreadsSpecified ? __kmp_max_nth : 1, - __kmp_max_nth, &__kmp_tp_capacity); -} // __kmp_stg_parse_all_threadprivate - -static void __kmp_stg_print_all_threadprivate(kmp_str_buf_t *buffer, - char const *name, void *data) { - __kmp_stg_print_int(buffer, name, __kmp_tp_capacity); -} - -// ----------------------------------------------------------------------------- -// KMP_FOREIGN_THREADS_THREADPRIVATE - -static void __kmp_stg_parse_foreign_threads_threadprivate(char const *name, - char const *value, - void *data) { - __kmp_stg_parse_bool(name, value, &__kmp_foreign_tp); -} // __kmp_stg_parse_foreign_threads_threadprivate - -static void __kmp_stg_print_foreign_threads_threadprivate(kmp_str_buf_t *buffer, - char const *name, - void *data) { - __kmp_stg_print_bool(buffer, name, __kmp_foreign_tp); -} // __kmp_stg_print_foreign_threads_threadprivate - -// ----------------------------------------------------------------------------- -// KMP_AFFINITY, GOMP_CPU_AFFINITY, KMP_TOPOLOGY_METHOD - -#if KMP_AFFINITY_SUPPORTED -// Parse the proc id list. Return TRUE if successful, FALSE otherwise. -static int __kmp_parse_affinity_proc_id_list(const char *var, const char *env, - const char **nextEnv, - char **proclist) { - const char *scan = env; - const char *next = scan; - int empty = TRUE; - - *proclist = NULL; - - for (;;) { - int start, end, stride; - - SKIP_WS(scan); - next = scan; - if (*next == '\0') { - break; - } - - if (*next == '{') { - int num; - next++; // skip '{' - SKIP_WS(next); - scan = next; - - // Read the first integer in the set. - if ((*next < '0') || (*next > '9')) { - KMP_WARNING(AffSyntaxError, var); - return FALSE; - } - SKIP_DIGITS(next); - num = __kmp_str_to_int(scan, *next); - KMP_ASSERT(num >= 0); - - for (;;) { - // Check for end of set. - SKIP_WS(next); - if (*next == '}') { - next++; // skip '}' - break; - } - - // Skip optional comma. - if (*next == ',') { - next++; - } - SKIP_WS(next); - - // Read the next integer in the set. - scan = next; - if ((*next < '0') || (*next > '9')) { - KMP_WARNING(AffSyntaxError, var); - return FALSE; - } - - SKIP_DIGITS(next); - num = __kmp_str_to_int(scan, *next); - KMP_ASSERT(num >= 0); - } - empty = FALSE; - - SKIP_WS(next); - if (*next == ',') { - next++; - } - scan = next; - continue; - } - - // Next character is not an integer => end of list - if ((*next < '0') || (*next > '9')) { - if (empty) { - KMP_WARNING(AffSyntaxError, var); - return FALSE; - } - break; - } - - // Read the first integer. - SKIP_DIGITS(next); - start = __kmp_str_to_int(scan, *next); - KMP_ASSERT(start >= 0); - SKIP_WS(next); - - // If this isn't a range, then go on. - if (*next != '-') { - empty = FALSE; - - // Skip optional comma. - if (*next == ',') { - next++; - } - scan = next; - continue; - } - - // This is a range. Skip over the '-' and read in the 2nd int. - next++; // skip '-' - SKIP_WS(next); - scan = next; - if ((*next < '0') || (*next > '9')) { - KMP_WARNING(AffSyntaxError, var); - return FALSE; - } - SKIP_DIGITS(next); - end = __kmp_str_to_int(scan, *next); - KMP_ASSERT(end >= 0); - - // Check for a stride parameter - stride = 1; - SKIP_WS(next); - if (*next == ':') { - // A stride is specified. Skip over the ':" and read the 3rd int. - int sign = +1; - next++; // skip ':' - SKIP_WS(next); - scan = next; - if (*next == '-') { - sign = -1; - next++; - SKIP_WS(next); - scan = next; - } - if ((*next < '0') || (*next > '9')) { - KMP_WARNING(AffSyntaxError, var); - return FALSE; - } - SKIP_DIGITS(next); - stride = __kmp_str_to_int(scan, *next); - KMP_ASSERT(stride >= 0); - stride *= sign; - } - - // Do some range checks. - if (stride == 0) { - KMP_WARNING(AffZeroStride, var); - return FALSE; - } - if (stride > 0) { - if (start > end) { - KMP_WARNING(AffStartGreaterEnd, var, start, end); - return FALSE; - } - } else { - if (start < end) { - KMP_WARNING(AffStrideLessZero, var, start, end); - return FALSE; - } - } - if ((end - start) / stride > 65536) { - KMP_WARNING(AffRangeTooBig, var, end, start, stride); - return FALSE; - } - - empty = FALSE; - - // Skip optional comma. - SKIP_WS(next); - if (*next == ',') { - next++; - } - scan = next; - } - - *nextEnv = next; - - { - int len = next - env; - char *retlist = (char *)__kmp_allocate((len + 1) * sizeof(char)); - KMP_MEMCPY_S(retlist, (len + 1) * sizeof(char), env, len * sizeof(char)); - retlist[len] = '\0'; - *proclist = retlist; - } - return TRUE; -} - -// If KMP_AFFINITY is specified without a type, then -// __kmp_affinity_notype should point to its setting. -static kmp_setting_t *__kmp_affinity_notype = NULL; - -static void __kmp_parse_affinity_env(char const *name, char const *value, - enum affinity_type *out_type, - char **out_proclist, int *out_verbose, - int *out_warn, int *out_respect, - enum affinity_gran *out_gran, - int *out_gran_levels, int *out_dups, - int *out_compact, int *out_offset) { - char *buffer = NULL; // Copy of env var value. - char *buf = NULL; // Buffer for strtok_r() function. - char *next = NULL; // end of token / start of next. - const char *start; // start of current token (for err msgs) - int count = 0; // Counter of parsed integer numbers. - int number[2]; // Parsed numbers. - - // Guards. - int type = 0; - int proclist = 0; - int verbose = 0; - int warnings = 0; - int respect = 0; - int gran = 0; - int dups = 0; - - KMP_ASSERT(value != NULL); - - if (TCR_4(__kmp_init_middle)) { - KMP_WARNING(EnvMiddleWarn, name); - __kmp_env_toPrint(name, 0); - return; - } - __kmp_env_toPrint(name, 1); - - buffer = - __kmp_str_format("%s", value); // Copy env var to keep original intact. - buf = buffer; - SKIP_WS(buf); - -// Helper macros. - -// If we see a parse error, emit a warning and scan to the next ",". -// -// FIXME - there's got to be a better way to print an error -// message, hopefully without overwritting peices of buf. -#define EMIT_WARN(skip, errlist) \ - { \ - char ch; \ - if (skip) { \ - SKIP_TO(next, ','); \ - } \ - ch = *next; \ - *next = '\0'; \ - KMP_WARNING errlist; \ - *next = ch; \ - if (skip) { \ - if (ch == ',') \ - next++; \ - } \ - buf = next; \ - } - -#define _set_param(_guard, _var, _val) \ - { \ - if (_guard == 0) { \ - _var = _val; \ - } else { \ - EMIT_WARN(FALSE, (AffParamDefined, name, start)); \ - } \ - ++_guard; \ - } - -#define set_type(val) _set_param(type, *out_type, val) -#define set_verbose(val) _set_param(verbose, *out_verbose, val) -#define set_warnings(val) _set_param(warnings, *out_warn, val) -#define set_respect(val) _set_param(respect, *out_respect, val) -#define set_dups(val) _set_param(dups, *out_dups, val) -#define set_proclist(val) _set_param(proclist, *out_proclist, val) - -#define set_gran(val, levels) \ - { \ - if (gran == 0) { \ - *out_gran = val; \ - *out_gran_levels = levels; \ - } else { \ - EMIT_WARN(FALSE, (AffParamDefined, name, start)); \ - } \ - ++gran; \ - } - -#if OMP_40_ENABLED - KMP_DEBUG_ASSERT((__kmp_nested_proc_bind.bind_types != NULL) && - (__kmp_nested_proc_bind.used > 0)); -#endif - - while (*buf != '\0') { - start = next = buf; - - if (__kmp_match_str("none", buf, CCAST(const char **, &next))) { - set_type(affinity_none); -#if OMP_40_ENABLED - __kmp_nested_proc_bind.bind_types[0] = proc_bind_false; -#endif - buf = next; - } else if (__kmp_match_str("scatter", buf, CCAST(const char **, &next))) { - set_type(affinity_scatter); -#if OMP_40_ENABLED - __kmp_nested_proc_bind.bind_types[0] = proc_bind_intel; -#endif - buf = next; - } else if (__kmp_match_str("compact", buf, CCAST(const char **, &next))) { - set_type(affinity_compact); -#if OMP_40_ENABLED - __kmp_nested_proc_bind.bind_types[0] = proc_bind_intel; -#endif - buf = next; - } else if (__kmp_match_str("logical", buf, CCAST(const char **, &next))) { - set_type(affinity_logical); -#if OMP_40_ENABLED - __kmp_nested_proc_bind.bind_types[0] = proc_bind_intel; -#endif - buf = next; - } else if (__kmp_match_str("physical", buf, CCAST(const char **, &next))) { - set_type(affinity_physical); -#if OMP_40_ENABLED - __kmp_nested_proc_bind.bind_types[0] = proc_bind_intel; -#endif - buf = next; - } else if (__kmp_match_str("explicit", buf, CCAST(const char **, &next))) { - set_type(affinity_explicit); -#if OMP_40_ENABLED - __kmp_nested_proc_bind.bind_types[0] = proc_bind_intel; -#endif - buf = next; - } else if (__kmp_match_str("balanced", buf, CCAST(const char **, &next))) { - set_type(affinity_balanced); -#if OMP_40_ENABLED - __kmp_nested_proc_bind.bind_types[0] = proc_bind_intel; -#endif - buf = next; - } else if (__kmp_match_str("disabled", buf, CCAST(const char **, &next))) { - set_type(affinity_disabled); -#if OMP_40_ENABLED - __kmp_nested_proc_bind.bind_types[0] = proc_bind_false; -#endif - buf = next; - } else if (__kmp_match_str("verbose", buf, CCAST(const char **, &next))) { - set_verbose(TRUE); - buf = next; - } else if (__kmp_match_str("noverbose", buf, CCAST(const char **, &next))) { - set_verbose(FALSE); - buf = next; - } else if (__kmp_match_str("warnings", buf, CCAST(const char **, &next))) { - set_warnings(TRUE); - buf = next; - } else if (__kmp_match_str("nowarnings", buf, - CCAST(const char **, &next))) { - set_warnings(FALSE); - buf = next; - } else if (__kmp_match_str("respect", buf, CCAST(const char **, &next))) { - set_respect(TRUE); - buf = next; - } else if (__kmp_match_str("norespect", buf, CCAST(const char **, &next))) { - set_respect(FALSE); - buf = next; - } else if (__kmp_match_str("duplicates", buf, - CCAST(const char **, &next)) || - __kmp_match_str("dups", buf, CCAST(const char **, &next))) { - set_dups(TRUE); - buf = next; - } else if (__kmp_match_str("noduplicates", buf, - CCAST(const char **, &next)) || - __kmp_match_str("nodups", buf, CCAST(const char **, &next))) { - set_dups(FALSE); - buf = next; - } else if (__kmp_match_str("granularity", buf, - CCAST(const char **, &next)) || - __kmp_match_str("gran", buf, CCAST(const char **, &next))) { - SKIP_WS(next); - if (*next != '=') { - EMIT_WARN(TRUE, (AffInvalidParam, name, start)); - continue; - } - next++; // skip '=' - SKIP_WS(next); - - buf = next; - if (__kmp_match_str("fine", buf, CCAST(const char **, &next))) { - set_gran(affinity_gran_fine, -1); - buf = next; - } else if (__kmp_match_str("thread", buf, CCAST(const char **, &next))) { - set_gran(affinity_gran_thread, -1); - buf = next; - } else if (__kmp_match_str("core", buf, CCAST(const char **, &next))) { - set_gran(affinity_gran_core, -1); - buf = next; -#if KMP_USE_HWLOC - } else if (__kmp_match_str("tile", buf, CCAST(const char **, &next))) { - set_gran(affinity_gran_tile, -1); - buf = next; -#endif - } else if (__kmp_match_str("package", buf, CCAST(const char **, &next))) { - set_gran(affinity_gran_package, -1); - buf = next; - } else if (__kmp_match_str("node", buf, CCAST(const char **, &next))) { - set_gran(affinity_gran_node, -1); - buf = next; -#if KMP_GROUP_AFFINITY - } else if (__kmp_match_str("group", buf, CCAST(const char **, &next))) { - set_gran(affinity_gran_group, -1); - buf = next; -#endif /* KMP_GROUP AFFINITY */ - } else if ((*buf >= '0') && (*buf <= '9')) { - int n; - next = buf; - SKIP_DIGITS(next); - n = __kmp_str_to_int(buf, *next); - KMP_ASSERT(n >= 0); - buf = next; - set_gran(affinity_gran_default, n); - } else { - EMIT_WARN(TRUE, (AffInvalidParam, name, start)); - continue; - } - } else if (__kmp_match_str("proclist", buf, CCAST(const char **, &next))) { - char *temp_proclist; - - SKIP_WS(next); - if (*next != '=') { - EMIT_WARN(TRUE, (AffInvalidParam, name, start)); - continue; - } - next++; // skip '=' - SKIP_WS(next); - if (*next != '[') { - EMIT_WARN(TRUE, (AffInvalidParam, name, start)); - continue; - } - next++; // skip '[' - buf = next; - if (!__kmp_parse_affinity_proc_id_list( - name, buf, CCAST(const char **, &next), &temp_proclist)) { - // warning already emitted. - SKIP_TO(next, ']'); - if (*next == ']') - next++; - SKIP_TO(next, ','); - if (*next == ',') - next++; - buf = next; - continue; - } - if (*next != ']') { - EMIT_WARN(TRUE, (AffInvalidParam, name, start)); - continue; - } - next++; // skip ']' - set_proclist(temp_proclist); - } else if ((*buf >= '0') && (*buf <= '9')) { - // Parse integer numbers -- permute and offset. - int n; - next = buf; - SKIP_DIGITS(next); - n = __kmp_str_to_int(buf, *next); - KMP_ASSERT(n >= 0); - buf = next; - if (count < 2) { - number[count] = n; - } else { - KMP_WARNING(AffManyParams, name, start); - } - ++count; - } else { - EMIT_WARN(TRUE, (AffInvalidParam, name, start)); - continue; - } - - SKIP_WS(next); - if (*next == ',') { - next++; - SKIP_WS(next); - } else if (*next != '\0') { - const char *temp = next; - EMIT_WARN(TRUE, (ParseExtraCharsWarn, name, temp)); - continue; - } - buf = next; - } // while - -#undef EMIT_WARN -#undef _set_param -#undef set_type -#undef set_verbose -#undef set_warnings -#undef set_respect -#undef set_granularity - - __kmp_str_free(&buffer); - - if (proclist) { - if (!type) { - KMP_WARNING(AffProcListNoType, name); - *out_type = affinity_explicit; -#if OMP_40_ENABLED - __kmp_nested_proc_bind.bind_types[0] = proc_bind_intel; -#endif - } else if (*out_type != affinity_explicit) { - KMP_WARNING(AffProcListNotExplicit, name); - KMP_ASSERT(*out_proclist != NULL); - KMP_INTERNAL_FREE(*out_proclist); - *out_proclist = NULL; - } - } - switch (*out_type) { - case affinity_logical: - case affinity_physical: { - if (count > 0) { - *out_offset = number[0]; - } - if (count > 1) { - KMP_WARNING(AffManyParamsForLogic, name, number[1]); - } - } break; - case affinity_balanced: { - if (count > 0) { - *out_compact = number[0]; - } - if (count > 1) { - *out_offset = number[1]; - } - - if (__kmp_affinity_gran == affinity_gran_default) { -#if KMP_MIC_SUPPORTED - if (__kmp_mic_type != non_mic) { - if (__kmp_affinity_verbose || __kmp_affinity_warnings) { - KMP_WARNING(AffGranUsing, "KMP_AFFINITY", "fine"); - } - __kmp_affinity_gran = affinity_gran_fine; - } else -#endif - { - if (__kmp_affinity_verbose || __kmp_affinity_warnings) { - KMP_WARNING(AffGranUsing, "KMP_AFFINITY", "core"); - } - __kmp_affinity_gran = affinity_gran_core; - } - } - } break; - case affinity_scatter: - case affinity_compact: { - if (count > 0) { - *out_compact = number[0]; - } - if (count > 1) { - *out_offset = number[1]; - } - } break; - case affinity_explicit: { - if (*out_proclist == NULL) { - KMP_WARNING(AffNoProcList, name); - __kmp_affinity_type = affinity_none; - } - if (count > 0) { - KMP_WARNING(AffNoParam, name, "explicit"); - } - } break; - case affinity_none: { - if (count > 0) { - KMP_WARNING(AffNoParam, name, "none"); - } - } break; - case affinity_disabled: { - if (count > 0) { - KMP_WARNING(AffNoParam, name, "disabled"); - } - } break; - case affinity_default: { - if (count > 0) { - KMP_WARNING(AffNoParam, name, "default"); - } - } break; - default: { KMP_ASSERT(0); } - } -} // __kmp_parse_affinity_env - -static void __kmp_stg_parse_affinity(char const *name, char const *value, - void *data) { - kmp_setting_t **rivals = (kmp_setting_t **)data; - int rc; - - rc = __kmp_stg_check_rivals(name, value, rivals); - if (rc) { - return; - } - - __kmp_parse_affinity_env(name, value, &__kmp_affinity_type, - &__kmp_affinity_proclist, &__kmp_affinity_verbose, - &__kmp_affinity_warnings, - &__kmp_affinity_respect_mask, &__kmp_affinity_gran, - &__kmp_affinity_gran_levels, &__kmp_affinity_dups, - &__kmp_affinity_compact, &__kmp_affinity_offset); - -} // __kmp_stg_parse_affinity - -static void __kmp_stg_print_affinity(kmp_str_buf_t *buffer, char const *name, - void *data) { - if (__kmp_env_format) { - KMP_STR_BUF_PRINT_NAME_EX(name); - } else { - __kmp_str_buf_print(buffer, " %s='", name); - } - if (__kmp_affinity_verbose) { - __kmp_str_buf_print(buffer, "%s,", "verbose"); - } else { - __kmp_str_buf_print(buffer, "%s,", "noverbose"); - } - if (__kmp_affinity_warnings) { - __kmp_str_buf_print(buffer, "%s,", "warnings"); - } else { - __kmp_str_buf_print(buffer, "%s,", "nowarnings"); - } - if (KMP_AFFINITY_CAPABLE()) { - if (__kmp_affinity_respect_mask) { - __kmp_str_buf_print(buffer, "%s,", "respect"); - } else { - __kmp_str_buf_print(buffer, "%s,", "norespect"); - } - switch (__kmp_affinity_gran) { - case affinity_gran_default: - __kmp_str_buf_print(buffer, "%s", "granularity=default,"); - break; - case affinity_gran_fine: - __kmp_str_buf_print(buffer, "%s", "granularity=fine,"); - break; - case affinity_gran_thread: - __kmp_str_buf_print(buffer, "%s", "granularity=thread,"); - break; - case affinity_gran_core: - __kmp_str_buf_print(buffer, "%s", "granularity=core,"); - break; - case affinity_gran_package: - __kmp_str_buf_print(buffer, "%s", "granularity=package,"); - break; - case affinity_gran_node: - __kmp_str_buf_print(buffer, "%s", "granularity=node,"); - break; -#if KMP_GROUP_AFFINITY - case affinity_gran_group: - __kmp_str_buf_print(buffer, "%s", "granularity=group,"); - break; -#endif /* KMP_GROUP_AFFINITY */ - } - } - if (!KMP_AFFINITY_CAPABLE()) { - __kmp_str_buf_print(buffer, "%s", "disabled"); - } else - switch (__kmp_affinity_type) { - case affinity_none: - __kmp_str_buf_print(buffer, "%s", "none"); - break; - case affinity_physical: - __kmp_str_buf_print(buffer, "%s,%d", "physical", __kmp_affinity_offset); - break; - case affinity_logical: - __kmp_str_buf_print(buffer, "%s,%d", "logical", __kmp_affinity_offset); - break; - case affinity_compact: - __kmp_str_buf_print(buffer, "%s,%d,%d", "compact", __kmp_affinity_compact, - __kmp_affinity_offset); - break; - case affinity_scatter: - __kmp_str_buf_print(buffer, "%s,%d,%d", "scatter", __kmp_affinity_compact, - __kmp_affinity_offset); - break; - case affinity_explicit: - __kmp_str_buf_print(buffer, "%s=[%s],%s", "proclist", - __kmp_affinity_proclist, "explicit"); - break; - case affinity_balanced: - __kmp_str_buf_print(buffer, "%s,%d,%d", "balanced", - __kmp_affinity_compact, __kmp_affinity_offset); - break; - case affinity_disabled: - __kmp_str_buf_print(buffer, "%s", "disabled"); - break; - case affinity_default: - __kmp_str_buf_print(buffer, "%s", "default"); - break; - default: - __kmp_str_buf_print(buffer, "%s", ""); - break; - } - __kmp_str_buf_print(buffer, "'\n"); -} //__kmp_stg_print_affinity - -#ifdef KMP_GOMP_COMPAT - -static void __kmp_stg_parse_gomp_cpu_affinity(char const *name, - char const *value, void *data) { - const char *next = NULL; - char *temp_proclist; - kmp_setting_t **rivals = (kmp_setting_t **)data; - int rc; - - rc = __kmp_stg_check_rivals(name, value, rivals); - if (rc) { - return; - } - - if (TCR_4(__kmp_init_middle)) { - KMP_WARNING(EnvMiddleWarn, name); - __kmp_env_toPrint(name, 0); - return; - } - - __kmp_env_toPrint(name, 1); - - if (__kmp_parse_affinity_proc_id_list(name, value, &next, &temp_proclist)) { - SKIP_WS(next); - if (*next == '\0') { - // GOMP_CPU_AFFINITY => granularity=fine,explicit,proclist=... - __kmp_affinity_proclist = temp_proclist; - __kmp_affinity_type = affinity_explicit; - __kmp_affinity_gran = affinity_gran_fine; -#if OMP_40_ENABLED - __kmp_nested_proc_bind.bind_types[0] = proc_bind_intel; -#endif - } else { - KMP_WARNING(AffSyntaxError, name); - if (temp_proclist != NULL) { - KMP_INTERNAL_FREE((void *)temp_proclist); - } - } - } else { - // Warning already emitted - __kmp_affinity_type = affinity_none; -#if OMP_40_ENABLED - __kmp_nested_proc_bind.bind_types[0] = proc_bind_false; -#endif - } -} // __kmp_stg_parse_gomp_cpu_affinity - -#endif /* KMP_GOMP_COMPAT */ - -#if OMP_40_ENABLED - -/*----------------------------------------------------------------------------- -The OMP_PLACES proc id list parser. Here is the grammar: - -place_list := place -place_list := place , place_list -place := num -place := place : num -place := place : num : signed -place := { subplacelist } -place := ! place // (lowest priority) -subplace_list := subplace -subplace_list := subplace , subplace_list -subplace := num -subplace := num : num -subplace := num : num : signed -signed := num -signed := + signed -signed := - signed ------------------------------------------------------------------------------*/ - -static int __kmp_parse_subplace_list(const char *var, const char **scan) { - const char *next; - - for (;;) { - int start, count, stride; - - // - // Read in the starting proc id - // - SKIP_WS(*scan); - if ((**scan < '0') || (**scan > '9')) { - KMP_WARNING(SyntaxErrorUsing, var, "\"threads\""); - return FALSE; - } - next = *scan; - SKIP_DIGITS(next); - start = __kmp_str_to_int(*scan, *next); - KMP_ASSERT(start >= 0); - *scan = next; - - // valid follow sets are ',' ':' and '}' - SKIP_WS(*scan); - if (**scan == '}') { - break; - } - if (**scan == ',') { - (*scan)++; // skip ',' - continue; - } - if (**scan != ':') { - KMP_WARNING(SyntaxErrorUsing, var, "\"threads\""); - return FALSE; - } - (*scan)++; // skip ':' - - // Read count parameter - SKIP_WS(*scan); - if ((**scan < '0') || (**scan > '9')) { - KMP_WARNING(SyntaxErrorUsing, var, "\"threads\""); - return FALSE; - } - next = *scan; - SKIP_DIGITS(next); - count = __kmp_str_to_int(*scan, *next); - KMP_ASSERT(count >= 0); - *scan = next; - - // valid follow sets are ',' ':' and '}' - SKIP_WS(*scan); - if (**scan == '}') { - break; - } - if (**scan == ',') { - (*scan)++; // skip ',' - continue; - } - if (**scan != ':') { - KMP_WARNING(SyntaxErrorUsing, var, "\"threads\""); - return FALSE; - } - (*scan)++; // skip ':' - - // Read stride parameter - int sign = +1; - for (;;) { - SKIP_WS(*scan); - if (**scan == '+') { - (*scan)++; // skip '+' - continue; - } - if (**scan == '-') { - sign *= -1; - (*scan)++; // skip '-' - continue; - } - break; - } - SKIP_WS(*scan); - if ((**scan < '0') || (**scan > '9')) { - KMP_WARNING(SyntaxErrorUsing, var, "\"threads\""); - return FALSE; - } - next = *scan; - SKIP_DIGITS(next); - stride = __kmp_str_to_int(*scan, *next); - KMP_ASSERT(stride >= 0); - *scan = next; - stride *= sign; - - // valid follow sets are ',' and '}' - SKIP_WS(*scan); - if (**scan == '}') { - break; - } - if (**scan == ',') { - (*scan)++; // skip ',' - continue; - } - - KMP_WARNING(SyntaxErrorUsing, var, "\"threads\""); - return FALSE; - } - return TRUE; -} - -static int __kmp_parse_place(const char *var, const char **scan) { - const char *next; - - // valid follow sets are '{' '!' and num - SKIP_WS(*scan); - if (**scan == '{') { - (*scan)++; // skip '{' - if (!__kmp_parse_subplace_list(var, scan)) { - return FALSE; - } - if (**scan != '}') { - KMP_WARNING(SyntaxErrorUsing, var, "\"threads\""); - return FALSE; - } - (*scan)++; // skip '}' - } else if (**scan == '!') { - (*scan)++; // skip '!' - return __kmp_parse_place(var, scan); //'!' has lower precedence than ':' - } else if ((**scan >= '0') && (**scan <= '9')) { - next = *scan; - SKIP_DIGITS(next); - int proc = __kmp_str_to_int(*scan, *next); - KMP_ASSERT(proc >= 0); - *scan = next; - } else { - KMP_WARNING(SyntaxErrorUsing, var, "\"threads\""); - return FALSE; - } - return TRUE; -} - -static int __kmp_parse_place_list(const char *var, const char *env, - char **place_list) { - const char *scan = env; - const char *next = scan; - - for (;;) { - int count, stride; - - if (!__kmp_parse_place(var, &scan)) { - return FALSE; - } - - // valid follow sets are ',' ':' and EOL - SKIP_WS(scan); - if (*scan == '\0') { - break; - } - if (*scan == ',') { - scan++; // skip ',' - continue; - } - if (*scan != ':') { - KMP_WARNING(SyntaxErrorUsing, var, "\"threads\""); - return FALSE; - } - scan++; // skip ':' - - // Read count parameter - SKIP_WS(scan); - if ((*scan < '0') || (*scan > '9')) { - KMP_WARNING(SyntaxErrorUsing, var, "\"threads\""); - return FALSE; - } - next = scan; - SKIP_DIGITS(next); - count = __kmp_str_to_int(scan, *next); - KMP_ASSERT(count >= 0); - scan = next; - - // valid follow sets are ',' ':' and EOL - SKIP_WS(scan); - if (*scan == '\0') { - break; - } - if (*scan == ',') { - scan++; // skip ',' - continue; - } - if (*scan != ':') { - KMP_WARNING(SyntaxErrorUsing, var, "\"threads\""); - return FALSE; - } - scan++; // skip ':' - - // Read stride parameter - int sign = +1; - for (;;) { - SKIP_WS(scan); - if (*scan == '+') { - scan++; // skip '+' - continue; - } - if (*scan == '-') { - sign *= -1; - scan++; // skip '-' - continue; - } - break; - } - SKIP_WS(scan); - if ((*scan < '0') || (*scan > '9')) { - KMP_WARNING(SyntaxErrorUsing, var, "\"threads\""); - return FALSE; - } - next = scan; - SKIP_DIGITS(next); - stride = __kmp_str_to_int(scan, *next); - KMP_ASSERT(stride >= 0); - scan = next; - stride *= sign; - - // valid follow sets are ',' and EOL - SKIP_WS(scan); - if (*scan == '\0') { - break; - } - if (*scan == ',') { - scan++; // skip ',' - continue; - } - - KMP_WARNING(SyntaxErrorUsing, var, "\"threads\""); - return FALSE; - } - - { - int len = scan - env; - char *retlist = (char *)__kmp_allocate((len + 1) * sizeof(char)); - KMP_MEMCPY_S(retlist, (len + 1) * sizeof(char), env, len * sizeof(char)); - retlist[len] = '\0'; - *place_list = retlist; - } - return TRUE; -} - -static void __kmp_stg_parse_places(char const *name, char const *value, - void *data) { - int count; - const char *scan = value; - const char *next = scan; - const char *kind = "\"threads\""; - kmp_setting_t **rivals = (kmp_setting_t **)data; - int rc; - - rc = __kmp_stg_check_rivals(name, value, rivals); - if (rc) { - return; - } - - // If OMP_PROC_BIND is not specified but OMP_PLACES is, - // then let OMP_PROC_BIND default to true. - if (__kmp_nested_proc_bind.bind_types[0] == proc_bind_default) { - __kmp_nested_proc_bind.bind_types[0] = proc_bind_true; - } - - //__kmp_affinity_num_places = 0; - - if (__kmp_match_str("threads", scan, &next)) { - scan = next; - __kmp_affinity_type = affinity_compact; - __kmp_affinity_gran = affinity_gran_thread; - __kmp_affinity_dups = FALSE; - kind = "\"threads\""; - } else if (__kmp_match_str("cores", scan, &next)) { - scan = next; - __kmp_affinity_type = affinity_compact; - __kmp_affinity_gran = affinity_gran_core; - __kmp_affinity_dups = FALSE; - kind = "\"cores\""; -#if KMP_USE_HWLOC - } else if (__kmp_match_str("tiles", scan, &next)) { - scan = next; - __kmp_affinity_type = affinity_compact; - __kmp_affinity_gran = affinity_gran_tile; - __kmp_affinity_dups = FALSE; - kind = "\"tiles\""; -#endif - } else if (__kmp_match_str("sockets", scan, &next)) { - scan = next; - __kmp_affinity_type = affinity_compact; - __kmp_affinity_gran = affinity_gran_package; - __kmp_affinity_dups = FALSE; - kind = "\"sockets\""; - } else { - if (__kmp_affinity_proclist != NULL) { - KMP_INTERNAL_FREE((void *)__kmp_affinity_proclist); - __kmp_affinity_proclist = NULL; - } - if (__kmp_parse_place_list(name, value, &__kmp_affinity_proclist)) { - __kmp_affinity_type = affinity_explicit; - __kmp_affinity_gran = affinity_gran_fine; - __kmp_affinity_dups = FALSE; - if (__kmp_nested_proc_bind.bind_types[0] == proc_bind_default) { - __kmp_nested_proc_bind.bind_types[0] = proc_bind_true; - } - } - return; - } - - if (__kmp_nested_proc_bind.bind_types[0] == proc_bind_default) { - __kmp_nested_proc_bind.bind_types[0] = proc_bind_true; - } - - SKIP_WS(scan); - if (*scan == '\0') { - return; - } - - // Parse option count parameter in parentheses - if (*scan != '(') { - KMP_WARNING(SyntaxErrorUsing, name, kind); - return; - } - scan++; // skip '(' - - SKIP_WS(scan); - next = scan; - SKIP_DIGITS(next); - count = __kmp_str_to_int(scan, *next); - KMP_ASSERT(count >= 0); - scan = next; - - SKIP_WS(scan); - if (*scan != ')') { - KMP_WARNING(SyntaxErrorUsing, name, kind); - return; - } - scan++; // skip ')' - - SKIP_WS(scan); - if (*scan != '\0') { - KMP_WARNING(ParseExtraCharsWarn, name, scan); - } - __kmp_affinity_num_places = count; -} - -static void __kmp_stg_print_places(kmp_str_buf_t *buffer, char const *name, - void *data) { - if (__kmp_env_format) { - KMP_STR_BUF_PRINT_NAME; - } else { - __kmp_str_buf_print(buffer, " %s", name); - } - if ((__kmp_nested_proc_bind.used == 0) || - (__kmp_nested_proc_bind.bind_types == NULL) || - (__kmp_nested_proc_bind.bind_types[0] == proc_bind_false)) { - __kmp_str_buf_print(buffer, ": %s\n", KMP_I18N_STR(NotDefined)); - } else if (__kmp_affinity_type == affinity_explicit) { - if (__kmp_affinity_proclist != NULL) { - __kmp_str_buf_print(buffer, "='%s'\n", __kmp_affinity_proclist); - } else { - __kmp_str_buf_print(buffer, ": %s\n", KMP_I18N_STR(NotDefined)); - } - } else if (__kmp_affinity_type == affinity_compact) { - int num; - if (__kmp_affinity_num_masks > 0) { - num = __kmp_affinity_num_masks; - } else if (__kmp_affinity_num_places > 0) { - num = __kmp_affinity_num_places; - } else { - num = 0; - } - if (__kmp_affinity_gran == affinity_gran_thread) { - if (num > 0) { - __kmp_str_buf_print(buffer, "='threads(%d)'\n", num); - } else { - __kmp_str_buf_print(buffer, "='threads'\n"); - } - } else if (__kmp_affinity_gran == affinity_gran_core) { - if (num > 0) { - __kmp_str_buf_print(buffer, "='cores(%d)' \n", num); - } else { - __kmp_str_buf_print(buffer, "='cores'\n"); - } -#if KMP_USE_HWLOC - } else if (__kmp_affinity_gran == affinity_gran_tile) { - if (num > 0) { - __kmp_str_buf_print(buffer, "='tiles(%d)' \n", num); - } else { - __kmp_str_buf_print(buffer, "='tiles'\n"); - } -#endif - } else if (__kmp_affinity_gran == affinity_gran_package) { - if (num > 0) { - __kmp_str_buf_print(buffer, "='sockets(%d)'\n", num); - } else { - __kmp_str_buf_print(buffer, "='sockets'\n"); - } - } else { - __kmp_str_buf_print(buffer, ": %s\n", KMP_I18N_STR(NotDefined)); - } - } else { - __kmp_str_buf_print(buffer, ": %s\n", KMP_I18N_STR(NotDefined)); - } -} - -#endif /* OMP_40_ENABLED */ - -#if (!OMP_40_ENABLED) - -static void __kmp_stg_parse_proc_bind(char const *name, char const *value, - void *data) { - int enabled; - kmp_setting_t **rivals = (kmp_setting_t **)data; - int rc; - - rc = __kmp_stg_check_rivals(name, value, rivals); - if (rc) { - return; - } - - // In OMP 3.1, OMP_PROC_BIND is strictly a boolean - __kmp_stg_parse_bool(name, value, &enabled); - if (enabled) { - // OMP_PROC_BIND => granularity=fine,scatter on MIC - // OMP_PROC_BIND => granularity=core,scatter elsewhere - __kmp_affinity_type = affinity_scatter; -#if KMP_MIC_SUPPORTED - if (__kmp_mic_type != non_mic) - __kmp_affinity_gran = affinity_gran_fine; - else -#endif - __kmp_affinity_gran = affinity_gran_core; - } else { - __kmp_affinity_type = affinity_none; - } -} // __kmp_parse_proc_bind - -#endif /* if (! OMP_40_ENABLED) */ - -static void __kmp_stg_parse_topology_method(char const *name, char const *value, - void *data) { - if (__kmp_str_match("all", 1, value)) { - __kmp_affinity_top_method = affinity_top_method_all; - } -#if KMP_USE_HWLOC - else if (__kmp_str_match("hwloc", 1, value)) { - __kmp_affinity_top_method = affinity_top_method_hwloc; - } -#endif -#if KMP_ARCH_X86 || KMP_ARCH_X86_64 - else if (__kmp_str_match("x2apic id", 9, value) || - __kmp_str_match("x2apic_id", 9, value) || - __kmp_str_match("x2apic-id", 9, value) || - __kmp_str_match("x2apicid", 8, value) || - __kmp_str_match("cpuid leaf 11", 13, value) || - __kmp_str_match("cpuid_leaf_11", 13, value) || - __kmp_str_match("cpuid-leaf-11", 13, value) || - __kmp_str_match("cpuid leaf11", 12, value) || - __kmp_str_match("cpuid_leaf11", 12, value) || - __kmp_str_match("cpuid-leaf11", 12, value) || - __kmp_str_match("cpuidleaf 11", 12, value) || - __kmp_str_match("cpuidleaf_11", 12, value) || - __kmp_str_match("cpuidleaf-11", 12, value) || - __kmp_str_match("cpuidleaf11", 11, value) || - __kmp_str_match("cpuid 11", 8, value) || - __kmp_str_match("cpuid_11", 8, value) || - __kmp_str_match("cpuid-11", 8, value) || - __kmp_str_match("cpuid11", 7, value) || - __kmp_str_match("leaf 11", 7, value) || - __kmp_str_match("leaf_11", 7, value) || - __kmp_str_match("leaf-11", 7, value) || - __kmp_str_match("leaf11", 6, value)) { - __kmp_affinity_top_method = affinity_top_method_x2apicid; - } else if (__kmp_str_match("apic id", 7, value) || - __kmp_str_match("apic_id", 7, value) || - __kmp_str_match("apic-id", 7, value) || - __kmp_str_match("apicid", 6, value) || - __kmp_str_match("cpuid leaf 4", 12, value) || - __kmp_str_match("cpuid_leaf_4", 12, value) || - __kmp_str_match("cpuid-leaf-4", 12, value) || - __kmp_str_match("cpuid leaf4", 11, value) || - __kmp_str_match("cpuid_leaf4", 11, value) || - __kmp_str_match("cpuid-leaf4", 11, value) || - __kmp_str_match("cpuidleaf 4", 11, value) || - __kmp_str_match("cpuidleaf_4", 11, value) || - __kmp_str_match("cpuidleaf-4", 11, value) || - __kmp_str_match("cpuidleaf4", 10, value) || - __kmp_str_match("cpuid 4", 7, value) || - __kmp_str_match("cpuid_4", 7, value) || - __kmp_str_match("cpuid-4", 7, value) || - __kmp_str_match("cpuid4", 6, value) || - __kmp_str_match("leaf 4", 6, value) || - __kmp_str_match("leaf_4", 6, value) || - __kmp_str_match("leaf-4", 6, value) || - __kmp_str_match("leaf4", 5, value)) { - __kmp_affinity_top_method = affinity_top_method_apicid; - } -#endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */ - else if (__kmp_str_match("/proc/cpuinfo", 2, value) || - __kmp_str_match("cpuinfo", 5, value)) { - __kmp_affinity_top_method = affinity_top_method_cpuinfo; - } -#if KMP_GROUP_AFFINITY - else if (__kmp_str_match("group", 1, value)) { - __kmp_affinity_top_method = affinity_top_method_group; - } -#endif /* KMP_GROUP_AFFINITY */ - else if (__kmp_str_match("flat", 1, value)) { - __kmp_affinity_top_method = affinity_top_method_flat; - } else { - KMP_WARNING(StgInvalidValue, name, value); - } -} // __kmp_stg_parse_topology_method - -static void __kmp_stg_print_topology_method(kmp_str_buf_t *buffer, - char const *name, void *data) { - char const *value = NULL; - - switch (__kmp_affinity_top_method) { - case affinity_top_method_default: - value = "default"; - break; - - case affinity_top_method_all: - value = "all"; - break; - -#if KMP_ARCH_X86 || KMP_ARCH_X86_64 - case affinity_top_method_x2apicid: - value = "x2APIC id"; - break; - - case affinity_top_method_apicid: - value = "APIC id"; - break; -#endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */ - -#if KMP_USE_HWLOC - case affinity_top_method_hwloc: - value = "hwloc"; - break; -#endif - - case affinity_top_method_cpuinfo: - value = "cpuinfo"; - break; - -#if KMP_GROUP_AFFINITY - case affinity_top_method_group: - value = "group"; - break; -#endif /* KMP_GROUP_AFFINITY */ - - case affinity_top_method_flat: - value = "flat"; - break; - } - - if (value != NULL) { - __kmp_stg_print_str(buffer, name, value); - } -} // __kmp_stg_print_topology_method - -#endif /* KMP_AFFINITY_SUPPORTED */ - -#if OMP_40_ENABLED - -// OMP_PROC_BIND / bind-var is functional on all 4.0 builds, including OS X* -// OMP_PLACES / place-partition-var is not. -static void __kmp_stg_parse_proc_bind(char const *name, char const *value, - void *data) { - kmp_setting_t **rivals = (kmp_setting_t **)data; - int rc; - - rc = __kmp_stg_check_rivals(name, value, rivals); - if (rc) { - return; - } - - // In OMP 4.0 OMP_PROC_BIND is a vector of proc_bind types. - KMP_DEBUG_ASSERT((__kmp_nested_proc_bind.bind_types != NULL) && - (__kmp_nested_proc_bind.used > 0)); - - const char *buf = value; - const char *next; - int num; - SKIP_WS(buf); - if ((*buf >= '0') && (*buf <= '9')) { - next = buf; - SKIP_DIGITS(next); - num = __kmp_str_to_int(buf, *next); - KMP_ASSERT(num >= 0); - buf = next; - SKIP_WS(buf); - } else { - num = -1; - } - - next = buf; - if (__kmp_match_str("disabled", buf, &next)) { - buf = next; - SKIP_WS(buf); -#if KMP_AFFINITY_SUPPORTED - __kmp_affinity_type = affinity_disabled; -#endif /* KMP_AFFINITY_SUPPORTED */ - __kmp_nested_proc_bind.used = 1; - __kmp_nested_proc_bind.bind_types[0] = proc_bind_false; - } else if ((num == (int)proc_bind_false) || - __kmp_match_str("false", buf, &next)) { - buf = next; - SKIP_WS(buf); -#if KMP_AFFINITY_SUPPORTED - __kmp_affinity_type = affinity_none; -#endif /* KMP_AFFINITY_SUPPORTED */ - __kmp_nested_proc_bind.used = 1; - __kmp_nested_proc_bind.bind_types[0] = proc_bind_false; - } else if ((num == (int)proc_bind_true) || - __kmp_match_str("true", buf, &next)) { - buf = next; - SKIP_WS(buf); - __kmp_nested_proc_bind.used = 1; - __kmp_nested_proc_bind.bind_types[0] = proc_bind_true; - } else { - // Count the number of values in the env var string - const char *scan; - int nelem = 1; - for (scan = buf; *scan != '\0'; scan++) { - if (*scan == ',') { - nelem++; - } - } - - // Create / expand the nested proc_bind array as needed - if (__kmp_nested_proc_bind.size < nelem) { - __kmp_nested_proc_bind.bind_types = - (kmp_proc_bind_t *)KMP_INTERNAL_REALLOC( - __kmp_nested_proc_bind.bind_types, - sizeof(kmp_proc_bind_t) * nelem); - if (__kmp_nested_proc_bind.bind_types == NULL) { - KMP_FATAL(MemoryAllocFailed); - } - __kmp_nested_proc_bind.size = nelem; - } - __kmp_nested_proc_bind.used = nelem; - - // Save values in the nested proc_bind array - int i = 0; - for (;;) { - enum kmp_proc_bind_t bind; - - if ((num == (int)proc_bind_master) || - __kmp_match_str("master", buf, &next)) { - buf = next; - SKIP_WS(buf); - bind = proc_bind_master; - } else if ((num == (int)proc_bind_close) || - __kmp_match_str("close", buf, &next)) { - buf = next; - SKIP_WS(buf); - bind = proc_bind_close; - } else if ((num == (int)proc_bind_spread) || - __kmp_match_str("spread", buf, &next)) { - buf = next; - SKIP_WS(buf); - bind = proc_bind_spread; - } else { - KMP_WARNING(StgInvalidValue, name, value); - __kmp_nested_proc_bind.bind_types[0] = proc_bind_false; - __kmp_nested_proc_bind.used = 1; - return; - } - - __kmp_nested_proc_bind.bind_types[i++] = bind; - if (i >= nelem) { - break; - } - KMP_DEBUG_ASSERT(*buf == ','); - buf++; - SKIP_WS(buf); - - // Read next value if it was specified as an integer - if ((*buf >= '0') && (*buf <= '9')) { - next = buf; - SKIP_DIGITS(next); - num = __kmp_str_to_int(buf, *next); - KMP_ASSERT(num >= 0); - buf = next; - SKIP_WS(buf); - } else { - num = -1; - } - } - SKIP_WS(buf); - } - if (*buf != '\0') { - KMP_WARNING(ParseExtraCharsWarn, name, buf); - } -} - -static void __kmp_stg_print_proc_bind(kmp_str_buf_t *buffer, char const *name, - void *data) { - int nelem = __kmp_nested_proc_bind.used; - if (__kmp_env_format) { - KMP_STR_BUF_PRINT_NAME; - } else { - __kmp_str_buf_print(buffer, " %s", name); - } - if (nelem == 0) { - __kmp_str_buf_print(buffer, ": %s\n", KMP_I18N_STR(NotDefined)); - } else { - int i; - __kmp_str_buf_print(buffer, "='", name); - for (i = 0; i < nelem; i++) { - switch (__kmp_nested_proc_bind.bind_types[i]) { - case proc_bind_false: - __kmp_str_buf_print(buffer, "false"); - break; - - case proc_bind_true: - __kmp_str_buf_print(buffer, "true"); - break; - - case proc_bind_master: - __kmp_str_buf_print(buffer, "master"); - break; - - case proc_bind_close: - __kmp_str_buf_print(buffer, "close"); - break; - - case proc_bind_spread: - __kmp_str_buf_print(buffer, "spread"); - break; - - case proc_bind_intel: - __kmp_str_buf_print(buffer, "intel"); - break; - - case proc_bind_default: - __kmp_str_buf_print(buffer, "default"); - break; - } - if (i < nelem - 1) { - __kmp_str_buf_print(buffer, ","); - } - } - __kmp_str_buf_print(buffer, "'\n"); - } -} - -#endif /* OMP_40_ENABLED */ - -#if OMP_50_ENABLED -static void __kmp_stg_parse_display_affinity(char const *name, - char const *value, void *data) { - __kmp_stg_parse_bool(name, value, &__kmp_display_affinity); -} -static void __kmp_stg_print_display_affinity(kmp_str_buf_t *buffer, - char const *name, void *data) { - __kmp_stg_print_bool(buffer, name, __kmp_display_affinity); -} -static void __kmp_stg_parse_affinity_format(char const *name, char const *value, - void *data) { - size_t length = KMP_STRLEN(value); - __kmp_strncpy_truncate(__kmp_affinity_format, KMP_AFFINITY_FORMAT_SIZE, value, - length); -} -static void __kmp_stg_print_affinity_format(kmp_str_buf_t *buffer, - char const *name, void *data) { - if (__kmp_env_format) { - KMP_STR_BUF_PRINT_NAME_EX(name); - } else { - __kmp_str_buf_print(buffer, " %s='", name); - } - __kmp_str_buf_print(buffer, "%s'\n", __kmp_affinity_format); -} -// OMP_ALLOCATOR sets default allocator -static void __kmp_stg_parse_allocator(char const *name, char const *value, - void *data) { - /* - The value can be any predefined allocator: - omp_default_mem_alloc = 1; - omp_large_cap_mem_alloc = 2; - omp_const_mem_alloc = 3; - omp_high_bw_mem_alloc = 4; - omp_low_lat_mem_alloc = 5; - omp_cgroup_mem_alloc = 6; - omp_pteam_mem_alloc = 7; - omp_thread_mem_alloc = 8; - Acceptable value is either a digit or a string. - */ - const char *buf = value; - const char *next; - int num; - SKIP_WS(buf); - if ((*buf > '0') && (*buf < '9')) { - next = buf; - SKIP_DIGITS(next); - num = __kmp_str_to_int(buf, *next); - KMP_ASSERT(num > 0); - switch (num) { - case 4: - if (__kmp_hbw_mem_available) { - __kmp_def_allocator = omp_high_bw_mem_alloc; - } else { - __kmp_msg(kmp_ms_warning, - KMP_MSG(OmpNoAllocator, "omp_high_bw_mem_alloc"), - __kmp_msg_null); - __kmp_def_allocator = omp_default_mem_alloc; - } - break; - case 1: - __kmp_def_allocator = omp_default_mem_alloc; - break; - case 2: - __kmp_msg(kmp_ms_warning, - KMP_MSG(OmpNoAllocator, "omp_large_cap_mem_alloc"), - __kmp_msg_null); - __kmp_def_allocator = omp_default_mem_alloc; - break; - case 3: - __kmp_msg(kmp_ms_warning, KMP_MSG(OmpNoAllocator, "omp_const_mem_alloc"), - __kmp_msg_null); - __kmp_def_allocator = omp_default_mem_alloc; - break; - case 5: - __kmp_msg(kmp_ms_warning, - KMP_MSG(OmpNoAllocator, "omp_low_lat_mem_alloc"), - __kmp_msg_null); - __kmp_def_allocator = omp_default_mem_alloc; - break; - case 6: - __kmp_msg(kmp_ms_warning, KMP_MSG(OmpNoAllocator, "omp_cgroup_mem_alloc"), - __kmp_msg_null); - __kmp_def_allocator = omp_default_mem_alloc; - break; - case 7: - __kmp_msg(kmp_ms_warning, KMP_MSG(OmpNoAllocator, "omp_pteam_mem_alloc"), - __kmp_msg_null); - __kmp_def_allocator = omp_default_mem_alloc; - break; - case 8: - __kmp_msg(kmp_ms_warning, KMP_MSG(OmpNoAllocator, "omp_thread_mem_alloc"), - __kmp_msg_null); - __kmp_def_allocator = omp_default_mem_alloc; - break; - } - return; - } - next = buf; - if (__kmp_match_str("omp_high_bw_mem_alloc", buf, &next)) { - if (__kmp_hbw_mem_available) { - __kmp_def_allocator = omp_high_bw_mem_alloc; - } else { - __kmp_msg(kmp_ms_warning, - KMP_MSG(OmpNoAllocator, "omp_high_bw_mem_alloc"), - __kmp_msg_null); - __kmp_def_allocator = omp_default_mem_alloc; - } - } else if (__kmp_match_str("omp_default_mem_alloc", buf, &next)) { - __kmp_def_allocator = omp_default_mem_alloc; - } else if (__kmp_match_str("omp_large_cap_mem_alloc", buf, &next)) { - __kmp_msg(kmp_ms_warning, - KMP_MSG(OmpNoAllocator, "omp_large_cap_mem_alloc"), - __kmp_msg_null); - __kmp_def_allocator = omp_default_mem_alloc; - } else if (__kmp_match_str("omp_const_mem_alloc", buf, &next)) { - __kmp_msg(kmp_ms_warning, KMP_MSG(OmpNoAllocator, "omp_const_mem_alloc"), - __kmp_msg_null); - __kmp_def_allocator = omp_default_mem_alloc; - } else if (__kmp_match_str("omp_low_lat_mem_alloc", buf, &next)) { - __kmp_msg(kmp_ms_warning, KMP_MSG(OmpNoAllocator, "omp_low_lat_mem_alloc"), - __kmp_msg_null); - __kmp_def_allocator = omp_default_mem_alloc; - } else if (__kmp_match_str("omp_cgroup_mem_alloc", buf, &next)) { - __kmp_msg(kmp_ms_warning, KMP_MSG(OmpNoAllocator, "omp_cgroup_mem_alloc"), - __kmp_msg_null); - __kmp_def_allocator = omp_default_mem_alloc; - } else if (__kmp_match_str("omp_pteam_mem_alloc", buf, &next)) { - __kmp_msg(kmp_ms_warning, KMP_MSG(OmpNoAllocator, "omp_pteam_mem_alloc"), - __kmp_msg_null); - __kmp_def_allocator = omp_default_mem_alloc; - } else if (__kmp_match_str("omp_thread_mem_alloc", buf, &next)) { - __kmp_msg(kmp_ms_warning, KMP_MSG(OmpNoAllocator, "omp_thread_mem_alloc"), - __kmp_msg_null); - __kmp_def_allocator = omp_default_mem_alloc; - } - buf = next; - SKIP_WS(buf); - if (*buf != '\0') { - KMP_WARNING(ParseExtraCharsWarn, name, buf); - } -} - -static void __kmp_stg_print_allocator(kmp_str_buf_t *buffer, char const *name, - void *data) { - if (__kmp_def_allocator == omp_default_mem_alloc) { - __kmp_stg_print_str(buffer, name, "omp_default_mem_alloc"); - } else if (__kmp_def_allocator == omp_high_bw_mem_alloc) { - __kmp_stg_print_str(buffer, name, "omp_high_bw_mem_alloc"); - } else if (__kmp_def_allocator == omp_large_cap_mem_alloc) { - __kmp_stg_print_str(buffer, name, "omp_large_cap_mem_alloc"); - } else if (__kmp_def_allocator == omp_const_mem_alloc) { - __kmp_stg_print_str(buffer, name, "omp_const_mem_alloc"); - } else if (__kmp_def_allocator == omp_low_lat_mem_alloc) { - __kmp_stg_print_str(buffer, name, "omp_low_lat_mem_alloc"); - } else if (__kmp_def_allocator == omp_cgroup_mem_alloc) { - __kmp_stg_print_str(buffer, name, "omp_cgroup_mem_alloc"); - } else if (__kmp_def_allocator == omp_pteam_mem_alloc) { - __kmp_stg_print_str(buffer, name, "omp_pteam_mem_alloc"); - } else if (__kmp_def_allocator == omp_thread_mem_alloc) { - __kmp_stg_print_str(buffer, name, "omp_thread_mem_alloc"); - } -} - -#endif /* OMP_50_ENABLED */ - -// ----------------------------------------------------------------------------- -// OMP_DYNAMIC - -static void __kmp_stg_parse_omp_dynamic(char const *name, char const *value, - void *data) { - __kmp_stg_parse_bool(name, value, &(__kmp_global.g.g_dynamic)); -} // __kmp_stg_parse_omp_dynamic - -static void __kmp_stg_print_omp_dynamic(kmp_str_buf_t *buffer, char const *name, - void *data) { - __kmp_stg_print_bool(buffer, name, __kmp_global.g.g_dynamic); -} // __kmp_stg_print_omp_dynamic - -static void __kmp_stg_parse_kmp_dynamic_mode(char const *name, - char const *value, void *data) { - if (TCR_4(__kmp_init_parallel)) { - KMP_WARNING(EnvParallelWarn, name); - __kmp_env_toPrint(name, 0); - return; - } -#ifdef USE_LOAD_BALANCE - else if (__kmp_str_match("load balance", 2, value) || - __kmp_str_match("load_balance", 2, value) || - __kmp_str_match("load-balance", 2, value) || - __kmp_str_match("loadbalance", 2, value) || - __kmp_str_match("balance", 1, value)) { - __kmp_global.g.g_dynamic_mode = dynamic_load_balance; - } -#endif /* USE_LOAD_BALANCE */ - else if (__kmp_str_match("thread limit", 1, value) || - __kmp_str_match("thread_limit", 1, value) || - __kmp_str_match("thread-limit", 1, value) || - __kmp_str_match("threadlimit", 1, value) || - __kmp_str_match("limit", 2, value)) { - __kmp_global.g.g_dynamic_mode = dynamic_thread_limit; - } else if (__kmp_str_match("random", 1, value)) { - __kmp_global.g.g_dynamic_mode = dynamic_random; - } else { - KMP_WARNING(StgInvalidValue, name, value); - } -} //__kmp_stg_parse_kmp_dynamic_mode - -static void __kmp_stg_print_kmp_dynamic_mode(kmp_str_buf_t *buffer, - char const *name, void *data) { -#if KMP_DEBUG - if (__kmp_global.g.g_dynamic_mode == dynamic_default) { - __kmp_str_buf_print(buffer, " %s: %s \n", name, KMP_I18N_STR(NotDefined)); - } -#ifdef USE_LOAD_BALANCE - else if (__kmp_global.g.g_dynamic_mode == dynamic_load_balance) { - __kmp_stg_print_str(buffer, name, "load balance"); - } -#endif /* USE_LOAD_BALANCE */ - else if (__kmp_global.g.g_dynamic_mode == dynamic_thread_limit) { - __kmp_stg_print_str(buffer, name, "thread limit"); - } else if (__kmp_global.g.g_dynamic_mode == dynamic_random) { - __kmp_stg_print_str(buffer, name, "random"); - } else { - KMP_ASSERT(0); - } -#endif /* KMP_DEBUG */ -} // __kmp_stg_print_kmp_dynamic_mode - -#ifdef USE_LOAD_BALANCE - -// ----------------------------------------------------------------------------- -// KMP_LOAD_BALANCE_INTERVAL - -static void __kmp_stg_parse_ld_balance_interval(char const *name, - char const *value, void *data) { - double interval = __kmp_convert_to_double(value); - if (interval >= 0) { - __kmp_load_balance_interval = interval; - } else { - KMP_WARNING(StgInvalidValue, name, value); - } -} // __kmp_stg_parse_load_balance_interval - -static void __kmp_stg_print_ld_balance_interval(kmp_str_buf_t *buffer, - char const *name, void *data) { -#if KMP_DEBUG - __kmp_str_buf_print(buffer, " %s=%8.6f\n", name, - __kmp_load_balance_interval); -#endif /* KMP_DEBUG */ -} // __kmp_stg_print_load_balance_interval - -#endif /* USE_LOAD_BALANCE */ - -// ----------------------------------------------------------------------------- -// KMP_INIT_AT_FORK - -static void __kmp_stg_parse_init_at_fork(char const *name, char const *value, - void *data) { - __kmp_stg_parse_bool(name, value, &__kmp_need_register_atfork); - if (__kmp_need_register_atfork) { - __kmp_need_register_atfork_specified = TRUE; - } -} // __kmp_stg_parse_init_at_fork - -static void __kmp_stg_print_init_at_fork(kmp_str_buf_t *buffer, - char const *name, void *data) { - __kmp_stg_print_bool(buffer, name, __kmp_need_register_atfork_specified); -} // __kmp_stg_print_init_at_fork - -// ----------------------------------------------------------------------------- -// KMP_SCHEDULE - -static void __kmp_stg_parse_schedule(char const *name, char const *value, - void *data) { - - if (value != NULL) { - size_t length = KMP_STRLEN(value); - if (length > INT_MAX) { - KMP_WARNING(LongValue, name); - } else { - const char *semicolon; - if (value[length - 1] == '"' || value[length - 1] == '\'') - KMP_WARNING(UnbalancedQuotes, name); - do { - char sentinel; - - semicolon = strchr(value, ';'); - if (*value && semicolon != value) { - const char *comma = strchr(value, ','); - - if (comma) { - ++comma; - sentinel = ','; - } else - sentinel = ';'; - if (!__kmp_strcasecmp_with_sentinel("static", value, sentinel)) { - if (!__kmp_strcasecmp_with_sentinel("greedy", comma, ';')) { - __kmp_static = kmp_sch_static_greedy; - continue; - } else if (!__kmp_strcasecmp_with_sentinel("balanced", comma, - ';')) { - __kmp_static = kmp_sch_static_balanced; - continue; - } - } else if (!__kmp_strcasecmp_with_sentinel("guided", value, - sentinel)) { - if (!__kmp_strcasecmp_with_sentinel("iterative", comma, ';')) { - __kmp_guided = kmp_sch_guided_iterative_chunked; - continue; - } else if (!__kmp_strcasecmp_with_sentinel("analytical", comma, - ';')) { - /* analytical not allowed for too many threads */ - __kmp_guided = kmp_sch_guided_analytical_chunked; - continue; - } - } - KMP_WARNING(InvalidClause, name, value); - } else - KMP_WARNING(EmptyClause, name); - } while ((value = semicolon ? semicolon + 1 : NULL)); - } - } - -} // __kmp_stg_parse__schedule - -static void __kmp_stg_print_schedule(kmp_str_buf_t *buffer, char const *name, - void *data) { - if (__kmp_env_format) { - KMP_STR_BUF_PRINT_NAME_EX(name); - } else { - __kmp_str_buf_print(buffer, " %s='", name); - } - if (__kmp_static == kmp_sch_static_greedy) { - __kmp_str_buf_print(buffer, "%s", "static,greedy"); - } else if (__kmp_static == kmp_sch_static_balanced) { - __kmp_str_buf_print(buffer, "%s", "static,balanced"); - } - if (__kmp_guided == kmp_sch_guided_iterative_chunked) { - __kmp_str_buf_print(buffer, ";%s'\n", "guided,iterative"); - } else if (__kmp_guided == kmp_sch_guided_analytical_chunked) { - __kmp_str_buf_print(buffer, ";%s'\n", "guided,analytical"); - } -} // __kmp_stg_print_schedule - -// ----------------------------------------------------------------------------- -// OMP_SCHEDULE - -static inline void __kmp_omp_schedule_restore() { -#if KMP_USE_HIER_SCHED - __kmp_hier_scheds.deallocate(); -#endif - __kmp_chunk = 0; - __kmp_sched = kmp_sch_default; -} - -static const char *__kmp_parse_single_omp_schedule(const char *name, - const char *value, - bool parse_hier = false) { - /* get the specified scheduling style */ - const char *ptr = value; - const char *comma = strchr(ptr, ','); - const char *delim; - int chunk = 0; - enum sched_type sched = kmp_sch_default; - if (*ptr == '\0') - return NULL; -#if KMP_USE_HIER_SCHED - kmp_hier_layer_e layer = kmp_hier_layer_e::LAYER_THREAD; - if (parse_hier) { - if (!__kmp_strcasecmp_with_sentinel("L1", ptr, ',')) { - layer = kmp_hier_layer_e::LAYER_L1; - } else if (!__kmp_strcasecmp_with_sentinel("L2", ptr, ',')) { - layer = kmp_hier_layer_e::LAYER_L2; - } else if (!__kmp_strcasecmp_with_sentinel("L3", ptr, ',')) { - layer = kmp_hier_layer_e::LAYER_L3; - } else if (!__kmp_strcasecmp_with_sentinel("NUMA", ptr, ',')) { - layer = kmp_hier_layer_e::LAYER_NUMA; - } - if (layer != kmp_hier_layer_e::LAYER_THREAD && !comma) { - // If there is no comma after the layer, then this schedule is invalid - KMP_WARNING(StgInvalidValue, name, value); - __kmp_omp_schedule_restore(); - return NULL; - } else if (layer != kmp_hier_layer_e::LAYER_THREAD) { - ptr = ++comma; - comma = strchr(ptr, ','); - } - } - delim = ptr; - while (*delim != ',' && *delim != ':' && *delim != '\0') - delim++; -#else // KMP_USE_HIER_SCHED - delim = ptr; - while (*delim != ',' && *delim != '\0') - delim++; -#endif // KMP_USE_HIER_SCHED - if (!__kmp_strcasecmp_with_sentinel("dynamic", ptr, *delim)) /* DYNAMIC */ - sched = kmp_sch_dynamic_chunked; - else if (!__kmp_strcasecmp_with_sentinel("guided", ptr, *delim)) /* GUIDED */ - sched = kmp_sch_guided_chunked; - // AC: TODO: add AUTO schedule, and probably remove TRAPEZOIDAL (OMP 3.0 does - // not allow it) - else if (!__kmp_strcasecmp_with_sentinel("auto", ptr, *delim)) { /* AUTO */ - sched = kmp_sch_auto; - if (comma) { - __kmp_msg(kmp_ms_warning, KMP_MSG(IgnoreChunk, name, comma), - __kmp_msg_null); - comma = NULL; - } - } else if (!__kmp_strcasecmp_with_sentinel("trapezoidal", ptr, - *delim)) /* TRAPEZOIDAL */ - sched = kmp_sch_trapezoidal; - else if (!__kmp_strcasecmp_with_sentinel("static", ptr, *delim)) /* STATIC */ - sched = kmp_sch_static; -#if KMP_STATIC_STEAL_ENABLED - else if (!__kmp_strcasecmp_with_sentinel("static_steal", ptr, *delim)) - sched = kmp_sch_static_steal; -#endif - else { - KMP_WARNING(StgInvalidValue, name, value); - __kmp_omp_schedule_restore(); - return NULL; - } - if (ptr && comma && *comma == *delim) { - ptr = comma + 1; - SKIP_DIGITS(ptr); - - if (sched == kmp_sch_static) - sched = kmp_sch_static_chunked; - ++comma; - chunk = __kmp_str_to_int(comma, *ptr); - if (chunk < 1) { - chunk = KMP_DEFAULT_CHUNK; - __kmp_msg(kmp_ms_warning, KMP_MSG(InvalidChunk, name, comma), - __kmp_msg_null); - KMP_INFORM(Using_int_Value, name, __kmp_chunk); - // AC: next block commented out until KMP_DEFAULT_CHUNK != KMP_MIN_CHUNK - // (to improve code coverage :) - // The default chunk size is 1 according to standard, thus making - // KMP_MIN_CHUNK not 1 we would introduce mess: - // wrong chunk becomes 1, but it will be impossible to explicitely set - // 1, because it becomes KMP_MIN_CHUNK... - // } else if ( chunk < KMP_MIN_CHUNK ) { - // chunk = KMP_MIN_CHUNK; - } else if (chunk > KMP_MAX_CHUNK) { - chunk = KMP_MAX_CHUNK; - __kmp_msg(kmp_ms_warning, KMP_MSG(LargeChunk, name, comma), - __kmp_msg_null); - KMP_INFORM(Using_int_Value, name, chunk); - } - } else if (ptr) { - SKIP_TOKEN(ptr); - } -#if KMP_USE_HIER_SCHED - if (layer != kmp_hier_layer_e::LAYER_THREAD) { - __kmp_hier_scheds.append(sched, chunk, layer); - } else -#endif - { - __kmp_chunk = chunk; - __kmp_sched = sched; - } - return ptr; -} - -static void __kmp_stg_parse_omp_schedule(char const *name, char const *value, - void *data) { - size_t length; - const char *ptr = value; - SKIP_WS(ptr); - if (value) { - length = KMP_STRLEN(value); - if (length) { - if (value[length - 1] == '"' || value[length - 1] == '\'') - KMP_WARNING(UnbalancedQuotes, name); -/* get the specified scheduling style */ -#if KMP_USE_HIER_SCHED - if (!__kmp_strcasecmp_with_sentinel("EXPERIMENTAL", ptr, ' ')) { - SKIP_TOKEN(ptr); - SKIP_WS(ptr); - while ((ptr = __kmp_parse_single_omp_schedule(name, ptr, true))) { - while (*ptr == ' ' || *ptr == '\t' || *ptr == ':') - ptr++; - } - } else -#endif - __kmp_parse_single_omp_schedule(name, ptr); - } else - KMP_WARNING(EmptyString, name); - } -#if KMP_USE_HIER_SCHED - __kmp_hier_scheds.sort(); -#endif - K_DIAG(1, ("__kmp_static == %d\n", __kmp_static)) - K_DIAG(1, ("__kmp_guided == %d\n", __kmp_guided)) - K_DIAG(1, ("__kmp_sched == %d\n", __kmp_sched)) - K_DIAG(1, ("__kmp_chunk == %d\n", __kmp_chunk)) -} // __kmp_stg_parse_omp_schedule - -static void __kmp_stg_print_omp_schedule(kmp_str_buf_t *buffer, - char const *name, void *data) { - if (__kmp_env_format) { - KMP_STR_BUF_PRINT_NAME_EX(name); - } else { - __kmp_str_buf_print(buffer, " %s='", name); - } - if (__kmp_chunk) { - switch (__kmp_sched) { - case kmp_sch_dynamic_chunked: - __kmp_str_buf_print(buffer, "%s,%d'\n", "dynamic", __kmp_chunk); - break; - case kmp_sch_guided_iterative_chunked: - case kmp_sch_guided_analytical_chunked: - __kmp_str_buf_print(buffer, "%s,%d'\n", "guided", __kmp_chunk); - break; - case kmp_sch_trapezoidal: - __kmp_str_buf_print(buffer, "%s,%d'\n", "trapezoidal", __kmp_chunk); - break; - case kmp_sch_static: - case kmp_sch_static_chunked: - case kmp_sch_static_balanced: - case kmp_sch_static_greedy: - __kmp_str_buf_print(buffer, "%s,%d'\n", "static", __kmp_chunk); - break; - case kmp_sch_static_steal: - __kmp_str_buf_print(buffer, "%s,%d'\n", "static_steal", __kmp_chunk); - break; - case kmp_sch_auto: - __kmp_str_buf_print(buffer, "%s,%d'\n", "auto", __kmp_chunk); - break; - } - } else { - switch (__kmp_sched) { - case kmp_sch_dynamic_chunked: - __kmp_str_buf_print(buffer, "%s'\n", "dynamic"); - break; - case kmp_sch_guided_iterative_chunked: - case kmp_sch_guided_analytical_chunked: - __kmp_str_buf_print(buffer, "%s'\n", "guided"); - break; - case kmp_sch_trapezoidal: - __kmp_str_buf_print(buffer, "%s'\n", "trapezoidal"); - break; - case kmp_sch_static: - case kmp_sch_static_chunked: - case kmp_sch_static_balanced: - case kmp_sch_static_greedy: - __kmp_str_buf_print(buffer, "%s'\n", "static"); - break; - case kmp_sch_static_steal: - __kmp_str_buf_print(buffer, "%s'\n", "static_steal"); - break; - case kmp_sch_auto: - __kmp_str_buf_print(buffer, "%s'\n", "auto"); - break; - } - } -} // __kmp_stg_print_omp_schedule - -#if KMP_USE_HIER_SCHED -// ----------------------------------------------------------------------------- -// KMP_DISP_HAND_THREAD -static void __kmp_stg_parse_kmp_hand_thread(char const *name, char const *value, - void *data) { - __kmp_stg_parse_bool(name, value, &(__kmp_dispatch_hand_threading)); -} // __kmp_stg_parse_kmp_hand_thread - -static void __kmp_stg_print_kmp_hand_thread(kmp_str_buf_t *buffer, - char const *name, void *data) { - __kmp_stg_print_bool(buffer, name, __kmp_dispatch_hand_threading); -} // __kmp_stg_print_kmp_hand_thread -#endif - -// ----------------------------------------------------------------------------- -// KMP_ATOMIC_MODE - -static void __kmp_stg_parse_atomic_mode(char const *name, char const *value, - void *data) { - // Modes: 0 -- do not change default; 1 -- Intel perf mode, 2 -- GOMP - // compatibility mode. - int mode = 0; - int max = 1; -#ifdef KMP_GOMP_COMPAT - max = 2; -#endif /* KMP_GOMP_COMPAT */ - __kmp_stg_parse_int(name, value, 0, max, &mode); - // TODO; parse_int is not very suitable for this case. In case of overflow it - // is better to use - // 0 rather that max value. - if (mode > 0) { - __kmp_atomic_mode = mode; - } -} // __kmp_stg_parse_atomic_mode - -static void __kmp_stg_print_atomic_mode(kmp_str_buf_t *buffer, char const *name, - void *data) { - __kmp_stg_print_int(buffer, name, __kmp_atomic_mode); -} // __kmp_stg_print_atomic_mode - -// ----------------------------------------------------------------------------- -// KMP_CONSISTENCY_CHECK - -static void __kmp_stg_parse_consistency_check(char const *name, - char const *value, void *data) { - if (!__kmp_strcasecmp_with_sentinel("all", value, 0)) { - // Note, this will not work from kmp_set_defaults because th_cons stack was - // not allocated - // for existed thread(s) thus the first __kmp_push_ will break - // with assertion. - // TODO: allocate th_cons if called from kmp_set_defaults. - __kmp_env_consistency_check = TRUE; - } else if (!__kmp_strcasecmp_with_sentinel("none", value, 0)) { - __kmp_env_consistency_check = FALSE; - } else { - KMP_WARNING(StgInvalidValue, name, value); - } -} // __kmp_stg_parse_consistency_check - -static void __kmp_stg_print_consistency_check(kmp_str_buf_t *buffer, - char const *name, void *data) { -#if KMP_DEBUG - const char *value = NULL; - - if (__kmp_env_consistency_check) { - value = "all"; - } else { - value = "none"; - } - - if (value != NULL) { - __kmp_stg_print_str(buffer, name, value); - } -#endif /* KMP_DEBUG */ -} // __kmp_stg_print_consistency_check - -#if USE_ITT_BUILD -// ----------------------------------------------------------------------------- -// KMP_ITT_PREPARE_DELAY - -#if USE_ITT_NOTIFY - -static void __kmp_stg_parse_itt_prepare_delay(char const *name, - char const *value, void *data) { - // Experimental code: KMP_ITT_PREPARE_DELAY specifies numbert of loop - // iterations. - int delay = 0; - __kmp_stg_parse_int(name, value, 0, INT_MAX, &delay); - __kmp_itt_prepare_delay = delay; -} // __kmp_str_parse_itt_prepare_delay - -static void __kmp_stg_print_itt_prepare_delay(kmp_str_buf_t *buffer, - char const *name, void *data) { - __kmp_stg_print_uint64(buffer, name, __kmp_itt_prepare_delay); - -} // __kmp_str_print_itt_prepare_delay - -#endif // USE_ITT_NOTIFY -#endif /* USE_ITT_BUILD */ - -// ----------------------------------------------------------------------------- -// KMP_MALLOC_POOL_INCR - -static void __kmp_stg_parse_malloc_pool_incr(char const *name, - char const *value, void *data) { - __kmp_stg_parse_size(name, value, KMP_MIN_MALLOC_POOL_INCR, - KMP_MAX_MALLOC_POOL_INCR, NULL, &__kmp_malloc_pool_incr, - 1); -} // __kmp_stg_parse_malloc_pool_incr - -static void __kmp_stg_print_malloc_pool_incr(kmp_str_buf_t *buffer, - char const *name, void *data) { - __kmp_stg_print_size(buffer, name, __kmp_malloc_pool_incr); - -} // _kmp_stg_print_malloc_pool_incr - -#ifdef KMP_DEBUG - -// ----------------------------------------------------------------------------- -// KMP_PAR_RANGE - -static void __kmp_stg_parse_par_range_env(char const *name, char const *value, - void *data) { - __kmp_stg_parse_par_range(name, value, &__kmp_par_range, - __kmp_par_range_routine, __kmp_par_range_filename, - &__kmp_par_range_lb, &__kmp_par_range_ub); -} // __kmp_stg_parse_par_range_env - -static void __kmp_stg_print_par_range_env(kmp_str_buf_t *buffer, - char const *name, void *data) { - if (__kmp_par_range != 0) { - __kmp_stg_print_str(buffer, name, par_range_to_print); - } -} // __kmp_stg_print_par_range_env - -// ----------------------------------------------------------------------------- -// KMP_YIELD_CYCLE, KMP_YIELD_ON, KMP_YIELD_OFF - -static void __kmp_stg_parse_yield_cycle(char const *name, char const *value, - void *data) { - int flag = __kmp_yield_cycle; - __kmp_stg_parse_bool(name, value, &flag); - __kmp_yield_cycle = flag; -} // __kmp_stg_parse_yield_cycle - -static void __kmp_stg_print_yield_cycle(kmp_str_buf_t *buffer, char const *name, - void *data) { - __kmp_stg_print_bool(buffer, name, __kmp_yield_cycle); -} // __kmp_stg_print_yield_cycle - -static void __kmp_stg_parse_yield_on(char const *name, char const *value, - void *data) { - __kmp_stg_parse_int(name, value, 2, INT_MAX, &__kmp_yield_on_count); -} // __kmp_stg_parse_yield_on - -static void __kmp_stg_print_yield_on(kmp_str_buf_t *buffer, char const *name, - void *data) { - __kmp_stg_print_int(buffer, name, __kmp_yield_on_count); -} // __kmp_stg_print_yield_on - -static void __kmp_stg_parse_yield_off(char const *name, char const *value, - void *data) { - __kmp_stg_parse_int(name, value, 2, INT_MAX, &__kmp_yield_off_count); -} // __kmp_stg_parse_yield_off - -static void __kmp_stg_print_yield_off(kmp_str_buf_t *buffer, char const *name, - void *data) { - __kmp_stg_print_int(buffer, name, __kmp_yield_off_count); -} // __kmp_stg_print_yield_off - -#endif - -// ----------------------------------------------------------------------------- -// KMP_INIT_WAIT, KMP_NEXT_WAIT - -static void __kmp_stg_parse_init_wait(char const *name, char const *value, - void *data) { - int wait; - KMP_ASSERT((__kmp_init_wait & 1) == 0); - wait = __kmp_init_wait / 2; - __kmp_stg_parse_int(name, value, KMP_MIN_INIT_WAIT, KMP_MAX_INIT_WAIT, &wait); - __kmp_init_wait = wait * 2; - KMP_ASSERT((__kmp_init_wait & 1) == 0); - __kmp_yield_init = __kmp_init_wait; -} // __kmp_stg_parse_init_wait - -static void __kmp_stg_print_init_wait(kmp_str_buf_t *buffer, char const *name, - void *data) { - __kmp_stg_print_int(buffer, name, __kmp_init_wait); -} // __kmp_stg_print_init_wait - -static void __kmp_stg_parse_next_wait(char const *name, char const *value, - void *data) { - int wait; - KMP_ASSERT((__kmp_next_wait & 1) == 0); - wait = __kmp_next_wait / 2; - __kmp_stg_parse_int(name, value, KMP_MIN_NEXT_WAIT, KMP_MAX_NEXT_WAIT, &wait); - __kmp_next_wait = wait * 2; - KMP_ASSERT((__kmp_next_wait & 1) == 0); - __kmp_yield_next = __kmp_next_wait; -} // __kmp_stg_parse_next_wait - -static void __kmp_stg_print_next_wait(kmp_str_buf_t *buffer, char const *name, - void *data) { - __kmp_stg_print_int(buffer, name, __kmp_next_wait); -} //__kmp_stg_print_next_wait - -// ----------------------------------------------------------------------------- -// KMP_GTID_MODE - -static void __kmp_stg_parse_gtid_mode(char const *name, char const *value, - void *data) { - // Modes: - // 0 -- do not change default - // 1 -- sp search - // 2 -- use "keyed" TLS var, i.e. - // pthread_getspecific(Linux* OS/OS X*) or TlsGetValue(Windows* OS) - // 3 -- __declspec(thread) TLS var in tdata section - int mode = 0; - int max = 2; -#ifdef KMP_TDATA_GTID - max = 3; -#endif /* KMP_TDATA_GTID */ - __kmp_stg_parse_int(name, value, 0, max, &mode); - // TODO; parse_int is not very suitable for this case. In case of overflow it - // is better to use 0 rather that max value. - if (mode == 0) { - __kmp_adjust_gtid_mode = TRUE; - } else { - __kmp_gtid_mode = mode; - __kmp_adjust_gtid_mode = FALSE; - } -} // __kmp_str_parse_gtid_mode - -static void __kmp_stg_print_gtid_mode(kmp_str_buf_t *buffer, char const *name, - void *data) { - if (__kmp_adjust_gtid_mode) { - __kmp_stg_print_int(buffer, name, 0); - } else { - __kmp_stg_print_int(buffer, name, __kmp_gtid_mode); - } -} // __kmp_stg_print_gtid_mode - -// ----------------------------------------------------------------------------- -// KMP_NUM_LOCKS_IN_BLOCK - -static void __kmp_stg_parse_lock_block(char const *name, char const *value, - void *data) { - __kmp_stg_parse_int(name, value, 0, KMP_INT_MAX, &__kmp_num_locks_in_block); -} // __kmp_str_parse_lock_block - -static void __kmp_stg_print_lock_block(kmp_str_buf_t *buffer, char const *name, - void *data) { - __kmp_stg_print_int(buffer, name, __kmp_num_locks_in_block); -} // __kmp_stg_print_lock_block - -// ----------------------------------------------------------------------------- -// KMP_LOCK_KIND - -#if KMP_USE_DYNAMIC_LOCK -#define KMP_STORE_LOCK_SEQ(a) (__kmp_user_lock_seq = lockseq_##a) -#else -#define KMP_STORE_LOCK_SEQ(a) -#endif - -static void __kmp_stg_parse_lock_kind(char const *name, char const *value, - void *data) { - if (__kmp_init_user_locks) { - KMP_WARNING(EnvLockWarn, name); - return; - } - - if (__kmp_str_match("tas", 2, value) || - __kmp_str_match("test and set", 2, value) || - __kmp_str_match("test_and_set", 2, value) || - __kmp_str_match("test-and-set", 2, value) || - __kmp_str_match("test andset", 2, value) || - __kmp_str_match("test_andset", 2, value) || - __kmp_str_match("test-andset", 2, value) || - __kmp_str_match("testand set", 2, value) || - __kmp_str_match("testand_set", 2, value) || - __kmp_str_match("testand-set", 2, value) || - __kmp_str_match("testandset", 2, value)) { - __kmp_user_lock_kind = lk_tas; - KMP_STORE_LOCK_SEQ(tas); - } -#if KMP_USE_FUTEX - else if (__kmp_str_match("futex", 1, value)) { - if (__kmp_futex_determine_capable()) { - __kmp_user_lock_kind = lk_futex; - KMP_STORE_LOCK_SEQ(futex); - } else { - KMP_WARNING(FutexNotSupported, name, value); - } - } -#endif - else if (__kmp_str_match("ticket", 2, value)) { - __kmp_user_lock_kind = lk_ticket; - KMP_STORE_LOCK_SEQ(ticket); - } else if (__kmp_str_match("queuing", 1, value) || - __kmp_str_match("queue", 1, value)) { - __kmp_user_lock_kind = lk_queuing; - KMP_STORE_LOCK_SEQ(queuing); - } else if (__kmp_str_match("drdpa ticket", 1, value) || - __kmp_str_match("drdpa_ticket", 1, value) || - __kmp_str_match("drdpa-ticket", 1, value) || - __kmp_str_match("drdpaticket", 1, value) || - __kmp_str_match("drdpa", 1, value)) { - __kmp_user_lock_kind = lk_drdpa; - KMP_STORE_LOCK_SEQ(drdpa); - } -#if KMP_USE_ADAPTIVE_LOCKS - else if (__kmp_str_match("adaptive", 1, value)) { - if (__kmp_cpuinfo.rtm) { // ??? Is cpuinfo available here? - __kmp_user_lock_kind = lk_adaptive; - KMP_STORE_LOCK_SEQ(adaptive); - } else { - KMP_WARNING(AdaptiveNotSupported, name, value); - __kmp_user_lock_kind = lk_queuing; - KMP_STORE_LOCK_SEQ(queuing); - } - } -#endif // KMP_USE_ADAPTIVE_LOCKS -#if KMP_USE_DYNAMIC_LOCK && KMP_USE_TSX - else if (__kmp_str_match("rtm", 1, value)) { - if (__kmp_cpuinfo.rtm) { - __kmp_user_lock_kind = lk_rtm; - KMP_STORE_LOCK_SEQ(rtm); - } else { - KMP_WARNING(AdaptiveNotSupported, name, value); - __kmp_user_lock_kind = lk_queuing; - KMP_STORE_LOCK_SEQ(queuing); - } - } else if (__kmp_str_match("hle", 1, value)) { - __kmp_user_lock_kind = lk_hle; - KMP_STORE_LOCK_SEQ(hle); - } -#endif - else { - KMP_WARNING(StgInvalidValue, name, value); - } -} - -static void __kmp_stg_print_lock_kind(kmp_str_buf_t *buffer, char const *name, - void *data) { - const char *value = NULL; - - switch (__kmp_user_lock_kind) { - case lk_default: - value = "default"; - break; - - case lk_tas: - value = "tas"; - break; - -#if KMP_USE_FUTEX - case lk_futex: - value = "futex"; - break; -#endif - -#if KMP_USE_DYNAMIC_LOCK && KMP_USE_TSX - case lk_rtm: - value = "rtm"; - break; - - case lk_hle: - value = "hle"; - break; -#endif - - case lk_ticket: - value = "ticket"; - break; - - case lk_queuing: - value = "queuing"; - break; - - case lk_drdpa: - value = "drdpa"; - break; -#if KMP_USE_ADAPTIVE_LOCKS - case lk_adaptive: - value = "adaptive"; - break; -#endif - } - - if (value != NULL) { - __kmp_stg_print_str(buffer, name, value); - } -} - -// ----------------------------------------------------------------------------- -// KMP_SPIN_BACKOFF_PARAMS - -// KMP_SPIN_BACKOFF_PARAMS=max_backoff[,min_tick] (max backoff size, min tick -// for machine pause) -static void __kmp_stg_parse_spin_backoff_params(const char *name, - const char *value, void *data) { - const char *next = value; - - int total = 0; // Count elements that were set. It'll be used as an array size - int prev_comma = FALSE; // For correct processing sequential commas - int i; - - kmp_uint32 max_backoff = __kmp_spin_backoff_params.max_backoff; - kmp_uint32 min_tick = __kmp_spin_backoff_params.min_tick; - - // Run only 3 iterations because it is enough to read two values or find a - // syntax error - for (i = 0; i < 3; i++) { - SKIP_WS(next); - - if (*next == '\0') { - break; - } - // Next character is not an integer or not a comma OR number of values > 2 - // => end of list - if (((*next < '0' || *next > '9') && *next != ',') || total > 2) { - KMP_WARNING(EnvSyntaxError, name, value); - return; - } - // The next character is ',' - if (*next == ',') { - // ',' is the fisrt character - if (total == 0 || prev_comma) { - total++; - } - prev_comma = TRUE; - next++; // skip ',' - SKIP_WS(next); - } - // Next character is a digit - if (*next >= '0' && *next <= '9') { - int num; - const char *buf = next; - char const *msg = NULL; - prev_comma = FALSE; - SKIP_DIGITS(next); - total++; - - const char *tmp = next; - SKIP_WS(tmp); - if ((*next == ' ' || *next == '\t') && (*tmp >= '0' && *tmp <= '9')) { - KMP_WARNING(EnvSpacesNotAllowed, name, value); - return; - } - - num = __kmp_str_to_int(buf, *next); - if (num <= 0) { // The number of retries should be > 0 - msg = KMP_I18N_STR(ValueTooSmall); - num = 1; - } else if (num > KMP_INT_MAX) { - msg = KMP_I18N_STR(ValueTooLarge); - num = KMP_INT_MAX; - } - if (msg != NULL) { - // Message is not empty. Print warning. - KMP_WARNING(ParseSizeIntWarn, name, value, msg); - KMP_INFORM(Using_int_Value, name, num); - } - if (total == 1) { - max_backoff = num; - } else if (total == 2) { - min_tick = num; - } - } - } - KMP_DEBUG_ASSERT(total > 0); - if (total <= 0) { - KMP_WARNING(EnvSyntaxError, name, value); - return; - } - __kmp_spin_backoff_params.max_backoff = max_backoff; - __kmp_spin_backoff_params.min_tick = min_tick; -} - -static void __kmp_stg_print_spin_backoff_params(kmp_str_buf_t *buffer, - char const *name, void *data) { - if (__kmp_env_format) { - KMP_STR_BUF_PRINT_NAME_EX(name); - } else { - __kmp_str_buf_print(buffer, " %s='", name); - } - __kmp_str_buf_print(buffer, "%d,%d'\n", __kmp_spin_backoff_params.max_backoff, - __kmp_spin_backoff_params.min_tick); -} - -#if KMP_USE_ADAPTIVE_LOCKS - -// ----------------------------------------------------------------------------- -// KMP_ADAPTIVE_LOCK_PROPS, KMP_SPECULATIVE_STATSFILE - -// Parse out values for the tunable parameters from a string of the form -// KMP_ADAPTIVE_LOCK_PROPS=max_soft_retries[,max_badness] -static void __kmp_stg_parse_adaptive_lock_props(const char *name, - const char *value, void *data) { - int max_retries = 0; - int max_badness = 0; - - const char *next = value; - - int total = 0; // Count elements that were set. It'll be used as an array size - int prev_comma = FALSE; // For correct processing sequential commas - int i; - - // Save values in the structure __kmp_speculative_backoff_params - // Run only 3 iterations because it is enough to read two values or find a - // syntax error - for (i = 0; i < 3; i++) { - SKIP_WS(next); - - if (*next == '\0') { - break; - } - // Next character is not an integer or not a comma OR number of values > 2 - // => end of list - if (((*next < '0' || *next > '9') && *next != ',') || total > 2) { - KMP_WARNING(EnvSyntaxError, name, value); - return; - } - // The next character is ',' - if (*next == ',') { - // ',' is the fisrt character - if (total == 0 || prev_comma) { - total++; - } - prev_comma = TRUE; - next++; // skip ',' - SKIP_WS(next); - } - // Next character is a digit - if (*next >= '0' && *next <= '9') { - int num; - const char *buf = next; - char const *msg = NULL; - prev_comma = FALSE; - SKIP_DIGITS(next); - total++; - - const char *tmp = next; - SKIP_WS(tmp); - if ((*next == ' ' || *next == '\t') && (*tmp >= '0' && *tmp <= '9')) { - KMP_WARNING(EnvSpacesNotAllowed, name, value); - return; - } - - num = __kmp_str_to_int(buf, *next); - if (num < 0) { // The number of retries should be >= 0 - msg = KMP_I18N_STR(ValueTooSmall); - num = 1; - } else if (num > KMP_INT_MAX) { - msg = KMP_I18N_STR(ValueTooLarge); - num = KMP_INT_MAX; - } - if (msg != NULL) { - // Message is not empty. Print warning. - KMP_WARNING(ParseSizeIntWarn, name, value, msg); - KMP_INFORM(Using_int_Value, name, num); - } - if (total == 1) { - max_retries = num; - } else if (total == 2) { - max_badness = num; - } - } - } - KMP_DEBUG_ASSERT(total > 0); - if (total <= 0) { - KMP_WARNING(EnvSyntaxError, name, value); - return; - } - __kmp_adaptive_backoff_params.max_soft_retries = max_retries; - __kmp_adaptive_backoff_params.max_badness = max_badness; -} - -static void __kmp_stg_print_adaptive_lock_props(kmp_str_buf_t *buffer, - char const *name, void *data) { - if (__kmp_env_format) { - KMP_STR_BUF_PRINT_NAME_EX(name); - } else { - __kmp_str_buf_print(buffer, " %s='", name); - } - __kmp_str_buf_print(buffer, "%d,%d'\n", - __kmp_adaptive_backoff_params.max_soft_retries, - __kmp_adaptive_backoff_params.max_badness); -} // __kmp_stg_print_adaptive_lock_props - -#if KMP_DEBUG_ADAPTIVE_LOCKS - -static void __kmp_stg_parse_speculative_statsfile(char const *name, - char const *value, - void *data) { - __kmp_stg_parse_file(name, value, "", CCAST(char**, &__kmp_speculative_statsfile)); -} // __kmp_stg_parse_speculative_statsfile - -static void __kmp_stg_print_speculative_statsfile(kmp_str_buf_t *buffer, - char const *name, - void *data) { - if (__kmp_str_match("-", 0, __kmp_speculative_statsfile)) { - __kmp_stg_print_str(buffer, name, "stdout"); - } else { - __kmp_stg_print_str(buffer, name, __kmp_speculative_statsfile); - } - -} // __kmp_stg_print_speculative_statsfile - -#endif // KMP_DEBUG_ADAPTIVE_LOCKS - -#endif // KMP_USE_ADAPTIVE_LOCKS - -// ----------------------------------------------------------------------------- -// KMP_HW_SUBSET (was KMP_PLACE_THREADS) - -// The longest observable sequense of items is -// Socket-Node-Tile-Core-Thread -// So, let's limit to 5 levels for now -// The input string is usually short enough, let's use 512 limit for now -#define MAX_T_LEVEL 5 -#define MAX_STR_LEN 512 -static void __kmp_stg_parse_hw_subset(char const *name, char const *value, - void *data) { - // Value example: 1s,5c@3,2T - // Which means "use 1 socket, 5 cores with offset 3, 2 threads per core" - kmp_setting_t **rivals = (kmp_setting_t **)data; - if (strcmp(name, "KMP_PLACE_THREADS") == 0) { - KMP_INFORM(EnvVarDeprecated, name, "KMP_HW_SUBSET"); - } - if (__kmp_stg_check_rivals(name, value, rivals)) { - return; - } - - char *components[MAX_T_LEVEL]; - char const *digits = "0123456789"; - char input[MAX_STR_LEN]; - size_t len = 0, mlen = MAX_STR_LEN; - int level = 0; - // Canonize the string (remove spaces, unify delimiters, etc.) - char *pos = CCAST(char *, value); - while (*pos && mlen) { - if (*pos != ' ') { // skip spaces - if (len == 0 && *pos == ':') { - __kmp_hws_abs_flag = 1; // if the first symbol is ":", skip it - } else { - input[len] = toupper(*pos); - if (input[len] == 'X') - input[len] = ','; // unify delimiters of levels - if (input[len] == 'O' && strchr(digits, *(pos + 1))) - input[len] = '@'; // unify delimiters of offset - len++; - } - } - mlen--; - pos++; - } - if (len == 0 || mlen == 0) - goto err; // contents is either empty or too long - input[len] = '\0'; - __kmp_hws_requested = 1; // mark that subset requested - // Split by delimiter - pos = input; - components[level++] = pos; - while ((pos = strchr(pos, ','))) { - *pos = '\0'; // modify input and avoid more copying - components[level++] = ++pos; // expect something after "," - if (level > MAX_T_LEVEL) - goto err; // too many components provided - } - // Check each component - for (int i = 0; i < level; ++i) { - int offset = 0; - int num = atoi(components[i]); // each component should start with a number - if ((pos = strchr(components[i], '@'))) { - offset = atoi(pos + 1); // save offset - *pos = '\0'; // cut the offset from the component - } - pos = components[i] + strspn(components[i], digits); - if (pos == components[i]) - goto err; - // detect the component type - switch (*pos) { - case 'S': // Socket - if (__kmp_hws_socket.num > 0) - goto err; // duplicate is not allowed - __kmp_hws_socket.num = num; - __kmp_hws_socket.offset = offset; - break; - case 'N': // NUMA Node - if (__kmp_hws_node.num > 0) - goto err; // duplicate is not allowed - __kmp_hws_node.num = num; - __kmp_hws_node.offset = offset; - break; - case 'L': // Cache - if (*(pos + 1) == '2') { // L2 - Tile - if (__kmp_hws_tile.num > 0) - goto err; // duplicate is not allowed - __kmp_hws_tile.num = num; - __kmp_hws_tile.offset = offset; - } else if (*(pos + 1) == '3') { // L3 - Socket - if (__kmp_hws_socket.num > 0) - goto err; // duplicate is not allowed - __kmp_hws_socket.num = num; - __kmp_hws_socket.offset = offset; - } else if (*(pos + 1) == '1') { // L1 - Core - if (__kmp_hws_core.num > 0) - goto err; // duplicate is not allowed - __kmp_hws_core.num = num; - __kmp_hws_core.offset = offset; - } - break; - case 'C': // Core (or Cache?) - if (*(pos + 1) != 'A') { - if (__kmp_hws_core.num > 0) - goto err; // duplicate is not allowed - __kmp_hws_core.num = num; - __kmp_hws_core.offset = offset; - } else { // Cache - char *d = pos + strcspn(pos, digits); // find digit - if (*d == '2') { // L2 - Tile - if (__kmp_hws_tile.num > 0) - goto err; // duplicate is not allowed - __kmp_hws_tile.num = num; - __kmp_hws_tile.offset = offset; - } else if (*d == '3') { // L3 - Socket - if (__kmp_hws_socket.num > 0) - goto err; // duplicate is not allowed - __kmp_hws_socket.num = num; - __kmp_hws_socket.offset = offset; - } else if (*d == '1') { // L1 - Core - if (__kmp_hws_core.num > 0) - goto err; // duplicate is not allowed - __kmp_hws_core.num = num; - __kmp_hws_core.offset = offset; - } else { - goto err; - } - } - break; - case 'T': // Thread - if (__kmp_hws_proc.num > 0) - goto err; // duplicate is not allowed - __kmp_hws_proc.num = num; - __kmp_hws_proc.offset = offset; - break; - default: - goto err; - } - } - return; -err: - KMP_WARNING(AffHWSubsetInvalid, name, value); - __kmp_hws_requested = 0; // mark that subset not requested - return; -} - -static void __kmp_stg_print_hw_subset(kmp_str_buf_t *buffer, char const *name, - void *data) { - if (__kmp_hws_requested) { - int comma = 0; - kmp_str_buf_t buf; - __kmp_str_buf_init(&buf); - if (__kmp_env_format) - KMP_STR_BUF_PRINT_NAME_EX(name); - else - __kmp_str_buf_print(buffer, " %s='", name); - if (__kmp_hws_socket.num) { - __kmp_str_buf_print(&buf, "%ds", __kmp_hws_socket.num); - if (__kmp_hws_socket.offset) - __kmp_str_buf_print(&buf, "@%d", __kmp_hws_socket.offset); - comma = 1; - } - if (__kmp_hws_node.num) { - __kmp_str_buf_print(&buf, "%s%dn", comma ? "," : "", __kmp_hws_node.num); - if (__kmp_hws_node.offset) - __kmp_str_buf_print(&buf, "@%d", __kmp_hws_node.offset); - comma = 1; - } - if (__kmp_hws_tile.num) { - __kmp_str_buf_print(&buf, "%s%dL2", comma ? "," : "", __kmp_hws_tile.num); - if (__kmp_hws_tile.offset) - __kmp_str_buf_print(&buf, "@%d", __kmp_hws_tile.offset); - comma = 1; - } - if (__kmp_hws_core.num) { - __kmp_str_buf_print(&buf, "%s%dc", comma ? "," : "", __kmp_hws_core.num); - if (__kmp_hws_core.offset) - __kmp_str_buf_print(&buf, "@%d", __kmp_hws_core.offset); - comma = 1; - } - if (__kmp_hws_proc.num) - __kmp_str_buf_print(&buf, "%s%dt", comma ? "," : "", __kmp_hws_proc.num); - __kmp_str_buf_print(buffer, "%s'\n", buf.str); - __kmp_str_buf_free(&buf); - } -} - -#if USE_ITT_BUILD -// ----------------------------------------------------------------------------- -// KMP_FORKJOIN_FRAMES - -static void __kmp_stg_parse_forkjoin_frames(char const *name, char const *value, - void *data) { - __kmp_stg_parse_bool(name, value, &__kmp_forkjoin_frames); -} // __kmp_stg_parse_forkjoin_frames - -static void __kmp_stg_print_forkjoin_frames(kmp_str_buf_t *buffer, - char const *name, void *data) { - __kmp_stg_print_bool(buffer, name, __kmp_forkjoin_frames); -} // __kmp_stg_print_forkjoin_frames - -// ----------------------------------------------------------------------------- -// KMP_FORKJOIN_FRAMES_MODE - -static void __kmp_stg_parse_forkjoin_frames_mode(char const *name, - char const *value, - void *data) { - __kmp_stg_parse_int(name, value, 0, 3, &__kmp_forkjoin_frames_mode); -} // __kmp_stg_parse_forkjoin_frames - -static void __kmp_stg_print_forkjoin_frames_mode(kmp_str_buf_t *buffer, - char const *name, void *data) { - __kmp_stg_print_int(buffer, name, __kmp_forkjoin_frames_mode); -} // __kmp_stg_print_forkjoin_frames -#endif /* USE_ITT_BUILD */ - -// ----------------------------------------------------------------------------- -// OMP_DISPLAY_ENV - -#if OMP_40_ENABLED - -static void __kmp_stg_parse_omp_display_env(char const *name, char const *value, - void *data) { - if (__kmp_str_match("VERBOSE", 1, value)) { - __kmp_display_env_verbose = TRUE; - } else { - __kmp_stg_parse_bool(name, value, &__kmp_display_env); - } - -} // __kmp_stg_parse_omp_display_env - -static void __kmp_stg_print_omp_display_env(kmp_str_buf_t *buffer, - char const *name, void *data) { - if (__kmp_display_env_verbose) { - __kmp_stg_print_str(buffer, name, "VERBOSE"); - } else { - __kmp_stg_print_bool(buffer, name, __kmp_display_env); - } -} // __kmp_stg_print_omp_display_env - -static void __kmp_stg_parse_omp_cancellation(char const *name, - char const *value, void *data) { - if (TCR_4(__kmp_init_parallel)) { - KMP_WARNING(EnvParallelWarn, name); - return; - } // read value before first parallel only - __kmp_stg_parse_bool(name, value, &__kmp_omp_cancellation); -} // __kmp_stg_parse_omp_cancellation - -static void __kmp_stg_print_omp_cancellation(kmp_str_buf_t *buffer, - char const *name, void *data) { - __kmp_stg_print_bool(buffer, name, __kmp_omp_cancellation); -} // __kmp_stg_print_omp_cancellation - -#endif - -#if OMP_50_ENABLED && OMPT_SUPPORT -static int __kmp_tool = 1; - -static void __kmp_stg_parse_omp_tool(char const *name, char const *value, - void *data) { - __kmp_stg_parse_bool(name, value, &__kmp_tool); -} // __kmp_stg_parse_omp_tool - -static void __kmp_stg_print_omp_tool(kmp_str_buf_t *buffer, char const *name, - void *data) { - if (__kmp_env_format) { - KMP_STR_BUF_PRINT_BOOL_EX(name, __kmp_tool, "enabled", "disabled"); - } else { - __kmp_str_buf_print(buffer, " %s=%s\n", name, - __kmp_tool ? "enabled" : "disabled"); - } -} // __kmp_stg_print_omp_tool - -static char *__kmp_tool_libraries = NULL; - -static void __kmp_stg_parse_omp_tool_libraries(char const *name, - char const *value, void *data) { - __kmp_stg_parse_str(name, value, &__kmp_tool_libraries); -} // __kmp_stg_parse_omp_tool_libraries - -static void __kmp_stg_print_omp_tool_libraries(kmp_str_buf_t *buffer, - char const *name, void *data) { - if (__kmp_tool_libraries) - __kmp_stg_print_str(buffer, name, __kmp_tool_libraries); - else { - if (__kmp_env_format) { - KMP_STR_BUF_PRINT_NAME; - } else { - __kmp_str_buf_print(buffer, " %s", name); - } - __kmp_str_buf_print(buffer, ": %s\n", KMP_I18N_STR(NotDefined)); - } -} // __kmp_stg_print_omp_tool_libraries - -#endif - -// Table. - -static kmp_setting_t __kmp_stg_table[] = { - - {"KMP_ALL_THREADS", __kmp_stg_parse_device_thread_limit, NULL, NULL, 0, 0}, - {"KMP_BLOCKTIME", __kmp_stg_parse_blocktime, __kmp_stg_print_blocktime, - NULL, 0, 0}, - {"KMP_DUPLICATE_LIB_OK", __kmp_stg_parse_duplicate_lib_ok, - __kmp_stg_print_duplicate_lib_ok, NULL, 0, 0}, - {"KMP_LIBRARY", __kmp_stg_parse_wait_policy, __kmp_stg_print_wait_policy, - NULL, 0, 0}, - {"KMP_DEVICE_THREAD_LIMIT", __kmp_stg_parse_device_thread_limit, - __kmp_stg_print_device_thread_limit, NULL, 0, 0}, -#if KMP_USE_MONITOR - {"KMP_MONITOR_STACKSIZE", __kmp_stg_parse_monitor_stacksize, - __kmp_stg_print_monitor_stacksize, NULL, 0, 0}, -#endif - {"KMP_SETTINGS", __kmp_stg_parse_settings, __kmp_stg_print_settings, NULL, - 0, 0}, - {"KMP_STACKOFFSET", __kmp_stg_parse_stackoffset, - __kmp_stg_print_stackoffset, NULL, 0, 0}, - {"KMP_STACKSIZE", __kmp_stg_parse_stacksize, __kmp_stg_print_stacksize, - NULL, 0, 0}, - {"KMP_STACKPAD", __kmp_stg_parse_stackpad, __kmp_stg_print_stackpad, NULL, - 0, 0}, - {"KMP_VERSION", __kmp_stg_parse_version, __kmp_stg_print_version, NULL, 0, - 0}, - {"KMP_WARNINGS", __kmp_stg_parse_warnings, __kmp_stg_print_warnings, NULL, - 0, 0}, - - {"OMP_NESTED", __kmp_stg_parse_nested, __kmp_stg_print_nested, NULL, 0, 0}, - {"OMP_NUM_THREADS", __kmp_stg_parse_num_threads, - __kmp_stg_print_num_threads, NULL, 0, 0}, - {"OMP_STACKSIZE", __kmp_stg_parse_stacksize, __kmp_stg_print_stacksize, - NULL, 0, 0}, - - {"KMP_TASKING", __kmp_stg_parse_tasking, __kmp_stg_print_tasking, NULL, 0, - 0}, - {"KMP_TASK_STEALING_CONSTRAINT", __kmp_stg_parse_task_stealing, - __kmp_stg_print_task_stealing, NULL, 0, 0}, - {"OMP_MAX_ACTIVE_LEVELS", __kmp_stg_parse_max_active_levels, - __kmp_stg_print_max_active_levels, NULL, 0, 0}, -#if OMP_40_ENABLED - {"OMP_DEFAULT_DEVICE", __kmp_stg_parse_default_device, - __kmp_stg_print_default_device, NULL, 0, 0}, -#endif -#if OMP_50_ENABLED - {"OMP_TARGET_OFFLOAD", __kmp_stg_parse_target_offload, - __kmp_stg_print_target_offload, NULL, 0, 0}, -#endif -#if OMP_45_ENABLED - {"OMP_MAX_TASK_PRIORITY", __kmp_stg_parse_max_task_priority, - __kmp_stg_print_max_task_priority, NULL, 0, 0}, - {"KMP_TASKLOOP_MIN_TASKS", __kmp_stg_parse_taskloop_min_tasks, - __kmp_stg_print_taskloop_min_tasks, NULL, 0, 0}, -#endif - {"OMP_THREAD_LIMIT", __kmp_stg_parse_thread_limit, - __kmp_stg_print_thread_limit, NULL, 0, 0}, - {"KMP_TEAMS_THREAD_LIMIT", __kmp_stg_parse_teams_thread_limit, - __kmp_stg_print_teams_thread_limit, NULL, 0, 0}, - {"OMP_WAIT_POLICY", __kmp_stg_parse_wait_policy, - __kmp_stg_print_wait_policy, NULL, 0, 0}, - {"KMP_DISP_NUM_BUFFERS", __kmp_stg_parse_disp_buffers, - __kmp_stg_print_disp_buffers, NULL, 0, 0}, -#if KMP_NESTED_HOT_TEAMS - {"KMP_HOT_TEAMS_MAX_LEVEL", __kmp_stg_parse_hot_teams_level, - __kmp_stg_print_hot_teams_level, NULL, 0, 0}, - {"KMP_HOT_TEAMS_MODE", __kmp_stg_parse_hot_teams_mode, - __kmp_stg_print_hot_teams_mode, NULL, 0, 0}, -#endif // KMP_NESTED_HOT_TEAMS - -#if KMP_HANDLE_SIGNALS - {"KMP_HANDLE_SIGNALS", __kmp_stg_parse_handle_signals, - __kmp_stg_print_handle_signals, NULL, 0, 0}, -#endif - -#if KMP_ARCH_X86 || KMP_ARCH_X86_64 - {"KMP_INHERIT_FP_CONTROL", __kmp_stg_parse_inherit_fp_control, - __kmp_stg_print_inherit_fp_control, NULL, 0, 0}, -#endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */ - -#ifdef KMP_GOMP_COMPAT - {"GOMP_STACKSIZE", __kmp_stg_parse_stacksize, NULL, NULL, 0, 0}, -#endif - -#ifdef KMP_DEBUG - {"KMP_A_DEBUG", __kmp_stg_parse_a_debug, __kmp_stg_print_a_debug, NULL, 0, - 0}, - {"KMP_B_DEBUG", __kmp_stg_parse_b_debug, __kmp_stg_print_b_debug, NULL, 0, - 0}, - {"KMP_C_DEBUG", __kmp_stg_parse_c_debug, __kmp_stg_print_c_debug, NULL, 0, - 0}, - {"KMP_D_DEBUG", __kmp_stg_parse_d_debug, __kmp_stg_print_d_debug, NULL, 0, - 0}, - {"KMP_E_DEBUG", __kmp_stg_parse_e_debug, __kmp_stg_print_e_debug, NULL, 0, - 0}, - {"KMP_F_DEBUG", __kmp_stg_parse_f_debug, __kmp_stg_print_f_debug, NULL, 0, - 0}, - {"KMP_DEBUG", __kmp_stg_parse_debug, NULL, /* no print */ NULL, 0, 0}, - {"KMP_DEBUG_BUF", __kmp_stg_parse_debug_buf, __kmp_stg_print_debug_buf, - NULL, 0, 0}, - {"KMP_DEBUG_BUF_ATOMIC", __kmp_stg_parse_debug_buf_atomic, - __kmp_stg_print_debug_buf_atomic, NULL, 0, 0}, - {"KMP_DEBUG_BUF_CHARS", __kmp_stg_parse_debug_buf_chars, - __kmp_stg_print_debug_buf_chars, NULL, 0, 0}, - {"KMP_DEBUG_BUF_LINES", __kmp_stg_parse_debug_buf_lines, - __kmp_stg_print_debug_buf_lines, NULL, 0, 0}, - {"KMP_DIAG", __kmp_stg_parse_diag, __kmp_stg_print_diag, NULL, 0, 0}, - - {"KMP_PAR_RANGE", __kmp_stg_parse_par_range_env, - __kmp_stg_print_par_range_env, NULL, 0, 0}, - {"KMP_YIELD_CYCLE", __kmp_stg_parse_yield_cycle, - __kmp_stg_print_yield_cycle, NULL, 0, 0}, - {"KMP_YIELD_ON", __kmp_stg_parse_yield_on, __kmp_stg_print_yield_on, NULL, - 0, 0}, - {"KMP_YIELD_OFF", __kmp_stg_parse_yield_off, __kmp_stg_print_yield_off, - NULL, 0, 0}, -#endif // KMP_DEBUG - - {"KMP_ALIGN_ALLOC", __kmp_stg_parse_align_alloc, - __kmp_stg_print_align_alloc, NULL, 0, 0}, - - {"KMP_PLAIN_BARRIER", __kmp_stg_parse_barrier_branch_bit, - __kmp_stg_print_barrier_branch_bit, NULL, 0, 0}, - {"KMP_PLAIN_BARRIER_PATTERN", __kmp_stg_parse_barrier_pattern, - __kmp_stg_print_barrier_pattern, NULL, 0, 0}, - {"KMP_FORKJOIN_BARRIER", __kmp_stg_parse_barrier_branch_bit, - __kmp_stg_print_barrier_branch_bit, NULL, 0, 0}, - {"KMP_FORKJOIN_BARRIER_PATTERN", __kmp_stg_parse_barrier_pattern, - __kmp_stg_print_barrier_pattern, NULL, 0, 0}, -#if KMP_FAST_REDUCTION_BARRIER - {"KMP_REDUCTION_BARRIER", __kmp_stg_parse_barrier_branch_bit, - __kmp_stg_print_barrier_branch_bit, NULL, 0, 0}, - {"KMP_REDUCTION_BARRIER_PATTERN", __kmp_stg_parse_barrier_pattern, - __kmp_stg_print_barrier_pattern, NULL, 0, 0}, -#endif - - {"KMP_ABORT_DELAY", __kmp_stg_parse_abort_delay, - __kmp_stg_print_abort_delay, NULL, 0, 0}, - {"KMP_CPUINFO_FILE", __kmp_stg_parse_cpuinfo_file, - __kmp_stg_print_cpuinfo_file, NULL, 0, 0}, - {"KMP_FORCE_REDUCTION", __kmp_stg_parse_force_reduction, - __kmp_stg_print_force_reduction, NULL, 0, 0}, - {"KMP_DETERMINISTIC_REDUCTION", __kmp_stg_parse_force_reduction, - __kmp_stg_print_force_reduction, NULL, 0, 0}, - {"KMP_STORAGE_MAP", __kmp_stg_parse_storage_map, - __kmp_stg_print_storage_map, NULL, 0, 0}, - {"KMP_ALL_THREADPRIVATE", __kmp_stg_parse_all_threadprivate, - __kmp_stg_print_all_threadprivate, NULL, 0, 0}, - {"KMP_FOREIGN_THREADS_THREADPRIVATE", - __kmp_stg_parse_foreign_threads_threadprivate, - __kmp_stg_print_foreign_threads_threadprivate, NULL, 0, 0}, - -#if KMP_AFFINITY_SUPPORTED - {"KMP_AFFINITY", __kmp_stg_parse_affinity, __kmp_stg_print_affinity, NULL, - 0, 0}, -#ifdef KMP_GOMP_COMPAT - {"GOMP_CPU_AFFINITY", __kmp_stg_parse_gomp_cpu_affinity, NULL, - /* no print */ NULL, 0, 0}, -#endif /* KMP_GOMP_COMPAT */ -#if OMP_40_ENABLED - {"OMP_PROC_BIND", __kmp_stg_parse_proc_bind, __kmp_stg_print_proc_bind, - NULL, 0, 0}, - {"OMP_PLACES", __kmp_stg_parse_places, __kmp_stg_print_places, NULL, 0, 0}, -#else - {"OMP_PROC_BIND", __kmp_stg_parse_proc_bind, NULL, /* no print */ NULL, 0, - 0}, -#endif /* OMP_40_ENABLED */ - {"KMP_TOPOLOGY_METHOD", __kmp_stg_parse_topology_method, - __kmp_stg_print_topology_method, NULL, 0, 0}, - -#else - -// KMP_AFFINITY is not supported on OS X*, nor is OMP_PLACES. -// OMP_PROC_BIND and proc-bind-var are supported, however. -#if OMP_40_ENABLED - {"OMP_PROC_BIND", __kmp_stg_parse_proc_bind, __kmp_stg_print_proc_bind, - NULL, 0, 0}, -#endif - -#endif // KMP_AFFINITY_SUPPORTED -#if OMP_50_ENABLED - {"OMP_DISPLAY_AFFINITY", __kmp_stg_parse_display_affinity, - __kmp_stg_print_display_affinity, NULL, 0, 0}, - {"OMP_AFFINITY_FORMAT", __kmp_stg_parse_affinity_format, - __kmp_stg_print_affinity_format, NULL, 0, 0}, -#endif - {"KMP_INIT_AT_FORK", __kmp_stg_parse_init_at_fork, - __kmp_stg_print_init_at_fork, NULL, 0, 0}, - {"KMP_SCHEDULE", __kmp_stg_parse_schedule, __kmp_stg_print_schedule, NULL, - 0, 0}, - {"OMP_SCHEDULE", __kmp_stg_parse_omp_schedule, __kmp_stg_print_omp_schedule, - NULL, 0, 0}, -#if KMP_USE_HIER_SCHED - {"KMP_DISP_HAND_THREAD", __kmp_stg_parse_kmp_hand_thread, - __kmp_stg_print_kmp_hand_thread, NULL, 0, 0}, -#endif - {"KMP_ATOMIC_MODE", __kmp_stg_parse_atomic_mode, - __kmp_stg_print_atomic_mode, NULL, 0, 0}, - {"KMP_CONSISTENCY_CHECK", __kmp_stg_parse_consistency_check, - __kmp_stg_print_consistency_check, NULL, 0, 0}, - -#if USE_ITT_BUILD && USE_ITT_NOTIFY - {"KMP_ITT_PREPARE_DELAY", __kmp_stg_parse_itt_prepare_delay, - __kmp_stg_print_itt_prepare_delay, NULL, 0, 0}, -#endif /* USE_ITT_BUILD && USE_ITT_NOTIFY */ - {"KMP_MALLOC_POOL_INCR", __kmp_stg_parse_malloc_pool_incr, - __kmp_stg_print_malloc_pool_incr, NULL, 0, 0}, - {"KMP_INIT_WAIT", __kmp_stg_parse_init_wait, __kmp_stg_print_init_wait, - NULL, 0, 0}, - {"KMP_NEXT_WAIT", __kmp_stg_parse_next_wait, __kmp_stg_print_next_wait, - NULL, 0, 0}, - {"KMP_GTID_MODE", __kmp_stg_parse_gtid_mode, __kmp_stg_print_gtid_mode, - NULL, 0, 0}, - {"OMP_DYNAMIC", __kmp_stg_parse_omp_dynamic, __kmp_stg_print_omp_dynamic, - NULL, 0, 0}, - {"KMP_DYNAMIC_MODE", __kmp_stg_parse_kmp_dynamic_mode, - __kmp_stg_print_kmp_dynamic_mode, NULL, 0, 0}, - -#ifdef USE_LOAD_BALANCE - {"KMP_LOAD_BALANCE_INTERVAL", __kmp_stg_parse_ld_balance_interval, - __kmp_stg_print_ld_balance_interval, NULL, 0, 0}, -#endif - - {"KMP_NUM_LOCKS_IN_BLOCK", __kmp_stg_parse_lock_block, - __kmp_stg_print_lock_block, NULL, 0, 0}, - {"KMP_LOCK_KIND", __kmp_stg_parse_lock_kind, __kmp_stg_print_lock_kind, - NULL, 0, 0}, - {"KMP_SPIN_BACKOFF_PARAMS", __kmp_stg_parse_spin_backoff_params, - __kmp_stg_print_spin_backoff_params, NULL, 0, 0}, -#if KMP_USE_ADAPTIVE_LOCKS - {"KMP_ADAPTIVE_LOCK_PROPS", __kmp_stg_parse_adaptive_lock_props, - __kmp_stg_print_adaptive_lock_props, NULL, 0, 0}, -#if KMP_DEBUG_ADAPTIVE_LOCKS - {"KMP_SPECULATIVE_STATSFILE", __kmp_stg_parse_speculative_statsfile, - __kmp_stg_print_speculative_statsfile, NULL, 0, 0}, -#endif -#endif // KMP_USE_ADAPTIVE_LOCKS - {"KMP_PLACE_THREADS", __kmp_stg_parse_hw_subset, __kmp_stg_print_hw_subset, - NULL, 0, 0}, - {"KMP_HW_SUBSET", __kmp_stg_parse_hw_subset, __kmp_stg_print_hw_subset, - NULL, 0, 0}, -#if USE_ITT_BUILD - {"KMP_FORKJOIN_FRAMES", __kmp_stg_parse_forkjoin_frames, - __kmp_stg_print_forkjoin_frames, NULL, 0, 0}, - {"KMP_FORKJOIN_FRAMES_MODE", __kmp_stg_parse_forkjoin_frames_mode, - __kmp_stg_print_forkjoin_frames_mode, NULL, 0, 0}, -#endif - -#if OMP_40_ENABLED - {"OMP_DISPLAY_ENV", __kmp_stg_parse_omp_display_env, - __kmp_stg_print_omp_display_env, NULL, 0, 0}, - {"OMP_CANCELLATION", __kmp_stg_parse_omp_cancellation, - __kmp_stg_print_omp_cancellation, NULL, 0, 0}, -#endif - -#if OMP_50_ENABLED - {"OMP_ALLOCATOR", __kmp_stg_parse_allocator, __kmp_stg_print_allocator, - NULL, 0, 0}, -#endif - -#if OMP_50_ENABLED && OMPT_SUPPORT - {"OMP_TOOL", __kmp_stg_parse_omp_tool, __kmp_stg_print_omp_tool, NULL, 0, - 0}, - {"OMP_TOOL_LIBRARIES", __kmp_stg_parse_omp_tool_libraries, - __kmp_stg_print_omp_tool_libraries, NULL, 0, 0}, -#endif - - {"", NULL, NULL, NULL, 0, 0}}; // settings - -static int const __kmp_stg_count = - sizeof(__kmp_stg_table) / sizeof(kmp_setting_t); - -static inline kmp_setting_t *__kmp_stg_find(char const *name) { - - int i; - if (name != NULL) { - for (i = 0; i < __kmp_stg_count; ++i) { - if (strcmp(__kmp_stg_table[i].name, name) == 0) { - return &__kmp_stg_table[i]; - } - } - } - return NULL; - -} // __kmp_stg_find - -static int __kmp_stg_cmp(void const *_a, void const *_b) { - const kmp_setting_t *a = RCAST(const kmp_setting_t *, _a); - const kmp_setting_t *b = RCAST(const kmp_setting_t *, _b); - - // Process KMP_AFFINITY last. - // It needs to come after OMP_PLACES and GOMP_CPU_AFFINITY. - if (strcmp(a->name, "KMP_AFFINITY") == 0) { - if (strcmp(b->name, "KMP_AFFINITY") == 0) { - return 0; - } - return 1; - } else if (strcmp(b->name, "KMP_AFFINITY") == 0) { - return -1; - } - return strcmp(a->name, b->name); -} // __kmp_stg_cmp - -static void __kmp_stg_init(void) { - - static int initialized = 0; - - if (!initialized) { - - // Sort table. - qsort(__kmp_stg_table, __kmp_stg_count - 1, sizeof(kmp_setting_t), - __kmp_stg_cmp); - - { // Initialize *_STACKSIZE data. - kmp_setting_t *kmp_stacksize = - __kmp_stg_find("KMP_STACKSIZE"); // 1st priority. -#ifdef KMP_GOMP_COMPAT - kmp_setting_t *gomp_stacksize = - __kmp_stg_find("GOMP_STACKSIZE"); // 2nd priority. -#endif - kmp_setting_t *omp_stacksize = - __kmp_stg_find("OMP_STACKSIZE"); // 3rd priority. - - // !!! volatile keyword is Intel(R) C Compiler bug CQ49908 workaround. - // !!! Compiler does not understand rivals is used and optimizes out - // assignments - // !!! rivals[ i ++ ] = ...; - static kmp_setting_t *volatile rivals[4]; - static kmp_stg_ss_data_t kmp_data = {1, CCAST(kmp_setting_t **, rivals)}; -#ifdef KMP_GOMP_COMPAT - static kmp_stg_ss_data_t gomp_data = {1024, - CCAST(kmp_setting_t **, rivals)}; -#endif - static kmp_stg_ss_data_t omp_data = {1024, - CCAST(kmp_setting_t **, rivals)}; - int i = 0; - - rivals[i++] = kmp_stacksize; -#ifdef KMP_GOMP_COMPAT - if (gomp_stacksize != NULL) { - rivals[i++] = gomp_stacksize; - } -#endif - rivals[i++] = omp_stacksize; - rivals[i++] = NULL; - - kmp_stacksize->data = &kmp_data; -#ifdef KMP_GOMP_COMPAT - if (gomp_stacksize != NULL) { - gomp_stacksize->data = &gomp_data; - } -#endif - omp_stacksize->data = &omp_data; - } - - { // Initialize KMP_LIBRARY and OMP_WAIT_POLICY data. - kmp_setting_t *kmp_library = - __kmp_stg_find("KMP_LIBRARY"); // 1st priority. - kmp_setting_t *omp_wait_policy = - __kmp_stg_find("OMP_WAIT_POLICY"); // 2nd priority. - - // !!! volatile keyword is Intel(R) C Compiler bug CQ49908 workaround. - static kmp_setting_t *volatile rivals[3]; - static kmp_stg_wp_data_t kmp_data = {0, CCAST(kmp_setting_t **, rivals)}; - static kmp_stg_wp_data_t omp_data = {1, CCAST(kmp_setting_t **, rivals)}; - int i = 0; - - rivals[i++] = kmp_library; - if (omp_wait_policy != NULL) { - rivals[i++] = omp_wait_policy; - } - rivals[i++] = NULL; - - kmp_library->data = &kmp_data; - if (omp_wait_policy != NULL) { - omp_wait_policy->data = &omp_data; - } - } - - { // Initialize KMP_DEVICE_THREAD_LIMIT and KMP_ALL_THREADS - kmp_setting_t *kmp_device_thread_limit = - __kmp_stg_find("KMP_DEVICE_THREAD_LIMIT"); // 1st priority. - kmp_setting_t *kmp_all_threads = - __kmp_stg_find("KMP_ALL_THREADS"); // 2nd priority. - - // !!! volatile keyword is Intel(R) C Compiler bug CQ49908 workaround. - static kmp_setting_t *volatile rivals[3]; - int i = 0; - - rivals[i++] = kmp_device_thread_limit; - rivals[i++] = kmp_all_threads; - rivals[i++] = NULL; - - kmp_device_thread_limit->data = CCAST(kmp_setting_t **, rivals); - kmp_all_threads->data = CCAST(kmp_setting_t **, rivals); - } - - { // Initialize KMP_HW_SUBSET and KMP_PLACE_THREADS - // 1st priority - kmp_setting_t *kmp_hw_subset = __kmp_stg_find("KMP_HW_SUBSET"); - // 2nd priority - kmp_setting_t *kmp_place_threads = __kmp_stg_find("KMP_PLACE_THREADS"); - - // !!! volatile keyword is Intel(R) C Compiler bug CQ49908 workaround. - static kmp_setting_t *volatile rivals[3]; - int i = 0; - - rivals[i++] = kmp_hw_subset; - rivals[i++] = kmp_place_threads; - rivals[i++] = NULL; - - kmp_hw_subset->data = CCAST(kmp_setting_t **, rivals); - kmp_place_threads->data = CCAST(kmp_setting_t **, rivals); - } - -#if KMP_AFFINITY_SUPPORTED - { // Initialize KMP_AFFINITY, GOMP_CPU_AFFINITY, and OMP_PROC_BIND data. - kmp_setting_t *kmp_affinity = - __kmp_stg_find("KMP_AFFINITY"); // 1st priority. - KMP_DEBUG_ASSERT(kmp_affinity != NULL); - -#ifdef KMP_GOMP_COMPAT - kmp_setting_t *gomp_cpu_affinity = - __kmp_stg_find("GOMP_CPU_AFFINITY"); // 2nd priority. - KMP_DEBUG_ASSERT(gomp_cpu_affinity != NULL); -#endif - - kmp_setting_t *omp_proc_bind = - __kmp_stg_find("OMP_PROC_BIND"); // 3rd priority. - KMP_DEBUG_ASSERT(omp_proc_bind != NULL); - - // !!! volatile keyword is Intel(R) C Compiler bug CQ49908 workaround. - static kmp_setting_t *volatile rivals[4]; - int i = 0; - - rivals[i++] = kmp_affinity; - -#ifdef KMP_GOMP_COMPAT - rivals[i++] = gomp_cpu_affinity; - gomp_cpu_affinity->data = CCAST(kmp_setting_t **, rivals); -#endif - - rivals[i++] = omp_proc_bind; - omp_proc_bind->data = CCAST(kmp_setting_t **, rivals); - rivals[i++] = NULL; - -#if OMP_40_ENABLED - static kmp_setting_t *volatile places_rivals[4]; - i = 0; - - kmp_setting_t *omp_places = __kmp_stg_find("OMP_PLACES"); // 3rd priority. - KMP_DEBUG_ASSERT(omp_places != NULL); - - places_rivals[i++] = kmp_affinity; -#ifdef KMP_GOMP_COMPAT - places_rivals[i++] = gomp_cpu_affinity; -#endif - places_rivals[i++] = omp_places; - omp_places->data = CCAST(kmp_setting_t **, places_rivals); - places_rivals[i++] = NULL; -#endif - } -#else -// KMP_AFFINITY not supported, so OMP_PROC_BIND has no rivals. -// OMP_PLACES not supported yet. -#endif // KMP_AFFINITY_SUPPORTED - - { // Initialize KMP_DETERMINISTIC_REDUCTION and KMP_FORCE_REDUCTION data. - kmp_setting_t *kmp_force_red = - __kmp_stg_find("KMP_FORCE_REDUCTION"); // 1st priority. - kmp_setting_t *kmp_determ_red = - __kmp_stg_find("KMP_DETERMINISTIC_REDUCTION"); // 2nd priority. - - // !!! volatile keyword is Intel(R) C Compiler bug CQ49908 workaround. - static kmp_setting_t *volatile rivals[3]; - static kmp_stg_fr_data_t force_data = {1, - CCAST(kmp_setting_t **, rivals)}; - static kmp_stg_fr_data_t determ_data = {0, - CCAST(kmp_setting_t **, rivals)}; - int i = 0; - - rivals[i++] = kmp_force_red; - if (kmp_determ_red != NULL) { - rivals[i++] = kmp_determ_red; - } - rivals[i++] = NULL; - - kmp_force_red->data = &force_data; - if (kmp_determ_red != NULL) { - kmp_determ_red->data = &determ_data; - } - } - - initialized = 1; - } - - // Reset flags. - int i; - for (i = 0; i < __kmp_stg_count; ++i) { - __kmp_stg_table[i].set = 0; - } - -} // __kmp_stg_init - -static void __kmp_stg_parse(char const *name, char const *value) { - // On Windows* OS there are some nameless variables like "C:=C:\" (yeah, - // really nameless, they are presented in environment block as - // "=C:=C\\\x00=D:=D:\\\x00...", so let us skip them. - if (name[0] == 0) { - return; - } - - if (value != NULL) { - kmp_setting_t *setting = __kmp_stg_find(name); - if (setting != NULL) { - setting->parse(name, value, setting->data); - setting->defined = 1; - } - } - -} // __kmp_stg_parse - -static int __kmp_stg_check_rivals( // 0 -- Ok, 1 -- errors found. - char const *name, // Name of variable. - char const *value, // Value of the variable. - kmp_setting_t **rivals // List of rival settings (must include current one). - ) { - - if (rivals == NULL) { - return 0; - } - - // Loop thru higher priority settings (listed before current). - int i = 0; - for (; strcmp(rivals[i]->name, name) != 0; i++) { - KMP_DEBUG_ASSERT(rivals[i] != NULL); - -#if KMP_AFFINITY_SUPPORTED - if (rivals[i] == __kmp_affinity_notype) { - // If KMP_AFFINITY is specified without a type name, - // it does not rival OMP_PROC_BIND or GOMP_CPU_AFFINITY. - continue; - } -#endif - - if (rivals[i]->set) { - KMP_WARNING(StgIgnored, name, rivals[i]->name); - return 1; - } - } - - ++i; // Skip current setting. - return 0; - -} // __kmp_stg_check_rivals - -static int __kmp_env_toPrint(char const *name, int flag) { - int rc = 0; - kmp_setting_t *setting = __kmp_stg_find(name); - if (setting != NULL) { - rc = setting->defined; - if (flag >= 0) { - setting->defined = flag; - } - } - return rc; -} - -static void __kmp_aux_env_initialize(kmp_env_blk_t *block) { - - char const *value; - - /* OMP_NUM_THREADS */ - value = __kmp_env_blk_var(block, "OMP_NUM_THREADS"); - if (value) { - ompc_set_num_threads(__kmp_dflt_team_nth); - } - - /* KMP_BLOCKTIME */ - value = __kmp_env_blk_var(block, "KMP_BLOCKTIME"); - if (value) { - kmpc_set_blocktime(__kmp_dflt_blocktime); - } - - /* OMP_NESTED */ - value = __kmp_env_blk_var(block, "OMP_NESTED"); - if (value) { - ompc_set_nested(__kmp_dflt_nested); - } - - /* OMP_DYNAMIC */ - value = __kmp_env_blk_var(block, "OMP_DYNAMIC"); - if (value) { - ompc_set_dynamic(__kmp_global.g.g_dynamic); - } -} - -void __kmp_env_initialize(char const *string) { - - kmp_env_blk_t block; - int i; - - __kmp_stg_init(); - - // Hack!!! - if (string == NULL) { - // __kmp_max_nth = __kmp_sys_max_nth; - __kmp_threads_capacity = - __kmp_initial_threads_capacity(__kmp_dflt_team_nth_ub); - } - __kmp_env_blk_init(&block, string); - - // update the set flag on all entries that have an env var - for (i = 0; i < block.count; ++i) { - if ((block.vars[i].name == NULL) || (*block.vars[i].name == '\0')) { - continue; - } - if (block.vars[i].value == NULL) { - continue; - } - kmp_setting_t *setting = __kmp_stg_find(block.vars[i].name); - if (setting != NULL) { - setting->set = 1; - } - } - - // We need to know if blocktime was set when processing OMP_WAIT_POLICY - blocktime_str = __kmp_env_blk_var(&block, "KMP_BLOCKTIME"); - - // Special case. If we parse environment, not a string, process KMP_WARNINGS - // first. - if (string == NULL) { - char const *name = "KMP_WARNINGS"; - char const *value = __kmp_env_blk_var(&block, name); - __kmp_stg_parse(name, value); - } - -#if KMP_AFFINITY_SUPPORTED - // Special case. KMP_AFFINITY is not a rival to other affinity env vars - // if no affinity type is specified. We want to allow - // KMP_AFFINITY=[no],verbose/[no]warnings/etc. to be enabled when - // specifying the affinity type via GOMP_CPU_AFFINITY or the OMP 4.0 - // affinity mechanism. - __kmp_affinity_notype = NULL; - char const *aff_str = __kmp_env_blk_var(&block, "KMP_AFFINITY"); - if (aff_str != NULL) { -// Check if the KMP_AFFINITY type is specified in the string. -// We just search the string for "compact", "scatter", etc. -// without really parsing the string. The syntax of the -// KMP_AFFINITY env var is such that none of the affinity -// type names can appear anywhere other that the type -// specifier, even as substrings. -// -// I can't find a case-insensitive version of strstr on Windows* OS. -// Use the case-sensitive version for now. - -#if KMP_OS_WINDOWS -#define FIND strstr -#else -#define FIND strcasestr -#endif - - if ((FIND(aff_str, "none") == NULL) && - (FIND(aff_str, "physical") == NULL) && - (FIND(aff_str, "logical") == NULL) && - (FIND(aff_str, "compact") == NULL) && - (FIND(aff_str, "scatter") == NULL) && - (FIND(aff_str, "explicit") == NULL) && - (FIND(aff_str, "balanced") == NULL) && - (FIND(aff_str, "disabled") == NULL)) { - __kmp_affinity_notype = __kmp_stg_find("KMP_AFFINITY"); - } else { - // A new affinity type is specified. - // Reset the affinity flags to their default values, - // in case this is called from kmp_set_defaults(). - __kmp_affinity_type = affinity_default; - __kmp_affinity_gran = affinity_gran_default; - __kmp_affinity_top_method = affinity_top_method_default; - __kmp_affinity_respect_mask = affinity_respect_mask_default; - } -#undef FIND - -#if OMP_40_ENABLED - // Also reset the affinity flags if OMP_PROC_BIND is specified. - aff_str = __kmp_env_blk_var(&block, "OMP_PROC_BIND"); - if (aff_str != NULL) { - __kmp_affinity_type = affinity_default; - __kmp_affinity_gran = affinity_gran_default; - __kmp_affinity_top_method = affinity_top_method_default; - __kmp_affinity_respect_mask = affinity_respect_mask_default; - } -#endif /* OMP_40_ENABLED */ - } - -#endif /* KMP_AFFINITY_SUPPORTED */ - -#if OMP_40_ENABLED - // Set up the nested proc bind type vector. - if (__kmp_nested_proc_bind.bind_types == NULL) { - __kmp_nested_proc_bind.bind_types = - (kmp_proc_bind_t *)KMP_INTERNAL_MALLOC(sizeof(kmp_proc_bind_t)); - if (__kmp_nested_proc_bind.bind_types == NULL) { - KMP_FATAL(MemoryAllocFailed); - } - __kmp_nested_proc_bind.size = 1; - __kmp_nested_proc_bind.used = 1; -#if KMP_AFFINITY_SUPPORTED - __kmp_nested_proc_bind.bind_types[0] = proc_bind_default; -#else - // default proc bind is false if affinity not supported - __kmp_nested_proc_bind.bind_types[0] = proc_bind_false; -#endif - } -#endif /* OMP_40_ENABLED */ - -#if OMP_50_ENABLED - // Set up the affinity format ICV - // Grab the default affinity format string from the message catalog - kmp_msg_t m = - __kmp_msg_format(kmp_i18n_msg_AffFormatDefault, "%P", "%i", "%n", "%A"); - KMP_DEBUG_ASSERT(KMP_STRLEN(m.str) < KMP_AFFINITY_FORMAT_SIZE); - - if (__kmp_affinity_format == NULL) { - __kmp_affinity_format = - (char *)KMP_INTERNAL_MALLOC(sizeof(char) * KMP_AFFINITY_FORMAT_SIZE); - } - KMP_STRCPY_S(__kmp_affinity_format, KMP_AFFINITY_FORMAT_SIZE, m.str); - __kmp_str_free(&m.str); -#endif - - // Now process all of the settings. - for (i = 0; i < block.count; ++i) { - __kmp_stg_parse(block.vars[i].name, block.vars[i].value); - } - - // If user locks have been allocated yet, don't reset the lock vptr table. - if (!__kmp_init_user_locks) { - if (__kmp_user_lock_kind == lk_default) { - __kmp_user_lock_kind = lk_queuing; - } -#if KMP_USE_DYNAMIC_LOCK - __kmp_init_dynamic_user_locks(); -#else - __kmp_set_user_lock_vptrs(__kmp_user_lock_kind); -#endif - } else { - KMP_DEBUG_ASSERT(string != NULL); // kmp_set_defaults() was called - KMP_DEBUG_ASSERT(__kmp_user_lock_kind != lk_default); -// Binds lock functions again to follow the transition between different -// KMP_CONSISTENCY_CHECK values. Calling this again is harmless as long -// as we do not allow lock kind changes after making a call to any -// user lock functions (true). -#if KMP_USE_DYNAMIC_LOCK - __kmp_init_dynamic_user_locks(); -#else - __kmp_set_user_lock_vptrs(__kmp_user_lock_kind); -#endif - } - -#if KMP_AFFINITY_SUPPORTED - - if (!TCR_4(__kmp_init_middle)) { -#if KMP_USE_HWLOC - // Force using hwloc when either tiles or numa nodes requested within - // KMP_HW_SUBSET and no other topology method is requested - if ((__kmp_hws_node.num > 0 || __kmp_hws_tile.num > 0 || - __kmp_affinity_gran == affinity_gran_tile) && - (__kmp_affinity_top_method == affinity_top_method_default)) { - __kmp_affinity_top_method = affinity_top_method_hwloc; - } -#endif - // Determine if the machine/OS is actually capable of supporting - // affinity. - const char *var = "KMP_AFFINITY"; - KMPAffinity::pick_api(); -#if KMP_USE_HWLOC - // If Hwloc topology discovery was requested but affinity was also disabled, - // then tell user that Hwloc request is being ignored and use default - // topology discovery method. - if (__kmp_affinity_top_method == affinity_top_method_hwloc && - __kmp_affinity_dispatch->get_api_type() != KMPAffinity::HWLOC) { - KMP_WARNING(AffIgnoringHwloc, var); - __kmp_affinity_top_method = affinity_top_method_all; - } -#endif - if (__kmp_affinity_type == affinity_disabled) { - KMP_AFFINITY_DISABLE(); - } else if (!KMP_AFFINITY_CAPABLE()) { - __kmp_affinity_dispatch->determine_capable(var); - if (!KMP_AFFINITY_CAPABLE()) { - if (__kmp_affinity_verbose || - (__kmp_affinity_warnings && - (__kmp_affinity_type != affinity_default) && - (__kmp_affinity_type != affinity_none) && - (__kmp_affinity_type != affinity_disabled))) { - KMP_WARNING(AffNotSupported, var); - } - __kmp_affinity_type = affinity_disabled; - __kmp_affinity_respect_mask = 0; - __kmp_affinity_gran = affinity_gran_fine; - } - } - -#if OMP_40_ENABLED - if (__kmp_affinity_type == affinity_disabled) { - __kmp_nested_proc_bind.bind_types[0] = proc_bind_false; - } else if (__kmp_nested_proc_bind.bind_types[0] == proc_bind_true) { - // OMP_PROC_BIND=true maps to OMP_PROC_BIND=spread. - __kmp_nested_proc_bind.bind_types[0] = proc_bind_spread; - } -#endif /* OMP_40_ENABLED */ - - if (KMP_AFFINITY_CAPABLE()) { - -#if KMP_GROUP_AFFINITY - // This checks to see if the initial affinity mask is equal - // to a single windows processor group. If it is, then we do - // not respect the initial affinity mask and instead, use the - // entire machine. - bool exactly_one_group = false; - if (__kmp_num_proc_groups > 1) { - int group; - bool within_one_group; - // Get the initial affinity mask and determine if it is - // contained within a single group. - kmp_affin_mask_t *init_mask; - KMP_CPU_ALLOC(init_mask); - __kmp_get_system_affinity(init_mask, TRUE); - group = __kmp_get_proc_group(init_mask); - within_one_group = (group >= 0); - // If the initial affinity is within a single group, - // then determine if it is equal to that single group. - if (within_one_group) { - DWORD num_bits_in_group = __kmp_GetActiveProcessorCount(group); - DWORD num_bits_in_mask = 0; - for (int bit = init_mask->begin(); bit != init_mask->end(); - bit = init_mask->next(bit)) - num_bits_in_mask++; - exactly_one_group = (num_bits_in_group == num_bits_in_mask); - } - KMP_CPU_FREE(init_mask); - } - - // Handle the Win 64 group affinity stuff if there are multiple - // processor groups, or if the user requested it, and OMP 4.0 - // affinity is not in effect. - if (((__kmp_num_proc_groups > 1) && - (__kmp_affinity_type == affinity_default) -#if OMP_40_ENABLED - && (__kmp_nested_proc_bind.bind_types[0] == proc_bind_default)) -#endif - || (__kmp_affinity_top_method == affinity_top_method_group)) { - if (__kmp_affinity_respect_mask == affinity_respect_mask_default && - exactly_one_group) { - __kmp_affinity_respect_mask = FALSE; - } - if (__kmp_affinity_type == affinity_default) { - __kmp_affinity_type = affinity_compact; -#if OMP_40_ENABLED - __kmp_nested_proc_bind.bind_types[0] = proc_bind_intel; -#endif - } - if (__kmp_affinity_top_method == affinity_top_method_default) { - if (__kmp_affinity_gran == affinity_gran_default) { - __kmp_affinity_top_method = affinity_top_method_group; - __kmp_affinity_gran = affinity_gran_group; - } else if (__kmp_affinity_gran == affinity_gran_group) { - __kmp_affinity_top_method = affinity_top_method_group; - } else { - __kmp_affinity_top_method = affinity_top_method_all; - } - } else if (__kmp_affinity_top_method == affinity_top_method_group) { - if (__kmp_affinity_gran == affinity_gran_default) { - __kmp_affinity_gran = affinity_gran_group; - } else if ((__kmp_affinity_gran != affinity_gran_group) && - (__kmp_affinity_gran != affinity_gran_fine) && - (__kmp_affinity_gran != affinity_gran_thread)) { - const char *str = NULL; - switch (__kmp_affinity_gran) { - case affinity_gran_core: - str = "core"; - break; - case affinity_gran_package: - str = "package"; - break; - case affinity_gran_node: - str = "node"; - break; - case affinity_gran_tile: - str = "tile"; - break; - default: - KMP_DEBUG_ASSERT(0); - } - KMP_WARNING(AffGranTopGroup, var, str); - __kmp_affinity_gran = affinity_gran_fine; - } - } else { - if (__kmp_affinity_gran == affinity_gran_default) { - __kmp_affinity_gran = affinity_gran_core; - } else if (__kmp_affinity_gran == affinity_gran_group) { - const char *str = NULL; - switch (__kmp_affinity_type) { - case affinity_physical: - str = "physical"; - break; - case affinity_logical: - str = "logical"; - break; - case affinity_compact: - str = "compact"; - break; - case affinity_scatter: - str = "scatter"; - break; - case affinity_explicit: - str = "explicit"; - break; - // No MIC on windows, so no affinity_balanced case - default: - KMP_DEBUG_ASSERT(0); - } - KMP_WARNING(AffGranGroupType, var, str); - __kmp_affinity_gran = affinity_gran_core; - } - } - } else - -#endif /* KMP_GROUP_AFFINITY */ - - { - if (__kmp_affinity_respect_mask == affinity_respect_mask_default) { -#if KMP_GROUP_AFFINITY - if (__kmp_num_proc_groups > 1 && exactly_one_group) { - __kmp_affinity_respect_mask = FALSE; - } else -#endif /* KMP_GROUP_AFFINITY */ - { - __kmp_affinity_respect_mask = TRUE; - } - } -#if OMP_40_ENABLED - if ((__kmp_nested_proc_bind.bind_types[0] != proc_bind_intel) && - (__kmp_nested_proc_bind.bind_types[0] != proc_bind_default)) { - if (__kmp_affinity_type == affinity_default) { - __kmp_affinity_type = affinity_compact; - __kmp_affinity_dups = FALSE; - } - } else -#endif /* OMP_40_ENABLED */ - if (__kmp_affinity_type == affinity_default) { -#if OMP_40_ENABLED -#if KMP_MIC_SUPPORTED - if (__kmp_mic_type != non_mic) { - __kmp_nested_proc_bind.bind_types[0] = proc_bind_intel; - } else -#endif - { - __kmp_nested_proc_bind.bind_types[0] = proc_bind_false; - } -#endif /* OMP_40_ENABLED */ -#if KMP_MIC_SUPPORTED - if (__kmp_mic_type != non_mic) { - __kmp_affinity_type = affinity_scatter; - } else -#endif - { - __kmp_affinity_type = affinity_none; - } - } - if ((__kmp_affinity_gran == affinity_gran_default) && - (__kmp_affinity_gran_levels < 0)) { -#if KMP_MIC_SUPPORTED - if (__kmp_mic_type != non_mic) { - __kmp_affinity_gran = affinity_gran_fine; - } else -#endif - { - __kmp_affinity_gran = affinity_gran_core; - } - } - if (__kmp_affinity_top_method == affinity_top_method_default) { - __kmp_affinity_top_method = affinity_top_method_all; - } - } - } - - K_DIAG(1, ("__kmp_affinity_type == %d\n", __kmp_affinity_type)); - K_DIAG(1, ("__kmp_affinity_compact == %d\n", __kmp_affinity_compact)); - K_DIAG(1, ("__kmp_affinity_offset == %d\n", __kmp_affinity_offset)); - K_DIAG(1, ("__kmp_affinity_verbose == %d\n", __kmp_affinity_verbose)); - K_DIAG(1, ("__kmp_affinity_warnings == %d\n", __kmp_affinity_warnings)); - K_DIAG(1, ("__kmp_affinity_respect_mask == %d\n", - __kmp_affinity_respect_mask)); - K_DIAG(1, ("__kmp_affinity_gran == %d\n", __kmp_affinity_gran)); - - KMP_DEBUG_ASSERT(__kmp_affinity_type != affinity_default); -#if OMP_40_ENABLED - KMP_DEBUG_ASSERT(__kmp_nested_proc_bind.bind_types[0] != proc_bind_default); - K_DIAG(1, ("__kmp_nested_proc_bind.bind_types[0] == %d\n", - __kmp_nested_proc_bind.bind_types[0])); -#endif - } - -#endif /* KMP_AFFINITY_SUPPORTED */ - - if (__kmp_version) { - __kmp_print_version_1(); - } - - // Post-initialization step: some env. vars need their value's further - // processing - if (string != NULL) { // kmp_set_defaults() was called - __kmp_aux_env_initialize(&block); - } - - __kmp_env_blk_free(&block); - - KMP_MB(); - -} // __kmp_env_initialize - -void __kmp_env_print() { - - kmp_env_blk_t block; - int i; - kmp_str_buf_t buffer; - - __kmp_stg_init(); - __kmp_str_buf_init(&buffer); - - __kmp_env_blk_init(&block, NULL); - __kmp_env_blk_sort(&block); - - // Print real environment values. - __kmp_str_buf_print(&buffer, "\n%s\n\n", KMP_I18N_STR(UserSettings)); - for (i = 0; i < block.count; ++i) { - char const *name = block.vars[i].name; - char const *value = block.vars[i].value; - if ((KMP_STRLEN(name) > 4 && strncmp(name, "KMP_", 4) == 0) || - strncmp(name, "OMP_", 4) == 0 -#ifdef KMP_GOMP_COMPAT - || strncmp(name, "GOMP_", 5) == 0 -#endif // KMP_GOMP_COMPAT - ) { - __kmp_str_buf_print(&buffer, " %s=%s\n", name, value); - } - } - __kmp_str_buf_print(&buffer, "\n"); - - // Print internal (effective) settings. - __kmp_str_buf_print(&buffer, "%s\n\n", KMP_I18N_STR(EffectiveSettings)); - for (int i = 0; i < __kmp_stg_count; ++i) { - if (__kmp_stg_table[i].print != NULL) { - __kmp_stg_table[i].print(&buffer, __kmp_stg_table[i].name, - __kmp_stg_table[i].data); - } - } - - __kmp_printf("%s", buffer.str); - - __kmp_env_blk_free(&block); - __kmp_str_buf_free(&buffer); - - __kmp_printf("\n"); - -} // __kmp_env_print - -#if OMP_40_ENABLED -void __kmp_env_print_2() { - - kmp_env_blk_t block; - kmp_str_buf_t buffer; - - __kmp_env_format = 1; - - __kmp_stg_init(); - __kmp_str_buf_init(&buffer); - - __kmp_env_blk_init(&block, NULL); - __kmp_env_blk_sort(&block); - - __kmp_str_buf_print(&buffer, "\n%s\n", KMP_I18N_STR(DisplayEnvBegin)); - __kmp_str_buf_print(&buffer, " _OPENMP='%d'\n", __kmp_openmp_version); - - for (int i = 0; i < __kmp_stg_count; ++i) { - if (__kmp_stg_table[i].print != NULL && - ((__kmp_display_env && - strncmp(__kmp_stg_table[i].name, "OMP_", 4) == 0) || - __kmp_display_env_verbose)) { - __kmp_stg_table[i].print(&buffer, __kmp_stg_table[i].name, - __kmp_stg_table[i].data); - } - } - - __kmp_str_buf_print(&buffer, "%s\n", KMP_I18N_STR(DisplayEnvEnd)); - __kmp_str_buf_print(&buffer, "\n"); - - __kmp_printf("%s", buffer.str); - - __kmp_env_blk_free(&block); - __kmp_str_buf_free(&buffer); - - __kmp_printf("\n"); - -} // __kmp_env_print_2 -#endif // OMP_40_ENABLED - -// end of file Property changes on: vendor/llvm-openmp/openmp-release80-r363030/runtime/src/kmp_settings.cpp ___________________________________________________________________ Deleted: svn:eol-style ## -1 +0,0 ## -native \ No newline at end of property Deleted: svn:keywords ## -1 +0,0 ## -FreeBSD=%H \ No newline at end of property Deleted: svn:mime-type ## -1 +0,0 ## -text/plain \ No newline at end of property Index: vendor/llvm-openmp/openmp-release80-r363030/runtime/src/kmp_taskdeps.cpp =================================================================== --- vendor/llvm-openmp/openmp-release80-r363030/runtime/src/kmp_taskdeps.cpp (revision 348960) +++ vendor/llvm-openmp/openmp-release80-r363030/runtime/src/kmp_taskdeps.cpp (nonexistent) @@ -1,664 +0,0 @@ -/* - * kmp_taskdeps.cpp - */ - -//===----------------------------------------------------------------------===// -// -// The LLVM Compiler Infrastructure -// -// This file is dual licensed under the MIT and the University of Illinois Open -// Source Licenses. See LICENSE.txt for details. -// -//===----------------------------------------------------------------------===// - -//#define KMP_SUPPORT_GRAPH_OUTPUT 1 - -#include "kmp.h" -#include "kmp_io.h" -#include "kmp_wait_release.h" -#include "kmp_taskdeps.h" -#if OMPT_SUPPORT -#include "ompt-specific.h" -#endif - -#if OMP_40_ENABLED - -// TODO: Improve memory allocation? keep a list of pre-allocated structures? -// allocate in blocks? re-use list finished list entries? -// TODO: don't use atomic ref counters for stack-allocated nodes. -// TODO: find an alternate to atomic refs for heap-allocated nodes? -// TODO: Finish graph output support -// TODO: kmp_lock_t seems a tad to big (and heavy weight) for this. Check other -// runtime locks -// TODO: Any ITT support needed? - -#ifdef KMP_SUPPORT_GRAPH_OUTPUT -static std::atomic kmp_node_id_seed = ATOMIC_VAR_INIT(0); -#endif - -static void __kmp_init_node(kmp_depnode_t *node) { - node->dn.successors = NULL; - node->dn.task = NULL; // will point to the rigth task - // once dependences have been processed - for (int i = 0; i < MAX_MTX_DEPS; ++i) - node->dn.mtx_locks[i] = NULL; - node->dn.mtx_num_locks = 0; - __kmp_init_lock(&node->dn.lock); - KMP_ATOMIC_ST_RLX(&node->dn.nrefs, 1); // init creates the first reference -#ifdef KMP_SUPPORT_GRAPH_OUTPUT - node->dn.id = KMP_ATOMIC_INC(&kmp_node_id_seed); -#endif -} - -static inline kmp_depnode_t *__kmp_node_ref(kmp_depnode_t *node) { - KMP_ATOMIC_INC(&node->dn.nrefs); - return node; -} - -enum { KMP_DEPHASH_OTHER_SIZE = 97, KMP_DEPHASH_MASTER_SIZE = 997 }; - -static inline kmp_int32 __kmp_dephash_hash(kmp_intptr_t addr, size_t hsize) { - // TODO alternate to try: set = (((Addr64)(addrUsefulBits * 9.618)) % - // m_num_sets ); - return ((addr >> 6) ^ (addr >> 2)) % hsize; -} - -static kmp_dephash_t *__kmp_dephash_create(kmp_info_t *thread, - kmp_taskdata_t *current_task) { - kmp_dephash_t *h; - - size_t h_size; - - if (current_task->td_flags.tasktype == TASK_IMPLICIT) - h_size = KMP_DEPHASH_MASTER_SIZE; - else - h_size = KMP_DEPHASH_OTHER_SIZE; - - kmp_int32 size = - h_size * sizeof(kmp_dephash_entry_t *) + sizeof(kmp_dephash_t); - -#if USE_FAST_MEMORY - h = (kmp_dephash_t *)__kmp_fast_allocate(thread, size); -#else - h = (kmp_dephash_t *)__kmp_thread_malloc(thread, size); -#endif - h->size = h_size; - -#ifdef KMP_DEBUG - h->nelements = 0; - h->nconflicts = 0; -#endif - h->buckets = (kmp_dephash_entry **)(h + 1); - - for (size_t i = 0; i < h_size; i++) - h->buckets[i] = 0; - - return h; -} - -#define ENTRY_LAST_INS 0 -#define ENTRY_LAST_MTXS 1 - -static kmp_dephash_entry * -__kmp_dephash_find(kmp_info_t *thread, kmp_dephash_t *h, kmp_intptr_t addr) { - kmp_int32 bucket = __kmp_dephash_hash(addr, h->size); - - kmp_dephash_entry_t *entry; - for (entry = h->buckets[bucket]; entry; entry = entry->next_in_bucket) - if (entry->addr == addr) - break; - - if (entry == NULL) { -// create entry. This is only done by one thread so no locking required -#if USE_FAST_MEMORY - entry = (kmp_dephash_entry_t *)__kmp_fast_allocate( - thread, sizeof(kmp_dephash_entry_t)); -#else - entry = (kmp_dephash_entry_t *)__kmp_thread_malloc( - thread, sizeof(kmp_dephash_entry_t)); -#endif - entry->addr = addr; - entry->last_out = NULL; - entry->last_ins = NULL; - entry->last_mtxs = NULL; - entry->last_flag = ENTRY_LAST_INS; - entry->mtx_lock = NULL; - entry->next_in_bucket = h->buckets[bucket]; - h->buckets[bucket] = entry; -#ifdef KMP_DEBUG - h->nelements++; - if (entry->next_in_bucket) - h->nconflicts++; -#endif - } - return entry; -} - -static kmp_depnode_list_t *__kmp_add_node(kmp_info_t *thread, - kmp_depnode_list_t *list, - kmp_depnode_t *node) { - kmp_depnode_list_t *new_head; - -#if USE_FAST_MEMORY - new_head = (kmp_depnode_list_t *)__kmp_fast_allocate( - thread, sizeof(kmp_depnode_list_t)); -#else - new_head = (kmp_depnode_list_t *)__kmp_thread_malloc( - thread, sizeof(kmp_depnode_list_t)); -#endif - - new_head->node = __kmp_node_ref(node); - new_head->next = list; - - return new_head; -} - -static inline void __kmp_track_dependence(kmp_depnode_t *source, - kmp_depnode_t *sink, - kmp_task_t *sink_task) { -#ifdef KMP_SUPPORT_GRAPH_OUTPUT - kmp_taskdata_t *task_source = KMP_TASK_TO_TASKDATA(source->dn.task); - // do not use sink->dn.task as that is only filled after the dependencies - // are already processed! - kmp_taskdata_t *task_sink = KMP_TASK_TO_TASKDATA(sink_task); - - __kmp_printf("%d(%s) -> %d(%s)\n", source->dn.id, - task_source->td_ident->psource, sink->dn.id, - task_sink->td_ident->psource); -#endif -#if OMPT_SUPPORT && OMPT_OPTIONAL - /* OMPT tracks dependences between task (a=source, b=sink) in which - task a blocks the execution of b through the ompt_new_dependence_callback - */ - if (ompt_enabled.ompt_callback_task_dependence) { - kmp_taskdata_t *task_source = KMP_TASK_TO_TASKDATA(source->dn.task); - kmp_taskdata_t *task_sink = KMP_TASK_TO_TASKDATA(sink_task); - - ompt_callbacks.ompt_callback(ompt_callback_task_dependence)( - &(task_source->ompt_task_info.task_data), - &(task_sink->ompt_task_info.task_data)); - } -#endif /* OMPT_SUPPORT && OMPT_OPTIONAL */ -} - -static inline kmp_int32 -__kmp_depnode_link_successor(kmp_int32 gtid, kmp_info_t *thread, - kmp_task_t *task, kmp_depnode_t *node, - kmp_depnode_list_t *plist) { - if (!plist) - return 0; - kmp_int32 npredecessors = 0; - // link node as successor of list elements - for (kmp_depnode_list_t *p = plist; p; p = p->next) { - kmp_depnode_t *dep = p->node; - if (dep->dn.task) { - KMP_ACQUIRE_DEPNODE(gtid, dep); - if (dep->dn.task) { - __kmp_track_dependence(dep, node, task); - dep->dn.successors = __kmp_add_node(thread, dep->dn.successors, node); - KA_TRACE(40, ("__kmp_process_deps: T#%d adding dependence from %p to " - "%p\n", - gtid, KMP_TASK_TO_TASKDATA(dep->dn.task), - KMP_TASK_TO_TASKDATA(task))); - npredecessors++; - } - KMP_RELEASE_DEPNODE(gtid, dep); - } - } - return npredecessors; -} - -static inline kmp_int32 __kmp_depnode_link_successor(kmp_int32 gtid, - kmp_info_t *thread, - kmp_task_t *task, - kmp_depnode_t *source, - kmp_depnode_t *sink) { - if (!sink) - return 0; - kmp_int32 npredecessors = 0; - if (sink->dn.task) { - // synchronously add source to sink' list of successors - KMP_ACQUIRE_DEPNODE(gtid, sink); - if (sink->dn.task) { - __kmp_track_dependence(sink, source, task); - sink->dn.successors = __kmp_add_node(thread, sink->dn.successors, source); - KA_TRACE(40, ("__kmp_process_deps: T#%d adding dependence from %p to " - "%p\n", - gtid, KMP_TASK_TO_TASKDATA(sink->dn.task), - KMP_TASK_TO_TASKDATA(task))); - npredecessors++; - } - KMP_RELEASE_DEPNODE(gtid, sink); - } - return npredecessors; -} - -template -static inline kmp_int32 -__kmp_process_deps(kmp_int32 gtid, kmp_depnode_t *node, kmp_dephash_t *hash, - bool dep_barrier, kmp_int32 ndeps, - kmp_depend_info_t *dep_list, kmp_task_t *task) { - KA_TRACE(30, ("__kmp_process_deps<%d>: T#%d processing %d dependencies : " - "dep_barrier = %d\n", - filter, gtid, ndeps, dep_barrier)); - - kmp_info_t *thread = __kmp_threads[gtid]; - kmp_int32 npredecessors = 0; - for (kmp_int32 i = 0; i < ndeps; i++) { - const kmp_depend_info_t *dep = &dep_list[i]; - - if (filter && dep->base_addr == 0) - continue; // skip filtered entries - - kmp_dephash_entry_t *info = - __kmp_dephash_find(thread, hash, dep->base_addr); - kmp_depnode_t *last_out = info->last_out; - kmp_depnode_list_t *last_ins = info->last_ins; - kmp_depnode_list_t *last_mtxs = info->last_mtxs; - - if (dep->flags.out) { // out --> clean lists of ins and mtxs if any - if (last_ins || last_mtxs) { - if (info->last_flag == ENTRY_LAST_INS) { // INS were last - npredecessors += - __kmp_depnode_link_successor(gtid, thread, task, node, last_ins); - } else { // MTXS were last - npredecessors += - __kmp_depnode_link_successor(gtid, thread, task, node, last_mtxs); - } - __kmp_depnode_list_free(thread, last_ins); - __kmp_depnode_list_free(thread, last_mtxs); - info->last_ins = NULL; - info->last_mtxs = NULL; - } else { - npredecessors += - __kmp_depnode_link_successor(gtid, thread, task, node, last_out); - } - __kmp_node_deref(thread, last_out); - if (dep_barrier) { - // if this is a sync point in the serial sequence, then the previous - // outputs are guaranteed to be completed after the execution of this - // task so the previous output nodes can be cleared. - info->last_out = NULL; - } else { - info->last_out = __kmp_node_ref(node); - } - } else if (dep->flags.in) { - // in --> link node to either last_out or last_mtxs, clean earlier deps - if (last_mtxs) { - npredecessors += - __kmp_depnode_link_successor(gtid, thread, task, node, last_mtxs); - __kmp_node_deref(thread, last_out); - info->last_out = NULL; - if (info->last_flag == ENTRY_LAST_MTXS && last_ins) { // MTXS were last - // clean old INS before creating new list - __kmp_depnode_list_free(thread, last_ins); - info->last_ins = NULL; - } - } else { - // link node as successor of the last_out if any - npredecessors += - __kmp_depnode_link_successor(gtid, thread, task, node, last_out); - } - info->last_flag = ENTRY_LAST_INS; - info->last_ins = __kmp_add_node(thread, info->last_ins, node); - } else { - KMP_DEBUG_ASSERT(dep->flags.mtx == 1); - // mtx --> link node to either last_out or last_ins, clean earlier deps - if (last_ins) { - npredecessors += - __kmp_depnode_link_successor(gtid, thread, task, node, last_ins); - __kmp_node_deref(thread, last_out); - info->last_out = NULL; - if (info->last_flag == ENTRY_LAST_INS && last_mtxs) { // INS were last - // clean old MTXS before creating new list - __kmp_depnode_list_free(thread, last_mtxs); - info->last_mtxs = NULL; - } - } else { - // link node as successor of the last_out if any - npredecessors += - __kmp_depnode_link_successor(gtid, thread, task, node, last_out); - } - info->last_flag = ENTRY_LAST_MTXS; - info->last_mtxs = __kmp_add_node(thread, info->last_mtxs, node); - if (info->mtx_lock == NULL) { - info->mtx_lock = (kmp_lock_t *)__kmp_allocate(sizeof(kmp_lock_t)); - __kmp_init_lock(info->mtx_lock); - } - KMP_DEBUG_ASSERT(node->dn.mtx_num_locks < MAX_MTX_DEPS); - kmp_int32 m; - // Save lock in node's array - for (m = 0; m < MAX_MTX_DEPS; ++m) { - // sort pointers in decreasing order to avoid potential livelock - if (node->dn.mtx_locks[m] < info->mtx_lock) { - KMP_DEBUG_ASSERT(node->dn.mtx_locks[node->dn.mtx_num_locks] == NULL); - for (int n = node->dn.mtx_num_locks; n > m; --n) { - // shift right all lesser non-NULL pointers - KMP_DEBUG_ASSERT(node->dn.mtx_locks[n - 1] != NULL); - node->dn.mtx_locks[n] = node->dn.mtx_locks[n - 1]; - } - node->dn.mtx_locks[m] = info->mtx_lock; - break; - } - } - KMP_DEBUG_ASSERT(m < MAX_MTX_DEPS); // must break from loop - node->dn.mtx_num_locks++; - } - } - KA_TRACE(30, ("__kmp_process_deps<%d>: T#%d found %d predecessors\n", filter, - gtid, npredecessors)); - return npredecessors; -} - -#define NO_DEP_BARRIER (false) -#define DEP_BARRIER (true) - -// returns true if the task has any outstanding dependence -static bool __kmp_check_deps(kmp_int32 gtid, kmp_depnode_t *node, - kmp_task_t *task, kmp_dephash_t *hash, - bool dep_barrier, kmp_int32 ndeps, - kmp_depend_info_t *dep_list, - kmp_int32 ndeps_noalias, - kmp_depend_info_t *noalias_dep_list) { - int i, n_mtxs = 0; -#if KMP_DEBUG - kmp_taskdata_t *taskdata = KMP_TASK_TO_TASKDATA(task); -#endif - KA_TRACE(20, ("__kmp_check_deps: T#%d checking dependencies for task %p : %d " - "possibly aliased dependencies, %d non-aliased depedencies : " - "dep_barrier=%d .\n", - gtid, taskdata, ndeps, ndeps_noalias, dep_barrier)); - - // Filter deps in dep_list - // TODO: Different algorithm for large dep_list ( > 10 ? ) - for (i = 0; i < ndeps; i++) { - if (dep_list[i].base_addr != 0) { - for (int j = i + 1; j < ndeps; j++) { - if (dep_list[i].base_addr == dep_list[j].base_addr) { - dep_list[i].flags.in |= dep_list[j].flags.in; - dep_list[i].flags.out |= - (dep_list[j].flags.out || - (dep_list[i].flags.in && dep_list[j].flags.mtx) || - (dep_list[i].flags.mtx && dep_list[j].flags.in)); - dep_list[i].flags.mtx = - dep_list[i].flags.mtx | dep_list[j].flags.mtx && - !dep_list[i].flags.out; - dep_list[j].base_addr = 0; // Mark j element as void - } - } - if (dep_list[i].flags.mtx) { - // limit number of mtx deps to MAX_MTX_DEPS per node - if (n_mtxs < MAX_MTX_DEPS && task != NULL) { - ++n_mtxs; - } else { - dep_list[i].flags.in = 1; // downgrade mutexinoutset to inout - dep_list[i].flags.out = 1; - dep_list[i].flags.mtx = 0; - } - } - } - } - - // doesn't need to be atomic as no other thread is going to be accessing this - // node just yet. - // npredecessors is set -1 to ensure that none of the releasing tasks queues - // this task before we have finished processing all the dependencies - node->dn.npredecessors = -1; - - // used to pack all npredecessors additions into a single atomic operation at - // the end - int npredecessors; - - npredecessors = __kmp_process_deps(gtid, node, hash, dep_barrier, ndeps, - dep_list, task); - npredecessors += __kmp_process_deps( - gtid, node, hash, dep_barrier, ndeps_noalias, noalias_dep_list, task); - - node->dn.task = task; - KMP_MB(); - - // Account for our initial fake value - npredecessors++; - - // Update predecessors and obtain current value to check if there are still - // any outstandig dependences (some tasks may have finished while we processed - // the dependences) - npredecessors = - node->dn.npredecessors.fetch_add(npredecessors) + npredecessors; - - KA_TRACE(20, ("__kmp_check_deps: T#%d found %d predecessors for task %p \n", - gtid, npredecessors, taskdata)); - - // beyond this point the task could be queued (and executed) by a releasing - // task... - return npredecessors > 0 ? true : false; -} - -/*! -@ingroup TASKING -@param loc_ref location of the original task directive -@param gtid Global Thread ID of encountering thread -@param new_task task thunk allocated by __kmp_omp_task_alloc() for the ''new -task'' -@param ndeps Number of depend items with possible aliasing -@param dep_list List of depend items with possible aliasing -@param ndeps_noalias Number of depend items with no aliasing -@param noalias_dep_list List of depend items with no aliasing - -@return Returns either TASK_CURRENT_NOT_QUEUED if the current task was not -suspendend and queued, or TASK_CURRENT_QUEUED if it was suspended and queued - -Schedule a non-thread-switchable task with dependences for execution -*/ -kmp_int32 __kmpc_omp_task_with_deps(ident_t *loc_ref, kmp_int32 gtid, - kmp_task_t *new_task, kmp_int32 ndeps, - kmp_depend_info_t *dep_list, - kmp_int32 ndeps_noalias, - kmp_depend_info_t *noalias_dep_list) { - - kmp_taskdata_t *new_taskdata = KMP_TASK_TO_TASKDATA(new_task); - KA_TRACE(10, ("__kmpc_omp_task_with_deps(enter): T#%d loc=%p task=%p\n", gtid, - loc_ref, new_taskdata)); - - kmp_info_t *thread = __kmp_threads[gtid]; - kmp_taskdata_t *current_task = thread->th.th_current_task; - -#if OMPT_SUPPORT - if (ompt_enabled.enabled) { - OMPT_STORE_RETURN_ADDRESS(gtid); - if (!current_task->ompt_task_info.frame.enter_frame.ptr) - current_task->ompt_task_info.frame.enter_frame.ptr = - OMPT_GET_FRAME_ADDRESS(0); - if (ompt_enabled.ompt_callback_task_create) { - ompt_data_t task_data = ompt_data_none; - ompt_callbacks.ompt_callback(ompt_callback_task_create)( - current_task ? &(current_task->ompt_task_info.task_data) : &task_data, - current_task ? &(current_task->ompt_task_info.frame) : NULL, - &(new_taskdata->ompt_task_info.task_data), - ompt_task_explicit | TASK_TYPE_DETAILS_FORMAT(new_taskdata), 1, - OMPT_LOAD_RETURN_ADDRESS(gtid)); - } - - new_taskdata->ompt_task_info.frame.enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0); - } - -#if OMPT_OPTIONAL - /* OMPT grab all dependences if requested by the tool */ - if (ndeps + ndeps_noalias > 0 && - ompt_enabled.ompt_callback_dependences) { - kmp_int32 i; - - new_taskdata->ompt_task_info.ndeps = ndeps + ndeps_noalias; - new_taskdata->ompt_task_info.deps = - (ompt_dependence_t *)KMP_OMPT_DEPS_ALLOC( - thread, (ndeps + ndeps_noalias) * sizeof(ompt_dependence_t)); - - KMP_ASSERT(new_taskdata->ompt_task_info.deps != NULL); - - for (i = 0; i < ndeps; i++) { - new_taskdata->ompt_task_info.deps[i].variable.ptr = - (void *)dep_list[i].base_addr; - if (dep_list[i].flags.in && dep_list[i].flags.out) - new_taskdata->ompt_task_info.deps[i].dependence_type = - ompt_dependence_type_inout; - else if (dep_list[i].flags.out) - new_taskdata->ompt_task_info.deps[i].dependence_type = - ompt_dependence_type_out; - else if (dep_list[i].flags.in) - new_taskdata->ompt_task_info.deps[i].dependence_type = - ompt_dependence_type_in; - } - for (i = 0; i < ndeps_noalias; i++) { - new_taskdata->ompt_task_info.deps[ndeps + i].variable.ptr = - (void *)noalias_dep_list[i].base_addr; - if (noalias_dep_list[i].flags.in && noalias_dep_list[i].flags.out) - new_taskdata->ompt_task_info.deps[ndeps + i].dependence_type = - ompt_dependence_type_inout; - else if (noalias_dep_list[i].flags.out) - new_taskdata->ompt_task_info.deps[ndeps + i].dependence_type = - ompt_dependence_type_out; - else if (noalias_dep_list[i].flags.in) - new_taskdata->ompt_task_info.deps[ndeps + i].dependence_type = - ompt_dependence_type_in; - } - ompt_callbacks.ompt_callback(ompt_callback_dependences)( - &(new_taskdata->ompt_task_info.task_data), - new_taskdata->ompt_task_info.deps, new_taskdata->ompt_task_info.ndeps); - /* We can now free the allocated memory for the dependencies */ - /* For OMPD we might want to delay the free until task_end */ - KMP_OMPT_DEPS_FREE(thread, new_taskdata->ompt_task_info.deps); - new_taskdata->ompt_task_info.deps = NULL; - new_taskdata->ompt_task_info.ndeps = 0; - } -#endif /* OMPT_OPTIONAL */ -#endif /* OMPT_SUPPORT */ - - bool serial = current_task->td_flags.team_serial || - current_task->td_flags.tasking_ser || - current_task->td_flags.final; -#if OMP_45_ENABLED - kmp_task_team_t *task_team = thread->th.th_task_team; - serial = serial && !(task_team && task_team->tt.tt_found_proxy_tasks); -#endif - - if (!serial && (ndeps > 0 || ndeps_noalias > 0)) { - /* if no dependencies have been tracked yet, create the dependence hash */ - if (current_task->td_dephash == NULL) - current_task->td_dephash = __kmp_dephash_create(thread, current_task); - -#if USE_FAST_MEMORY - kmp_depnode_t *node = - (kmp_depnode_t *)__kmp_fast_allocate(thread, sizeof(kmp_depnode_t)); -#else - kmp_depnode_t *node = - (kmp_depnode_t *)__kmp_thread_malloc(thread, sizeof(kmp_depnode_t)); -#endif - - __kmp_init_node(node); - new_taskdata->td_depnode = node; - - if (__kmp_check_deps(gtid, node, new_task, current_task->td_dephash, - NO_DEP_BARRIER, ndeps, dep_list, ndeps_noalias, - noalias_dep_list)) { - KA_TRACE(10, ("__kmpc_omp_task_with_deps(exit): T#%d task had blocking " - "dependencies: " - "loc=%p task=%p, return: TASK_CURRENT_NOT_QUEUED\n", - gtid, loc_ref, new_taskdata)); -#if OMPT_SUPPORT - if (ompt_enabled.enabled) { - current_task->ompt_task_info.frame.enter_frame = ompt_data_none; - } -#endif - return TASK_CURRENT_NOT_QUEUED; - } - } else { - KA_TRACE(10, ("__kmpc_omp_task_with_deps(exit): T#%d ignored dependencies " - "for task (serialized)" - "loc=%p task=%p\n", - gtid, loc_ref, new_taskdata)); - } - - KA_TRACE(10, ("__kmpc_omp_task_with_deps(exit): T#%d task had no blocking " - "dependencies : " - "loc=%p task=%p, transferring to __kmp_omp_task\n", - gtid, loc_ref, new_taskdata)); - - kmp_int32 ret = __kmp_omp_task(gtid, new_task, true); -#if OMPT_SUPPORT - if (ompt_enabled.enabled) { - current_task->ompt_task_info.frame.enter_frame = ompt_data_none; - } -#endif - return ret; -} - -/*! -@ingroup TASKING -@param loc_ref location of the original task directive -@param gtid Global Thread ID of encountering thread -@param ndeps Number of depend items with possible aliasing -@param dep_list List of depend items with possible aliasing -@param ndeps_noalias Number of depend items with no aliasing -@param noalias_dep_list List of depend items with no aliasing - -Blocks the current task until all specifies dependencies have been fulfilled. -*/ -void __kmpc_omp_wait_deps(ident_t *loc_ref, kmp_int32 gtid, kmp_int32 ndeps, - kmp_depend_info_t *dep_list, kmp_int32 ndeps_noalias, - kmp_depend_info_t *noalias_dep_list) { - KA_TRACE(10, ("__kmpc_omp_wait_deps(enter): T#%d loc=%p\n", gtid, loc_ref)); - - if (ndeps == 0 && ndeps_noalias == 0) { - KA_TRACE(10, ("__kmpc_omp_wait_deps(exit): T#%d has no dependencies to " - "wait upon : loc=%p\n", - gtid, loc_ref)); - return; - } - - kmp_info_t *thread = __kmp_threads[gtid]; - kmp_taskdata_t *current_task = thread->th.th_current_task; - - // We can return immediately as: - // - dependences are not computed in serial teams (except with proxy tasks) - // - if the dephash is not yet created it means we have nothing to wait for - bool ignore = current_task->td_flags.team_serial || - current_task->td_flags.tasking_ser || - current_task->td_flags.final; -#if OMP_45_ENABLED - ignore = ignore && thread->th.th_task_team != NULL && - thread->th.th_task_team->tt.tt_found_proxy_tasks == FALSE; -#endif - ignore = ignore || current_task->td_dephash == NULL; - - if (ignore) { - KA_TRACE(10, ("__kmpc_omp_wait_deps(exit): T#%d has no blocking " - "dependencies : loc=%p\n", - gtid, loc_ref)); - return; - } - - kmp_depnode_t node = {0}; - __kmp_init_node(&node); - - if (!__kmp_check_deps(gtid, &node, NULL, current_task->td_dephash, - DEP_BARRIER, ndeps, dep_list, ndeps_noalias, - noalias_dep_list)) { - KA_TRACE(10, ("__kmpc_omp_wait_deps(exit): T#%d has no blocking " - "dependencies : loc=%p\n", - gtid, loc_ref)); - return; - } - - int thread_finished = FALSE; - kmp_flag_32 flag((std::atomic *)&node.dn.npredecessors, 0U); - while (node.dn.npredecessors > 0) { - flag.execute_tasks(thread, gtid, FALSE, - &thread_finished USE_ITT_BUILD_ARG(NULL), - __kmp_task_stealing_constraint); - } - - KA_TRACE(10, ("__kmpc_omp_wait_deps(exit): T#%d finished waiting : loc=%p\n", - gtid, loc_ref)); -} - -#endif /* OMP_40_ENABLED */ Property changes on: vendor/llvm-openmp/openmp-release80-r363030/runtime/src/kmp_taskdeps.cpp ___________________________________________________________________ Deleted: svn:eol-style ## -1 +0,0 ## -native \ No newline at end of property Deleted: svn:keywords ## -1 +0,0 ## -FreeBSD=%H \ No newline at end of property Deleted: svn:mime-type ## -1 +0,0 ## -text/plain \ No newline at end of property Index: vendor/llvm-openmp/openmp-release80-r363030/runtime/src/kmp_environment.h =================================================================== --- vendor/llvm-openmp/openmp-release80-r363030/runtime/src/kmp_environment.h (revision 348960) +++ vendor/llvm-openmp/openmp-release80-r363030/runtime/src/kmp_environment.h (nonexistent) @@ -1,78 +0,0 @@ -/* - * kmp_environment.h -- Handle environment varoiables OS-independently. - */ - -//===----------------------------------------------------------------------===// -// -// The LLVM Compiler Infrastructure -// -// This file is dual licensed under the MIT and the University of Illinois Open -// Source Licenses. See LICENSE.txt for details. -// -//===----------------------------------------------------------------------===// - -#ifndef KMP_ENVIRONMENT_H -#define KMP_ENVIRONMENT_H - -#ifdef __cplusplus -extern "C" { -#endif - -// Return a copy of the value of environment variable or NULL if the variable -// does not exist. -// *Note*: Returned pointed *must* be freed after use with __kmp_env_free(). -char *__kmp_env_get(char const *name); -void __kmp_env_free(char const **value); - -// Return 1 if the environment variable exists or 0 if does not exist. -int __kmp_env_exists(char const *name); - -// Set the environment variable. -void __kmp_env_set(char const *name, char const *value, int overwrite); - -// Unset (remove) environment variable. -void __kmp_env_unset(char const *name); - -// ----------------------------------------------------------------------------- -// Working with environment blocks. - -/* kmp_env_blk_t is read-only collection of environment variables (or - environment-like). Usage: - -kmp_env_blk_t block; -__kmp_env_blk_init( & block, NULL ); // Initialize block from process - // environment. -// or -__kmp_env_blk_init( & block, "KMP_WARNING=1|KMP_AFFINITY=none" ); // from string -__kmp_env_blk_sort( & block ); // Optionally, sort list. -for ( i = 0; i < block.count; ++ i ) { - // Process block.vars[ i ].name and block.vars[ i ].value... -} -__kmp_env_block_free( & block ); -*/ - -struct __kmp_env_var { - char *name; - char *value; -}; -typedef struct __kmp_env_var kmp_env_var_t; - -struct __kmp_env_blk { - char *bulk; - kmp_env_var_t *vars; - int count; -}; -typedef struct __kmp_env_blk kmp_env_blk_t; - -void __kmp_env_blk_init(kmp_env_blk_t *block, char const *bulk); -void __kmp_env_blk_free(kmp_env_blk_t *block); -void __kmp_env_blk_sort(kmp_env_blk_t *block); -char const *__kmp_env_blk_var(kmp_env_blk_t *block, char const *name); - -#ifdef __cplusplus -} -#endif - -#endif // KMP_ENVIRONMENT_H - -// end of file // Property changes on: vendor/llvm-openmp/openmp-release80-r363030/runtime/src/kmp_environment.h ___________________________________________________________________ Deleted: svn:eol-style ## -1 +0,0 ## -native \ No newline at end of property Deleted: svn:keywords ## -1 +0,0 ## -FreeBSD=%H \ No newline at end of property Deleted: svn:mime-type ## -1 +0,0 ## -text/plain \ No newline at end of property Index: vendor/llvm-openmp/openmp-release80-r363030/runtime/src/kmp_gsupport.cpp =================================================================== --- vendor/llvm-openmp/openmp-release80-r363030/runtime/src/kmp_gsupport.cpp (revision 348960) +++ vendor/llvm-openmp/openmp-release80-r363030/runtime/src/kmp_gsupport.cpp (nonexistent) @@ -1,2000 +0,0 @@ -/* - * kmp_gsupport.cpp - */ - -//===----------------------------------------------------------------------===// -// -// The LLVM Compiler Infrastructure -// -// This file is dual licensed under the MIT and the University of Illinois Open -// Source Licenses. See LICENSE.txt for details. -// -//===----------------------------------------------------------------------===// - -#include "kmp.h" -#include "kmp_atomic.h" - -#if OMPT_SUPPORT -#include "ompt-specific.h" -#endif - -#ifdef __cplusplus -extern "C" { -#endif // __cplusplus - -#define MKLOC(loc, routine) \ - static ident_t(loc) = {0, KMP_IDENT_KMPC, 0, 0, ";unknown;unknown;0;0;;"}; - -#include "kmp_ftn_os.h" - -void KMP_EXPAND_NAME(KMP_API_NAME_GOMP_BARRIER)(void) { - int gtid = __kmp_entry_gtid(); - MKLOC(loc, "GOMP_barrier"); - KA_TRACE(20, ("GOMP_barrier: T#%d\n", gtid)); -#if OMPT_SUPPORT && OMPT_OPTIONAL - ompt_frame_t *ompt_frame; - if (ompt_enabled.enabled) { - __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL); - ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0); - OMPT_STORE_RETURN_ADDRESS(gtid); - } -#endif - __kmpc_barrier(&loc, gtid); -#if OMPT_SUPPORT && OMPT_OPTIONAL - if (ompt_enabled.enabled) { - ompt_frame->enter_frame = ompt_data_none; - } -#endif -} - -// Mutual exclusion - -// The symbol that icc/ifort generates for unnamed for unnamed critical sections -// - .gomp_critical_user_ - is defined using .comm in any objects reference it. -// We can't reference it directly here in C code, as the symbol contains a ".". -// -// The RTL contains an assembly language definition of .gomp_critical_user_ -// with another symbol __kmp_unnamed_critical_addr initialized with it's -// address. -extern kmp_critical_name *__kmp_unnamed_critical_addr; - -void KMP_EXPAND_NAME(KMP_API_NAME_GOMP_CRITICAL_START)(void) { - int gtid = __kmp_entry_gtid(); - MKLOC(loc, "GOMP_critical_start"); - KA_TRACE(20, ("GOMP_critical_start: T#%d\n", gtid)); -#if OMPT_SUPPORT && OMPT_OPTIONAL - OMPT_STORE_RETURN_ADDRESS(gtid); -#endif - __kmpc_critical(&loc, gtid, __kmp_unnamed_critical_addr); -} - -void KMP_EXPAND_NAME(KMP_API_NAME_GOMP_CRITICAL_END)(void) { - int gtid = __kmp_get_gtid(); - MKLOC(loc, "GOMP_critical_end"); - KA_TRACE(20, ("GOMP_critical_end: T#%d\n", gtid)); -#if OMPT_SUPPORT && OMPT_OPTIONAL - OMPT_STORE_RETURN_ADDRESS(gtid); -#endif - __kmpc_end_critical(&loc, gtid, __kmp_unnamed_critical_addr); -} - -void KMP_EXPAND_NAME(KMP_API_NAME_GOMP_CRITICAL_NAME_START)(void **pptr) { - int gtid = __kmp_entry_gtid(); - MKLOC(loc, "GOMP_critical_name_start"); - KA_TRACE(20, ("GOMP_critical_name_start: T#%d\n", gtid)); - __kmpc_critical(&loc, gtid, (kmp_critical_name *)pptr); -} - -void KMP_EXPAND_NAME(KMP_API_NAME_GOMP_CRITICAL_NAME_END)(void **pptr) { - int gtid = __kmp_get_gtid(); - MKLOC(loc, "GOMP_critical_name_end"); - KA_TRACE(20, ("GOMP_critical_name_end: T#%d\n", gtid)); - __kmpc_end_critical(&loc, gtid, (kmp_critical_name *)pptr); -} - -// The Gnu codegen tries to use locked operations to perform atomic updates -// inline. If it can't, then it calls GOMP_atomic_start() before performing -// the update and GOMP_atomic_end() afterward, regardless of the data type. -void KMP_EXPAND_NAME(KMP_API_NAME_GOMP_ATOMIC_START)(void) { - int gtid = __kmp_entry_gtid(); - KA_TRACE(20, ("GOMP_atomic_start: T#%d\n", gtid)); - -#if OMPT_SUPPORT - __ompt_thread_assign_wait_id(0); -#endif - - __kmp_acquire_atomic_lock(&__kmp_atomic_lock, gtid); -} - -void KMP_EXPAND_NAME(KMP_API_NAME_GOMP_ATOMIC_END)(void) { - int gtid = __kmp_get_gtid(); - KA_TRACE(20, ("GOMP_atomic_end: T#%d\n", gtid)); - __kmp_release_atomic_lock(&__kmp_atomic_lock, gtid); -} - -int KMP_EXPAND_NAME(KMP_API_NAME_GOMP_SINGLE_START)(void) { - int gtid = __kmp_entry_gtid(); - MKLOC(loc, "GOMP_single_start"); - KA_TRACE(20, ("GOMP_single_start: T#%d\n", gtid)); - - if (!TCR_4(__kmp_init_parallel)) - __kmp_parallel_initialize(); - - // 3rd parameter == FALSE prevents kmp_enter_single from pushing a - // workshare when USE_CHECKS is defined. We need to avoid the push, - // as there is no corresponding GOMP_single_end() call. - kmp_int32 rc = __kmp_enter_single(gtid, &loc, FALSE); - -#if OMPT_SUPPORT && OMPT_OPTIONAL - kmp_info_t *this_thr = __kmp_threads[gtid]; - kmp_team_t *team = this_thr->th.th_team; - int tid = __kmp_tid_from_gtid(gtid); - - if (ompt_enabled.enabled) { - if (rc) { - if (ompt_enabled.ompt_callback_work) { - ompt_callbacks.ompt_callback(ompt_callback_work)( - ompt_work_single_executor, ompt_scope_begin, - &(team->t.ompt_team_info.parallel_data), - &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data), - 1, OMPT_GET_RETURN_ADDRESS(0)); - } - } else { - if (ompt_enabled.ompt_callback_work) { - ompt_callbacks.ompt_callback(ompt_callback_work)( - ompt_work_single_other, ompt_scope_begin, - &(team->t.ompt_team_info.parallel_data), - &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data), - 1, OMPT_GET_RETURN_ADDRESS(0)); - ompt_callbacks.ompt_callback(ompt_callback_work)( - ompt_work_single_other, ompt_scope_end, - &(team->t.ompt_team_info.parallel_data), - &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data), - 1, OMPT_GET_RETURN_ADDRESS(0)); - } - } - } -#endif - - return rc; -} - -void *KMP_EXPAND_NAME(KMP_API_NAME_GOMP_SINGLE_COPY_START)(void) { - void *retval; - int gtid = __kmp_entry_gtid(); - MKLOC(loc, "GOMP_single_copy_start"); - KA_TRACE(20, ("GOMP_single_copy_start: T#%d\n", gtid)); - - if (!TCR_4(__kmp_init_parallel)) - __kmp_parallel_initialize(); - - // If this is the first thread to enter, return NULL. The generated code will - // then call GOMP_single_copy_end() for this thread only, with the - // copyprivate data pointer as an argument. - if (__kmp_enter_single(gtid, &loc, FALSE)) - return NULL; - -// Wait for the first thread to set the copyprivate data pointer, -// and for all other threads to reach this point. - -#if OMPT_SUPPORT && OMPT_OPTIONAL - ompt_frame_t *ompt_frame; - if (ompt_enabled.enabled) { - __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL); - ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0); - OMPT_STORE_RETURN_ADDRESS(gtid); - } -#endif - __kmp_barrier(bs_plain_barrier, gtid, FALSE, 0, NULL, NULL); - - // Retrieve the value of the copyprivate data point, and wait for all - // threads to do likewise, then return. - retval = __kmp_team_from_gtid(gtid)->t.t_copypriv_data; -#if OMPT_SUPPORT && OMPT_OPTIONAL - if (ompt_enabled.enabled) { - OMPT_STORE_RETURN_ADDRESS(gtid); - } -#endif - __kmp_barrier(bs_plain_barrier, gtid, FALSE, 0, NULL, NULL); -#if OMPT_SUPPORT && OMPT_OPTIONAL - if (ompt_enabled.enabled) { - ompt_frame->enter_frame = ompt_data_none; - } -#endif - return retval; -} - -void KMP_EXPAND_NAME(KMP_API_NAME_GOMP_SINGLE_COPY_END)(void *data) { - int gtid = __kmp_get_gtid(); - KA_TRACE(20, ("GOMP_single_copy_end: T#%d\n", gtid)); - - // Set the copyprivate data pointer fo the team, then hit the barrier so that - // the other threads will continue on and read it. Hit another barrier before - // continuing, so that the know that the copyprivate data pointer has been - // propagated to all threads before trying to reuse the t_copypriv_data field. - __kmp_team_from_gtid(gtid)->t.t_copypriv_data = data; -#if OMPT_SUPPORT && OMPT_OPTIONAL - ompt_frame_t *ompt_frame; - if (ompt_enabled.enabled) { - __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL); - ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0); - OMPT_STORE_RETURN_ADDRESS(gtid); - } -#endif - __kmp_barrier(bs_plain_barrier, gtid, FALSE, 0, NULL, NULL); -#if OMPT_SUPPORT && OMPT_OPTIONAL - if (ompt_enabled.enabled) { - OMPT_STORE_RETURN_ADDRESS(gtid); - } -#endif - __kmp_barrier(bs_plain_barrier, gtid, FALSE, 0, NULL, NULL); -#if OMPT_SUPPORT && OMPT_OPTIONAL - if (ompt_enabled.enabled) { - ompt_frame->enter_frame = ompt_data_none; - } -#endif -} - -void KMP_EXPAND_NAME(KMP_API_NAME_GOMP_ORDERED_START)(void) { - int gtid = __kmp_entry_gtid(); - MKLOC(loc, "GOMP_ordered_start"); - KA_TRACE(20, ("GOMP_ordered_start: T#%d\n", gtid)); -#if OMPT_SUPPORT && OMPT_OPTIONAL - OMPT_STORE_RETURN_ADDRESS(gtid); -#endif - __kmpc_ordered(&loc, gtid); -} - -void KMP_EXPAND_NAME(KMP_API_NAME_GOMP_ORDERED_END)(void) { - int gtid = __kmp_get_gtid(); - MKLOC(loc, "GOMP_ordered_end"); - KA_TRACE(20, ("GOMP_ordered_start: T#%d\n", gtid)); -#if OMPT_SUPPORT && OMPT_OPTIONAL - OMPT_STORE_RETURN_ADDRESS(gtid); -#endif - __kmpc_end_ordered(&loc, gtid); -} - -// Dispatch macro defs -// -// They come in two flavors: 64-bit unsigned, and either 32-bit signed -// (IA-32 architecture) or 64-bit signed (Intel(R) 64). - -#if KMP_ARCH_X86 || KMP_ARCH_ARM || KMP_ARCH_MIPS -#define KMP_DISPATCH_INIT __kmp_aux_dispatch_init_4 -#define KMP_DISPATCH_FINI_CHUNK __kmp_aux_dispatch_fini_chunk_4 -#define KMP_DISPATCH_NEXT __kmpc_dispatch_next_4 -#else -#define KMP_DISPATCH_INIT __kmp_aux_dispatch_init_8 -#define KMP_DISPATCH_FINI_CHUNK __kmp_aux_dispatch_fini_chunk_8 -#define KMP_DISPATCH_NEXT __kmpc_dispatch_next_8 -#endif /* KMP_ARCH_X86 */ - -#define KMP_DISPATCH_INIT_ULL __kmp_aux_dispatch_init_8u -#define KMP_DISPATCH_FINI_CHUNK_ULL __kmp_aux_dispatch_fini_chunk_8u -#define KMP_DISPATCH_NEXT_ULL __kmpc_dispatch_next_8u - -// The parallel contruct - -#ifndef KMP_DEBUG -static -#endif /* KMP_DEBUG */ - void - __kmp_GOMP_microtask_wrapper(int *gtid, int *npr, void (*task)(void *), - void *data) { -#if OMPT_SUPPORT - kmp_info_t *thr; - ompt_frame_t *ompt_frame; - ompt_state_t enclosing_state; - - if (ompt_enabled.enabled) { - // get pointer to thread data structure - thr = __kmp_threads[*gtid]; - - // save enclosing task state; set current state for task - enclosing_state = thr->th.ompt_thread_info.state; - thr->th.ompt_thread_info.state = ompt_state_work_parallel; - - // set task frame - __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL); - ompt_frame->exit_frame.ptr = OMPT_GET_FRAME_ADDRESS(0); - } -#endif - - task(data); - -#if OMPT_SUPPORT - if (ompt_enabled.enabled) { - // clear task frame - ompt_frame->exit_frame = ompt_data_none; - - // restore enclosing state - thr->th.ompt_thread_info.state = enclosing_state; - } -#endif -} - -#ifndef KMP_DEBUG -static -#endif /* KMP_DEBUG */ - void - __kmp_GOMP_parallel_microtask_wrapper(int *gtid, int *npr, - void (*task)(void *), void *data, - unsigned num_threads, ident_t *loc, - enum sched_type schedule, long start, - long end, long incr, - long chunk_size) { - // Intialize the loop worksharing construct. - - KMP_DISPATCH_INIT(loc, *gtid, schedule, start, end, incr, chunk_size, - schedule != kmp_sch_static); - -#if OMPT_SUPPORT - kmp_info_t *thr; - ompt_frame_t *ompt_frame; - ompt_state_t enclosing_state; - - if (ompt_enabled.enabled) { - thr = __kmp_threads[*gtid]; - // save enclosing task state; set current state for task - enclosing_state = thr->th.ompt_thread_info.state; - thr->th.ompt_thread_info.state = ompt_state_work_parallel; - - // set task frame - __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL); - ompt_frame->exit_frame.ptr = OMPT_GET_FRAME_ADDRESS(0); - } -#endif - - // Now invoke the microtask. - task(data); - -#if OMPT_SUPPORT - if (ompt_enabled.enabled) { - // clear task frame - ompt_frame->exit_frame = ompt_data_none; - - // reset enclosing state - thr->th.ompt_thread_info.state = enclosing_state; - } -#endif -} - -#ifndef KMP_DEBUG -static -#endif /* KMP_DEBUG */ - void - __kmp_GOMP_fork_call(ident_t *loc, int gtid, void (*unwrapped_task)(void *), - microtask_t wrapper, int argc, ...) { - int rc; - kmp_info_t *thr = __kmp_threads[gtid]; - kmp_team_t *team = thr->th.th_team; - int tid = __kmp_tid_from_gtid(gtid); - - va_list ap; - va_start(ap, argc); - - rc = __kmp_fork_call(loc, gtid, fork_context_gnu, argc, wrapper, - __kmp_invoke_task_func, -#if (KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64) && KMP_OS_LINUX - &ap -#else - ap -#endif - ); - - va_end(ap); - - if (rc) { - __kmp_run_before_invoked_task(gtid, tid, thr, team); - } - -#if OMPT_SUPPORT - int ompt_team_size; - if (ompt_enabled.enabled) { - ompt_team_info_t *team_info = __ompt_get_teaminfo(0, NULL); - ompt_task_info_t *task_info = __ompt_get_task_info_object(0); - - // implicit task callback - if (ompt_enabled.ompt_callback_implicit_task) { - ompt_team_size = __kmp_team_from_gtid(gtid)->t.t_nproc; - ompt_callbacks.ompt_callback(ompt_callback_implicit_task)( - ompt_scope_begin, &(team_info->parallel_data), - &(task_info->task_data), ompt_team_size, __kmp_tid_from_gtid(gtid), ompt_task_implicit); // TODO: Can this be ompt_task_initial? - task_info->thread_num = __kmp_tid_from_gtid(gtid); - } - thr->th.ompt_thread_info.state = ompt_state_work_parallel; - } -#endif -} - -static void __kmp_GOMP_serialized_parallel(ident_t *loc, kmp_int32 gtid, - void (*task)(void *)) { -#if OMPT_SUPPORT - OMPT_STORE_RETURN_ADDRESS(gtid); -#endif - __kmp_serialized_parallel(loc, gtid); -} - -void KMP_EXPAND_NAME(KMP_API_NAME_GOMP_PARALLEL_START)(void (*task)(void *), - void *data, - unsigned num_threads) { - int gtid = __kmp_entry_gtid(); - -#if OMPT_SUPPORT - ompt_frame_t *parent_frame, *frame; - - if (ompt_enabled.enabled) { - __ompt_get_task_info_internal(0, NULL, NULL, &parent_frame, NULL, NULL); - parent_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0); - OMPT_STORE_RETURN_ADDRESS(gtid); - } -#endif - - MKLOC(loc, "GOMP_parallel_start"); - KA_TRACE(20, ("GOMP_parallel_start: T#%d\n", gtid)); - - if (__kmpc_ok_to_fork(&loc) && (num_threads != 1)) { - if (num_threads != 0) { - __kmp_push_num_threads(&loc, gtid, num_threads); - } - __kmp_GOMP_fork_call(&loc, gtid, task, - (microtask_t)__kmp_GOMP_microtask_wrapper, 2, task, - data); - } else { - __kmp_GOMP_serialized_parallel(&loc, gtid, task); - } - -#if OMPT_SUPPORT - if (ompt_enabled.enabled) { - __ompt_get_task_info_internal(0, NULL, NULL, &frame, NULL, NULL); - frame->exit_frame.ptr = OMPT_GET_FRAME_ADDRESS(0); - } -#endif -} - -void KMP_EXPAND_NAME(KMP_API_NAME_GOMP_PARALLEL_END)(void) { - int gtid = __kmp_get_gtid(); - kmp_info_t *thr; - - thr = __kmp_threads[gtid]; - - MKLOC(loc, "GOMP_parallel_end"); - KA_TRACE(20, ("GOMP_parallel_end: T#%d\n", gtid)); - - if (!thr->th.th_team->t.t_serialized) { - __kmp_run_after_invoked_task(gtid, __kmp_tid_from_gtid(gtid), thr, - thr->th.th_team); - -#if OMPT_SUPPORT - if (ompt_enabled.enabled) { - // Implicit task is finished here, in the barrier we might schedule - // deferred tasks, - // these don't see the implicit task on the stack - OMPT_CUR_TASK_INFO(thr)->frame.exit_frame = ompt_data_none; - } -#endif - - __kmp_join_call(&loc, gtid -#if OMPT_SUPPORT - , - fork_context_gnu -#endif - ); - } else { - __kmpc_end_serialized_parallel(&loc, gtid); - } -} - -// Loop worksharing constructs - -// The Gnu codegen passes in an exclusive upper bound for the overall range, -// but the libguide dispatch code expects an inclusive upper bound, hence the -// "end - incr" 5th argument to KMP_DISPATCH_INIT (and the " ub - str" 11th -// argument to __kmp_GOMP_fork_call). -// -// Conversely, KMP_DISPATCH_NEXT returns and inclusive upper bound in *p_ub, -// but the Gnu codegen expects an excluside upper bound, so the adjustment -// "*p_ub += stride" compenstates for the discrepancy. -// -// Correction: the gnu codegen always adjusts the upper bound by +-1, not the -// stride value. We adjust the dispatch parameters accordingly (by +-1), but -// we still adjust p_ub by the actual stride value. -// -// The "runtime" versions do not take a chunk_sz parameter. -// -// The profile lib cannot support construct checking of unordered loops that -// are predetermined by the compiler to be statically scheduled, as the gcc -// codegen will not always emit calls to GOMP_loop_static_next() to get the -// next iteration. Instead, it emits inline code to call omp_get_thread_num() -// num and calculate the iteration space using the result. It doesn't do this -// with ordered static loop, so they can be checked. - -#if OMPT_SUPPORT -#define IF_OMPT_SUPPORT(code) code -#else -#define IF_OMPT_SUPPORT(code) -#endif - -#define LOOP_START(func, schedule) \ - int func(long lb, long ub, long str, long chunk_sz, long *p_lb, \ - long *p_ub) { \ - int status; \ - long stride; \ - int gtid = __kmp_entry_gtid(); \ - MKLOC(loc, KMP_STR(func)); \ - KA_TRACE( \ - 20, \ - (KMP_STR( \ - func) ": T#%d, lb 0x%lx, ub 0x%lx, str 0x%lx, chunk_sz 0x%lx\n", \ - gtid, lb, ub, str, chunk_sz)); \ - \ - if ((str > 0) ? (lb < ub) : (lb > ub)) { \ - IF_OMPT_SUPPORT(OMPT_STORE_RETURN_ADDRESS(gtid);) \ - KMP_DISPATCH_INIT(&loc, gtid, (schedule), lb, \ - (str > 0) ? (ub - 1) : (ub + 1), str, chunk_sz, \ - (schedule) != kmp_sch_static); \ - IF_OMPT_SUPPORT(OMPT_STORE_RETURN_ADDRESS(gtid);) \ - status = KMP_DISPATCH_NEXT(&loc, gtid, NULL, (kmp_int *)p_lb, \ - (kmp_int *)p_ub, (kmp_int *)&stride); \ - if (status) { \ - KMP_DEBUG_ASSERT(stride == str); \ - *p_ub += (str > 0) ? 1 : -1; \ - } \ - } else { \ - status = 0; \ - } \ - \ - KA_TRACE( \ - 20, \ - (KMP_STR( \ - func) " exit: T#%d, *p_lb 0x%lx, *p_ub 0x%lx, returning %d\n", \ - gtid, *p_lb, *p_ub, status)); \ - return status; \ - } - -#define LOOP_RUNTIME_START(func, schedule) \ - int func(long lb, long ub, long str, long *p_lb, long *p_ub) { \ - int status; \ - long stride; \ - long chunk_sz = 0; \ - int gtid = __kmp_entry_gtid(); \ - MKLOC(loc, KMP_STR(func)); \ - KA_TRACE( \ - 20, \ - (KMP_STR(func) ": T#%d, lb 0x%lx, ub 0x%lx, str 0x%lx, chunk_sz %d\n", \ - gtid, lb, ub, str, chunk_sz)); \ - \ - if ((str > 0) ? (lb < ub) : (lb > ub)) { \ - IF_OMPT_SUPPORT(OMPT_STORE_RETURN_ADDRESS(gtid);) \ - KMP_DISPATCH_INIT(&loc, gtid, (schedule), lb, \ - (str > 0) ? (ub - 1) : (ub + 1), str, chunk_sz, TRUE); \ - IF_OMPT_SUPPORT(OMPT_STORE_RETURN_ADDRESS(gtid);) \ - status = KMP_DISPATCH_NEXT(&loc, gtid, NULL, (kmp_int *)p_lb, \ - (kmp_int *)p_ub, (kmp_int *)&stride); \ - if (status) { \ - KMP_DEBUG_ASSERT(stride == str); \ - *p_ub += (str > 0) ? 1 : -1; \ - } \ - } else { \ - status = 0; \ - } \ - \ - KA_TRACE( \ - 20, \ - (KMP_STR( \ - func) " exit: T#%d, *p_lb 0x%lx, *p_ub 0x%lx, returning %d\n", \ - gtid, *p_lb, *p_ub, status)); \ - return status; \ - } - -#if OMP_45_ENABLED -#define KMP_DOACROSS_FINI(status, gtid) \ - if (!status && __kmp_threads[gtid]->th.th_dispatch->th_doacross_flags) { \ - __kmpc_doacross_fini(NULL, gtid); \ - } -#else -#define KMP_DOACROSS_FINI(status, gtid) /* Nothing */ -#endif - -#define LOOP_NEXT(func, fini_code) \ - int func(long *p_lb, long *p_ub) { \ - int status; \ - long stride; \ - int gtid = __kmp_get_gtid(); \ - MKLOC(loc, KMP_STR(func)); \ - KA_TRACE(20, (KMP_STR(func) ": T#%d\n", gtid)); \ - \ - IF_OMPT_SUPPORT(OMPT_STORE_RETURN_ADDRESS(gtid);) \ - fini_code status = KMP_DISPATCH_NEXT(&loc, gtid, NULL, (kmp_int *)p_lb, \ - (kmp_int *)p_ub, (kmp_int *)&stride); \ - if (status) { \ - *p_ub += (stride > 0) ? 1 : -1; \ - } \ - KMP_DOACROSS_FINI(status, gtid) \ - \ - KA_TRACE( \ - 20, \ - (KMP_STR(func) " exit: T#%d, *p_lb 0x%lx, *p_ub 0x%lx, stride 0x%lx, " \ - "returning %d\n", \ - gtid, *p_lb, *p_ub, stride, status)); \ - return status; \ - } - -LOOP_START(KMP_EXPAND_NAME(KMP_API_NAME_GOMP_LOOP_STATIC_START), kmp_sch_static) -LOOP_NEXT(KMP_EXPAND_NAME(KMP_API_NAME_GOMP_LOOP_STATIC_NEXT), {}) -LOOP_START(KMP_EXPAND_NAME(KMP_API_NAME_GOMP_LOOP_DYNAMIC_START), - kmp_sch_dynamic_chunked) -LOOP_NEXT(KMP_EXPAND_NAME(KMP_API_NAME_GOMP_LOOP_DYNAMIC_NEXT), {}) -LOOP_START(KMP_EXPAND_NAME(KMP_API_NAME_GOMP_LOOP_GUIDED_START), - kmp_sch_guided_chunked) -LOOP_NEXT(KMP_EXPAND_NAME(KMP_API_NAME_GOMP_LOOP_GUIDED_NEXT), {}) -LOOP_RUNTIME_START(KMP_EXPAND_NAME(KMP_API_NAME_GOMP_LOOP_RUNTIME_START), - kmp_sch_runtime) -LOOP_NEXT(KMP_EXPAND_NAME(KMP_API_NAME_GOMP_LOOP_RUNTIME_NEXT), {}) - -LOOP_START(KMP_EXPAND_NAME(KMP_API_NAME_GOMP_LOOP_ORDERED_STATIC_START), - kmp_ord_static) -LOOP_NEXT(KMP_EXPAND_NAME(KMP_API_NAME_GOMP_LOOP_ORDERED_STATIC_NEXT), - { KMP_DISPATCH_FINI_CHUNK(&loc, gtid); }) -LOOP_START(KMP_EXPAND_NAME(KMP_API_NAME_GOMP_LOOP_ORDERED_DYNAMIC_START), - kmp_ord_dynamic_chunked) -LOOP_NEXT(KMP_EXPAND_NAME(KMP_API_NAME_GOMP_LOOP_ORDERED_DYNAMIC_NEXT), - { KMP_DISPATCH_FINI_CHUNK(&loc, gtid); }) -LOOP_START(KMP_EXPAND_NAME(KMP_API_NAME_GOMP_LOOP_ORDERED_GUIDED_START), - kmp_ord_guided_chunked) -LOOP_NEXT(KMP_EXPAND_NAME(KMP_API_NAME_GOMP_LOOP_ORDERED_GUIDED_NEXT), - { KMP_DISPATCH_FINI_CHUNK(&loc, gtid); }) -LOOP_RUNTIME_START( - KMP_EXPAND_NAME(KMP_API_NAME_GOMP_LOOP_ORDERED_RUNTIME_START), - kmp_ord_runtime) -LOOP_NEXT(KMP_EXPAND_NAME(KMP_API_NAME_GOMP_LOOP_ORDERED_RUNTIME_NEXT), - { KMP_DISPATCH_FINI_CHUNK(&loc, gtid); }) - -#if OMP_45_ENABLED -#define LOOP_DOACROSS_START(func, schedule) \ - bool func(unsigned ncounts, long *counts, long chunk_sz, long *p_lb, \ - long *p_ub) { \ - int status; \ - long stride, lb, ub, str; \ - int gtid = __kmp_entry_gtid(); \ - struct kmp_dim *dims = \ - (struct kmp_dim *)__kmp_allocate(sizeof(struct kmp_dim) * ncounts); \ - MKLOC(loc, KMP_STR(func)); \ - for (unsigned i = 0; i < ncounts; ++i) { \ - dims[i].lo = 0; \ - dims[i].up = counts[i] - 1; \ - dims[i].st = 1; \ - } \ - __kmpc_doacross_init(&loc, gtid, (int)ncounts, dims); \ - lb = 0; \ - ub = counts[0]; \ - str = 1; \ - KA_TRACE(20, (KMP_STR(func) ": T#%d, ncounts %u, lb 0x%lx, ub 0x%lx, str " \ - "0x%lx, chunk_sz " \ - "0x%lx\n", \ - gtid, ncounts, lb, ub, str, chunk_sz)); \ - \ - if ((str > 0) ? (lb < ub) : (lb > ub)) { \ - KMP_DISPATCH_INIT(&loc, gtid, (schedule), lb, \ - (str > 0) ? (ub - 1) : (ub + 1), str, chunk_sz, \ - (schedule) != kmp_sch_static); \ - status = KMP_DISPATCH_NEXT(&loc, gtid, NULL, (kmp_int *)p_lb, \ - (kmp_int *)p_ub, (kmp_int *)&stride); \ - if (status) { \ - KMP_DEBUG_ASSERT(stride == str); \ - *p_ub += (str > 0) ? 1 : -1; \ - } \ - } else { \ - status = 0; \ - } \ - KMP_DOACROSS_FINI(status, gtid); \ - \ - KA_TRACE( \ - 20, \ - (KMP_STR( \ - func) " exit: T#%d, *p_lb 0x%lx, *p_ub 0x%lx, returning %d\n", \ - gtid, *p_lb, *p_ub, status)); \ - __kmp_free(dims); \ - return status; \ - } - -#define LOOP_DOACROSS_RUNTIME_START(func, schedule) \ - int func(unsigned ncounts, long *counts, long *p_lb, long *p_ub) { \ - int status; \ - long stride, lb, ub, str; \ - long chunk_sz = 0; \ - int gtid = __kmp_entry_gtid(); \ - struct kmp_dim *dims = \ - (struct kmp_dim *)__kmp_allocate(sizeof(struct kmp_dim) * ncounts); \ - MKLOC(loc, KMP_STR(func)); \ - for (unsigned i = 0; i < ncounts; ++i) { \ - dims[i].lo = 0; \ - dims[i].up = counts[i] - 1; \ - dims[i].st = 1; \ - } \ - __kmpc_doacross_init(&loc, gtid, (int)ncounts, dims); \ - lb = 0; \ - ub = counts[0]; \ - str = 1; \ - KA_TRACE( \ - 20, \ - (KMP_STR(func) ": T#%d, lb 0x%lx, ub 0x%lx, str 0x%lx, chunk_sz %d\n", \ - gtid, lb, ub, str, chunk_sz)); \ - \ - if ((str > 0) ? (lb < ub) : (lb > ub)) { \ - KMP_DISPATCH_INIT(&loc, gtid, (schedule), lb, \ - (str > 0) ? (ub - 1) : (ub + 1), str, chunk_sz, TRUE); \ - status = KMP_DISPATCH_NEXT(&loc, gtid, NULL, (kmp_int *)p_lb, \ - (kmp_int *)p_ub, (kmp_int *)&stride); \ - if (status) { \ - KMP_DEBUG_ASSERT(stride == str); \ - *p_ub += (str > 0) ? 1 : -1; \ - } \ - } else { \ - status = 0; \ - } \ - KMP_DOACROSS_FINI(status, gtid); \ - \ - KA_TRACE( \ - 20, \ - (KMP_STR( \ - func) " exit: T#%d, *p_lb 0x%lx, *p_ub 0x%lx, returning %d\n", \ - gtid, *p_lb, *p_ub, status)); \ - __kmp_free(dims); \ - return status; \ - } - -LOOP_DOACROSS_START( - KMP_EXPAND_NAME(KMP_API_NAME_GOMP_LOOP_DOACROSS_STATIC_START), - kmp_sch_static) -LOOP_DOACROSS_START( - KMP_EXPAND_NAME(KMP_API_NAME_GOMP_LOOP_DOACROSS_DYNAMIC_START), - kmp_sch_dynamic_chunked) -LOOP_DOACROSS_START( - KMP_EXPAND_NAME(KMP_API_NAME_GOMP_LOOP_DOACROSS_GUIDED_START), - kmp_sch_guided_chunked) -LOOP_DOACROSS_RUNTIME_START( - KMP_EXPAND_NAME(KMP_API_NAME_GOMP_LOOP_DOACROSS_RUNTIME_START), - kmp_sch_runtime) -#endif // OMP_45_ENABLED - -void KMP_EXPAND_NAME(KMP_API_NAME_GOMP_LOOP_END)(void) { - int gtid = __kmp_get_gtid(); - KA_TRACE(20, ("GOMP_loop_end: T#%d\n", gtid)) - -#if OMPT_SUPPORT && OMPT_OPTIONAL - ompt_frame_t *ompt_frame; - if (ompt_enabled.enabled) { - __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL); - ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0); - OMPT_STORE_RETURN_ADDRESS(gtid); - } -#endif - __kmp_barrier(bs_plain_barrier, gtid, FALSE, 0, NULL, NULL); -#if OMPT_SUPPORT && OMPT_OPTIONAL - if (ompt_enabled.enabled) { - ompt_frame->enter_frame = ompt_data_none; - } -#endif - - KA_TRACE(20, ("GOMP_loop_end exit: T#%d\n", gtid)) -} - -void KMP_EXPAND_NAME(KMP_API_NAME_GOMP_LOOP_END_NOWAIT)(void) { - KA_TRACE(20, ("GOMP_loop_end_nowait: T#%d\n", __kmp_get_gtid())) -} - -// Unsigned long long loop worksharing constructs -// -// These are new with gcc 4.4 - -#define LOOP_START_ULL(func, schedule) \ - int func(int up, unsigned long long lb, unsigned long long ub, \ - unsigned long long str, unsigned long long chunk_sz, \ - unsigned long long *p_lb, unsigned long long *p_ub) { \ - int status; \ - long long str2 = up ? ((long long)str) : -((long long)str); \ - long long stride; \ - int gtid = __kmp_entry_gtid(); \ - MKLOC(loc, KMP_STR(func)); \ - \ - KA_TRACE(20, (KMP_STR(func) ": T#%d, up %d, lb 0x%llx, ub 0x%llx, str " \ - "0x%llx, chunk_sz 0x%llx\n", \ - gtid, up, lb, ub, str, chunk_sz)); \ - \ - if ((str > 0) ? (lb < ub) : (lb > ub)) { \ - KMP_DISPATCH_INIT_ULL(&loc, gtid, (schedule), lb, \ - (str2 > 0) ? (ub - 1) : (ub + 1), str2, chunk_sz, \ - (schedule) != kmp_sch_static); \ - status = \ - KMP_DISPATCH_NEXT_ULL(&loc, gtid, NULL, (kmp_uint64 *)p_lb, \ - (kmp_uint64 *)p_ub, (kmp_int64 *)&stride); \ - if (status) { \ - KMP_DEBUG_ASSERT(stride == str2); \ - *p_ub += (str > 0) ? 1 : -1; \ - } \ - } else { \ - status = 0; \ - } \ - \ - KA_TRACE( \ - 20, \ - (KMP_STR( \ - func) " exit: T#%d, *p_lb 0x%llx, *p_ub 0x%llx, returning %d\n", \ - gtid, *p_lb, *p_ub, status)); \ - return status; \ - } - -#define LOOP_RUNTIME_START_ULL(func, schedule) \ - int func(int up, unsigned long long lb, unsigned long long ub, \ - unsigned long long str, unsigned long long *p_lb, \ - unsigned long long *p_ub) { \ - int status; \ - long long str2 = up ? ((long long)str) : -((long long)str); \ - unsigned long long stride; \ - unsigned long long chunk_sz = 0; \ - int gtid = __kmp_entry_gtid(); \ - MKLOC(loc, KMP_STR(func)); \ - \ - KA_TRACE(20, (KMP_STR(func) ": T#%d, up %d, lb 0x%llx, ub 0x%llx, str " \ - "0x%llx, chunk_sz 0x%llx\n", \ - gtid, up, lb, ub, str, chunk_sz)); \ - \ - if ((str > 0) ? (lb < ub) : (lb > ub)) { \ - KMP_DISPATCH_INIT_ULL(&loc, gtid, (schedule), lb, \ - (str2 > 0) ? (ub - 1) : (ub + 1), str2, chunk_sz, \ - TRUE); \ - status = \ - KMP_DISPATCH_NEXT_ULL(&loc, gtid, NULL, (kmp_uint64 *)p_lb, \ - (kmp_uint64 *)p_ub, (kmp_int64 *)&stride); \ - if (status) { \ - KMP_DEBUG_ASSERT((long long)stride == str2); \ - *p_ub += (str > 0) ? 1 : -1; \ - } \ - } else { \ - status = 0; \ - } \ - \ - KA_TRACE( \ - 20, \ - (KMP_STR( \ - func) " exit: T#%d, *p_lb 0x%llx, *p_ub 0x%llx, returning %d\n", \ - gtid, *p_lb, *p_ub, status)); \ - return status; \ - } - -#define LOOP_NEXT_ULL(func, fini_code) \ - int func(unsigned long long *p_lb, unsigned long long *p_ub) { \ - int status; \ - long long stride; \ - int gtid = __kmp_get_gtid(); \ - MKLOC(loc, KMP_STR(func)); \ - KA_TRACE(20, (KMP_STR(func) ": T#%d\n", gtid)); \ - \ - fini_code status = \ - KMP_DISPATCH_NEXT_ULL(&loc, gtid, NULL, (kmp_uint64 *)p_lb, \ - (kmp_uint64 *)p_ub, (kmp_int64 *)&stride); \ - if (status) { \ - *p_ub += (stride > 0) ? 1 : -1; \ - } \ - \ - KA_TRACE( \ - 20, \ - (KMP_STR( \ - func) " exit: T#%d, *p_lb 0x%llx, *p_ub 0x%llx, stride 0x%llx, " \ - "returning %d\n", \ - gtid, *p_lb, *p_ub, stride, status)); \ - return status; \ - } - -LOOP_START_ULL(KMP_EXPAND_NAME(KMP_API_NAME_GOMP_LOOP_ULL_STATIC_START), - kmp_sch_static) -LOOP_NEXT_ULL(KMP_EXPAND_NAME(KMP_API_NAME_GOMP_LOOP_ULL_STATIC_NEXT), {}) -LOOP_START_ULL(KMP_EXPAND_NAME(KMP_API_NAME_GOMP_LOOP_ULL_DYNAMIC_START), - kmp_sch_dynamic_chunked) -LOOP_NEXT_ULL(KMP_EXPAND_NAME(KMP_API_NAME_GOMP_LOOP_ULL_DYNAMIC_NEXT), {}) -LOOP_START_ULL(KMP_EXPAND_NAME(KMP_API_NAME_GOMP_LOOP_ULL_GUIDED_START), - kmp_sch_guided_chunked) -LOOP_NEXT_ULL(KMP_EXPAND_NAME(KMP_API_NAME_GOMP_LOOP_ULL_GUIDED_NEXT), {}) -LOOP_RUNTIME_START_ULL( - KMP_EXPAND_NAME(KMP_API_NAME_GOMP_LOOP_ULL_RUNTIME_START), kmp_sch_runtime) -LOOP_NEXT_ULL(KMP_EXPAND_NAME(KMP_API_NAME_GOMP_LOOP_ULL_RUNTIME_NEXT), {}) - -LOOP_START_ULL(KMP_EXPAND_NAME(KMP_API_NAME_GOMP_LOOP_ULL_ORDERED_STATIC_START), - kmp_ord_static) -LOOP_NEXT_ULL(KMP_EXPAND_NAME(KMP_API_NAME_GOMP_LOOP_ULL_ORDERED_STATIC_NEXT), - { KMP_DISPATCH_FINI_CHUNK_ULL(&loc, gtid); }) -LOOP_START_ULL( - KMP_EXPAND_NAME(KMP_API_NAME_GOMP_LOOP_ULL_ORDERED_DYNAMIC_START), - kmp_ord_dynamic_chunked) -LOOP_NEXT_ULL(KMP_EXPAND_NAME(KMP_API_NAME_GOMP_LOOP_ULL_ORDERED_DYNAMIC_NEXT), - { KMP_DISPATCH_FINI_CHUNK_ULL(&loc, gtid); }) -LOOP_START_ULL(KMP_EXPAND_NAME(KMP_API_NAME_GOMP_LOOP_ULL_ORDERED_GUIDED_START), - kmp_ord_guided_chunked) -LOOP_NEXT_ULL(KMP_EXPAND_NAME(KMP_API_NAME_GOMP_LOOP_ULL_ORDERED_GUIDED_NEXT), - { KMP_DISPATCH_FINI_CHUNK_ULL(&loc, gtid); }) -LOOP_RUNTIME_START_ULL( - KMP_EXPAND_NAME(KMP_API_NAME_GOMP_LOOP_ULL_ORDERED_RUNTIME_START), - kmp_ord_runtime) -LOOP_NEXT_ULL(KMP_EXPAND_NAME(KMP_API_NAME_GOMP_LOOP_ULL_ORDERED_RUNTIME_NEXT), - { KMP_DISPATCH_FINI_CHUNK_ULL(&loc, gtid); }) - -#if OMP_45_ENABLED -#define LOOP_DOACROSS_START_ULL(func, schedule) \ - int func(unsigned ncounts, unsigned long long *counts, \ - unsigned long long chunk_sz, unsigned long long *p_lb, \ - unsigned long long *p_ub) { \ - int status; \ - long long stride, str, lb, ub; \ - int gtid = __kmp_entry_gtid(); \ - struct kmp_dim *dims = \ - (struct kmp_dim *)__kmp_allocate(sizeof(struct kmp_dim) * ncounts); \ - MKLOC(loc, KMP_STR(func)); \ - for (unsigned i = 0; i < ncounts; ++i) { \ - dims[i].lo = 0; \ - dims[i].up = counts[i] - 1; \ - dims[i].st = 1; \ - } \ - __kmpc_doacross_init(&loc, gtid, (int)ncounts, dims); \ - lb = 0; \ - ub = counts[0]; \ - str = 1; \ - \ - KA_TRACE(20, (KMP_STR(func) ": T#%d, lb 0x%llx, ub 0x%llx, str " \ - "0x%llx, chunk_sz 0x%llx\n", \ - gtid, lb, ub, str, chunk_sz)); \ - \ - if ((str > 0) ? (lb < ub) : (lb > ub)) { \ - KMP_DISPATCH_INIT_ULL(&loc, gtid, (schedule), lb, \ - (str > 0) ? (ub - 1) : (ub + 1), str, chunk_sz, \ - (schedule) != kmp_sch_static); \ - status = \ - KMP_DISPATCH_NEXT_ULL(&loc, gtid, NULL, (kmp_uint64 *)p_lb, \ - (kmp_uint64 *)p_ub, (kmp_int64 *)&stride); \ - if (status) { \ - KMP_DEBUG_ASSERT(stride == str); \ - *p_ub += (str > 0) ? 1 : -1; \ - } \ - } else { \ - status = 0; \ - } \ - KMP_DOACROSS_FINI(status, gtid); \ - \ - KA_TRACE( \ - 20, \ - (KMP_STR( \ - func) " exit: T#%d, *p_lb 0x%llx, *p_ub 0x%llx, returning %d\n", \ - gtid, *p_lb, *p_ub, status)); \ - __kmp_free(dims); \ - return status; \ - } - -#define LOOP_DOACROSS_RUNTIME_START_ULL(func, schedule) \ - int func(unsigned ncounts, unsigned long long *counts, \ - unsigned long long *p_lb, unsigned long long *p_ub) { \ - int status; \ - unsigned long long stride, str, lb, ub; \ - unsigned long long chunk_sz = 0; \ - int gtid = __kmp_entry_gtid(); \ - struct kmp_dim *dims = \ - (struct kmp_dim *)__kmp_allocate(sizeof(struct kmp_dim) * ncounts); \ - MKLOC(loc, KMP_STR(func)); \ - for (unsigned i = 0; i < ncounts; ++i) { \ - dims[i].lo = 0; \ - dims[i].up = counts[i] - 1; \ - dims[i].st = 1; \ - } \ - __kmpc_doacross_init(&loc, gtid, (int)ncounts, dims); \ - lb = 0; \ - ub = counts[0]; \ - str = 1; \ - KA_TRACE(20, (KMP_STR(func) ": T#%d, lb 0x%llx, ub 0x%llx, str " \ - "0x%llx, chunk_sz 0x%llx\n", \ - gtid, lb, ub, str, chunk_sz)); \ - \ - if ((str > 0) ? (lb < ub) : (lb > ub)) { \ - KMP_DISPATCH_INIT_ULL(&loc, gtid, (schedule), lb, \ - (str > 0) ? (ub - 1) : (ub + 1), str, chunk_sz, \ - TRUE); \ - status = \ - KMP_DISPATCH_NEXT_ULL(&loc, gtid, NULL, (kmp_uint64 *)p_lb, \ - (kmp_uint64 *)p_ub, (kmp_int64 *)&stride); \ - if (status) { \ - KMP_DEBUG_ASSERT(stride == str); \ - *p_ub += (str > 0) ? 1 : -1; \ - } \ - } else { \ - status = 0; \ - } \ - KMP_DOACROSS_FINI(status, gtid); \ - \ - KA_TRACE( \ - 20, \ - (KMP_STR( \ - func) " exit: T#%d, *p_lb 0x%llx, *p_ub 0x%llx, returning %d\n", \ - gtid, *p_lb, *p_ub, status)); \ - __kmp_free(dims); \ - return status; \ - } - -LOOP_DOACROSS_START_ULL( - KMP_EXPAND_NAME(KMP_API_NAME_GOMP_LOOP_ULL_DOACROSS_STATIC_START), - kmp_sch_static) -LOOP_DOACROSS_START_ULL( - KMP_EXPAND_NAME(KMP_API_NAME_GOMP_LOOP_ULL_DOACROSS_DYNAMIC_START), - kmp_sch_dynamic_chunked) -LOOP_DOACROSS_START_ULL( - KMP_EXPAND_NAME(KMP_API_NAME_GOMP_LOOP_ULL_DOACROSS_GUIDED_START), - kmp_sch_guided_chunked) -LOOP_DOACROSS_RUNTIME_START_ULL( - KMP_EXPAND_NAME(KMP_API_NAME_GOMP_LOOP_ULL_DOACROSS_RUNTIME_START), - kmp_sch_runtime) -#endif - -// Combined parallel / loop worksharing constructs -// -// There are no ull versions (yet). - -#define PARALLEL_LOOP_START(func, schedule, ompt_pre, ompt_post) \ - void func(void (*task)(void *), void *data, unsigned num_threads, long lb, \ - long ub, long str, long chunk_sz) { \ - int gtid = __kmp_entry_gtid(); \ - MKLOC(loc, KMP_STR(func)); \ - KA_TRACE( \ - 20, \ - (KMP_STR( \ - func) ": T#%d, lb 0x%lx, ub 0x%lx, str 0x%lx, chunk_sz 0x%lx\n", \ - gtid, lb, ub, str, chunk_sz)); \ - \ - ompt_pre(); \ - \ - if (__kmpc_ok_to_fork(&loc) && (num_threads != 1)) { \ - if (num_threads != 0) { \ - __kmp_push_num_threads(&loc, gtid, num_threads); \ - } \ - __kmp_GOMP_fork_call(&loc, gtid, task, \ - (microtask_t)__kmp_GOMP_parallel_microtask_wrapper, \ - 9, task, data, num_threads, &loc, (schedule), lb, \ - (str > 0) ? (ub - 1) : (ub + 1), str, chunk_sz); \ - IF_OMPT_SUPPORT(OMPT_STORE_RETURN_ADDRESS(gtid)); \ - } else { \ - __kmp_GOMP_serialized_parallel(&loc, gtid, task); \ - IF_OMPT_SUPPORT(OMPT_STORE_RETURN_ADDRESS(gtid)); \ - } \ - \ - KMP_DISPATCH_INIT(&loc, gtid, (schedule), lb, \ - (str > 0) ? (ub - 1) : (ub + 1), str, chunk_sz, \ - (schedule) != kmp_sch_static); \ - \ - ompt_post(); \ - \ - KA_TRACE(20, (KMP_STR(func) " exit: T#%d\n", gtid)); \ - } - -#if OMPT_SUPPORT && OMPT_OPTIONAL - -#define OMPT_LOOP_PRE() \ - ompt_frame_t *parent_frame; \ - if (ompt_enabled.enabled) { \ - __ompt_get_task_info_internal(0, NULL, NULL, &parent_frame, NULL, NULL); \ - parent_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0); \ - OMPT_STORE_RETURN_ADDRESS(gtid); \ - } - -#define OMPT_LOOP_POST() \ - if (ompt_enabled.enabled) { \ - parent_frame->enter_frame = ompt_data_none; \ - } - -#else - -#define OMPT_LOOP_PRE() - -#define OMPT_LOOP_POST() - -#endif - -PARALLEL_LOOP_START( - KMP_EXPAND_NAME(KMP_API_NAME_GOMP_PARALLEL_LOOP_STATIC_START), - kmp_sch_static, OMPT_LOOP_PRE, OMPT_LOOP_POST) -PARALLEL_LOOP_START( - KMP_EXPAND_NAME(KMP_API_NAME_GOMP_PARALLEL_LOOP_DYNAMIC_START), - kmp_sch_dynamic_chunked, OMPT_LOOP_PRE, OMPT_LOOP_POST) -PARALLEL_LOOP_START( - KMP_EXPAND_NAME(KMP_API_NAME_GOMP_PARALLEL_LOOP_GUIDED_START), - kmp_sch_guided_chunked, OMPT_LOOP_PRE, OMPT_LOOP_POST) -PARALLEL_LOOP_START( - KMP_EXPAND_NAME(KMP_API_NAME_GOMP_PARALLEL_LOOP_RUNTIME_START), - kmp_sch_runtime, OMPT_LOOP_PRE, OMPT_LOOP_POST) - -// Tasking constructs - -void KMP_EXPAND_NAME(KMP_API_NAME_GOMP_TASK)(void (*func)(void *), void *data, - void (*copy_func)(void *, void *), - long arg_size, long arg_align, - bool if_cond, unsigned gomp_flags -#if OMP_40_ENABLED - , - void **depend -#endif - ) { - MKLOC(loc, "GOMP_task"); - int gtid = __kmp_entry_gtid(); - kmp_int32 flags = 0; - kmp_tasking_flags_t *input_flags = (kmp_tasking_flags_t *)&flags; - - KA_TRACE(20, ("GOMP_task: T#%d\n", gtid)); - - // The low-order bit is the "untied" flag - if (!(gomp_flags & 1)) { - input_flags->tiedness = 1; - } - // The second low-order bit is the "final" flag - if (gomp_flags & 2) { - input_flags->final = 1; - } - input_flags->native = 1; - // __kmp_task_alloc() sets up all other flags - - if (!if_cond) { - arg_size = 0; - } - - kmp_task_t *task = __kmp_task_alloc( - &loc, gtid, input_flags, sizeof(kmp_task_t), - arg_size ? arg_size + arg_align - 1 : 0, (kmp_routine_entry_t)func); - - if (arg_size > 0) { - if (arg_align > 0) { - task->shareds = (void *)((((size_t)task->shareds) + arg_align - 1) / - arg_align * arg_align); - } - // else error?? - - if (copy_func) { - (*copy_func)(task->shareds, data); - } else { - KMP_MEMCPY(task->shareds, data, arg_size); - } - } - -#if OMPT_SUPPORT - kmp_taskdata_t *current_task; - if (ompt_enabled.enabled) { - OMPT_STORE_RETURN_ADDRESS(gtid); - current_task = __kmp_threads[gtid]->th.th_current_task; - current_task->ompt_task_info.frame.enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0); - } -#endif - - if (if_cond) { -#if OMP_40_ENABLED - if (gomp_flags & 8) { - KMP_ASSERT(depend); - const size_t ndeps = (kmp_intptr_t)depend[0]; - const size_t nout = (kmp_intptr_t)depend[1]; - kmp_depend_info_t dep_list[ndeps]; - - for (size_t i = 0U; i < ndeps; i++) { - dep_list[i].base_addr = (kmp_intptr_t)depend[2U + i]; - dep_list[i].len = 0U; - dep_list[i].flags.in = 1; - dep_list[i].flags.out = (i < nout); - } - __kmpc_omp_task_with_deps(&loc, gtid, task, ndeps, dep_list, 0, NULL); - } else { -#endif - __kmpc_omp_task(&loc, gtid, task); - } - } else { -#if OMPT_SUPPORT - ompt_thread_info_t oldInfo; - kmp_info_t *thread; - kmp_taskdata_t *taskdata; - if (ompt_enabled.enabled) { - // Store the threads states and restore them after the task - thread = __kmp_threads[gtid]; - taskdata = KMP_TASK_TO_TASKDATA(task); - oldInfo = thread->th.ompt_thread_info; - thread->th.ompt_thread_info.wait_id = 0; - thread->th.ompt_thread_info.state = ompt_state_work_parallel; - taskdata->ompt_task_info.frame.exit_frame.ptr = OMPT_GET_FRAME_ADDRESS(0); - OMPT_STORE_RETURN_ADDRESS(gtid); - } -#endif - - __kmpc_omp_task_begin_if0(&loc, gtid, task); - func(data); - __kmpc_omp_task_complete_if0(&loc, gtid, task); - -#if OMPT_SUPPORT - if (ompt_enabled.enabled) { - thread->th.ompt_thread_info = oldInfo; - taskdata->ompt_task_info.frame.exit_frame = ompt_data_none; - } -#endif - } -#if OMPT_SUPPORT - if (ompt_enabled.enabled) { - current_task->ompt_task_info.frame.enter_frame = ompt_data_none; - } -#endif - - KA_TRACE(20, ("GOMP_task exit: T#%d\n", gtid)); -} - -void KMP_EXPAND_NAME(KMP_API_NAME_GOMP_TASKWAIT)(void) { - MKLOC(loc, "GOMP_taskwait"); - int gtid = __kmp_entry_gtid(); - -#if OMPT_SUPPORT - if (ompt_enabled.enabled) - OMPT_STORE_RETURN_ADDRESS(gtid); -#endif - - KA_TRACE(20, ("GOMP_taskwait: T#%d\n", gtid)); - - __kmpc_omp_taskwait(&loc, gtid); - - KA_TRACE(20, ("GOMP_taskwait exit: T#%d\n", gtid)); -} - -// Sections worksharing constructs -// -// For the sections construct, we initialize a dynamically scheduled loop -// worksharing construct with lb 1 and stride 1, and use the iteration #'s -// that its returns as sections ids. -// -// There are no special entry points for ordered sections, so we always use -// the dynamically scheduled workshare, even if the sections aren't ordered. - -unsigned KMP_EXPAND_NAME(KMP_API_NAME_GOMP_SECTIONS_START)(unsigned count) { - int status; - kmp_int lb, ub, stride; - int gtid = __kmp_entry_gtid(); - MKLOC(loc, "GOMP_sections_start"); - KA_TRACE(20, ("GOMP_sections_start: T#%d\n", gtid)); - - KMP_DISPATCH_INIT(&loc, gtid, kmp_nm_dynamic_chunked, 1, count, 1, 1, TRUE); - - status = KMP_DISPATCH_NEXT(&loc, gtid, NULL, &lb, &ub, &stride); - if (status) { - KMP_DEBUG_ASSERT(stride == 1); - KMP_DEBUG_ASSERT(lb > 0); - KMP_ASSERT(lb == ub); - } else { - lb = 0; - } - - KA_TRACE(20, ("GOMP_sections_start exit: T#%d returning %u\n", gtid, - (unsigned)lb)); - return (unsigned)lb; -} - -unsigned KMP_EXPAND_NAME(KMP_API_NAME_GOMP_SECTIONS_NEXT)(void) { - int status; - kmp_int lb, ub, stride; - int gtid = __kmp_get_gtid(); - MKLOC(loc, "GOMP_sections_next"); - KA_TRACE(20, ("GOMP_sections_next: T#%d\n", gtid)); - -#if OMPT_SUPPORT - OMPT_STORE_RETURN_ADDRESS(gtid); -#endif - - status = KMP_DISPATCH_NEXT(&loc, gtid, NULL, &lb, &ub, &stride); - if (status) { - KMP_DEBUG_ASSERT(stride == 1); - KMP_DEBUG_ASSERT(lb > 0); - KMP_ASSERT(lb == ub); - } else { - lb = 0; - } - - KA_TRACE( - 20, ("GOMP_sections_next exit: T#%d returning %u\n", gtid, (unsigned)lb)); - return (unsigned)lb; -} - -void KMP_EXPAND_NAME(KMP_API_NAME_GOMP_PARALLEL_SECTIONS_START)( - void (*task)(void *), void *data, unsigned num_threads, unsigned count) { - int gtid = __kmp_entry_gtid(); - -#if OMPT_SUPPORT - ompt_frame_t *parent_frame; - - if (ompt_enabled.enabled) { - __ompt_get_task_info_internal(0, NULL, NULL, &parent_frame, NULL, NULL); - parent_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0); - OMPT_STORE_RETURN_ADDRESS(gtid); - } -#endif - - MKLOC(loc, "GOMP_parallel_sections_start"); - KA_TRACE(20, ("GOMP_parallel_sections_start: T#%d\n", gtid)); - - if (__kmpc_ok_to_fork(&loc) && (num_threads != 1)) { - if (num_threads != 0) { - __kmp_push_num_threads(&loc, gtid, num_threads); - } - __kmp_GOMP_fork_call(&loc, gtid, task, - (microtask_t)__kmp_GOMP_parallel_microtask_wrapper, 9, - task, data, num_threads, &loc, kmp_nm_dynamic_chunked, - (kmp_int)1, (kmp_int)count, (kmp_int)1, (kmp_int)1); - } else { - __kmp_GOMP_serialized_parallel(&loc, gtid, task); - } - -#if OMPT_SUPPORT - if (ompt_enabled.enabled) { - parent_frame->enter_frame = ompt_data_none; - } -#endif - - KMP_DISPATCH_INIT(&loc, gtid, kmp_nm_dynamic_chunked, 1, count, 1, 1, TRUE); - - KA_TRACE(20, ("GOMP_parallel_sections_start exit: T#%d\n", gtid)); -} - -void KMP_EXPAND_NAME(KMP_API_NAME_GOMP_SECTIONS_END)(void) { - int gtid = __kmp_get_gtid(); - KA_TRACE(20, ("GOMP_sections_end: T#%d\n", gtid)) - -#if OMPT_SUPPORT - ompt_frame_t *ompt_frame; - if (ompt_enabled.enabled) { - __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL); - ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0); - OMPT_STORE_RETURN_ADDRESS(gtid); - } -#endif - __kmp_barrier(bs_plain_barrier, gtid, FALSE, 0, NULL, NULL); -#if OMPT_SUPPORT - if (ompt_enabled.enabled) { - ompt_frame->enter_frame = ompt_data_none; - } -#endif - - KA_TRACE(20, ("GOMP_sections_end exit: T#%d\n", gtid)) -} - -void KMP_EXPAND_NAME(KMP_API_NAME_GOMP_SECTIONS_END_NOWAIT)(void) { - KA_TRACE(20, ("GOMP_sections_end_nowait: T#%d\n", __kmp_get_gtid())) -} - -// libgomp has an empty function for GOMP_taskyield as of 2013-10-10 -void KMP_EXPAND_NAME(KMP_API_NAME_GOMP_TASKYIELD)(void) { - KA_TRACE(20, ("GOMP_taskyield: T#%d\n", __kmp_get_gtid())) - return; -} - -#if OMP_40_ENABLED // these are new GOMP_4.0 entry points - -void KMP_EXPAND_NAME(KMP_API_NAME_GOMP_PARALLEL)(void (*task)(void *), - void *data, - unsigned num_threads, - unsigned int flags) { - int gtid = __kmp_entry_gtid(); - MKLOC(loc, "GOMP_parallel"); - KA_TRACE(20, ("GOMP_parallel: T#%d\n", gtid)); - -#if OMPT_SUPPORT - ompt_task_info_t *parent_task_info, *task_info; - if (ompt_enabled.enabled) { - parent_task_info = __ompt_get_task_info_object(0); - parent_task_info->frame.enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0); - OMPT_STORE_RETURN_ADDRESS(gtid); - } -#endif - if (__kmpc_ok_to_fork(&loc) && (num_threads != 1)) { - if (num_threads != 0) { - __kmp_push_num_threads(&loc, gtid, num_threads); - } - if (flags != 0) { - __kmp_push_proc_bind(&loc, gtid, (kmp_proc_bind_t)flags); - } - __kmp_GOMP_fork_call(&loc, gtid, task, - (microtask_t)__kmp_GOMP_microtask_wrapper, 2, task, - data); - } else { - __kmp_GOMP_serialized_parallel(&loc, gtid, task); - } -#if OMPT_SUPPORT - if (ompt_enabled.enabled) { - task_info = __ompt_get_task_info_object(0); - task_info->frame.exit_frame.ptr = OMPT_GET_FRAME_ADDRESS(0); - } -#endif - task(data); -#if OMPT_SUPPORT - if (ompt_enabled.enabled) { - OMPT_STORE_RETURN_ADDRESS(gtid); - } -#endif - KMP_EXPAND_NAME(KMP_API_NAME_GOMP_PARALLEL_END)(); -#if OMPT_SUPPORT - if (ompt_enabled.enabled) { - task_info->frame.exit_frame = ompt_data_none; - parent_task_info->frame.enter_frame = ompt_data_none; - } -#endif -} - -void KMP_EXPAND_NAME(KMP_API_NAME_GOMP_PARALLEL_SECTIONS)(void (*task)(void *), - void *data, - unsigned num_threads, - unsigned count, - unsigned flags) { - int gtid = __kmp_entry_gtid(); - MKLOC(loc, "GOMP_parallel_sections"); - KA_TRACE(20, ("GOMP_parallel_sections: T#%d\n", gtid)); - -#if OMPT_SUPPORT - OMPT_STORE_RETURN_ADDRESS(gtid); -#endif - - if (__kmpc_ok_to_fork(&loc) && (num_threads != 1)) { - if (num_threads != 0) { - __kmp_push_num_threads(&loc, gtid, num_threads); - } - if (flags != 0) { - __kmp_push_proc_bind(&loc, gtid, (kmp_proc_bind_t)flags); - } - __kmp_GOMP_fork_call(&loc, gtid, task, - (microtask_t)__kmp_GOMP_parallel_microtask_wrapper, 9, - task, data, num_threads, &loc, kmp_nm_dynamic_chunked, - (kmp_int)1, (kmp_int)count, (kmp_int)1, (kmp_int)1); - } else { - __kmp_GOMP_serialized_parallel(&loc, gtid, task); - } - -#if OMPT_SUPPORT - OMPT_STORE_RETURN_ADDRESS(gtid); -#endif - - KMP_DISPATCH_INIT(&loc, gtid, kmp_nm_dynamic_chunked, 1, count, 1, 1, TRUE); - - task(data); - KMP_EXPAND_NAME(KMP_API_NAME_GOMP_PARALLEL_END)(); - KA_TRACE(20, ("GOMP_parallel_sections exit: T#%d\n", gtid)); -} - -#define PARALLEL_LOOP(func, schedule, ompt_pre, ompt_post) \ - void func(void (*task)(void *), void *data, unsigned num_threads, long lb, \ - long ub, long str, long chunk_sz, unsigned flags) { \ - int gtid = __kmp_entry_gtid(); \ - MKLOC(loc, KMP_STR(func)); \ - KA_TRACE( \ - 20, \ - (KMP_STR( \ - func) ": T#%d, lb 0x%lx, ub 0x%lx, str 0x%lx, chunk_sz 0x%lx\n", \ - gtid, lb, ub, str, chunk_sz)); \ - \ - ompt_pre(); \ - if (__kmpc_ok_to_fork(&loc) && (num_threads != 1)) { \ - if (num_threads != 0) { \ - __kmp_push_num_threads(&loc, gtid, num_threads); \ - } \ - if (flags != 0) { \ - __kmp_push_proc_bind(&loc, gtid, (kmp_proc_bind_t)flags); \ - } \ - __kmp_GOMP_fork_call(&loc, gtid, task, \ - (microtask_t)__kmp_GOMP_parallel_microtask_wrapper, \ - 9, task, data, num_threads, &loc, (schedule), lb, \ - (str > 0) ? (ub - 1) : (ub + 1), str, chunk_sz); \ - } else { \ - __kmp_GOMP_serialized_parallel(&loc, gtid, task); \ - } \ - \ - IF_OMPT_SUPPORT(OMPT_STORE_RETURN_ADDRESS(gtid);) \ - KMP_DISPATCH_INIT(&loc, gtid, (schedule), lb, \ - (str > 0) ? (ub - 1) : (ub + 1), str, chunk_sz, \ - (schedule) != kmp_sch_static); \ - task(data); \ - KMP_EXPAND_NAME(KMP_API_NAME_GOMP_PARALLEL_END)(); \ - ompt_post(); \ - \ - KA_TRACE(20, (KMP_STR(func) " exit: T#%d\n", gtid)); \ - } - -PARALLEL_LOOP(KMP_EXPAND_NAME(KMP_API_NAME_GOMP_PARALLEL_LOOP_STATIC), - kmp_sch_static, OMPT_LOOP_PRE, OMPT_LOOP_POST) -PARALLEL_LOOP(KMP_EXPAND_NAME(KMP_API_NAME_GOMP_PARALLEL_LOOP_DYNAMIC), - kmp_sch_dynamic_chunked, OMPT_LOOP_PRE, OMPT_LOOP_POST) -PARALLEL_LOOP(KMP_EXPAND_NAME(KMP_API_NAME_GOMP_PARALLEL_LOOP_GUIDED), - kmp_sch_guided_chunked, OMPT_LOOP_PRE, OMPT_LOOP_POST) -PARALLEL_LOOP(KMP_EXPAND_NAME(KMP_API_NAME_GOMP_PARALLEL_LOOP_RUNTIME), - kmp_sch_runtime, OMPT_LOOP_PRE, OMPT_LOOP_POST) - -void KMP_EXPAND_NAME(KMP_API_NAME_GOMP_TASKGROUP_START)(void) { - int gtid = __kmp_entry_gtid(); - MKLOC(loc, "GOMP_taskgroup_start"); - KA_TRACE(20, ("GOMP_taskgroup_start: T#%d\n", gtid)); - -#if OMPT_SUPPORT - if (ompt_enabled.enabled) - OMPT_STORE_RETURN_ADDRESS(gtid); -#endif - - __kmpc_taskgroup(&loc, gtid); - - return; -} - -void KMP_EXPAND_NAME(KMP_API_NAME_GOMP_TASKGROUP_END)(void) { - int gtid = __kmp_get_gtid(); - MKLOC(loc, "GOMP_taskgroup_end"); - KA_TRACE(20, ("GOMP_taskgroup_end: T#%d\n", gtid)); - -#if OMPT_SUPPORT - if (ompt_enabled.enabled) - OMPT_STORE_RETURN_ADDRESS(gtid); -#endif - - __kmpc_end_taskgroup(&loc, gtid); - - return; -} - -#ifndef KMP_DEBUG -static -#endif /* KMP_DEBUG */ - kmp_int32 - __kmp_gomp_to_omp_cancellation_kind(int gomp_kind) { - kmp_int32 cncl_kind = 0; - switch (gomp_kind) { - case 1: - cncl_kind = cancel_parallel; - break; - case 2: - cncl_kind = cancel_loop; - break; - case 4: - cncl_kind = cancel_sections; - break; - case 8: - cncl_kind = cancel_taskgroup; - break; - } - return cncl_kind; -} - -bool KMP_EXPAND_NAME(KMP_API_NAME_GOMP_CANCELLATION_POINT)(int which) { - if (__kmp_omp_cancellation) { - KMP_FATAL(NoGompCancellation); - } - int gtid = __kmp_get_gtid(); - MKLOC(loc, "GOMP_cancellation_point"); - KA_TRACE(20, ("GOMP_cancellation_point: T#%d\n", gtid)); - - kmp_int32 cncl_kind = __kmp_gomp_to_omp_cancellation_kind(which); - - return __kmpc_cancellationpoint(&loc, gtid, cncl_kind); -} - -bool KMP_EXPAND_NAME(KMP_API_NAME_GOMP_BARRIER_CANCEL)(void) { - if (__kmp_omp_cancellation) { - KMP_FATAL(NoGompCancellation); - } - KMP_FATAL(NoGompCancellation); - int gtid = __kmp_get_gtid(); - MKLOC(loc, "GOMP_barrier_cancel"); - KA_TRACE(20, ("GOMP_barrier_cancel: T#%d\n", gtid)); - - return __kmpc_cancel_barrier(&loc, gtid); -} - -bool KMP_EXPAND_NAME(KMP_API_NAME_GOMP_CANCEL)(int which, bool do_cancel) { - if (__kmp_omp_cancellation) { - KMP_FATAL(NoGompCancellation); - } else { - return FALSE; - } - - int gtid = __kmp_get_gtid(); - MKLOC(loc, "GOMP_cancel"); - KA_TRACE(20, ("GOMP_cancel: T#%d\n", gtid)); - - kmp_int32 cncl_kind = __kmp_gomp_to_omp_cancellation_kind(which); - - if (do_cancel == FALSE) { - return KMP_EXPAND_NAME(KMP_API_NAME_GOMP_CANCELLATION_POINT)(which); - } else { - return __kmpc_cancel(&loc, gtid, cncl_kind); - } -} - -bool KMP_EXPAND_NAME(KMP_API_NAME_GOMP_SECTIONS_END_CANCEL)(void) { - if (__kmp_omp_cancellation) { - KMP_FATAL(NoGompCancellation); - } - int gtid = __kmp_get_gtid(); - MKLOC(loc, "GOMP_sections_end_cancel"); - KA_TRACE(20, ("GOMP_sections_end_cancel: T#%d\n", gtid)); - - return __kmpc_cancel_barrier(&loc, gtid); -} - -bool KMP_EXPAND_NAME(KMP_API_NAME_GOMP_LOOP_END_CANCEL)(void) { - if (__kmp_omp_cancellation) { - KMP_FATAL(NoGompCancellation); - } - int gtid = __kmp_get_gtid(); - MKLOC(loc, "GOMP_loop_end_cancel"); - KA_TRACE(20, ("GOMP_loop_end_cancel: T#%d\n", gtid)); - - return __kmpc_cancel_barrier(&loc, gtid); -} - -// All target functions are empty as of 2014-05-29 -void KMP_EXPAND_NAME(KMP_API_NAME_GOMP_TARGET)(int device, void (*fn)(void *), - const void *openmp_target, - size_t mapnum, void **hostaddrs, - size_t *sizes, - unsigned char *kinds) { - return; -} - -void KMP_EXPAND_NAME(KMP_API_NAME_GOMP_TARGET_DATA)( - int device, const void *openmp_target, size_t mapnum, void **hostaddrs, - size_t *sizes, unsigned char *kinds) { - return; -} - -void KMP_EXPAND_NAME(KMP_API_NAME_GOMP_TARGET_END_DATA)(void) { return; } - -void KMP_EXPAND_NAME(KMP_API_NAME_GOMP_TARGET_UPDATE)( - int device, const void *openmp_target, size_t mapnum, void **hostaddrs, - size_t *sizes, unsigned char *kinds) { - return; -} - -void KMP_EXPAND_NAME(KMP_API_NAME_GOMP_TEAMS)(unsigned int num_teams, - unsigned int thread_limit) { - return; -} -#endif // OMP_40_ENABLED - -#if OMP_45_ENABLED - -// Task duplication function which copies src to dest (both are -// preallocated task structures) -static void __kmp_gomp_task_dup(kmp_task_t *dest, kmp_task_t *src, - kmp_int32 last_private) { - kmp_taskdata_t *taskdata = KMP_TASK_TO_TASKDATA(src); - if (taskdata->td_copy_func) { - (taskdata->td_copy_func)(dest->shareds, src->shareds); - } -} - -#ifdef __cplusplus -} // extern "C" -#endif - -template -void __GOMP_taskloop(void (*func)(void *), void *data, - void (*copy_func)(void *, void *), long arg_size, - long arg_align, unsigned gomp_flags, - unsigned long num_tasks, int priority, T start, T end, - T step) { - typedef void (*p_task_dup_t)(kmp_task_t *, kmp_task_t *, kmp_int32); - MKLOC(loc, "GOMP_taskloop"); - int sched; - T *loop_bounds; - int gtid = __kmp_entry_gtid(); - kmp_int32 flags = 0; - int if_val = gomp_flags & (1u << 10); - int nogroup = gomp_flags & (1u << 11); - int up = gomp_flags & (1u << 8); - p_task_dup_t task_dup = NULL; - kmp_tasking_flags_t *input_flags = (kmp_tasking_flags_t *)&flags; -#ifdef KMP_DEBUG - { - char *buff; - buff = __kmp_str_format( - "GOMP_taskloop: T#%%d: func:%%p data:%%p copy_func:%%p " - "arg_size:%%ld arg_align:%%ld gomp_flags:0x%%x num_tasks:%%lu " - "priority:%%d start:%%%s end:%%%s step:%%%s\n", - traits_t::spec, traits_t::spec, traits_t::spec); - KA_TRACE(20, (buff, gtid, func, data, copy_func, arg_size, arg_align, - gomp_flags, num_tasks, priority, start, end, step)); - __kmp_str_free(&buff); - } -#endif - KMP_ASSERT((size_t)arg_size >= 2 * sizeof(T)); - KMP_ASSERT(arg_align > 0); - // The low-order bit is the "untied" flag - if (!(gomp_flags & 1)) { - input_flags->tiedness = 1; - } - // The second low-order bit is the "final" flag - if (gomp_flags & 2) { - input_flags->final = 1; - } - // Negative step flag - if (!up) { - // If step is flagged as negative, but isn't properly sign extended - // Then manually sign extend it. Could be a short, int, char embedded - // in a long. So cannot assume any cast. - if (step > 0) { - for (int i = sizeof(T) * CHAR_BIT - 1; i >= 0L; --i) { - // break at the first 1 bit - if (step & ((T)1 << i)) - break; - step |= ((T)1 << i); - } - } - } - input_flags->native = 1; - // Figure out if none/grainsize/num_tasks clause specified - if (num_tasks > 0) { - if (gomp_flags & (1u << 9)) - sched = 1; // grainsize specified - else - sched = 2; // num_tasks specified - // neither grainsize nor num_tasks specified - } else { - sched = 0; - } - - // __kmp_task_alloc() sets up all other flags - kmp_task_t *task = - __kmp_task_alloc(&loc, gtid, input_flags, sizeof(kmp_task_t), - arg_size + arg_align - 1, (kmp_routine_entry_t)func); - kmp_taskdata_t *taskdata = KMP_TASK_TO_TASKDATA(task); - taskdata->td_copy_func = copy_func; - taskdata->td_size_loop_bounds = sizeof(T); - - // re-align shareds if needed and setup firstprivate copy constructors - // through the task_dup mechanism - task->shareds = (void *)((((size_t)task->shareds) + arg_align - 1) / - arg_align * arg_align); - if (copy_func) { - task_dup = __kmp_gomp_task_dup; - } - KMP_MEMCPY(task->shareds, data, arg_size); - - loop_bounds = (T *)task->shareds; - loop_bounds[0] = start; - loop_bounds[1] = end + (up ? -1 : 1); - __kmpc_taskloop(&loc, gtid, task, if_val, (kmp_uint64 *)&(loop_bounds[0]), - (kmp_uint64 *)&(loop_bounds[1]), (kmp_int64)step, nogroup, - sched, (kmp_uint64)num_tasks, (void *)task_dup); -} - -// 4 byte version of GOMP_doacross_post -// This verison needs to create a temporary array which converts 4 byte -// integers into 8 byte integeres -template -void __kmp_GOMP_doacross_post(T *count); - -template <> void __kmp_GOMP_doacross_post(long *count) { - int gtid = __kmp_entry_gtid(); - kmp_info_t *th = __kmp_threads[gtid]; - MKLOC(loc, "GOMP_doacross_post"); - kmp_int64 num_dims = th->th.th_dispatch->th_doacross_info[0]; - kmp_int64 *vec = - (kmp_int64 *)__kmp_thread_malloc(th, sizeof(kmp_int64) * num_dims); - for (kmp_int64 i = 0; i < num_dims; ++i) { - vec[i] = (kmp_int64)count[i]; - } - __kmpc_doacross_post(&loc, gtid, vec); - __kmp_thread_free(th, vec); -} - -// 8 byte versions of GOMP_doacross_post -// This version can just pass in the count array directly instead of creating -// a temporary array -template <> void __kmp_GOMP_doacross_post(long *count) { - int gtid = __kmp_entry_gtid(); - MKLOC(loc, "GOMP_doacross_post"); - __kmpc_doacross_post(&loc, gtid, RCAST(kmp_int64 *, count)); -} - -template void __kmp_GOMP_doacross_wait(T first, va_list args) { - int gtid = __kmp_entry_gtid(); - kmp_info_t *th = __kmp_threads[gtid]; - MKLOC(loc, "GOMP_doacross_wait"); - kmp_int64 num_dims = th->th.th_dispatch->th_doacross_info[0]; - kmp_int64 *vec = - (kmp_int64 *)__kmp_thread_malloc(th, sizeof(kmp_int64) * num_dims); - vec[0] = (kmp_int64)first; - for (kmp_int64 i = 1; i < num_dims; ++i) { - T item = va_arg(args, T); - vec[i] = (kmp_int64)item; - } - __kmpc_doacross_wait(&loc, gtid, vec); - __kmp_thread_free(th, vec); - return; -} - -#ifdef __cplusplus -extern "C" { -#endif // __cplusplus - -void KMP_EXPAND_NAME(KMP_API_NAME_GOMP_TASKLOOP)( - void (*func)(void *), void *data, void (*copy_func)(void *, void *), - long arg_size, long arg_align, unsigned gomp_flags, unsigned long num_tasks, - int priority, long start, long end, long step) { - __GOMP_taskloop(func, data, copy_func, arg_size, arg_align, gomp_flags, - num_tasks, priority, start, end, step); -} - -void KMP_EXPAND_NAME(KMP_API_NAME_GOMP_TASKLOOP_ULL)( - void (*func)(void *), void *data, void (*copy_func)(void *, void *), - long arg_size, long arg_align, unsigned gomp_flags, unsigned long num_tasks, - int priority, unsigned long long start, unsigned long long end, - unsigned long long step) { - __GOMP_taskloop(func, data, copy_func, arg_size, - arg_align, gomp_flags, num_tasks, - priority, start, end, step); -} - -void KMP_EXPAND_NAME(KMP_API_NAME_GOMP_DOACROSS_POST)(long *count) { - __kmp_GOMP_doacross_post(count); -} - -void KMP_EXPAND_NAME(KMP_API_NAME_GOMP_DOACROSS_WAIT)(long first, ...) { - va_list args; - va_start(args, first); - __kmp_GOMP_doacross_wait(first, args); - va_end(args); -} - -void KMP_EXPAND_NAME(KMP_API_NAME_GOMP_DOACROSS_ULL_POST)( - unsigned long long *count) { - int gtid = __kmp_entry_gtid(); - MKLOC(loc, "GOMP_doacross_ull_post"); - __kmpc_doacross_post(&loc, gtid, RCAST(kmp_int64 *, count)); -} - -void KMP_EXPAND_NAME(KMP_API_NAME_GOMP_DOACROSS_ULL_WAIT)( - unsigned long long first, ...) { - va_list args; - va_start(args, first); - __kmp_GOMP_doacross_wait(first, args); - va_end(args); -} - -#endif // OMP_45_ENABLED - -/* The following sections of code create aliases for the GOMP_* functions, then - create versioned symbols using the assembler directive .symver. This is only - pertinent for ELF .so library. The KMP_VERSION_SYMBOL macro is defined in - kmp_os.h */ - -#ifdef KMP_USE_VERSION_SYMBOLS -// GOMP_1.0 versioned symbols -KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_ATOMIC_END, 10, "GOMP_1.0"); -KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_ATOMIC_START, 10, "GOMP_1.0"); -KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_BARRIER, 10, "GOMP_1.0"); -KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_CRITICAL_END, 10, "GOMP_1.0"); -KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_CRITICAL_NAME_END, 10, "GOMP_1.0"); -KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_CRITICAL_NAME_START, 10, "GOMP_1.0"); -KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_CRITICAL_START, 10, "GOMP_1.0"); -KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_LOOP_DYNAMIC_NEXT, 10, "GOMP_1.0"); -KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_LOOP_DYNAMIC_START, 10, "GOMP_1.0"); -KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_LOOP_END, 10, "GOMP_1.0"); -KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_LOOP_END_NOWAIT, 10, "GOMP_1.0"); -KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_LOOP_GUIDED_NEXT, 10, "GOMP_1.0"); -KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_LOOP_GUIDED_START, 10, "GOMP_1.0"); -KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_LOOP_ORDERED_DYNAMIC_NEXT, 10, "GOMP_1.0"); -KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_LOOP_ORDERED_DYNAMIC_START, 10, - "GOMP_1.0"); -KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_LOOP_ORDERED_GUIDED_NEXT, 10, "GOMP_1.0"); -KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_LOOP_ORDERED_GUIDED_START, 10, "GOMP_1.0"); -KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_LOOP_ORDERED_RUNTIME_NEXT, 10, "GOMP_1.0"); -KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_LOOP_ORDERED_RUNTIME_START, 10, - "GOMP_1.0"); -KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_LOOP_ORDERED_STATIC_NEXT, 10, "GOMP_1.0"); -KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_LOOP_ORDERED_STATIC_START, 10, "GOMP_1.0"); -KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_LOOP_RUNTIME_NEXT, 10, "GOMP_1.0"); -KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_LOOP_RUNTIME_START, 10, "GOMP_1.0"); -KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_LOOP_STATIC_NEXT, 10, "GOMP_1.0"); -KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_LOOP_STATIC_START, 10, "GOMP_1.0"); -KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_ORDERED_END, 10, "GOMP_1.0"); -KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_ORDERED_START, 10, "GOMP_1.0"); -KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_PARALLEL_END, 10, "GOMP_1.0"); -KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_PARALLEL_LOOP_DYNAMIC_START, 10, - "GOMP_1.0"); -KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_PARALLEL_LOOP_GUIDED_START, 10, - "GOMP_1.0"); -KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_PARALLEL_LOOP_RUNTIME_START, 10, - "GOMP_1.0"); -KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_PARALLEL_LOOP_STATIC_START, 10, - "GOMP_1.0"); -KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_PARALLEL_SECTIONS_START, 10, "GOMP_1.0"); -KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_PARALLEL_START, 10, "GOMP_1.0"); -KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_SECTIONS_END, 10, "GOMP_1.0"); -KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_SECTIONS_END_NOWAIT, 10, "GOMP_1.0"); -KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_SECTIONS_NEXT, 10, "GOMP_1.0"); -KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_SECTIONS_START, 10, "GOMP_1.0"); -KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_SINGLE_COPY_END, 10, "GOMP_1.0"); -KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_SINGLE_COPY_START, 10, "GOMP_1.0"); -KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_SINGLE_START, 10, "GOMP_1.0"); - -// GOMP_2.0 versioned symbols -KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_TASK, 20, "GOMP_2.0"); -KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_TASKWAIT, 20, "GOMP_2.0"); -KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_LOOP_ULL_DYNAMIC_NEXT, 20, "GOMP_2.0"); -KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_LOOP_ULL_DYNAMIC_START, 20, "GOMP_2.0"); -KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_LOOP_ULL_GUIDED_NEXT, 20, "GOMP_2.0"); -KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_LOOP_ULL_GUIDED_START, 20, "GOMP_2.0"); -KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_LOOP_ULL_ORDERED_DYNAMIC_NEXT, 20, - "GOMP_2.0"); -KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_LOOP_ULL_ORDERED_DYNAMIC_START, 20, - "GOMP_2.0"); -KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_LOOP_ULL_ORDERED_GUIDED_NEXT, 20, - "GOMP_2.0"); -KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_LOOP_ULL_ORDERED_GUIDED_START, 20, - "GOMP_2.0"); -KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_LOOP_ULL_ORDERED_RUNTIME_NEXT, 20, - "GOMP_2.0"); -KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_LOOP_ULL_ORDERED_RUNTIME_START, 20, - "GOMP_2.0"); -KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_LOOP_ULL_ORDERED_STATIC_NEXT, 20, - "GOMP_2.0"); -KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_LOOP_ULL_ORDERED_STATIC_START, 20, - "GOMP_2.0"); -KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_LOOP_ULL_RUNTIME_NEXT, 20, "GOMP_2.0"); -KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_LOOP_ULL_RUNTIME_START, 20, "GOMP_2.0"); -KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_LOOP_ULL_STATIC_NEXT, 20, "GOMP_2.0"); -KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_LOOP_ULL_STATIC_START, 20, "GOMP_2.0"); - -// GOMP_3.0 versioned symbols -KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_TASKYIELD, 30, "GOMP_3.0"); - -// GOMP_4.0 versioned symbols -#if OMP_40_ENABLED -KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_PARALLEL, 40, "GOMP_4.0"); -KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_PARALLEL_SECTIONS, 40, "GOMP_4.0"); -KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_PARALLEL_LOOP_DYNAMIC, 40, "GOMP_4.0"); -KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_PARALLEL_LOOP_GUIDED, 40, "GOMP_4.0"); -KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_PARALLEL_LOOP_RUNTIME, 40, "GOMP_4.0"); -KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_PARALLEL_LOOP_STATIC, 40, "GOMP_4.0"); -KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_TASKGROUP_START, 40, "GOMP_4.0"); -KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_TASKGROUP_END, 40, "GOMP_4.0"); -KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_BARRIER_CANCEL, 40, "GOMP_4.0"); -KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_CANCEL, 40, "GOMP_4.0"); -KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_CANCELLATION_POINT, 40, "GOMP_4.0"); -KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_LOOP_END_CANCEL, 40, "GOMP_4.0"); -KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_SECTIONS_END_CANCEL, 40, "GOMP_4.0"); -KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_TARGET, 40, "GOMP_4.0"); -KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_TARGET_DATA, 40, "GOMP_4.0"); -KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_TARGET_END_DATA, 40, "GOMP_4.0"); -KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_TARGET_UPDATE, 40, "GOMP_4.0"); -KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_TEAMS, 40, "GOMP_4.0"); -#endif - -// GOMP_4.5 versioned symbols -#if OMP_45_ENABLED -KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_TASKLOOP, 45, "GOMP_4.5"); -KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_TASKLOOP_ULL, 45, "GOMP_4.5"); -KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_DOACROSS_POST, 45, "GOMP_4.5"); -KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_DOACROSS_WAIT, 45, "GOMP_4.5"); -KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_LOOP_DOACROSS_STATIC_START, 45, - "GOMP_4.5"); -KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_LOOP_DOACROSS_DYNAMIC_START, 45, - "GOMP_4.5"); -KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_LOOP_DOACROSS_GUIDED_START, 45, - "GOMP_4.5"); -KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_LOOP_DOACROSS_RUNTIME_START, 45, - "GOMP_4.5"); -KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_DOACROSS_ULL_POST, 45, "GOMP_4.5"); -KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_DOACROSS_ULL_WAIT, 45, "GOMP_4.5"); -KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_LOOP_ULL_DOACROSS_STATIC_START, 45, - "GOMP_4.5"); -KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_LOOP_ULL_DOACROSS_DYNAMIC_START, 45, - "GOMP_4.5"); -KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_LOOP_ULL_DOACROSS_GUIDED_START, 45, - "GOMP_4.5"); -KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_LOOP_ULL_DOACROSS_RUNTIME_START, 45, - "GOMP_4.5"); -#endif - -#endif // KMP_USE_VERSION_SYMBOLS - -#ifdef __cplusplus -} // extern "C" -#endif // __cplusplus Property changes on: vendor/llvm-openmp/openmp-release80-r363030/runtime/src/kmp_gsupport.cpp ___________________________________________________________________ Deleted: svn:eol-style ## -1 +0,0 ## -native \ No newline at end of property Deleted: svn:keywords ## -1 +0,0 ## -FreeBSD=%H \ No newline at end of property Deleted: svn:mime-type ## -1 +0,0 ## -text/plain \ No newline at end of property Index: vendor/llvm-openmp/openmp-release80-r363030/runtime/src/z_Linux_asm.S =================================================================== --- vendor/llvm-openmp/openmp-release80-r363030/runtime/src/z_Linux_asm.S (revision 348960) +++ vendor/llvm-openmp/openmp-release80-r363030/runtime/src/z_Linux_asm.S (nonexistent) @@ -1,1730 +0,0 @@ -// z_Linux_asm.S: - microtasking routines specifically -// written for Intel platforms running Linux* OS - -// -////===----------------------------------------------------------------------===// -//// -//// The LLVM Compiler Infrastructure -//// -//// This file is dual licensed under the MIT and the University of Illinois Open -//// Source Licenses. See LICENSE.txt for details. -//// -////===----------------------------------------------------------------------===// -// - -// ----------------------------------------------------------------------- -// macros -// ----------------------------------------------------------------------- - -#include "kmp_config.h" - -#if KMP_ARCH_X86 || KMP_ARCH_X86_64 - -# if KMP_MIC -// the 'delay r16/r32/r64' should be used instead of the 'pause'. -// The delay operation has the effect of removing the current thread from -// the round-robin HT mechanism, and therefore speeds up the issue rate of -// the other threads on the same core. -// -// A value of 0 works fine for <= 2 threads per core, but causes the EPCC -// barrier time to increase greatly for 3 or more threads per core. -// -// A value of 100 works pretty well for up to 4 threads per core, but isn't -// quite as fast as 0 for 2 threads per core. -// -// We need to check what happens for oversubscription / > 4 threads per core. -// It is possible that we need to pass the delay value in as a parameter -// that the caller determines based on the total # threads / # cores. -// -//.macro pause_op -// mov $100, %rax -// delay %rax -//.endm -# else -# define pause_op .byte 0xf3,0x90 -# endif // KMP_MIC - -# if KMP_OS_DARWIN -# define KMP_PREFIX_UNDERSCORE(x) _##x // extra underscore for OS X* symbols -# define KMP_LABEL(x) L_##x // form the name of label -.macro KMP_CFI_DEF_OFFSET -.endmacro -.macro KMP_CFI_OFFSET -.endmacro -.macro KMP_CFI_REGISTER -.endmacro -.macro KMP_CFI_DEF -.endmacro -.macro ALIGN - .align $0 -.endmacro -.macro DEBUG_INFO -/* Not sure what .size does in icc, not sure if we need to do something - similar for OS X*. -*/ -.endmacro -.macro PROC - ALIGN 4 - .globl KMP_PREFIX_UNDERSCORE($0) -KMP_PREFIX_UNDERSCORE($0): -.endmacro -# else // KMP_OS_DARWIN -# define KMP_PREFIX_UNDERSCORE(x) x //no extra underscore for Linux* OS symbols -// Format labels so that they don't override function names in gdb's backtraces -// MIC assembler doesn't accept .L syntax, the L works fine there (as well as -// on OS X*) -# if KMP_MIC -# define KMP_LABEL(x) L_##x // local label -# else -# define KMP_LABEL(x) .L_##x // local label hidden from backtraces -# endif // KMP_MIC -.macro ALIGN size - .align 1<<(\size) -.endm -.macro DEBUG_INFO proc - .cfi_endproc -// Not sure why we need .type and .size for the functions - .align 16 - .type \proc,@function - .size \proc,.-\proc -.endm -.macro PROC proc - ALIGN 4 - .globl KMP_PREFIX_UNDERSCORE(\proc) -KMP_PREFIX_UNDERSCORE(\proc): - .cfi_startproc -.endm -.macro KMP_CFI_DEF_OFFSET sz - .cfi_def_cfa_offset \sz -.endm -.macro KMP_CFI_OFFSET reg, sz - .cfi_offset \reg,\sz -.endm -.macro KMP_CFI_REGISTER reg - .cfi_def_cfa_register \reg -.endm -.macro KMP_CFI_DEF reg, sz - .cfi_def_cfa \reg,\sz -.endm -# endif // KMP_OS_DARWIN -#endif // KMP_ARCH_X86 || KMP_ARCH_x86_64 - -#if (KMP_OS_LINUX || KMP_OS_DARWIN) && KMP_ARCH_AARCH64 - -# if KMP_OS_DARWIN -# define KMP_PREFIX_UNDERSCORE(x) _##x // extra underscore for OS X* symbols -# define KMP_LABEL(x) L_##x // form the name of label - -.macro ALIGN - .align $0 -.endmacro - -.macro DEBUG_INFO -/* Not sure what .size does in icc, not sure if we need to do something - similar for OS X*. -*/ -.endmacro - -.macro PROC - ALIGN 4 - .globl KMP_PREFIX_UNDERSCORE($0) -KMP_PREFIX_UNDERSCORE($0): -.endmacro -# else // KMP_OS_DARWIN -# define KMP_PREFIX_UNDERSCORE(x) x // no extra underscore for Linux* OS symbols -// Format labels so that they don't override function names in gdb's backtraces -# define KMP_LABEL(x) .L_##x // local label hidden from backtraces - -.macro ALIGN size - .align 1<<(\size) -.endm - -.macro DEBUG_INFO proc - .cfi_endproc -// Not sure why we need .type and .size for the functions - ALIGN 2 - .type \proc,@function - .size \proc,.-\proc -.endm - -.macro PROC proc - ALIGN 2 - .globl KMP_PREFIX_UNDERSCORE(\proc) -KMP_PREFIX_UNDERSCORE(\proc): - .cfi_startproc -.endm -# endif // KMP_OS_DARWIN - -#endif // (KMP_OS_LINUX || KMP_OS_DARWIN) && KMP_ARCH_AARCH64 - -// ----------------------------------------------------------------------- -// data -// ----------------------------------------------------------------------- - -#ifdef KMP_GOMP_COMPAT - -// Support for unnamed common blocks. -// -// Because the symbol ".gomp_critical_user_" contains a ".", we have to -// put this stuff in assembly. - -# if KMP_ARCH_X86 -# if KMP_OS_DARWIN - .data - .comm .gomp_critical_user_,32 - .data - .globl ___kmp_unnamed_critical_addr -___kmp_unnamed_critical_addr: - .long .gomp_critical_user_ -# else /* Linux* OS */ - .data - .comm .gomp_critical_user_,32,8 - .data - ALIGN 4 - .global __kmp_unnamed_critical_addr -__kmp_unnamed_critical_addr: - .4byte .gomp_critical_user_ - .type __kmp_unnamed_critical_addr,@object - .size __kmp_unnamed_critical_addr,4 -# endif /* KMP_OS_DARWIN */ -# endif /* KMP_ARCH_X86 */ - -# if KMP_ARCH_X86_64 -# if KMP_OS_DARWIN - .data - .comm .gomp_critical_user_,32 - .data - .globl ___kmp_unnamed_critical_addr -___kmp_unnamed_critical_addr: - .quad .gomp_critical_user_ -# else /* Linux* OS */ - .data - .comm .gomp_critical_user_,32,8 - .data - ALIGN 8 - .global __kmp_unnamed_critical_addr -__kmp_unnamed_critical_addr: - .8byte .gomp_critical_user_ - .type __kmp_unnamed_critical_addr,@object - .size __kmp_unnamed_critical_addr,8 -# endif /* KMP_OS_DARWIN */ -# endif /* KMP_ARCH_X86_64 */ - -#endif /* KMP_GOMP_COMPAT */ - - -#if KMP_ARCH_X86 && !KMP_ARCH_PPC64 - -// ----------------------------------------------------------------------- -// microtasking routines specifically written for IA-32 architecture -// running Linux* OS -// ----------------------------------------------------------------------- - - .ident "Intel Corporation" - .data - ALIGN 4 -// void -// __kmp_x86_pause( void ); - - .text - PROC __kmp_x86_pause - - pause_op - ret - - DEBUG_INFO __kmp_x86_pause - -// void -// __kmp_x86_cpuid( int mode, int mode2, void *cpuid_buffer ); - - PROC __kmp_x86_cpuid - - pushl %ebp - movl %esp,%ebp - pushl %edi - pushl %ebx - pushl %ecx - pushl %edx - - movl 8(%ebp), %eax - movl 12(%ebp), %ecx - cpuid // Query the CPUID for the current processor - - movl 16(%ebp), %edi - movl %eax, 0(%edi) - movl %ebx, 4(%edi) - movl %ecx, 8(%edi) - movl %edx, 12(%edi) - - popl %edx - popl %ecx - popl %ebx - popl %edi - movl %ebp, %esp - popl %ebp - ret - - DEBUG_INFO __kmp_x86_cpuid - - -# if !KMP_ASM_INTRINS - -//------------------------------------------------------------------------ -// kmp_int32 -// __kmp_test_then_add32( volatile kmp_int32 *p, kmp_int32 d ); - - PROC __kmp_test_then_add32 - - movl 4(%esp), %ecx - movl 8(%esp), %eax - lock - xaddl %eax,(%ecx) - ret - - DEBUG_INFO __kmp_test_then_add32 - -//------------------------------------------------------------------------ -// FUNCTION __kmp_xchg_fixed8 -// -// kmp_int32 -// __kmp_xchg_fixed8( volatile kmp_int8 *p, kmp_int8 d ); -// -// parameters: -// p: 4(%esp) -// d: 8(%esp) -// -// return: %al - PROC __kmp_xchg_fixed8 - - movl 4(%esp), %ecx // "p" - movb 8(%esp), %al // "d" - - lock - xchgb %al,(%ecx) - ret - - DEBUG_INFO __kmp_xchg_fixed8 - - -//------------------------------------------------------------------------ -// FUNCTION __kmp_xchg_fixed16 -// -// kmp_int16 -// __kmp_xchg_fixed16( volatile kmp_int16 *p, kmp_int16 d ); -// -// parameters: -// p: 4(%esp) -// d: 8(%esp) -// return: %ax - PROC __kmp_xchg_fixed16 - - movl 4(%esp), %ecx // "p" - movw 8(%esp), %ax // "d" - - lock - xchgw %ax,(%ecx) - ret - - DEBUG_INFO __kmp_xchg_fixed16 - - -//------------------------------------------------------------------------ -// FUNCTION __kmp_xchg_fixed32 -// -// kmp_int32 -// __kmp_xchg_fixed32( volatile kmp_int32 *p, kmp_int32 d ); -// -// parameters: -// p: 4(%esp) -// d: 8(%esp) -// -// return: %eax - PROC __kmp_xchg_fixed32 - - movl 4(%esp), %ecx // "p" - movl 8(%esp), %eax // "d" - - lock - xchgl %eax,(%ecx) - ret - - DEBUG_INFO __kmp_xchg_fixed32 - - -// kmp_int8 -// __kmp_compare_and_store8( volatile kmp_int8 *p, kmp_int8 cv, kmp_int8 sv ); - PROC __kmp_compare_and_store8 - - movl 4(%esp), %ecx - movb 8(%esp), %al - movb 12(%esp), %dl - lock - cmpxchgb %dl,(%ecx) - sete %al // if %al == (%ecx) set %al = 1 else set %al = 0 - and $1, %eax // sign extend previous instruction - ret - - DEBUG_INFO __kmp_compare_and_store8 - -// kmp_int16 -// __kmp_compare_and_store16(volatile kmp_int16 *p, kmp_int16 cv, kmp_int16 sv); - PROC __kmp_compare_and_store16 - - movl 4(%esp), %ecx - movw 8(%esp), %ax - movw 12(%esp), %dx - lock - cmpxchgw %dx,(%ecx) - sete %al // if %ax == (%ecx) set %al = 1 else set %al = 0 - and $1, %eax // sign extend previous instruction - ret - - DEBUG_INFO __kmp_compare_and_store16 - -// kmp_int32 -// __kmp_compare_and_store32(volatile kmp_int32 *p, kmp_int32 cv, kmp_int32 sv); - PROC __kmp_compare_and_store32 - - movl 4(%esp), %ecx - movl 8(%esp), %eax - movl 12(%esp), %edx - lock - cmpxchgl %edx,(%ecx) - sete %al // if %eax == (%ecx) set %al = 1 else set %al = 0 - and $1, %eax // sign extend previous instruction - ret - - DEBUG_INFO __kmp_compare_and_store32 - -// kmp_int32 -// __kmp_compare_and_store64(volatile kmp_int64 *p, kmp_int64 cv, kmp_int64 s ); - PROC __kmp_compare_and_store64 - - pushl %ebp - movl %esp, %ebp - pushl %ebx - pushl %edi - movl 8(%ebp), %edi - movl 12(%ebp), %eax // "cv" low order word - movl 16(%ebp), %edx // "cv" high order word - movl 20(%ebp), %ebx // "sv" low order word - movl 24(%ebp), %ecx // "sv" high order word - lock - cmpxchg8b (%edi) - sete %al // if %edx:eax == (%edi) set %al = 1 else set %al = 0 - and $1, %eax // sign extend previous instruction - popl %edi - popl %ebx - movl %ebp, %esp - popl %ebp - ret - - DEBUG_INFO __kmp_compare_and_store64 - -// kmp_int8 -// __kmp_compare_and_store_ret8(volatile kmp_int8 *p, kmp_int8 cv, kmp_int8 sv); - PROC __kmp_compare_and_store_ret8 - - movl 4(%esp), %ecx - movb 8(%esp), %al - movb 12(%esp), %dl - lock - cmpxchgb %dl,(%ecx) - ret - - DEBUG_INFO __kmp_compare_and_store_ret8 - -// kmp_int16 -// __kmp_compare_and_store_ret16(volatile kmp_int16 *p, kmp_int16 cv, -// kmp_int16 sv); - PROC __kmp_compare_and_store_ret16 - - movl 4(%esp), %ecx - movw 8(%esp), %ax - movw 12(%esp), %dx - lock - cmpxchgw %dx,(%ecx) - ret - - DEBUG_INFO __kmp_compare_and_store_ret16 - -// kmp_int32 -// __kmp_compare_and_store_ret32(volatile kmp_int32 *p, kmp_int32 cv, -// kmp_int32 sv); - PROC __kmp_compare_and_store_ret32 - - movl 4(%esp), %ecx - movl 8(%esp), %eax - movl 12(%esp), %edx - lock - cmpxchgl %edx,(%ecx) - ret - - DEBUG_INFO __kmp_compare_and_store_ret32 - -// kmp_int64 -// __kmp_compare_and_store_ret64(volatile kmp_int64 *p, kmp_int64 cv, -// kmp_int64 sv); - PROC __kmp_compare_and_store_ret64 - - pushl %ebp - movl %esp, %ebp - pushl %ebx - pushl %edi - movl 8(%ebp), %edi - movl 12(%ebp), %eax // "cv" low order word - movl 16(%ebp), %edx // "cv" high order word - movl 20(%ebp), %ebx // "sv" low order word - movl 24(%ebp), %ecx // "sv" high order word - lock - cmpxchg8b (%edi) - popl %edi - popl %ebx - movl %ebp, %esp - popl %ebp - ret - - DEBUG_INFO __kmp_compare_and_store_ret64 - - -//------------------------------------------------------------------------ -// FUNCTION __kmp_xchg_real32 -// -// kmp_real32 -// __kmp_xchg_real32( volatile kmp_real32 *addr, kmp_real32 data ); -// -// parameters: -// addr: 4(%esp) -// data: 8(%esp) -// -// return: %eax - PROC __kmp_xchg_real32 - - pushl %ebp - movl %esp, %ebp - subl $4, %esp - pushl %esi - - movl 4(%ebp), %esi - flds (%esi) - // load - fsts -4(%ebp) - // store old value - - movl 8(%ebp), %eax - - lock - xchgl %eax, (%esi) - - flds -4(%ebp) - // return old value - - popl %esi - movl %ebp, %esp - popl %ebp - ret - - DEBUG_INFO __kmp_xchg_real32 - -# endif /* !KMP_ASM_INTRINS */ - - -//------------------------------------------------------------------------ -// FUNCTION __kmp_load_x87_fpu_control_word -// -// void -// __kmp_load_x87_fpu_control_word( kmp_int16 *p ); -// -// parameters: -// p: 4(%esp) - PROC __kmp_load_x87_fpu_control_word - - movl 4(%esp), %eax - fldcw (%eax) - ret - - DEBUG_INFO __kmp_load_x87_fpu_control_word - - -//------------------------------------------------------------------------ -// FUNCTION __kmp_store_x87_fpu_control_word -// -// void -// __kmp_store_x87_fpu_control_word( kmp_int16 *p ); -// -// parameters: -// p: 4(%esp) - PROC __kmp_store_x87_fpu_control_word - - movl 4(%esp), %eax - fstcw (%eax) - ret - - DEBUG_INFO __kmp_store_x87_fpu_control_word - - -//------------------------------------------------------------------------ -// FUNCTION __kmp_clear_x87_fpu_status_word -// -// void -// __kmp_clear_x87_fpu_status_word(); - PROC __kmp_clear_x87_fpu_status_word - - fnclex - ret - - DEBUG_INFO __kmp_clear_x87_fpu_status_word - - -//------------------------------------------------------------------------ -// typedef void (*microtask_t)( int *gtid, int *tid, ... ); -// -// int -// __kmp_invoke_microtask( microtask_t pkfn, int gtid, int tid, -// int argc, void *p_argv[] ) { -// (*pkfn)( & gtid, & gtid, argv[0], ... ); -// return 1; -// } - -// -- Begin __kmp_invoke_microtask -// mark_begin; - PROC __kmp_invoke_microtask - - pushl %ebp - KMP_CFI_DEF_OFFSET 8 - KMP_CFI_OFFSET ebp,-8 - movl %esp,%ebp // establish the base pointer for this routine. - KMP_CFI_REGISTER ebp - subl $8,%esp // allocate space for two local variables. - // These varibales are: - // argv: -4(%ebp) - // temp: -8(%ebp) - // - pushl %ebx // save %ebx to use during this routine - // -#if OMPT_SUPPORT - movl 28(%ebp),%ebx // get exit_frame address - movl %ebp,(%ebx) // save exit_frame -#endif - - movl 20(%ebp),%ebx // Stack alignment - # args - addl $2,%ebx // #args +2 Always pass at least 2 args (gtid and tid) - shll $2,%ebx // Number of bytes used on stack: (#args+2)*4 - movl %esp,%eax // - subl %ebx,%eax // %esp-((#args+2)*4) -> %eax -- without mods, stack ptr would be this - movl %eax,%ebx // Save to %ebx - andl $0xFFFFFF80,%eax // mask off 7 bits - subl %eax,%ebx // Amount to subtract from %esp - subl %ebx,%esp // Prepare the stack ptr -- - // now it will be aligned on 128-byte boundary at the call - - movl 24(%ebp),%eax // copy from p_argv[] - movl %eax,-4(%ebp) // into the local variable *argv. - - movl 20(%ebp),%ebx // argc is 20(%ebp) - shll $2,%ebx - -KMP_LABEL(invoke_2): - cmpl $0,%ebx - jg KMP_LABEL(invoke_4) - jmp KMP_LABEL(invoke_3) - ALIGN 2 -KMP_LABEL(invoke_4): - movl -4(%ebp),%eax - subl $4,%ebx // decrement argc. - addl %ebx,%eax // index into argv. - movl (%eax),%edx - pushl %edx - - jmp KMP_LABEL(invoke_2) - ALIGN 2 -KMP_LABEL(invoke_3): - leal 16(%ebp),%eax // push & tid - pushl %eax - - leal 12(%ebp),%eax // push & gtid - pushl %eax - - movl 8(%ebp),%ebx - call *%ebx // call (*pkfn)(); - - movl $1,%eax // return 1; - - movl -12(%ebp),%ebx // restore %ebx - leave - KMP_CFI_DEF esp,4 - ret - - DEBUG_INFO __kmp_invoke_microtask -// -- End __kmp_invoke_microtask - - -// kmp_uint64 -// __kmp_hardware_timestamp(void) - PROC __kmp_hardware_timestamp - rdtsc - ret - - DEBUG_INFO __kmp_hardware_timestamp -// -- End __kmp_hardware_timestamp - -#endif /* KMP_ARCH_X86 */ - - -#if KMP_ARCH_X86_64 - -// ----------------------------------------------------------------------- -// microtasking routines specifically written for IA-32 architecture and -// Intel(R) 64 running Linux* OS -// ----------------------------------------------------------------------- - -// -- Machine type P -// mark_description "Intel Corporation"; - .ident "Intel Corporation" -// -- .file "z_Linux_asm.S" - .data - ALIGN 4 - -// To prevent getting our code into .data section .text added to every routine -// definition for x86_64. -//------------------------------------------------------------------------ -// FUNCTION __kmp_x86_cpuid -// -// void -// __kmp_x86_cpuid( int mode, int mode2, void *cpuid_buffer ); -// -// parameters: -// mode: %edi -// mode2: %esi -// cpuid_buffer: %rdx - .text - PROC __kmp_x86_cpuid - - pushq %rbp - movq %rsp,%rbp - pushq %rbx // callee-save register - - movl %esi, %ecx // "mode2" - movl %edi, %eax // "mode" - movq %rdx, %rsi // cpuid_buffer - cpuid // Query the CPUID for the current processor - - movl %eax, 0(%rsi) // store results into buffer - movl %ebx, 4(%rsi) - movl %ecx, 8(%rsi) - movl %edx, 12(%rsi) - - popq %rbx // callee-save register - movq %rbp, %rsp - popq %rbp - ret - - DEBUG_INFO __kmp_x86_cpuid - - - -# if !KMP_ASM_INTRINS - -//------------------------------------------------------------------------ -// FUNCTION __kmp_test_then_add32 -// -// kmp_int32 -// __kmp_test_then_add32( volatile kmp_int32 *p, kmp_int32 d ); -// -// parameters: -// p: %rdi -// d: %esi -// -// return: %eax - .text - PROC __kmp_test_then_add32 - - movl %esi, %eax // "d" - lock - xaddl %eax,(%rdi) - ret - - DEBUG_INFO __kmp_test_then_add32 - - -//------------------------------------------------------------------------ -// FUNCTION __kmp_test_then_add64 -// -// kmp_int64 -// __kmp_test_then_add64( volatile kmp_int64 *p, kmp_int64 d ); -// -// parameters: -// p: %rdi -// d: %rsi -// return: %rax - .text - PROC __kmp_test_then_add64 - - movq %rsi, %rax // "d" - lock - xaddq %rax,(%rdi) - ret - - DEBUG_INFO __kmp_test_then_add64 - - -//------------------------------------------------------------------------ -// FUNCTION __kmp_xchg_fixed8 -// -// kmp_int32 -// __kmp_xchg_fixed8( volatile kmp_int8 *p, kmp_int8 d ); -// -// parameters: -// p: %rdi -// d: %sil -// -// return: %al - .text - PROC __kmp_xchg_fixed8 - - movb %sil, %al // "d" - - lock - xchgb %al,(%rdi) - ret - - DEBUG_INFO __kmp_xchg_fixed8 - - -//------------------------------------------------------------------------ -// FUNCTION __kmp_xchg_fixed16 -// -// kmp_int16 -// __kmp_xchg_fixed16( volatile kmp_int16 *p, kmp_int16 d ); -// -// parameters: -// p: %rdi -// d: %si -// return: %ax - .text - PROC __kmp_xchg_fixed16 - - movw %si, %ax // "d" - - lock - xchgw %ax,(%rdi) - ret - - DEBUG_INFO __kmp_xchg_fixed16 - - -//------------------------------------------------------------------------ -// FUNCTION __kmp_xchg_fixed32 -// -// kmp_int32 -// __kmp_xchg_fixed32( volatile kmp_int32 *p, kmp_int32 d ); -// -// parameters: -// p: %rdi -// d: %esi -// -// return: %eax - .text - PROC __kmp_xchg_fixed32 - - movl %esi, %eax // "d" - - lock - xchgl %eax,(%rdi) - ret - - DEBUG_INFO __kmp_xchg_fixed32 - - -//------------------------------------------------------------------------ -// FUNCTION __kmp_xchg_fixed64 -// -// kmp_int64 -// __kmp_xchg_fixed64( volatile kmp_int64 *p, kmp_int64 d ); -// -// parameters: -// p: %rdi -// d: %rsi -// return: %rax - .text - PROC __kmp_xchg_fixed64 - - movq %rsi, %rax // "d" - - lock - xchgq %rax,(%rdi) - ret - - DEBUG_INFO __kmp_xchg_fixed64 - - -//------------------------------------------------------------------------ -// FUNCTION __kmp_compare_and_store8 -// -// kmp_int8 -// __kmp_compare_and_store8( volatile kmp_int8 *p, kmp_int8 cv, kmp_int8 sv ); -// -// parameters: -// p: %rdi -// cv: %esi -// sv: %edx -// -// return: %eax - .text - PROC __kmp_compare_and_store8 - - movb %sil, %al // "cv" - lock - cmpxchgb %dl,(%rdi) - sete %al // if %al == (%rdi) set %al = 1 else set %al = 0 - andq $1, %rax // sign extend previous instruction for return value - ret - - DEBUG_INFO __kmp_compare_and_store8 - - -//------------------------------------------------------------------------ -// FUNCTION __kmp_compare_and_store16 -// -// kmp_int16 -// __kmp_compare_and_store16( volatile kmp_int16 *p, kmp_int16 cv, kmp_int16 sv ); -// -// parameters: -// p: %rdi -// cv: %si -// sv: %dx -// -// return: %eax - .text - PROC __kmp_compare_and_store16 - - movw %si, %ax // "cv" - lock - cmpxchgw %dx,(%rdi) - sete %al // if %ax == (%rdi) set %al = 1 else set %al = 0 - andq $1, %rax // sign extend previous instruction for return value - ret - - DEBUG_INFO __kmp_compare_and_store16 - - -//------------------------------------------------------------------------ -// FUNCTION __kmp_compare_and_store32 -// -// kmp_int32 -// __kmp_compare_and_store32( volatile kmp_int32 *p, kmp_int32 cv, kmp_int32 sv ); -// -// parameters: -// p: %rdi -// cv: %esi -// sv: %edx -// -// return: %eax - .text - PROC __kmp_compare_and_store32 - - movl %esi, %eax // "cv" - lock - cmpxchgl %edx,(%rdi) - sete %al // if %eax == (%rdi) set %al = 1 else set %al = 0 - andq $1, %rax // sign extend previous instruction for return value - ret - - DEBUG_INFO __kmp_compare_and_store32 - - -//------------------------------------------------------------------------ -// FUNCTION __kmp_compare_and_store64 -// -// kmp_int32 -// __kmp_compare_and_store64( volatile kmp_int64 *p, kmp_int64 cv, kmp_int64 sv ); -// -// parameters: -// p: %rdi -// cv: %rsi -// sv: %rdx -// return: %eax - .text - PROC __kmp_compare_and_store64 - - movq %rsi, %rax // "cv" - lock - cmpxchgq %rdx,(%rdi) - sete %al // if %rax == (%rdi) set %al = 1 else set %al = 0 - andq $1, %rax // sign extend previous instruction for return value - ret - - DEBUG_INFO __kmp_compare_and_store64 - -//------------------------------------------------------------------------ -// FUNCTION __kmp_compare_and_store_ret8 -// -// kmp_int8 -// __kmp_compare_and_store_ret8( volatile kmp_int8 *p, kmp_int8 cv, kmp_int8 sv ); -// -// parameters: -// p: %rdi -// cv: %esi -// sv: %edx -// -// return: %eax - .text - PROC __kmp_compare_and_store_ret8 - - movb %sil, %al // "cv" - lock - cmpxchgb %dl,(%rdi) - ret - - DEBUG_INFO __kmp_compare_and_store_ret8 - - -//------------------------------------------------------------------------ -// FUNCTION __kmp_compare_and_store_ret16 -// -// kmp_int16 -// __kmp_compare_and_store16_ret( volatile kmp_int16 *p, kmp_int16 cv, kmp_int16 sv ); -// -// parameters: -// p: %rdi -// cv: %si -// sv: %dx -// -// return: %eax - .text - PROC __kmp_compare_and_store_ret16 - - movw %si, %ax // "cv" - lock - cmpxchgw %dx,(%rdi) - ret - - DEBUG_INFO __kmp_compare_and_store_ret16 - - -//------------------------------------------------------------------------ -// FUNCTION __kmp_compare_and_store_ret32 -// -// kmp_int32 -// __kmp_compare_and_store_ret32( volatile kmp_int32 *p, kmp_int32 cv, kmp_int32 sv ); -// -// parameters: -// p: %rdi -// cv: %esi -// sv: %edx -// -// return: %eax - .text - PROC __kmp_compare_and_store_ret32 - - movl %esi, %eax // "cv" - lock - cmpxchgl %edx,(%rdi) - ret - - DEBUG_INFO __kmp_compare_and_store_ret32 - - -//------------------------------------------------------------------------ -// FUNCTION __kmp_compare_and_store_ret64 -// -// kmp_int64 -// __kmp_compare_and_store_ret64( volatile kmp_int64 *p, kmp_int64 cv, kmp_int64 sv ); -// -// parameters: -// p: %rdi -// cv: %rsi -// sv: %rdx -// return: %eax - .text - PROC __kmp_compare_and_store_ret64 - - movq %rsi, %rax // "cv" - lock - cmpxchgq %rdx,(%rdi) - ret - - DEBUG_INFO __kmp_compare_and_store_ret64 - -# endif /* !KMP_ASM_INTRINS */ - - -# if !KMP_MIC - -# if !KMP_ASM_INTRINS - -//------------------------------------------------------------------------ -// FUNCTION __kmp_xchg_real32 -// -// kmp_real32 -// __kmp_xchg_real32( volatile kmp_real32 *addr, kmp_real32 data ); -// -// parameters: -// addr: %rdi -// data: %xmm0 (lower 4 bytes) -// -// return: %xmm0 (lower 4 bytes) - .text - PROC __kmp_xchg_real32 - - movd %xmm0, %eax // load "data" to eax - - lock - xchgl %eax, (%rdi) - - movd %eax, %xmm0 // load old value into return register - - ret - - DEBUG_INFO __kmp_xchg_real32 - - -//------------------------------------------------------------------------ -// FUNCTION __kmp_xchg_real64 -// -// kmp_real64 -// __kmp_xchg_real64( volatile kmp_real64 *addr, kmp_real64 data ); -// -// parameters: -// addr: %rdi -// data: %xmm0 (lower 8 bytes) -// return: %xmm0 (lower 8 bytes) - .text - PROC __kmp_xchg_real64 - - movd %xmm0, %rax // load "data" to rax - - lock - xchgq %rax, (%rdi) - - movd %rax, %xmm0 // load old value into return register - ret - - DEBUG_INFO __kmp_xchg_real64 - - -# endif /* !KMP_MIC */ - -# endif /* !KMP_ASM_INTRINS */ - - -//------------------------------------------------------------------------ -// FUNCTION __kmp_load_x87_fpu_control_word -// -// void -// __kmp_load_x87_fpu_control_word( kmp_int16 *p ); -// -// parameters: -// p: %rdi - .text - PROC __kmp_load_x87_fpu_control_word - - fldcw (%rdi) - ret - - DEBUG_INFO __kmp_load_x87_fpu_control_word - - -//------------------------------------------------------------------------ -// FUNCTION __kmp_store_x87_fpu_control_word -// -// void -// __kmp_store_x87_fpu_control_word( kmp_int16 *p ); -// -// parameters: -// p: %rdi - .text - PROC __kmp_store_x87_fpu_control_word - - fstcw (%rdi) - ret - - DEBUG_INFO __kmp_store_x87_fpu_control_word - - -//------------------------------------------------------------------------ -// FUNCTION __kmp_clear_x87_fpu_status_word -// -// void -// __kmp_clear_x87_fpu_status_word(); - .text - PROC __kmp_clear_x87_fpu_status_word - -#if KMP_MIC -// TODO: remove the workaround for problem with fnclex instruction (no CQ known) - fstenv -32(%rsp) // store FP env - andw $~0x80ff, 4-32(%rsp) // clear 0-7,15 bits of FP SW - fldenv -32(%rsp) // load FP env back - ret -#else - fnclex - ret -#endif - - DEBUG_INFO __kmp_clear_x87_fpu_status_word - - -//------------------------------------------------------------------------ -// typedef void (*microtask_t)( int *gtid, int *tid, ... ); -// -// int -// __kmp_invoke_microtask( void (*pkfn) (int gtid, int tid, ...), -// int gtid, int tid, -// int argc, void *p_argv[] ) { -// (*pkfn)( & gtid, & tid, argv[0], ... ); -// return 1; -// } -// -// note: at call to pkfn must have %rsp 128-byte aligned for compiler -// -// parameters: -// %rdi: pkfn -// %esi: gtid -// %edx: tid -// %ecx: argc -// %r8: p_argv -// %r9: &exit_frame -// -// locals: -// __gtid: gtid parm pushed on stack so can pass >id to pkfn -// __tid: tid parm pushed on stack so can pass &tid to pkfn -// -// reg temps: -// %rax: used all over the place -// %rdx: used in stack pointer alignment calculation -// %r11: used to traverse p_argv array -// %rsi: used as temporary for stack parameters -// used as temporary for number of pkfn parms to push -// %rbx: used to hold pkfn address, and zero constant, callee-save -// -// return: %eax (always 1/TRUE) -__gtid = -16 -__tid = -24 - -// -- Begin __kmp_invoke_microtask -// mark_begin; - .text - PROC __kmp_invoke_microtask - - pushq %rbp // save base pointer - KMP_CFI_DEF_OFFSET 16 - KMP_CFI_OFFSET rbp,-16 - movq %rsp,%rbp // establish the base pointer for this routine. - KMP_CFI_REGISTER rbp - -#if OMPT_SUPPORT - movq %rbp, (%r9) // save exit_frame -#endif - - pushq %rbx // %rbx is callee-saved register - pushq %rsi // Put gtid on stack so can pass &tgid to pkfn - pushq %rdx // Put tid on stack so can pass &tid to pkfn - - movq %rcx, %rax // Stack alignment calculation begins; argc -> %rax - movq $0, %rbx // constant for cmovs later - subq $4, %rax // subtract four args passed in registers to pkfn -#if KMP_MIC - js KMP_LABEL(kmp_0) // jump to movq - jmp KMP_LABEL(kmp_0_exit) // jump ahead -KMP_LABEL(kmp_0): - movq %rbx, %rax // zero negative value in %rax <- max(0, argc-4) -KMP_LABEL(kmp_0_exit): -#else - cmovsq %rbx, %rax // zero negative value in %rax <- max(0, argc-4) -#endif // KMP_MIC - - movq %rax, %rsi // save max(0, argc-4) -> %rsi for later - shlq $3, %rax // Number of bytes used on stack: max(0, argc-4)*8 - - movq %rsp, %rdx // - subq %rax, %rdx // %rsp-(max(0,argc-4)*8) -> %rdx -- - // without align, stack ptr would be this - movq %rdx, %rax // Save to %rax - - andq $0xFFFFFFFFFFFFFF80, %rax // mask off lower 7 bits (128 bytes align) - subq %rax, %rdx // Amount to subtract from %rsp - subq %rdx, %rsp // Prepare the stack ptr -- - // now %rsp will align to 128-byte boundary at call site - - // setup pkfn parameter reg and stack - movq %rcx, %rax // argc -> %rax - cmpq $0, %rsi - je KMP_LABEL(kmp_invoke_pass_parms) // jump ahead if no parms to push - shlq $3, %rcx // argc*8 -> %rcx - movq %r8, %rdx // p_argv -> %rdx - addq %rcx, %rdx // &p_argv[argc] -> %rdx - - movq %rsi, %rcx // max (0, argc-4) -> %rcx - -KMP_LABEL(kmp_invoke_push_parms): - // push nth - 7th parms to pkfn on stack - subq $8, %rdx // decrement p_argv pointer to previous parm - movq (%rdx), %rsi // p_argv[%rcx-1] -> %rsi - pushq %rsi // push p_argv[%rcx-1] onto stack (reverse order) - subl $1, %ecx - -// C69570: "X86_64_RELOC_BRANCH not supported" error at linking on mac_32e -// if the name of the label that is an operand of this jecxz starts with a dot ("."); -// Apple's linker does not support 1-byte length relocation; -// Resolution: replace all .labelX entries with L_labelX. - - jecxz KMP_LABEL(kmp_invoke_pass_parms) // stop when four p_argv[] parms left - jmp KMP_LABEL(kmp_invoke_push_parms) - ALIGN 3 -KMP_LABEL(kmp_invoke_pass_parms): // put 1st - 6th parms to pkfn in registers. - // order here is important to avoid trashing - // registers used for both input and output parms! - movq %rdi, %rbx // pkfn -> %rbx - leaq __gtid(%rbp), %rdi // >id -> %rdi (store 1st parm to pkfn) - leaq __tid(%rbp), %rsi // &tid -> %rsi (store 2nd parm to pkfn) - - movq %r8, %r11 // p_argv -> %r11 - -#if KMP_MIC - cmpq $4, %rax // argc >= 4? - jns KMP_LABEL(kmp_4) // jump to movq - jmp KMP_LABEL(kmp_4_exit) // jump ahead -KMP_LABEL(kmp_4): - movq 24(%r11), %r9 // p_argv[3] -> %r9 (store 6th parm to pkfn) -KMP_LABEL(kmp_4_exit): - - cmpq $3, %rax // argc >= 3? - jns KMP_LABEL(kmp_3) // jump to movq - jmp KMP_LABEL(kmp_3_exit) // jump ahead -KMP_LABEL(kmp_3): - movq 16(%r11), %r8 // p_argv[2] -> %r8 (store 5th parm to pkfn) -KMP_LABEL(kmp_3_exit): - - cmpq $2, %rax // argc >= 2? - jns KMP_LABEL(kmp_2) // jump to movq - jmp KMP_LABEL(kmp_2_exit) // jump ahead -KMP_LABEL(kmp_2): - movq 8(%r11), %rcx // p_argv[1] -> %rcx (store 4th parm to pkfn) -KMP_LABEL(kmp_2_exit): - - cmpq $1, %rax // argc >= 1? - jns KMP_LABEL(kmp_1) // jump to movq - jmp KMP_LABEL(kmp_1_exit) // jump ahead -KMP_LABEL(kmp_1): - movq (%r11), %rdx // p_argv[0] -> %rdx (store 3rd parm to pkfn) -KMP_LABEL(kmp_1_exit): -#else - cmpq $4, %rax // argc >= 4? - cmovnsq 24(%r11), %r9 // p_argv[3] -> %r9 (store 6th parm to pkfn) - - cmpq $3, %rax // argc >= 3? - cmovnsq 16(%r11), %r8 // p_argv[2] -> %r8 (store 5th parm to pkfn) - - cmpq $2, %rax // argc >= 2? - cmovnsq 8(%r11), %rcx // p_argv[1] -> %rcx (store 4th parm to pkfn) - - cmpq $1, %rax // argc >= 1? - cmovnsq (%r11), %rdx // p_argv[0] -> %rdx (store 3rd parm to pkfn) -#endif // KMP_MIC - - call *%rbx // call (*pkfn)(); - movq $1, %rax // move 1 into return register; - - movq -8(%rbp), %rbx // restore %rbx using %rbp since %rsp was modified - movq %rbp, %rsp // restore stack pointer - popq %rbp // restore frame pointer - KMP_CFI_DEF rsp,8 - ret - - DEBUG_INFO __kmp_invoke_microtask -// -- End __kmp_invoke_microtask - -// kmp_uint64 -// __kmp_hardware_timestamp(void) - .text - PROC __kmp_hardware_timestamp - rdtsc - shlq $32, %rdx - orq %rdx, %rax - ret - - DEBUG_INFO __kmp_hardware_timestamp -// -- End __kmp_hardware_timestamp - -//------------------------------------------------------------------------ -// FUNCTION __kmp_bsr32 -// -// int -// __kmp_bsr32( int ); - .text - PROC __kmp_bsr32 - - bsr %edi,%eax - ret - - DEBUG_INFO __kmp_bsr32 - - -// ----------------------------------------------------------------------- -#endif /* KMP_ARCH_X86_64 */ - -// ' -#if (KMP_OS_LINUX || KMP_OS_DARWIN) && KMP_ARCH_AARCH64 - -//------------------------------------------------------------------------ -// -// typedef void (*microtask_t)( int *gtid, int *tid, ... ); -// -// int -// __kmp_invoke_microtask( void (*pkfn) (int gtid, int tid, ...), -// int gtid, int tid, -// int argc, void *p_argv[] ) { -// (*pkfn)( & gtid, & tid, argv[0], ... ); -// return 1; -// } -// -// parameters: -// x0: pkfn -// w1: gtid -// w2: tid -// w3: argc -// x4: p_argv -// x5: &exit_frame -// -// locals: -// __gtid: gtid parm pushed on stack so can pass >id to pkfn -// __tid: tid parm pushed on stack so can pass &tid to pkfn -// -// reg temps: -// x8: used to hold pkfn address -// w9: used as temporary for number of pkfn parms -// x10: used to traverse p_argv array -// x11: used as temporary for stack placement calculation -// x12: used as temporary for stack parameters -// x19: used to preserve exit_frame_ptr, callee-save -// -// return: w0 (always 1/TRUE) -// - -__gtid = 4 -__tid = 8 - -// -- Begin __kmp_invoke_microtask -// mark_begin; - .text - PROC __kmp_invoke_microtask - - stp x29, x30, [sp, #-16]! -# if OMPT_SUPPORT - stp x19, x20, [sp, #-16]! -# endif - mov x29, sp - - orr w9, wzr, #1 - add w9, w9, w3, lsr #1 - sub sp, sp, w9, lsl #4 - mov x11, sp - - mov x8, x0 - str w1, [x29, #-__gtid] - str w2, [x29, #-__tid] - mov w9, w3 - mov x10, x4 -# if OMPT_SUPPORT - mov x19, x5 - str x29, [x19] -# endif - - sub x0, x29, #__gtid - sub x1, x29, #__tid - - cbz w9, KMP_LABEL(kmp_1) - ldr x2, [x10] - - sub w9, w9, #1 - cbz w9, KMP_LABEL(kmp_1) - ldr x3, [x10, #8]! - - sub w9, w9, #1 - cbz w9, KMP_LABEL(kmp_1) - ldr x4, [x10, #8]! - - sub w9, w9, #1 - cbz w9, KMP_LABEL(kmp_1) - ldr x5, [x10, #8]! - - sub w9, w9, #1 - cbz w9, KMP_LABEL(kmp_1) - ldr x6, [x10, #8]! - - sub w9, w9, #1 - cbz w9, KMP_LABEL(kmp_1) - ldr x7, [x10, #8]! - -KMP_LABEL(kmp_0): - sub w9, w9, #1 - cbz w9, KMP_LABEL(kmp_1) - ldr x12, [x10, #8]! - str x12, [x11], #8 - b KMP_LABEL(kmp_0) -KMP_LABEL(kmp_1): - blr x8 - orr w0, wzr, #1 - mov sp, x29 -# if OMPT_SUPPORT - str xzr, [x19] - ldp x19, x20, [sp], #16 -# endif - ldp x29, x30, [sp], #16 - ret - - DEBUG_INFO __kmp_invoke_microtask -// -- End __kmp_invoke_microtask - -#endif /* (KMP_OS_LINUX || KMP_OS_DARWIN) && KMP_ARCH_AARCH64 */ - -#if KMP_ARCH_PPC64 - -//------------------------------------------------------------------------ -// -// typedef void (*microtask_t)( int *gtid, int *tid, ... ); -// -// int -// __kmp_invoke_microtask( void (*pkfn) (int gtid, int tid, ...), -// int gtid, int tid, -// int argc, void *p_argv[] ) { -// (*pkfn)( & gtid, & tid, argv[0], ... ); -// return 1; -// } -// -// parameters: -// r3: pkfn -// r4: gtid -// r5: tid -// r6: argc -// r7: p_argv -// r8: &exit_frame -// -// return: r3 (always 1/TRUE) -// - .text -# if KMP_ARCH_PPC64_LE - .abiversion 2 -# endif - .globl __kmp_invoke_microtask - -# if KMP_ARCH_PPC64_LE - .p2align 4 -# else - .p2align 2 -# endif - - .type __kmp_invoke_microtask,@function - -# if KMP_ARCH_PPC64_LE -__kmp_invoke_microtask: -.Lfunc_begin0: -.Lfunc_gep0: - addis 2, 12, .TOC.-.Lfunc_gep0@ha - addi 2, 2, .TOC.-.Lfunc_gep0@l -.Lfunc_lep0: - .localentry __kmp_invoke_microtask, .Lfunc_lep0-.Lfunc_gep0 -# else - .section .opd,"aw",@progbits -__kmp_invoke_microtask: - .p2align 3 - .quad .Lfunc_begin0 - .quad .TOC.@tocbase - .quad 0 - .text -.Lfunc_begin0: -# endif - -// -- Begin __kmp_invoke_microtask -// mark_begin; - -// We need to allocate a stack frame large enough to hold all of the parameters -// on the stack for the microtask plus what this function needs. That's 48 -// bytes under the ELFv1 ABI (32 bytes under ELFv2), plus 8*(2 + argc) for the -// parameters to the microtask, plus 8 bytes to store the values of r4 and r5, -// and 8 bytes to store r31. With OMP-T support, we need an additional 8 bytes -// to save r30 to hold a copy of r8. - - .cfi_startproc - mflr 0 - std 31, -8(1) - std 0, 16(1) - -// This is unusual because normally we'd set r31 equal to r1 after the stack -// frame is established. In this case, however, we need to dynamically compute -// the stack frame size, and so we keep a direct copy of r1 to access our -// register save areas and restore the r1 value before returning. - mr 31, 1 - .cfi_def_cfa_register r31 - .cfi_offset r31, -8 - .cfi_offset lr, 16 - -// Compute the size necessary for the local stack frame. -# if KMP_ARCH_PPC64_LE - li 12, 72 -# else - li 12, 88 -# endif - sldi 0, 6, 3 - add 12, 0, 12 - neg 12, 12 - -// We need to make sure that the stack frame stays aligned (to 16 bytes, except -// under the BG/Q CNK, where it must be to 32 bytes). -# if KMP_OS_CNK - li 0, -32 -# else - li 0, -16 -# endif - and 12, 0, 12 - -// Establish the local stack frame. - stdux 1, 1, 12 - -# if OMPT_SUPPORT - .cfi_offset r30, -16 - std 30, -16(31) - std 1, 0(8) - mr 30, 8 -# endif - -// Store gtid and tid to the stack because they're passed by reference to the microtask. - stw 4, -20(31) - stw 5, -24(31) - - mr 12, 6 - mr 4, 7 - - cmpwi 0, 12, 1 - blt 0, .Lcall - - ld 5, 0(4) - - cmpwi 0, 12, 2 - blt 0, .Lcall - - ld 6, 8(4) - - cmpwi 0, 12, 3 - blt 0, .Lcall - - ld 7, 16(4) - - cmpwi 0, 12, 4 - blt 0, .Lcall - - ld 8, 24(4) - - cmpwi 0, 12, 5 - blt 0, .Lcall - - ld 9, 32(4) - - cmpwi 0, 12, 6 - blt 0, .Lcall - - ld 10, 40(4) - - cmpwi 0, 12, 7 - blt 0, .Lcall - -// There are more than 6 microtask parameters, so we need to store the -// remainder to the stack. - addi 12, 12, -6 - mtctr 12 - -// These are set to 8 bytes before the first desired store address (we're using -// pre-increment loads and stores in the loop below). The parameter save area -// for the microtask begins 48 + 8*8 == 112 bytes above r1 for ELFv1 and -// 32 + 8*8 == 96 bytes above r1 for ELFv2. - addi 4, 4, 40 -# if KMP_ARCH_PPC64_LE - addi 12, 1, 88 -# else - addi 12, 1, 104 -# endif - -.Lnext: - ldu 0, 8(4) - stdu 0, 8(12) - bdnz .Lnext - -.Lcall: -# if KMP_ARCH_PPC64_LE - std 2, 24(1) - mr 12, 3 -#else - std 2, 40(1) -// For ELFv1, we need to load the actual function address from the function descriptor. - ld 12, 0(3) - ld 2, 8(3) - ld 11, 16(3) -#endif - - addi 3, 31, -20 - addi 4, 31, -24 - - mtctr 12 - bctrl -# if KMP_ARCH_PPC64_LE - ld 2, 24(1) -# else - ld 2, 40(1) -# endif - -# if OMPT_SUPPORT - li 3, 0 - std 3, 0(30) -# endif - - li 3, 1 - -# if OMPT_SUPPORT - ld 30, -16(31) -# endif - - mr 1, 31 - ld 0, 16(1) - ld 31, -8(1) - mtlr 0 - blr - - .long 0 - .quad 0 -.Lfunc_end0: - .size __kmp_invoke_microtask, .Lfunc_end0-.Lfunc_begin0 - .cfi_endproc - -// -- End __kmp_invoke_microtask - -#endif /* KMP_ARCH_PPC64 */ - -#if KMP_ARCH_ARM || KMP_ARCH_MIPS - .data - .comm .gomp_critical_user_,32,8 - .data - .align 4 - .global __kmp_unnamed_critical_addr -__kmp_unnamed_critical_addr: - .4byte .gomp_critical_user_ - .size __kmp_unnamed_critical_addr,4 -#endif /* KMP_ARCH_ARM */ - -#if KMP_ARCH_PPC64 || KMP_ARCH_AARCH64 || KMP_ARCH_MIPS64 - .data - .comm .gomp_critical_user_,32,8 - .data - .align 8 - .global __kmp_unnamed_critical_addr -__kmp_unnamed_critical_addr: - .8byte .gomp_critical_user_ - .size __kmp_unnamed_critical_addr,8 -#endif /* KMP_ARCH_PPC64 || KMP_ARCH_AARCH64 */ - -#if KMP_OS_LINUX -# if KMP_ARCH_ARM -.section .note.GNU-stack,"",%progbits -# else -.section .note.GNU-stack,"",@progbits -# endif -#endif Property changes on: vendor/llvm-openmp/openmp-release80-r363030/runtime/src/z_Linux_asm.S ___________________________________________________________________ Deleted: svn:eol-style ## -1 +0,0 ## -native \ No newline at end of property Deleted: svn:keywords ## -1 +0,0 ## -FreeBSD=%H \ No newline at end of property Deleted: svn:mime-type ## -1 +0,0 ## -text/plain \ No newline at end of property Index: vendor/llvm-openmp/openmp-release80-r363030/runtime/src/kmp_utility.cpp =================================================================== --- vendor/llvm-openmp/openmp-release80-r363030/runtime/src/kmp_utility.cpp (revision 348960) +++ vendor/llvm-openmp/openmp-release80-r363030/runtime/src/kmp_utility.cpp (nonexistent) @@ -1,410 +0,0 @@ -/* - * kmp_utility.cpp -- Utility routines for the OpenMP support library. - */ - -//===----------------------------------------------------------------------===// -// -// The LLVM Compiler Infrastructure -// -// This file is dual licensed under the MIT and the University of Illinois Open -// Source Licenses. See LICENSE.txt for details. -// -//===----------------------------------------------------------------------===// - -#include "kmp.h" -#include "kmp_i18n.h" -#include "kmp_str.h" -#include "kmp_wrapper_getpid.h" -#include - -static const char *unknown = "unknown"; - -#if KMP_ARCH_X86 || KMP_ARCH_X86_64 - -/* NOTE: If called before serial_initialize (i.e. from runtime_initialize), then - the debugging package has not been initialized yet, and only "0" will print - debugging output since the environment variables have not been read. */ - -#ifdef KMP_DEBUG -static int trace_level = 5; -#endif - -/* LOG_ID_BITS = ( 1 + floor( log_2( max( log_per_phy - 1, 1 )))) - * APIC_ID = (PHY_ID << LOG_ID_BITS) | LOG_ID - * PHY_ID = APIC_ID >> LOG_ID_BITS - */ -int __kmp_get_physical_id(int log_per_phy, int apic_id) { - int index_lsb, index_msb, temp; - - if (log_per_phy > 1) { - index_lsb = 0; - index_msb = 31; - - temp = log_per_phy; - while ((temp & 1) == 0) { - temp >>= 1; - index_lsb++; - } - - temp = log_per_phy; - while ((temp & 0x80000000) == 0) { - temp <<= 1; - index_msb--; - } - - /* If >1 bits were set in log_per_phy, choose next higher power of 2 */ - if (index_lsb != index_msb) - index_msb++; - - return ((int)(apic_id >> index_msb)); - } - - return apic_id; -} - -/* - * LOG_ID_BITS = ( 1 + floor( log_2( max( log_per_phy - 1, 1 )))) - * APIC_ID = (PHY_ID << LOG_ID_BITS) | LOG_ID - * LOG_ID = APIC_ID & (( 1 << LOG_ID_BITS ) - 1 ) - */ -int __kmp_get_logical_id(int log_per_phy, int apic_id) { - unsigned current_bit; - int bits_seen; - - if (log_per_phy <= 1) - return (0); - - bits_seen = 0; - - for (current_bit = 1; log_per_phy != 0; current_bit <<= 1) { - if (log_per_phy & current_bit) { - log_per_phy &= ~current_bit; - bits_seen++; - } - } - - /* If exactly 1 bit was set in log_per_phy, choose next lower power of 2 */ - if (bits_seen == 1) { - current_bit >>= 1; - } - - return ((int)((current_bit - 1) & apic_id)); -} - -static kmp_uint64 __kmp_parse_frequency( // R: Frequency in Hz. - char const *frequency // I: Float number and unit: MHz, GHz, or TGz. - ) { - - double value = 0.0; - char *unit = NULL; - kmp_uint64 result = 0; /* Zero is a better unknown value than all ones. */ - - if (frequency == NULL) { - return result; - } - value = strtod(frequency, &unit); - if (0 < value && - value <= DBL_MAX) { // Good value (not overflow, underflow, etc). - if (strcmp(unit, "MHz") == 0) { - value = value * 1.0E+6; - } else if (strcmp(unit, "GHz") == 0) { - value = value * 1.0E+9; - } else if (strcmp(unit, "THz") == 0) { - value = value * 1.0E+12; - } else { // Wrong unit. - return result; - } - result = value; - } - return result; - -} // func __kmp_parse_cpu_frequency - -void __kmp_query_cpuid(kmp_cpuinfo_t *p) { - struct kmp_cpuid buf; - int max_arg; - int log_per_phy; -#ifdef KMP_DEBUG - int cflush_size; -#endif - - p->initialized = 1; - - p->sse2 = 1; // Assume SSE2 by default. - - __kmp_x86_cpuid(0, 0, &buf); - - KA_TRACE(trace_level, - ("INFO: CPUID %d: EAX=0x%08X EBX=0x%08X ECX=0x%08X EDX=0x%08X\n", 0, - buf.eax, buf.ebx, buf.ecx, buf.edx)); - - max_arg = buf.eax; - - p->apic_id = -1; - - if (max_arg >= 1) { - int i; - kmp_uint32 t, data[4]; - - __kmp_x86_cpuid(1, 0, &buf); - KA_TRACE(trace_level, - ("INFO: CPUID %d: EAX=0x%08X EBX=0x%08X ECX=0x%08X EDX=0x%08X\n", - 1, buf.eax, buf.ebx, buf.ecx, buf.edx)); - - { -#define get_value(reg, lo, mask) (((reg) >> (lo)) & (mask)) - - p->signature = buf.eax; - p->family = get_value(buf.eax, 20, 0xff) + get_value(buf.eax, 8, 0x0f); - p->model = - (get_value(buf.eax, 16, 0x0f) << 4) + get_value(buf.eax, 4, 0x0f); - p->stepping = get_value(buf.eax, 0, 0x0f); - -#undef get_value - - KA_TRACE(trace_level, (" family = %d, model = %d, stepping = %d\n", - p->family, p->model, p->stepping)); - } - - for (t = buf.ebx, i = 0; i < 4; t >>= 8, ++i) { - data[i] = (t & 0xff); - } - - p->sse2 = (buf.edx >> 26) & 1; - -#ifdef KMP_DEBUG - - if ((buf.edx >> 4) & 1) { - /* TSC - Timestamp Counter Available */ - KA_TRACE(trace_level, (" TSC")); - } - if ((buf.edx >> 8) & 1) { - /* CX8 - CMPXCHG8B Instruction Available */ - KA_TRACE(trace_level, (" CX8")); - } - if ((buf.edx >> 9) & 1) { - /* APIC - Local APIC Present (multi-processor operation support */ - KA_TRACE(trace_level, (" APIC")); - } - if ((buf.edx >> 15) & 1) { - /* CMOV - Conditional MOVe Instruction Available */ - KA_TRACE(trace_level, (" CMOV")); - } - if ((buf.edx >> 18) & 1) { - /* PSN - Processor Serial Number Available */ - KA_TRACE(trace_level, (" PSN")); - } - if ((buf.edx >> 19) & 1) { - /* CLFULSH - Cache Flush Instruction Available */ - cflush_size = - data[1] * 8; /* Bits 15-08: CLFLUSH line size = 8 (64 bytes) */ - KA_TRACE(trace_level, (" CLFLUSH(%db)", cflush_size)); - } - if ((buf.edx >> 21) & 1) { - /* DTES - Debug Trace & EMON Store */ - KA_TRACE(trace_level, (" DTES")); - } - if ((buf.edx >> 22) & 1) { - /* ACPI - ACPI Support Available */ - KA_TRACE(trace_level, (" ACPI")); - } - if ((buf.edx >> 23) & 1) { - /* MMX - Multimedia Extensions */ - KA_TRACE(trace_level, (" MMX")); - } - if ((buf.edx >> 25) & 1) { - /* SSE - SSE Instructions */ - KA_TRACE(trace_level, (" SSE")); - } - if ((buf.edx >> 26) & 1) { - /* SSE2 - SSE2 Instructions */ - KA_TRACE(trace_level, (" SSE2")); - } - if ((buf.edx >> 27) & 1) { - /* SLFSNP - Self-Snooping Cache */ - KA_TRACE(trace_level, (" SLFSNP")); - } -#endif /* KMP_DEBUG */ - - if ((buf.edx >> 28) & 1) { - /* Bits 23-16: Logical Processors per Physical Processor (1 for P4) */ - log_per_phy = data[2]; - p->apic_id = data[3]; /* Bits 31-24: Processor Initial APIC ID (X) */ - KA_TRACE(trace_level, (" HT(%d TPUs)", log_per_phy)); - - if (log_per_phy > 1) { -/* default to 1k FOR JT-enabled processors (4k on OS X*) */ -#if KMP_OS_DARWIN - p->cpu_stackoffset = 4 * 1024; -#else - p->cpu_stackoffset = 1 * 1024; -#endif - } - - p->physical_id = __kmp_get_physical_id(log_per_phy, p->apic_id); - p->logical_id = __kmp_get_logical_id(log_per_phy, p->apic_id); - } -#ifdef KMP_DEBUG - if ((buf.edx >> 29) & 1) { - /* ATHROTL - Automatic Throttle Control */ - KA_TRACE(trace_level, (" ATHROTL")); - } - KA_TRACE(trace_level, (" ]\n")); - - for (i = 2; i <= max_arg; ++i) { - __kmp_x86_cpuid(i, 0, &buf); - KA_TRACE(trace_level, - ("INFO: CPUID %d: EAX=0x%08X EBX=0x%08X ECX=0x%08X EDX=0x%08X\n", - i, buf.eax, buf.ebx, buf.ecx, buf.edx)); - } -#endif -#if KMP_USE_ADAPTIVE_LOCKS - p->rtm = 0; - if (max_arg > 7) { - /* RTM bit CPUID.07:EBX, bit 11 */ - __kmp_x86_cpuid(7, 0, &buf); - p->rtm = (buf.ebx >> 11) & 1; - KA_TRACE(trace_level, (" RTM")); - } -#endif - } - - { // Parse CPU brand string for frequency, saving the string for later. - int i; - kmp_cpuid_t *base = (kmp_cpuid_t *)&p->name[0]; - - // Get CPU brand string. - for (i = 0; i < 3; ++i) { - __kmp_x86_cpuid(0x80000002 + i, 0, base + i); - } - p->name[sizeof(p->name) - 1] = 0; // Just in case. ;-) - KA_TRACE(trace_level, ("cpu brand string: \"%s\"\n", &p->name[0])); - - // Parse frequency. - p->frequency = __kmp_parse_frequency(strrchr(&p->name[0], ' ')); - KA_TRACE(trace_level, - ("cpu frequency from brand string: %" KMP_UINT64_SPEC "\n", - p->frequency)); - } -} - -#endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */ - -void __kmp_expand_host_name(char *buffer, size_t size) { - KMP_DEBUG_ASSERT(size >= sizeof(unknown)); -#if KMP_OS_WINDOWS - { - DWORD s = size; - - if (!GetComputerNameA(buffer, &s)) - KMP_STRCPY_S(buffer, size, unknown); - } -#else - buffer[size - 2] = 0; - if (gethostname(buffer, size) || buffer[size - 2] != 0) - KMP_STRCPY_S(buffer, size, unknown); -#endif -} - -/* Expand the meta characters in the filename: - * Currently defined characters are: - * %H the hostname - * %P the number of threads used. - * %I the unique identifier for this run. - */ - -void __kmp_expand_file_name(char *result, size_t rlen, char *pattern) { - char *pos = result, *end = result + rlen - 1; - char buffer[256]; - int default_cpu_width = 1; - int snp_result; - - KMP_DEBUG_ASSERT(rlen > 0); - *end = 0; - { - int i; - for (i = __kmp_xproc; i >= 10; i /= 10, ++default_cpu_width) - ; - } - - if (pattern != NULL) { - while (*pattern != '\0' && pos < end) { - if (*pattern != '%') { - *pos++ = *pattern++; - } else { - char *old_pattern = pattern; - int width = 1; - int cpu_width = default_cpu_width; - - ++pattern; - - if (*pattern >= '0' && *pattern <= '9') { - width = 0; - do { - width = (width * 10) + *pattern++ - '0'; - } while (*pattern >= '0' && *pattern <= '9'); - if (width < 0 || width > 1024) - width = 1; - - cpu_width = width; - } - - switch (*pattern) { - case 'H': - case 'h': { - __kmp_expand_host_name(buffer, sizeof(buffer)); - KMP_STRNCPY(pos, buffer, end - pos + 1); - if (*end == 0) { - while (*pos) - ++pos; - ++pattern; - } else - pos = end; - } break; - case 'P': - case 'p': { - snp_result = KMP_SNPRINTF(pos, end - pos + 1, "%0*d", cpu_width, - __kmp_dflt_team_nth); - if (snp_result >= 0 && snp_result <= end - pos) { - while (*pos) - ++pos; - ++pattern; - } else - pos = end; - } break; - case 'I': - case 'i': { - pid_t id = getpid(); -#if KMP_ARCH_X86_64 && defined(__MINGW32__) - snp_result = KMP_SNPRINTF(pos, end - pos + 1, "%0*lld", width, id); -#else - snp_result = KMP_SNPRINTF(pos, end - pos + 1, "%0*d", width, id); -#endif - if (snp_result >= 0 && snp_result <= end - pos) { - while (*pos) - ++pos; - ++pattern; - } else - pos = end; - break; - } - case '%': { - *pos++ = '%'; - ++pattern; - break; - } - default: { - *pos++ = '%'; - pattern = old_pattern + 1; - break; - } - } - } - } - /* TODO: How do we get rid of this? */ - if (*pattern != '\0') - KMP_FATAL(FileNameTooLong); - } - - *pos = '\0'; -} Property changes on: vendor/llvm-openmp/openmp-release80-r363030/runtime/src/kmp_utility.cpp ___________________________________________________________________ Deleted: svn:eol-style ## -1 +0,0 ## -native \ No newline at end of property Deleted: svn:keywords ## -1 +0,0 ## -FreeBSD=%H \ No newline at end of property Deleted: svn:mime-type ## -1 +0,0 ## -text/plain \ No newline at end of property Index: vendor/llvm-openmp/openmp-release80-r363030/runtime/src/kmp_error.cpp =================================================================== --- vendor/llvm-openmp/openmp-release80-r363030/runtime/src/kmp_error.cpp (revision 348960) +++ vendor/llvm-openmp/openmp-release80-r363030/runtime/src/kmp_error.cpp (nonexistent) @@ -1,462 +0,0 @@ -/* - * kmp_error.cpp -- KPTS functions for error checking at runtime - */ - -//===----------------------------------------------------------------------===// -// -// The LLVM Compiler Infrastructure -// -// This file is dual licensed under the MIT and the University of Illinois Open -// Source Licenses. See LICENSE.txt for details. -// -//===----------------------------------------------------------------------===// - -#include "kmp.h" -#include "kmp_error.h" -#include "kmp_i18n.h" -#include "kmp_str.h" - -/* ------------------------------------------------------------------------ */ - -#define MIN_STACK 100 - -static char const *cons_text_c[] = { - "(none)", "\"parallel\"", "work-sharing", /* this is not called "for" - because of lowering of - "sections" pragmas */ - "\"ordered\" work-sharing", /* this is not called "for ordered" because of - lowering of "sections" pragmas */ - "\"sections\"", - "work-sharing", /* this is not called "single" because of lowering of - "sections" pragmas */ - "\"taskq\"", "\"taskq\"", "\"taskq ordered\"", "\"critical\"", - "\"ordered\"", /* in PARALLEL */ - "\"ordered\"", /* in PDO */ - "\"ordered\"", /* in TASKQ */ - "\"master\"", "\"reduce\"", "\"barrier\""}; - -#define get_src(ident) ((ident) == NULL ? NULL : (ident)->psource) - -#define PUSH_MSG(ct, ident) \ - "\tpushing on stack: %s (%s)\n", cons_text_c[(ct)], get_src((ident)) -#define POP_MSG(p) \ - "\tpopping off stack: %s (%s)\n", cons_text_c[(p)->stack_data[tos].type], \ - get_src((p)->stack_data[tos].ident) - -static int const cons_text_c_num = sizeof(cons_text_c) / sizeof(char const *); - -/* --------------- START OF STATIC LOCAL ROUTINES ------------------------- */ - -static void __kmp_check_null_func(void) { /* nothing to do */ -} - -static void __kmp_expand_cons_stack(int gtid, struct cons_header *p) { - int i; - struct cons_data *d; - - /* TODO for monitor perhaps? */ - if (gtid < 0) - __kmp_check_null_func(); - - KE_TRACE(10, ("expand cons_stack (%d %d)\n", gtid, __kmp_get_gtid())); - - d = p->stack_data; - - p->stack_size = (p->stack_size * 2) + 100; - - /* TODO free the old data */ - p->stack_data = (struct cons_data *)__kmp_allocate(sizeof(struct cons_data) * - (p->stack_size + 1)); - - for (i = p->stack_top; i >= 0; --i) - p->stack_data[i] = d[i]; - - /* NOTE: we do not free the old stack_data */ -} - -// NOTE: Function returns allocated memory, caller must free it! -static char *__kmp_pragma(int ct, ident_t const *ident) { - char const *cons = NULL; // Construct name. - char *file = NULL; // File name. - char *func = NULL; // Function (routine) name. - char *line = NULL; // Line number. - kmp_str_buf_t buffer; - kmp_msg_t prgm; - __kmp_str_buf_init(&buffer); - if (0 < ct && ct < cons_text_c_num) { - cons = cons_text_c[ct]; - } else { - KMP_DEBUG_ASSERT(0); - } - if (ident != NULL && ident->psource != NULL) { - char *tail = NULL; - __kmp_str_buf_print(&buffer, "%s", - ident->psource); // Copy source to buffer. - // Split string in buffer to file, func, and line. - tail = buffer.str; - __kmp_str_split(tail, ';', NULL, &tail); - __kmp_str_split(tail, ';', &file, &tail); - __kmp_str_split(tail, ';', &func, &tail); - __kmp_str_split(tail, ';', &line, &tail); - } - prgm = __kmp_msg_format(kmp_i18n_fmt_Pragma, cons, file, func, line); - __kmp_str_buf_free(&buffer); - return prgm.str; -} // __kmp_pragma - -/* ----------------- END OF STATIC LOCAL ROUTINES ------------------------- */ - -void __kmp_error_construct(kmp_i18n_id_t id, // Message identifier. - enum cons_type ct, // Construct type. - ident_t const *ident // Construct ident. - ) { - char *construct = __kmp_pragma(ct, ident); - __kmp_fatal(__kmp_msg_format(id, construct), __kmp_msg_null); - KMP_INTERNAL_FREE(construct); -} - -void __kmp_error_construct2(kmp_i18n_id_t id, // Message identifier. - enum cons_type ct, // First construct type. - ident_t const *ident, // First construct ident. - struct cons_data const *cons // Second construct. - ) { - char *construct1 = __kmp_pragma(ct, ident); - char *construct2 = __kmp_pragma(cons->type, cons->ident); - __kmp_fatal(__kmp_msg_format(id, construct1, construct2), __kmp_msg_null); - KMP_INTERNAL_FREE(construct1); - KMP_INTERNAL_FREE(construct2); -} - -struct cons_header *__kmp_allocate_cons_stack(int gtid) { - struct cons_header *p; - - /* TODO for monitor perhaps? */ - if (gtid < 0) { - __kmp_check_null_func(); - } - KE_TRACE(10, ("allocate cons_stack (%d)\n", gtid)); - p = (struct cons_header *)__kmp_allocate(sizeof(struct cons_header)); - p->p_top = p->w_top = p->s_top = 0; - p->stack_data = (struct cons_data *)__kmp_allocate(sizeof(struct cons_data) * - (MIN_STACK + 1)); - p->stack_size = MIN_STACK; - p->stack_top = 0; - p->stack_data[0].type = ct_none; - p->stack_data[0].prev = 0; - p->stack_data[0].ident = NULL; - return p; -} - -void __kmp_free_cons_stack(void *ptr) { - struct cons_header *p = (struct cons_header *)ptr; - if (p != NULL) { - if (p->stack_data != NULL) { - __kmp_free(p->stack_data); - p->stack_data = NULL; - } - __kmp_free(p); - } -} - -#if KMP_DEBUG -static void dump_cons_stack(int gtid, struct cons_header *p) { - int i; - int tos = p->stack_top; - kmp_str_buf_t buffer; - __kmp_str_buf_init(&buffer); - __kmp_str_buf_print( - &buffer, - "+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-\n"); - __kmp_str_buf_print(&buffer, - "Begin construct stack with %d items for thread %d\n", - tos, gtid); - __kmp_str_buf_print(&buffer, " stack_top=%d { P=%d, W=%d, S=%d }\n", tos, - p->p_top, p->w_top, p->s_top); - for (i = tos; i > 0; i--) { - struct cons_data *c = &(p->stack_data[i]); - __kmp_str_buf_print( - &buffer, " stack_data[%2d] = { %s (%s) %d %p }\n", i, - cons_text_c[c->type], get_src(c->ident), c->prev, c->name); - } - __kmp_str_buf_print(&buffer, "End construct stack for thread %d\n", gtid); - __kmp_str_buf_print( - &buffer, - "+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-\n"); - __kmp_debug_printf("%s", buffer.str); - __kmp_str_buf_free(&buffer); -} -#endif - -void __kmp_push_parallel(int gtid, ident_t const *ident) { - int tos; - struct cons_header *p = __kmp_threads[gtid]->th.th_cons; - - KMP_DEBUG_ASSERT(__kmp_threads[gtid]->th.th_cons); - KE_TRACE(10, ("__kmp_push_parallel (%d %d)\n", gtid, __kmp_get_gtid())); - KE_TRACE(100, (PUSH_MSG(ct_parallel, ident))); - if (p->stack_top >= p->stack_size) { - __kmp_expand_cons_stack(gtid, p); - } - tos = ++p->stack_top; - p->stack_data[tos].type = ct_parallel; - p->stack_data[tos].prev = p->p_top; - p->stack_data[tos].ident = ident; - p->stack_data[tos].name = NULL; - p->p_top = tos; - KE_DUMP(1000, dump_cons_stack(gtid, p)); -} - -void __kmp_check_workshare(int gtid, enum cons_type ct, ident_t const *ident) { - struct cons_header *p = __kmp_threads[gtid]->th.th_cons; - - KMP_DEBUG_ASSERT(__kmp_threads[gtid]->th.th_cons); - KE_TRACE(10, ("__kmp_check_workshare (%d %d)\n", gtid, __kmp_get_gtid())); - - if (p->stack_top >= p->stack_size) { - __kmp_expand_cons_stack(gtid, p); - } - if (p->w_top > p->p_top && - !(IS_CONS_TYPE_TASKQ(p->stack_data[p->w_top].type) && - IS_CONS_TYPE_TASKQ(ct))) { - // We are already in a WORKSHARE construct for this PARALLEL region. - __kmp_error_construct2(kmp_i18n_msg_CnsInvalidNesting, ct, ident, - &p->stack_data[p->w_top]); - } - if (p->s_top > p->p_top) { - // We are already in a SYNC construct for this PARALLEL region. - __kmp_error_construct2(kmp_i18n_msg_CnsInvalidNesting, ct, ident, - &p->stack_data[p->s_top]); - } -} - -void __kmp_push_workshare(int gtid, enum cons_type ct, ident_t const *ident) { - int tos; - struct cons_header *p = __kmp_threads[gtid]->th.th_cons; - KE_TRACE(10, ("__kmp_push_workshare (%d %d)\n", gtid, __kmp_get_gtid())); - __kmp_check_workshare(gtid, ct, ident); - KE_TRACE(100, (PUSH_MSG(ct, ident))); - tos = ++p->stack_top; - p->stack_data[tos].type = ct; - p->stack_data[tos].prev = p->w_top; - p->stack_data[tos].ident = ident; - p->stack_data[tos].name = NULL; - p->w_top = tos; - KE_DUMP(1000, dump_cons_stack(gtid, p)); -} - -void -#if KMP_USE_DYNAMIC_LOCK -__kmp_check_sync( int gtid, enum cons_type ct, ident_t const * ident, kmp_user_lock_p lck, kmp_uint32 seq ) -#else -__kmp_check_sync( int gtid, enum cons_type ct, ident_t const * ident, kmp_user_lock_p lck ) -#endif -{ - struct cons_header *p = __kmp_threads[gtid]->th.th_cons; - - KE_TRACE(10, ("__kmp_check_sync (gtid=%d)\n", __kmp_get_gtid())); - - if (p->stack_top >= p->stack_size) - __kmp_expand_cons_stack(gtid, p); - - if (ct == ct_ordered_in_parallel || ct == ct_ordered_in_pdo || - ct == ct_ordered_in_taskq) { - if (p->w_top <= p->p_top) { -/* we are not in a worksharing construct */ -#ifdef BUILD_PARALLEL_ORDERED - /* do not report error messages for PARALLEL ORDERED */ - KMP_ASSERT(ct == ct_ordered_in_parallel); -#else - __kmp_error_construct(kmp_i18n_msg_CnsBoundToWorksharing, ct, ident); -#endif /* BUILD_PARALLEL_ORDERED */ - } else { - /* inside a WORKSHARING construct for this PARALLEL region */ - if (!IS_CONS_TYPE_ORDERED(p->stack_data[p->w_top].type)) { - if (p->stack_data[p->w_top].type == ct_taskq) { - __kmp_error_construct2(kmp_i18n_msg_CnsNotInTaskConstruct, ct, ident, - &p->stack_data[p->w_top]); - } else { - __kmp_error_construct2(kmp_i18n_msg_CnsNoOrderedClause, ct, ident, - &p->stack_data[p->w_top]); - } - } - } - if (p->s_top > p->p_top && p->s_top > p->w_top) { - /* inside a sync construct which is inside a worksharing construct */ - int index = p->s_top; - enum cons_type stack_type; - - stack_type = p->stack_data[index].type; - - if (stack_type == ct_critical || - ((stack_type == ct_ordered_in_parallel || - stack_type == ct_ordered_in_pdo || - stack_type == - ct_ordered_in_taskq) && /* C doesn't allow named ordered; - ordered in ordered gets error */ - p->stack_data[index].ident != NULL && - (p->stack_data[index].ident->flags & KMP_IDENT_KMPC))) { - /* we are in ORDERED which is inside an ORDERED or CRITICAL construct */ - __kmp_error_construct2(kmp_i18n_msg_CnsInvalidNesting, ct, ident, - &p->stack_data[index]); - } - } - } else if (ct == ct_critical) { -#if KMP_USE_DYNAMIC_LOCK - if (lck != NULL && - __kmp_get_user_lock_owner(lck, seq) == - gtid) { /* this thread already has lock for this critical section */ -#else - if (lck != NULL && - __kmp_get_user_lock_owner(lck) == - gtid) { /* this thread already has lock for this critical section */ -#endif - int index = p->s_top; - struct cons_data cons = {NULL, ct_critical, 0, NULL}; - /* walk up construct stack and try to find critical with matching name */ - while (index != 0 && p->stack_data[index].name != lck) { - index = p->stack_data[index].prev; - } - if (index != 0) { - /* found match on the stack (may not always because of interleaved - * critical for Fortran) */ - cons = p->stack_data[index]; - } - /* we are in CRITICAL which is inside a CRITICAL construct of same name */ - __kmp_error_construct2(kmp_i18n_msg_CnsNestingSameName, ct, ident, &cons); - } - } else if (ct == ct_master || ct == ct_reduce) { - if (p->w_top > p->p_top) { - /* inside a WORKSHARING construct for this PARALLEL region */ - __kmp_error_construct2(kmp_i18n_msg_CnsInvalidNesting, ct, ident, - &p->stack_data[p->w_top]); - } - if (ct == ct_reduce && p->s_top > p->p_top) { - /* inside a another SYNC construct for this PARALLEL region */ - __kmp_error_construct2(kmp_i18n_msg_CnsInvalidNesting, ct, ident, - &p->stack_data[p->s_top]); - } - } -} - -void -#if KMP_USE_DYNAMIC_LOCK -__kmp_push_sync( int gtid, enum cons_type ct, ident_t const * ident, kmp_user_lock_p lck, kmp_uint32 seq ) -#else -__kmp_push_sync( int gtid, enum cons_type ct, ident_t const * ident, kmp_user_lock_p lck ) -#endif -{ - int tos; - struct cons_header *p = __kmp_threads[gtid]->th.th_cons; - - KMP_ASSERT(gtid == __kmp_get_gtid()); - KE_TRACE(10, ("__kmp_push_sync (gtid=%d)\n", gtid)); -#if KMP_USE_DYNAMIC_LOCK - __kmp_check_sync(gtid, ct, ident, lck, seq); -#else - __kmp_check_sync(gtid, ct, ident, lck); -#endif - KE_TRACE(100, (PUSH_MSG(ct, ident))); - tos = ++p->stack_top; - p->stack_data[tos].type = ct; - p->stack_data[tos].prev = p->s_top; - p->stack_data[tos].ident = ident; - p->stack_data[tos].name = lck; - p->s_top = tos; - KE_DUMP(1000, dump_cons_stack(gtid, p)); -} - -/* ------------------------------------------------------------------------ */ - -void __kmp_pop_parallel(int gtid, ident_t const *ident) { - int tos; - struct cons_header *p = __kmp_threads[gtid]->th.th_cons; - tos = p->stack_top; - KE_TRACE(10, ("__kmp_pop_parallel (%d %d)\n", gtid, __kmp_get_gtid())); - if (tos == 0 || p->p_top == 0) { - __kmp_error_construct(kmp_i18n_msg_CnsDetectedEnd, ct_parallel, ident); - } - if (tos != p->p_top || p->stack_data[tos].type != ct_parallel) { - __kmp_error_construct2(kmp_i18n_msg_CnsExpectedEnd, ct_parallel, ident, - &p->stack_data[tos]); - } - KE_TRACE(100, (POP_MSG(p))); - p->p_top = p->stack_data[tos].prev; - p->stack_data[tos].type = ct_none; - p->stack_data[tos].ident = NULL; - p->stack_top = tos - 1; - KE_DUMP(1000, dump_cons_stack(gtid, p)); -} - -enum cons_type __kmp_pop_workshare(int gtid, enum cons_type ct, - ident_t const *ident) { - int tos; - struct cons_header *p = __kmp_threads[gtid]->th.th_cons; - - tos = p->stack_top; - KE_TRACE(10, ("__kmp_pop_workshare (%d %d)\n", gtid, __kmp_get_gtid())); - if (tos == 0 || p->w_top == 0) { - __kmp_error_construct(kmp_i18n_msg_CnsDetectedEnd, ct, ident); - } - - if (tos != p->w_top || - (p->stack_data[tos].type != ct && - // below are two exceptions to the rule that construct types must match - !(p->stack_data[tos].type == ct_pdo_ordered && ct == ct_pdo) && - !(p->stack_data[tos].type == ct_task_ordered && ct == ct_task))) { - __kmp_check_null_func(); - __kmp_error_construct2(kmp_i18n_msg_CnsExpectedEnd, ct, ident, - &p->stack_data[tos]); - } - KE_TRACE(100, (POP_MSG(p))); - p->w_top = p->stack_data[tos].prev; - p->stack_data[tos].type = ct_none; - p->stack_data[tos].ident = NULL; - p->stack_top = tos - 1; - KE_DUMP(1000, dump_cons_stack(gtid, p)); - return p->stack_data[p->w_top].type; -} - -void __kmp_pop_sync(int gtid, enum cons_type ct, ident_t const *ident) { - int tos; - struct cons_header *p = __kmp_threads[gtid]->th.th_cons; - tos = p->stack_top; - KE_TRACE(10, ("__kmp_pop_sync (%d %d)\n", gtid, __kmp_get_gtid())); - if (tos == 0 || p->s_top == 0) { - __kmp_error_construct(kmp_i18n_msg_CnsDetectedEnd, ct, ident); - } - if (tos != p->s_top || p->stack_data[tos].type != ct) { - __kmp_check_null_func(); - __kmp_error_construct2(kmp_i18n_msg_CnsExpectedEnd, ct, ident, - &p->stack_data[tos]); - } - if (gtid < 0) { - __kmp_check_null_func(); - } - KE_TRACE(100, (POP_MSG(p))); - p->s_top = p->stack_data[tos].prev; - p->stack_data[tos].type = ct_none; - p->stack_data[tos].ident = NULL; - p->stack_top = tos - 1; - KE_DUMP(1000, dump_cons_stack(gtid, p)); -} - -/* ------------------------------------------------------------------------ */ - -void __kmp_check_barrier(int gtid, enum cons_type ct, ident_t const *ident) { - struct cons_header *p = __kmp_threads[gtid]->th.th_cons; - KE_TRACE(10, ("__kmp_check_barrier (loc: %p, gtid: %d %d)\n", ident, gtid, - __kmp_get_gtid())); - if (ident != 0) { - __kmp_check_null_func(); - } - if (p->w_top > p->p_top) { - /* we are already in a WORKSHARING construct for this PARALLEL region */ - __kmp_error_construct2(kmp_i18n_msg_CnsInvalidNesting, ct, ident, - &p->stack_data[p->w_top]); - } - if (p->s_top > p->p_top) { - /* we are already in a SYNC construct for this PARALLEL region */ - __kmp_error_construct2(kmp_i18n_msg_CnsInvalidNesting, ct, ident, - &p->stack_data[p->s_top]); - } -} Property changes on: vendor/llvm-openmp/openmp-release80-r363030/runtime/src/kmp_error.cpp ___________________________________________________________________ Deleted: svn:eol-style ## -1 +0,0 ## -native \ No newline at end of property Deleted: svn:keywords ## -1 +0,0 ## -FreeBSD=%H \ No newline at end of property Deleted: svn:mime-type ## -1 +0,0 ## -text/plain \ No newline at end of property Index: vendor/llvm-openmp/openmp-release80-r363030/runtime/src/include/30/omp_lib.f.var =================================================================== --- vendor/llvm-openmp/openmp-release80-r363030/runtime/src/include/30/omp_lib.f.var (revision 348960) +++ vendor/llvm-openmp/openmp-release80-r363030/runtime/src/include/30/omp_lib.f.var (nonexistent) @@ -1,644 +0,0 @@ -! include/30/omp_lib.f.var - -! -!//===----------------------------------------------------------------------===// -!// -!// The LLVM Compiler Infrastructure -!// -!// This file is dual licensed under the MIT and the University of Illinois Open -!// Source Licenses. See LICENSE.txt for details. -!// -!//===----------------------------------------------------------------------===// -! - -!*** -!*** Some of the directives for the following routine extend past column 72, -!*** so process this file in 132-column mode. -!*** - -!dec$ fixedformlinesize:132 - - module omp_lib_kinds - - integer, parameter :: omp_integer_kind = 4 - integer, parameter :: omp_logical_kind = 4 - integer, parameter :: omp_real_kind = 4 - integer, parameter :: omp_lock_kind = int_ptr_kind() - integer, parameter :: omp_nest_lock_kind = int_ptr_kind() - integer, parameter :: omp_sched_kind = omp_integer_kind - integer, parameter :: kmp_pointer_kind = int_ptr_kind() - integer, parameter :: kmp_size_t_kind = int_ptr_kind() - integer, parameter :: kmp_affinity_mask_kind = int_ptr_kind() - - end module omp_lib_kinds - - module omp_lib - - use omp_lib_kinds - - integer (kind=omp_integer_kind), parameter :: kmp_version_major = @LIBOMP_VERSION_MAJOR@ - integer (kind=omp_integer_kind), parameter :: kmp_version_minor = @LIBOMP_VERSION_MINOR@ - integer (kind=omp_integer_kind), parameter :: kmp_version_build = @LIBOMP_VERSION_BUILD@ - character(*), parameter :: kmp_build_date = '@LIBOMP_BUILD_DATE@' - integer (kind=omp_integer_kind), parameter :: openmp_version = @LIBOMP_OMP_YEAR_MONTH@ - - integer(kind=omp_sched_kind), parameter :: omp_sched_static = 1 - integer(kind=omp_sched_kind), parameter :: omp_sched_dynamic = 2 - integer(kind=omp_sched_kind), parameter :: omp_sched_guided = 3 - integer(kind=omp_sched_kind), parameter :: omp_sched_auto = 4 - - interface - -! *** -! *** omp_* entry points -! *** - - subroutine omp_set_num_threads(nthreads) - use omp_lib_kinds - integer (kind=omp_integer_kind) nthreads - end subroutine omp_set_num_threads - - subroutine omp_set_dynamic(enable) - use omp_lib_kinds - logical (kind=omp_logical_kind) enable - end subroutine omp_set_dynamic - - subroutine omp_set_nested(enable) - use omp_lib_kinds - logical (kind=omp_logical_kind) enable - end subroutine omp_set_nested - - function omp_get_num_threads() - use omp_lib_kinds - integer (kind=omp_integer_kind) omp_get_num_threads - end function omp_get_num_threads - - function omp_get_max_threads() - use omp_lib_kinds - integer (kind=omp_integer_kind) omp_get_max_threads - end function omp_get_max_threads - - function omp_get_thread_num() - use omp_lib_kinds - integer (kind=omp_integer_kind) omp_get_thread_num - end function omp_get_thread_num - - function omp_get_num_procs() - use omp_lib_kinds - integer (kind=omp_integer_kind) omp_get_num_procs - end function omp_get_num_procs - - function omp_in_parallel() - use omp_lib_kinds - logical (kind=omp_logical_kind) omp_in_parallel - end function omp_in_parallel - - function omp_get_dynamic() - use omp_lib_kinds - logical (kind=omp_logical_kind) omp_get_dynamic - end function omp_get_dynamic - - function omp_get_nested() - use omp_lib_kinds - logical (kind=omp_logical_kind) omp_get_nested - end function omp_get_nested - - function omp_get_thread_limit() - use omp_lib_kinds - integer (kind=omp_integer_kind) omp_get_thread_limit - end function omp_get_thread_limit - - subroutine omp_set_max_active_levels(max_levels) - use omp_lib_kinds - integer (kind=omp_integer_kind) max_levels - end subroutine omp_set_max_active_levels - - function omp_get_max_active_levels() - use omp_lib_kinds - integer (kind=omp_integer_kind) omp_get_max_active_levels - end function omp_get_max_active_levels - - function omp_get_level() - use omp_lib_kinds - integer (kind=omp_integer_kind) omp_get_level - end function omp_get_level - - function omp_get_active_level() - use omp_lib_kinds - integer (kind=omp_integer_kind) omp_get_active_level - end function omp_get_active_level - - function omp_get_ancestor_thread_num(level) - use omp_lib_kinds - integer (kind=omp_integer_kind) level - integer (kind=omp_integer_kind) omp_get_ancestor_thread_num - end function omp_get_ancestor_thread_num - - function omp_get_team_size(level) - use omp_lib_kinds - integer (kind=omp_integer_kind) level - integer (kind=omp_integer_kind) omp_get_team_size - end function omp_get_team_size - - subroutine omp_set_schedule(kind, modifier) - use omp_lib_kinds - integer (kind=omp_sched_kind) kind - integer (kind=omp_integer_kind) modifier - end subroutine omp_set_schedule - - subroutine omp_get_schedule(kind, modifier) - use omp_lib_kinds - integer (kind=omp_sched_kind) kind - integer (kind=omp_integer_kind) modifier - end subroutine omp_get_schedule - - function omp_get_wtime() - double precision omp_get_wtime - end function omp_get_wtime - - function omp_get_wtick () - double precision omp_get_wtick - end function omp_get_wtick - - subroutine omp_init_lock(lockvar) - use omp_lib_kinds - integer (kind=omp_lock_kind) lockvar - end subroutine omp_init_lock - - subroutine omp_destroy_lock(lockvar) - use omp_lib_kinds - integer (kind=omp_lock_kind) lockvar - end subroutine omp_destroy_lock - - subroutine omp_set_lock(lockvar) - use omp_lib_kinds - integer (kind=omp_lock_kind) lockvar - end subroutine omp_set_lock - - subroutine omp_unset_lock(lockvar) - use omp_lib_kinds - integer (kind=omp_lock_kind) lockvar - end subroutine omp_unset_lock - - function omp_test_lock(lockvar) - use omp_lib_kinds - logical (kind=omp_logical_kind) omp_test_lock - integer (kind=omp_lock_kind) lockvar - end function omp_test_lock - - subroutine omp_init_nest_lock(lockvar) - use omp_lib_kinds - integer (kind=omp_nest_lock_kind) lockvar - end subroutine omp_init_nest_lock - - subroutine omp_destroy_nest_lock(lockvar) - use omp_lib_kinds - integer (kind=omp_nest_lock_kind) lockvar - end subroutine omp_destroy_nest_lock - - subroutine omp_set_nest_lock(lockvar) - use omp_lib_kinds - integer (kind=omp_nest_lock_kind) lockvar - end subroutine omp_set_nest_lock - - subroutine omp_unset_nest_lock(lockvar) - use omp_lib_kinds - integer (kind=omp_nest_lock_kind) lockvar - end subroutine omp_unset_nest_lock - - function omp_test_nest_lock(lockvar) - use omp_lib_kinds - integer (kind=omp_integer_kind) omp_test_nest_lock - integer (kind=omp_nest_lock_kind) lockvar - end function omp_test_nest_lock - -! *** -! *** kmp_* entry points -! *** - - subroutine kmp_set_stacksize(size) - use omp_lib_kinds - integer (kind=omp_integer_kind) size - end subroutine kmp_set_stacksize - - subroutine kmp_set_stacksize_s(size) - use omp_lib_kinds - integer (kind=kmp_size_t_kind) size - end subroutine kmp_set_stacksize_s - - subroutine kmp_set_blocktime(msec) - use omp_lib_kinds - integer (kind=omp_integer_kind) msec - end subroutine kmp_set_blocktime - - subroutine kmp_set_library_serial() - end subroutine kmp_set_library_serial - - subroutine kmp_set_library_turnaround() - end subroutine kmp_set_library_turnaround - - subroutine kmp_set_library_throughput() - end subroutine kmp_set_library_throughput - - subroutine kmp_set_library(libnum) - use omp_lib_kinds - integer (kind=omp_integer_kind) libnum - end subroutine kmp_set_library - - subroutine kmp_set_defaults(string) - character*(*) string - end subroutine kmp_set_defaults - - function kmp_get_stacksize() - use omp_lib_kinds - integer (kind=omp_integer_kind) kmp_get_stacksize - end function kmp_get_stacksize - - function kmp_get_stacksize_s() - use omp_lib_kinds - integer (kind=kmp_size_t_kind) kmp_get_stacksize_s - end function kmp_get_stacksize_s - - function kmp_get_blocktime() - use omp_lib_kinds - integer (kind=omp_integer_kind) kmp_get_blocktime - end function kmp_get_blocktime - - function kmp_get_library() - use omp_lib_kinds - integer (kind=omp_integer_kind) kmp_get_library - end function kmp_get_library - - function kmp_set_affinity(mask) - use omp_lib_kinds - integer (kind=omp_integer_kind) kmp_set_affinity - integer (kind=kmp_affinity_mask_kind) mask - end function kmp_set_affinity - - function kmp_get_affinity(mask) - use omp_lib_kinds - integer (kind=omp_integer_kind) kmp_get_affinity - integer (kind=kmp_affinity_mask_kind) mask - end function kmp_get_affinity - - function kmp_get_affinity_max_proc() - use omp_lib_kinds - integer (kind=omp_integer_kind) kmp_get_affinity_max_proc - end function kmp_get_affinity_max_proc - - subroutine kmp_create_affinity_mask(mask) - use omp_lib_kinds - integer (kind=kmp_affinity_mask_kind) mask - end subroutine kmp_create_affinity_mask - - subroutine kmp_destroy_affinity_mask(mask) - use omp_lib_kinds - integer (kind=kmp_affinity_mask_kind) mask - end subroutine kmp_destroy_affinity_mask - - function kmp_set_affinity_mask_proc(proc, mask) - use omp_lib_kinds - integer (kind=omp_integer_kind) kmp_set_affinity_mask_proc - integer (kind=omp_integer_kind) proc - integer (kind=kmp_affinity_mask_kind) mask - end function kmp_set_affinity_mask_proc - - function kmp_unset_affinity_mask_proc(proc, mask) - use omp_lib_kinds - integer (kind=omp_integer_kind) kmp_unset_affinity_mask_proc - integer (kind=omp_integer_kind) proc - integer (kind=kmp_affinity_mask_kind) mask - end function kmp_unset_affinity_mask_proc - - function kmp_get_affinity_mask_proc(proc, mask) - use omp_lib_kinds - integer (kind=omp_integer_kind) kmp_get_affinity_mask_proc - integer (kind=omp_integer_kind) proc - integer (kind=kmp_affinity_mask_kind) mask - end function kmp_get_affinity_mask_proc - - function kmp_malloc(size) - use omp_lib_kinds - integer (kind=kmp_pointer_kind) kmp_malloc - integer (kind=kmp_size_t_kind) size - end function kmp_malloc - - function kmp_aligned_malloc(size, alignment) - use omp_lib_kinds - integer (kind=kmp_pointer_kind) kmp_aligned_malloc - integer (kind=kmp_size_t_kind) size - integer (kind=kmp_size_t_kind) alignment - end function kmp_aligned_malloc - - function kmp_calloc(nelem, elsize) - use omp_lib_kinds - integer (kind=kmp_pointer_kind) kmp_calloc - integer (kind=kmp_size_t_kind) nelem - integer (kind=kmp_size_t_kind) elsize - end function kmp_calloc - - function kmp_realloc(ptr, size) - use omp_lib_kinds - integer (kind=kmp_pointer_kind) kmp_realloc - integer (kind=kmp_pointer_kind) ptr - integer (kind=kmp_size_t_kind) size - end function kmp_realloc - - subroutine kmp_free(ptr) - use omp_lib_kinds - integer (kind=kmp_pointer_kind) ptr - end subroutine kmp_free - - subroutine kmp_set_warnings_on() - end subroutine kmp_set_warnings_on - - subroutine kmp_set_warnings_off() - end subroutine kmp_set_warnings_off - - end interface - -!dec$ if defined(_WIN32) -!dec$ if defined(_WIN64) .or. defined(_M_AMD64) - -!*** -!*** The Fortran entry points must be in uppercase, even if the /Qlowercase -!*** option is specified. The alias attribute ensures that the specified -!*** string is used as the entry point. -!*** -!*** On the Windows* OS IA-32 architecture, the Fortran entry points have an -!*** underscore prepended. On the Windows* OS Intel(R) 64 -!*** architecture, no underscore is prepended. -!*** - -!dec$ attributes alias:'OMP_SET_NUM_THREADS' :: omp_set_num_threads -!dec$ attributes alias:'OMP_SET_DYNAMIC' :: omp_set_dynamic -!dec$ attributes alias:'OMP_SET_NESTED' :: omp_set_nested -!dec$ attributes alias:'OMP_GET_NUM_THREADS' :: omp_get_num_threads -!dec$ attributes alias:'OMP_GET_MAX_THREADS' :: omp_get_max_threads -!dec$ attributes alias:'OMP_GET_THREAD_NUM' :: omp_get_thread_num -!dec$ attributes alias:'OMP_GET_NUM_PROCS' :: omp_get_num_procs -!dec$ attributes alias:'OMP_IN_PARALLEL' :: omp_in_parallel -!dec$ attributes alias:'OMP_GET_DYNAMIC' :: omp_get_dynamic -!dec$ attributes alias:'OMP_GET_NESTED' :: omp_get_nested -!dec$ attributes alias:'OMP_GET_THREAD_LIMIT' :: omp_get_thread_limit -!dec$ attributes alias:'OMP_SET_MAX_ACTIVE_LEVELS' :: omp_set_max_active_levels -!dec$ attributes alias:'OMP_GET_MAX_ACTIVE_LEVELS' :: omp_get_max_active_levels -!dec$ attributes alias:'OMP_GET_LEVEL' :: omp_get_level -!dec$ attributes alias:'OMP_GET_ACTIVE_LEVEL' :: omp_get_active_level -!dec$ attributes alias:'OMP_GET_ANCESTOR_THREAD_NUM' :: omp_get_ancestor_thread_num -!dec$ attributes alias:'OMP_GET_TEAM_SIZE' :: omp_get_team_size -!dec$ attributes alias:'OMP_SET_SCHEDULE' :: omp_set_schedule -!dec$ attributes alias:'OMP_GET_SCHEDULE' :: omp_get_schedule -!dec$ attributes alias:'OMP_GET_WTIME' :: omp_get_wtime -!dec$ attributes alias:'OMP_GET_WTICK' :: omp_get_wtick - -!dec$ attributes alias:'omp_init_lock' :: omp_init_lock -!dec$ attributes alias:'omp_destroy_lock' :: omp_destroy_lock -!dec$ attributes alias:'omp_set_lock' :: omp_set_lock -!dec$ attributes alias:'omp_unset_lock' :: omp_unset_lock -!dec$ attributes alias:'omp_test_lock' :: omp_test_lock -!dec$ attributes alias:'omp_init_nest_lock' :: omp_init_nest_lock -!dec$ attributes alias:'omp_destroy_nest_lock' :: omp_destroy_nest_lock -!dec$ attributes alias:'omp_set_nest_lock' :: omp_set_nest_lock -!dec$ attributes alias:'omp_unset_nest_lock' :: omp_unset_nest_lock -!dec$ attributes alias:'omp_test_nest_lock' :: omp_test_nest_lock - -!dec$ attributes alias:'KMP_SET_STACKSIZE'::kmp_set_stacksize -!dec$ attributes alias:'KMP_SET_STACKSIZE_S'::kmp_set_stacksize_s -!dec$ attributes alias:'KMP_SET_BLOCKTIME'::kmp_set_blocktime -!dec$ attributes alias:'KMP_SET_LIBRARY_SERIAL'::kmp_set_library_serial -!dec$ attributes alias:'KMP_SET_LIBRARY_TURNAROUND'::kmp_set_library_turnaround -!dec$ attributes alias:'KMP_SET_LIBRARY_THROUGHPUT'::kmp_set_library_throughput -!dec$ attributes alias:'KMP_SET_LIBRARY'::kmp_set_library -!dec$ attributes alias:'KMP_GET_STACKSIZE'::kmp_get_stacksize -!dec$ attributes alias:'KMP_GET_STACKSIZE_S'::kmp_get_stacksize_s -!dec$ attributes alias:'KMP_GET_BLOCKTIME'::kmp_get_blocktime -!dec$ attributes alias:'KMP_GET_LIBRARY'::kmp_get_library -!dec$ attributes alias:'KMP_SET_AFFINITY'::kmp_set_affinity -!dec$ attributes alias:'KMP_GET_AFFINITY'::kmp_get_affinity -!dec$ attributes alias:'KMP_GET_AFFINITY_MAX_PROC'::kmp_get_affinity_max_proc -!dec$ attributes alias:'KMP_CREATE_AFFINITY_MASK'::kmp_create_affinity_mask -!dec$ attributes alias:'KMP_DESTROY_AFFINITY_MASK'::kmp_destroy_affinity_mask -!dec$ attributes alias:'KMP_SET_AFFINITY_MASK_PROC'::kmp_set_affinity_mask_proc -!dec$ attributes alias:'KMP_UNSET_AFFINITY_MASK_PROC'::kmp_unset_affinity_mask_proc -!dec$ attributes alias:'KMP_GET_AFFINITY_MASK_PROC'::kmp_get_affinity_mask_proc -!dec$ attributes alias:'KMP_MALLOC'::kmp_malloc -!dec$ attributes alias:'KMP_ALIGNED_MALLOC'::kmp_aligned_malloc -!dec$ attributes alias:'KMP_CALLOC'::kmp_calloc -!dec$ attributes alias:'KMP_REALLOC'::kmp_realloc -!dec$ attributes alias:'KMP_FREE'::kmp_free - -!dec$ attributes alias:'KMP_SET_WARNINGS_ON'::kmp_set_warnings_on -!dec$ attributes alias:'KMP_SET_WARNINGS_OFF'::kmp_set_warnings_off - -!dec$ else - -!*** -!*** On Windows* OS IA-32 architecture, the Fortran entry points have an underscore prepended. -!*** - -!dec$ attributes alias:'_OMP_SET_NUM_THREADS' :: omp_set_num_threads -!dec$ attributes alias:'_OMP_SET_DYNAMIC' :: omp_set_dynamic -!dec$ attributes alias:'_OMP_SET_NESTED' :: omp_set_nested -!dec$ attributes alias:'_OMP_GET_NUM_THREADS' :: omp_get_num_threads -!dec$ attributes alias:'_OMP_GET_MAX_THREADS' :: omp_get_max_threads -!dec$ attributes alias:'_OMP_GET_THREAD_NUM' :: omp_get_thread_num -!dec$ attributes alias:'_OMP_GET_NUM_PROCS' :: omp_get_num_procs -!dec$ attributes alias:'_OMP_IN_PARALLEL' :: omp_in_parallel -!dec$ attributes alias:'_OMP_GET_DYNAMIC' :: omp_get_dynamic -!dec$ attributes alias:'_OMP_GET_NESTED' :: omp_get_nested -!dec$ attributes alias:'_OMP_GET_THREAD_LIMIT' :: omp_get_thread_limit -!dec$ attributes alias:'_OMP_SET_MAX_ACTIVE_LEVELS' :: omp_set_max_active_levels -!dec$ attributes alias:'_OMP_GET_MAX_ACTIVE_LEVELS' :: omp_get_max_active_levels -!dec$ attributes alias:'_OMP_GET_LEVEL' :: omp_get_level -!dec$ attributes alias:'_OMP_GET_ACTIVE_LEVEL' :: omp_get_active_level -!dec$ attributes alias:'_OMP_GET_ANCESTOR_THREAD_NUM' :: omp_get_ancestor_thread_num -!dec$ attributes alias:'_OMP_GET_TEAM_SIZE' :: omp_get_team_size -!dec$ attributes alias:'_OMP_SET_SCHEDULE' :: omp_set_schedule -!dec$ attributes alias:'_OMP_GET_SCHEDULE' :: omp_get_schedule -!dec$ attributes alias:'_OMP_GET_WTIME' :: omp_get_wtime -!dec$ attributes alias:'_OMP_GET_WTICK' :: omp_get_wtick - -!dec$ attributes alias:'_omp_init_lock' :: omp_init_lock -!dec$ attributes alias:'_omp_destroy_lock' :: omp_destroy_lock -!dec$ attributes alias:'_omp_set_lock' :: omp_set_lock -!dec$ attributes alias:'_omp_unset_lock' :: omp_unset_lock -!dec$ attributes alias:'_omp_test_lock' :: omp_test_lock -!dec$ attributes alias:'_omp_init_nest_lock' :: omp_init_nest_lock -!dec$ attributes alias:'_omp_destroy_nest_lock' :: omp_destroy_nest_lock -!dec$ attributes alias:'_omp_set_nest_lock' :: omp_set_nest_lock -!dec$ attributes alias:'_omp_unset_nest_lock' :: omp_unset_nest_lock -!dec$ attributes alias:'_omp_test_nest_lock' :: omp_test_nest_lock - -!dec$ attributes alias:'_KMP_SET_STACKSIZE'::kmp_set_stacksize -!dec$ attributes alias:'_KMP_SET_STACKSIZE_S'::kmp_set_stacksize_s -!dec$ attributes alias:'_KMP_SET_BLOCKTIME'::kmp_set_blocktime -!dec$ attributes alias:'_KMP_SET_LIBRARY_SERIAL'::kmp_set_library_serial -!dec$ attributes alias:'_KMP_SET_LIBRARY_TURNAROUND'::kmp_set_library_turnaround -!dec$ attributes alias:'_KMP_SET_LIBRARY_THROUGHPUT'::kmp_set_library_throughput -!dec$ attributes alias:'_KMP_SET_LIBRARY'::kmp_set_library -!dec$ attributes alias:'_KMP_GET_STACKSIZE'::kmp_get_stacksize -!dec$ attributes alias:'_KMP_GET_STACKSIZE_S'::kmp_get_stacksize_s -!dec$ attributes alias:'_KMP_GET_BLOCKTIME'::kmp_get_blocktime -!dec$ attributes alias:'_KMP_GET_LIBRARY'::kmp_get_library -!dec$ attributes alias:'_KMP_SET_AFFINITY'::kmp_set_affinity -!dec$ attributes alias:'_KMP_GET_AFFINITY'::kmp_get_affinity -!dec$ attributes alias:'_KMP_GET_AFFINITY_MAX_PROC'::kmp_get_affinity_max_proc -!dec$ attributes alias:'_KMP_CREATE_AFFINITY_MASK'::kmp_create_affinity_mask -!dec$ attributes alias:'_KMP_DESTROY_AFFINITY_MASK'::kmp_destroy_affinity_mask -!dec$ attributes alias:'_KMP_SET_AFFINITY_MASK_PROC'::kmp_set_affinity_mask_proc -!dec$ attributes alias:'_KMP_UNSET_AFFINITY_MASK_PROC'::kmp_unset_affinity_mask_proc -!dec$ attributes alias:'_KMP_GET_AFFINITY_MASK_PROC'::kmp_get_affinity_mask_proc -!dec$ attributes alias:'_KMP_MALLOC'::kmp_malloc -!dec$ attributes alias:'_KMP_ALIGNED_MALLOC'::kmp_aligned_malloc -!dec$ attributes alias:'_KMP_CALLOC'::kmp_calloc -!dec$ attributes alias:'_KMP_REALLOC'::kmp_realloc -!dec$ attributes alias:'_KMP_FREE'::kmp_free - -!dec$ attributes alias:'_KMP_SET_WARNINGS_ON'::kmp_set_warnings_on -!dec$ attributes alias:'_KMP_SET_WARNINGS_OFF'::kmp_set_warnings_off - -!dec$ endif -!dec$ endif - -!dec$ if defined(__linux) - -!*** -!*** The Linux* OS entry points are in lowercase, with an underscore appended. -!*** - -!dec$ attributes alias:'omp_set_num_threads_'::omp_set_num_threads -!dec$ attributes alias:'omp_set_dynamic_'::omp_set_dynamic -!dec$ attributes alias:'omp_set_nested_'::omp_set_nested -!dec$ attributes alias:'omp_get_num_threads_'::omp_get_num_threads -!dec$ attributes alias:'omp_get_max_threads_'::omp_get_max_threads -!dec$ attributes alias:'omp_get_thread_num_'::omp_get_thread_num -!dec$ attributes alias:'omp_get_num_procs_'::omp_get_num_procs -!dec$ attributes alias:'omp_in_parallel_'::omp_in_parallel -!dec$ attributes alias:'omp_get_dynamic_'::omp_get_dynamic -!dec$ attributes alias:'omp_get_nested_'::omp_get_nested -!dec$ attributes alias:'omp_get_thread_limit_'::omp_get_thread_limit -!dec$ attributes alias:'omp_set_max_active_levels_'::omp_set_max_active_levels -!dec$ attributes alias:'omp_get_max_active_levels_'::omp_get_max_active_levels -!dec$ attributes alias:'omp_get_level_'::omp_get_level -!dec$ attributes alias:'omp_get_active_level_'::omp_get_active_level -!dec$ attributes alias:'omp_get_ancestor_thread_num_'::omp_get_ancestor_thread_num -!dec$ attributes alias:'omp_get_team_size_'::omp_get_team_size -!dec$ attributes alias:'omp_set_schedule_'::omp_set_schedule -!dec$ attributes alias:'omp_get_schedule_'::omp_get_schedule -!dec$ attributes alias:'omp_get_wtime_'::omp_get_wtime -!dec$ attributes alias:'omp_get_wtick_'::omp_get_wtick - -!dec$ attributes alias:'omp_init_lock_'::omp_init_lock -!dec$ attributes alias:'omp_destroy_lock_'::omp_destroy_lock -!dec$ attributes alias:'omp_set_lock_'::omp_set_lock -!dec$ attributes alias:'omp_unset_lock_'::omp_unset_lock -!dec$ attributes alias:'omp_test_lock_'::omp_test_lock -!dec$ attributes alias:'omp_init_nest_lock_'::omp_init_nest_lock -!dec$ attributes alias:'omp_destroy_nest_lock_'::omp_destroy_nest_lock -!dec$ attributes alias:'omp_set_nest_lock_'::omp_set_nest_lock -!dec$ attributes alias:'omp_unset_nest_lock_'::omp_unset_nest_lock -!dec$ attributes alias:'omp_test_nest_lock_'::omp_test_nest_lock - -!dec$ attributes alias:'kmp_set_stacksize_'::kmp_set_stacksize -!dec$ attributes alias:'kmp_set_stacksize_s_'::kmp_set_stacksize_s -!dec$ attributes alias:'kmp_set_blocktime_'::kmp_set_blocktime -!dec$ attributes alias:'kmp_set_library_serial_'::kmp_set_library_serial -!dec$ attributes alias:'kmp_set_library_turnaround_'::kmp_set_library_turnaround -!dec$ attributes alias:'kmp_set_library_throughput_'::kmp_set_library_throughput -!dec$ attributes alias:'kmp_set_library_'::kmp_set_library -!dec$ attributes alias:'kmp_get_stacksize_'::kmp_get_stacksize -!dec$ attributes alias:'kmp_get_stacksize_s_'::kmp_get_stacksize_s -!dec$ attributes alias:'kmp_get_blocktime_'::kmp_get_blocktime -!dec$ attributes alias:'kmp_get_library_'::kmp_get_library -!dec$ attributes alias:'kmp_set_affinity_'::kmp_set_affinity -!dec$ attributes alias:'kmp_get_affinity_'::kmp_get_affinity -!dec$ attributes alias:'kmp_get_affinity_max_proc_'::kmp_get_affinity_max_proc -!dec$ attributes alias:'kmp_create_affinity_mask_'::kmp_create_affinity_mask -!dec$ attributes alias:'kmp_destroy_affinity_mask_'::kmp_destroy_affinity_mask -!dec$ attributes alias:'kmp_set_affinity_mask_proc_'::kmp_set_affinity_mask_proc -!dec$ attributes alias:'kmp_unset_affinity_mask_proc_'::kmp_unset_affinity_mask_proc -!dec$ attributes alias:'kmp_get_affinity_mask_proc_'::kmp_get_affinity_mask_proc -!dec$ attributes alias:'kmp_malloc_'::kmp_malloc -!dec$ attributes alias:'kmp_aligned_malloc_'::kmp_aligned_malloc -!dec$ attributes alias:'kmp_calloc_'::kmp_calloc -!dec$ attributes alias:'kmp_realloc_'::kmp_realloc -!dec$ attributes alias:'kmp_free_'::kmp_free - -!dec$ attributes alias:'kmp_set_warnings_on_'::kmp_set_warnings_on -!dec$ attributes alias:'kmp_set_warnings_off_'::kmp_set_warnings_off - -!dec$ endif - -!dec$ if defined(__APPLE__) - -!*** -!*** The Mac entry points are in lowercase, with an both an underscore -!*** appended and an underscore prepended. -!*** - -!dec$ attributes alias:'_omp_set_num_threads_'::omp_set_num_threads -!dec$ attributes alias:'_omp_set_dynamic_'::omp_set_dynamic -!dec$ attributes alias:'_omp_set_nested_'::omp_set_nested -!dec$ attributes alias:'_omp_get_num_threads_'::omp_get_num_threads -!dec$ attributes alias:'_omp_get_max_threads_'::omp_get_max_threads -!dec$ attributes alias:'_omp_get_thread_num_'::omp_get_thread_num -!dec$ attributes alias:'_omp_get_num_procs_'::omp_get_num_procs -!dec$ attributes alias:'_omp_in_parallel_'::omp_in_parallel -!dec$ attributes alias:'_omp_get_dynamic_'::omp_get_dynamic -!dec$ attributes alias:'_omp_get_nested_'::omp_get_nested -!dec$ attributes alias:'_omp_get_thread_limit_'::omp_get_thread_limit -!dec$ attributes alias:'_omp_set_max_active_levels_'::omp_set_max_active_levels -!dec$ attributes alias:'_omp_get_max_active_levels_'::omp_get_max_active_levels -!dec$ attributes alias:'_omp_get_level_'::omp_get_level -!dec$ attributes alias:'_omp_get_active_level_'::omp_get_active_level -!dec$ attributes alias:'_omp_get_ancestor_thread_num_'::omp_get_ancestor_thread_num -!dec$ attributes alias:'_omp_get_team_size_'::omp_get_team_size -!dec$ attributes alias:'_omp_set_schedule_'::omp_set_schedule -!dec$ attributes alias:'_omp_get_schedule_'::omp_get_schedule -!dec$ attributes alias:'_omp_get_wtime_'::omp_get_wtime -!dec$ attributes alias:'_omp_get_wtick_'::omp_get_wtick - -!dec$ attributes alias:'_omp_init_lock_'::omp_init_lock -!dec$ attributes alias:'_omp_destroy_lock_'::omp_destroy_lock -!dec$ attributes alias:'_omp_set_lock_'::omp_set_lock -!dec$ attributes alias:'_omp_unset_lock_'::omp_unset_lock -!dec$ attributes alias:'_omp_test_lock_'::omp_test_lock -!dec$ attributes alias:'_omp_init_nest_lock_'::omp_init_nest_lock -!dec$ attributes alias:'_omp_destroy_nest_lock_'::omp_destroy_nest_lock -!dec$ attributes alias:'_omp_set_nest_lock_'::omp_set_nest_lock -!dec$ attributes alias:'_omp_unset_nest_lock_'::omp_unset_nest_lock -!dec$ attributes alias:'_omp_test_nest_lock_'::omp_test_nest_lock - -!dec$ attributes alias:'_kmp_set_stacksize_'::kmp_set_stacksize -!dec$ attributes alias:'_kmp_set_stacksize_s_'::kmp_set_stacksize_s -!dec$ attributes alias:'_kmp_set_blocktime_'::kmp_set_blocktime -!dec$ attributes alias:'_kmp_set_library_serial_'::kmp_set_library_serial -!dec$ attributes alias:'_kmp_set_library_turnaround_'::kmp_set_library_turnaround -!dec$ attributes alias:'_kmp_set_library_throughput_'::kmp_set_library_throughput -!dec$ attributes alias:'_kmp_set_library_'::kmp_set_library -!dec$ attributes alias:'_kmp_get_stacksize_'::kmp_get_stacksize -!dec$ attributes alias:'_kmp_get_stacksize_s_'::kmp_get_stacksize_s -!dec$ attributes alias:'_kmp_get_blocktime_'::kmp_get_blocktime -!dec$ attributes alias:'_kmp_get_library_'::kmp_get_library -!dec$ attributes alias:'_kmp_set_affinity_'::kmp_set_affinity -!dec$ attributes alias:'_kmp_get_affinity_'::kmp_get_affinity -!dec$ attributes alias:'_kmp_get_affinity_max_proc_'::kmp_get_affinity_max_proc -!dec$ attributes alias:'_kmp_create_affinity_mask_'::kmp_create_affinity_mask -!dec$ attributes alias:'_kmp_destroy_affinity_mask_'::kmp_destroy_affinity_mask -!dec$ attributes alias:'_kmp_set_affinity_mask_proc_'::kmp_set_affinity_mask_proc -!dec$ attributes alias:'_kmp_unset_affinity_mask_proc_'::kmp_unset_affinity_mask_proc -!dec$ attributes alias:'_kmp_get_affinity_mask_proc_'::kmp_get_affinity_mask_proc -!dec$ attributes alias:'_kmp_malloc_'::kmp_malloc -!dec$ attributes alias:'_kmp_aligned_malloc_'::kmp_aligned_malloc -!dec$ attributes alias:'_kmp_calloc_'::kmp_calloc -!dec$ attributes alias:'_kmp_realloc_'::kmp_realloc -!dec$ attributes alias:'_kmp_free_'::kmp_free - -!dec$ attributes alias:'_kmp_set_warnings_on_'::kmp_set_warnings_on -!dec$ attributes alias:'_kmp_set_warnings_off_'::kmp_set_warnings_off - -!dec$ endif - - end module omp_lib - Index: vendor/llvm-openmp/openmp-release80-r363030/runtime/src/include/30/omp_lib.h.var =================================================================== --- vendor/llvm-openmp/openmp-release80-r363030/runtime/src/include/30/omp_lib.h.var (revision 348960) +++ vendor/llvm-openmp/openmp-release80-r363030/runtime/src/include/30/omp_lib.h.var (nonexistent) @@ -1,649 +0,0 @@ -! include/30/omp_lib.h.var - -! -!//===----------------------------------------------------------------------===// -!// -!// The LLVM Compiler Infrastructure -!// -!// This file is dual licensed under the MIT and the University of Illinois Open -!// Source Licenses. See LICENSE.txt for details. -!// -!//===----------------------------------------------------------------------===// -! - -!*** -!*** Some of the directives for the following routine extend past column 72, -!*** so process this file in 132-column mode. -!*** - -!dec$ fixedformlinesize:132 - - integer, parameter :: omp_integer_kind = 4 - integer, parameter :: omp_logical_kind = 4 - integer, parameter :: omp_real_kind = 4 - integer, parameter :: omp_lock_kind = int_ptr_kind() - integer, parameter :: omp_nest_lock_kind = int_ptr_kind() - integer, parameter :: omp_sched_kind = omp_integer_kind - integer, parameter :: kmp_pointer_kind = int_ptr_kind() - integer, parameter :: kmp_size_t_kind = int_ptr_kind() - integer, parameter :: kmp_affinity_mask_kind = int_ptr_kind() - - integer(kind=omp_sched_kind), parameter :: omp_sched_static = 1 - integer(kind=omp_sched_kind), parameter :: omp_sched_dynamic = 2 - integer(kind=omp_sched_kind), parameter :: omp_sched_guided = 3 - integer(kind=omp_sched_kind), parameter :: omp_sched_auto = 4 - - integer (kind=omp_integer_kind), parameter :: kmp_version_major = @LIBOMP_VERSION_MAJOR@ - integer (kind=omp_integer_kind), parameter :: kmp_version_minor = @LIBOMP_VERSION_MINOR@ - integer (kind=omp_integer_kind), parameter :: kmp_version_build = @LIBOMP_VERSION_BUILD@ - character(*) kmp_build_date - parameter( kmp_build_date = '@LIBOMP_BUILD_DATE@' ) - integer (kind=omp_integer_kind), parameter :: openmp_version = @LIBOMP_OMP_YEAR_MONTH@ - - interface - -! *** -! *** omp_* entry points -! *** - - subroutine omp_set_num_threads(nthreads) - import - integer (kind=omp_integer_kind) nthreads - end subroutine omp_set_num_threads - - subroutine omp_set_dynamic(enable) - import - logical (kind=omp_logical_kind) enable - end subroutine omp_set_dynamic - - subroutine omp_set_nested(enable) - import - logical (kind=omp_logical_kind) enable - end subroutine omp_set_nested - - function omp_get_num_threads() - import - integer (kind=omp_integer_kind) omp_get_num_threads - end function omp_get_num_threads - - function omp_get_max_threads() - import - integer (kind=omp_integer_kind) omp_get_max_threads - end function omp_get_max_threads - - function omp_get_thread_num() - import - integer (kind=omp_integer_kind) omp_get_thread_num - end function omp_get_thread_num - - function omp_get_num_procs() - import - integer (kind=omp_integer_kind) omp_get_num_procs - end function omp_get_num_procs - - function omp_in_parallel() - import - logical (kind=omp_logical_kind) omp_in_parallel - end function omp_in_parallel - - function omp_in_final() - import - logical (kind=omp_logical_kind) omp_in_final - end function omp_in_final - - function omp_get_dynamic() - import - logical (kind=omp_logical_kind) omp_get_dynamic - end function omp_get_dynamic - - function omp_get_nested() - import - logical (kind=omp_logical_kind) omp_get_nested - end function omp_get_nested - - function omp_get_thread_limit() - import - integer (kind=omp_integer_kind) omp_get_thread_limit - end function omp_get_thread_limit - - subroutine omp_set_max_active_levels(max_levels) - import - integer (kind=omp_integer_kind) max_levels - end subroutine omp_set_max_active_levels - - function omp_get_max_active_levels() - import - integer (kind=omp_integer_kind) omp_get_max_active_levels - end function omp_get_max_active_levels - - function omp_get_level() - import - integer (kind=omp_integer_kind) omp_get_level - end function omp_get_level - - function omp_get_active_level() - import - integer (kind=omp_integer_kind) omp_get_active_level - end function omp_get_active_level - - function omp_get_ancestor_thread_num(level) - import - integer (kind=omp_integer_kind) level - integer (kind=omp_integer_kind) omp_get_ancestor_thread_num - end function omp_get_ancestor_thread_num - - function omp_get_team_size(level) - import - integer (kind=omp_integer_kind) level - integer (kind=omp_integer_kind) omp_get_team_size - end function omp_get_team_size - - subroutine omp_set_schedule(kind, modifier) - import - integer (kind=omp_sched_kind) kind - integer (kind=omp_integer_kind) modifier - end subroutine omp_set_schedule - - subroutine omp_get_schedule(kind, modifier) - import - integer (kind=omp_sched_kind) kind - integer (kind=omp_integer_kind) modifier - end subroutine omp_get_schedule - - function omp_get_wtime() - double precision omp_get_wtime - end function omp_get_wtime - - function omp_get_wtick () - double precision omp_get_wtick - end function omp_get_wtick - - subroutine omp_init_lock(lockvar) - import - integer (kind=omp_lock_kind) lockvar - end subroutine omp_init_lock - - subroutine omp_destroy_lock(lockvar) - import - integer (kind=omp_lock_kind) lockvar - end subroutine omp_destroy_lock - - subroutine omp_set_lock(lockvar) - import - integer (kind=omp_lock_kind) lockvar - end subroutine omp_set_lock - - subroutine omp_unset_lock(lockvar) - import - integer (kind=omp_lock_kind) lockvar - end subroutine omp_unset_lock - - function omp_test_lock(lockvar) - import - logical (kind=omp_logical_kind) omp_test_lock - integer (kind=omp_lock_kind) lockvar - end function omp_test_lock - - subroutine omp_init_nest_lock(lockvar) - import - integer (kind=omp_nest_lock_kind) lockvar - end subroutine omp_init_nest_lock - - subroutine omp_destroy_nest_lock(lockvar) - import - integer (kind=omp_nest_lock_kind) lockvar - end subroutine omp_destroy_nest_lock - - subroutine omp_set_nest_lock(lockvar) - import - integer (kind=omp_nest_lock_kind) lockvar - end subroutine omp_set_nest_lock - - subroutine omp_unset_nest_lock(lockvar) - import - integer (kind=omp_nest_lock_kind) lockvar - end subroutine omp_unset_nest_lock - - function omp_test_nest_lock(lockvar) - import - integer (kind=omp_integer_kind) omp_test_nest_lock - integer (kind=omp_nest_lock_kind) lockvar - end function omp_test_nest_lock - -! *** -! *** kmp_* entry points -! *** - - subroutine kmp_set_stacksize(size) - import - integer (kind=omp_integer_kind) size - end subroutine kmp_set_stacksize - - subroutine kmp_set_stacksize_s(size) - import - integer (kind=kmp_size_t_kind) size - end subroutine kmp_set_stacksize_s - - subroutine kmp_set_blocktime(msec) - import - integer (kind=omp_integer_kind) msec - end subroutine kmp_set_blocktime - - subroutine kmp_set_library_serial() - end subroutine kmp_set_library_serial - - subroutine kmp_set_library_turnaround() - end subroutine kmp_set_library_turnaround - - subroutine kmp_set_library_throughput() - end subroutine kmp_set_library_throughput - - subroutine kmp_set_library(libnum) - import - integer (kind=omp_integer_kind) libnum - end subroutine kmp_set_library - - subroutine kmp_set_defaults(string) - character*(*) string - end subroutine kmp_set_defaults - - function kmp_get_stacksize() - import - integer (kind=omp_integer_kind) kmp_get_stacksize - end function kmp_get_stacksize - - function kmp_get_stacksize_s() - import - integer (kind=kmp_size_t_kind) kmp_get_stacksize_s - end function kmp_get_stacksize_s - - function kmp_get_blocktime() - import - integer (kind=omp_integer_kind) kmp_get_blocktime - end function kmp_get_blocktime - - function kmp_get_library() - import - integer (kind=omp_integer_kind) kmp_get_library - end function kmp_get_library - - function kmp_set_affinity(mask) - import - integer (kind=omp_integer_kind) kmp_set_affinity - integer (kind=kmp_affinity_mask_kind) mask - end function kmp_set_affinity - - function kmp_get_affinity(mask) - import - integer (kind=omp_integer_kind) kmp_get_affinity - integer (kind=kmp_affinity_mask_kind) mask - end function kmp_get_affinity - - function kmp_get_affinity_max_proc() - import - integer (kind=omp_integer_kind) kmp_get_affinity_max_proc - end function kmp_get_affinity_max_proc - - subroutine kmp_create_affinity_mask(mask) - import - integer (kind=kmp_affinity_mask_kind) mask - end subroutine kmp_create_affinity_mask - - subroutine kmp_destroy_affinity_mask(mask) - import - integer (kind=kmp_affinity_mask_kind) mask - end subroutine kmp_destroy_affinity_mask - - function kmp_set_affinity_mask_proc(proc, mask) - import - integer (kind=omp_integer_kind) kmp_set_affinity_mask_proc - integer (kind=omp_integer_kind) proc - integer (kind=kmp_affinity_mask_kind) mask - end function kmp_set_affinity_mask_proc - - function kmp_unset_affinity_mask_proc(proc, mask) - import - integer (kind=omp_integer_kind) kmp_unset_affinity_mask_proc - integer (kind=omp_integer_kind) proc - integer (kind=kmp_affinity_mask_kind) mask - end function kmp_unset_affinity_mask_proc - - function kmp_get_affinity_mask_proc(proc, mask) - import - integer (kind=omp_integer_kind) kmp_get_affinity_mask_proc - integer (kind=omp_integer_kind) proc - integer (kind=kmp_affinity_mask_kind) mask - end function kmp_get_affinity_mask_proc - - function kmp_malloc(size) - import - integer (kind=kmp_pointer_kind) kmp_malloc - integer (kind=kmp_size_t_kind) size - end function kmp_malloc - - function kmp_aligned_malloc(size, alignment) - import - integer (kind=kmp_pointer_kind) kmp_aligned_malloc - integer (kind=kmp_size_t_kind) size - integer (kind=kmp_size_t_kind) alignment - end function kmp_aligned_malloc - - function kmp_calloc(nelem, elsize) - import - integer (kind=kmp_pointer_kind) kmp_calloc - integer (kind=kmp_size_t_kind) nelem - integer (kind=kmp_size_t_kind) elsize - end function kmp_calloc - - function kmp_realloc(ptr, size) - import - integer (kind=kmp_pointer_kind) kmp_realloc - integer (kind=kmp_pointer_kind) ptr - integer (kind=kmp_size_t_kind) size - end function kmp_realloc - - subroutine kmp_free(ptr) - import - integer (kind=kmp_pointer_kind) ptr - end subroutine kmp_free - - subroutine kmp_set_warnings_on() - end subroutine kmp_set_warnings_on - - subroutine kmp_set_warnings_off() - end subroutine kmp_set_warnings_off - - end interface - -!dec$ if defined(_WIN32) -!dec$ if defined(_WIN64) .or. defined(_M_AMD64) - -!*** -!*** The Fortran entry points must be in uppercase, even if the /Qlowercase -!*** option is specified. The alias attribute ensures that the specified -!*** string is used as the entry point. -!*** -!*** On the Windows* OS IA-32 architecture, the Fortran entry points have an -!*** underscore prepended. On the Windows* OS Intel(R) 64 -!*** architecture, no underscore is prepended. -!*** - -!dec$ attributes alias:'OMP_SET_NUM_THREADS'::omp_set_num_threads -!dec$ attributes alias:'OMP_SET_DYNAMIC'::omp_set_dynamic -!dec$ attributes alias:'OMP_SET_NESTED'::omp_set_nested -!dec$ attributes alias:'OMP_GET_NUM_THREADS'::omp_get_num_threads -!dec$ attributes alias:'OMP_GET_MAX_THREADS'::omp_get_max_threads -!dec$ attributes alias:'OMP_GET_THREAD_NUM'::omp_get_thread_num -!dec$ attributes alias:'OMP_GET_NUM_PROCS'::omp_get_num_procs -!dec$ attributes alias:'OMP_IN_PARALLEL'::omp_in_parallel -!dec$ attributes alias:'OMP_IN_FINAL'::omp_in_final -!dec$ attributes alias:'OMP_GET_DYNAMIC'::omp_get_dynamic -!dec$ attributes alias:'OMP_GET_NESTED'::omp_get_nested -!dec$ attributes alias:'OMP_GET_THREAD_LIMIT'::omp_get_thread_limit -!dec$ attributes alias:'OMP_SET_MAX_ACTIVE_LEVELS'::omp_set_max_active_levels -!dec$ attributes alias:'OMP_GET_MAX_ACTIVE_LEVELS'::omp_get_max_active_levels -!dec$ attributes alias:'OMP_GET_LEVEL'::omp_get_level -!dec$ attributes alias:'OMP_GET_ACTIVE_LEVEL'::omp_get_active_level -!dec$ attributes alias:'OMP_GET_ANCESTOR_THREAD_NUM'::omp_get_ancestor_thread_num -!dec$ attributes alias:'OMP_GET_TEAM_SIZE'::omp_get_team_size -!dec$ attributes alias:'OMP_SET_SCHEDULE'::omp_set_schedule -!dec$ attributes alias:'OMP_GET_SCHEDULE'::omp_get_schedule -!dec$ attributes alias:'OMP_GET_WTIME'::omp_get_wtime -!dec$ attributes alias:'OMP_GET_WTICK'::omp_get_wtick - -!dec$ attributes alias:'omp_init_lock'::omp_init_lock -!dec$ attributes alias:'omp_destroy_lock'::omp_destroy_lock -!dec$ attributes alias:'omp_set_lock'::omp_set_lock -!dec$ attributes alias:'omp_unset_lock'::omp_unset_lock -!dec$ attributes alias:'omp_test_lock'::omp_test_lock -!dec$ attributes alias:'omp_init_nest_lock'::omp_init_nest_lock -!dec$ attributes alias:'omp_destroy_nest_lock'::omp_destroy_nest_lock -!dec$ attributes alias:'omp_set_nest_lock'::omp_set_nest_lock -!dec$ attributes alias:'omp_unset_nest_lock'::omp_unset_nest_lock -!dec$ attributes alias:'omp_test_nest_lock'::omp_test_nest_lock - -!dec$ attributes alias:'KMP_SET_STACKSIZE'::kmp_set_stacksize -!dec$ attributes alias:'KMP_SET_STACKSIZE_S'::kmp_set_stacksize_s -!dec$ attributes alias:'KMP_SET_BLOCKTIME'::kmp_set_blocktime -!dec$ attributes alias:'KMP_SET_LIBRARY_SERIAL'::kmp_set_library_serial -!dec$ attributes alias:'KMP_SET_LIBRARY_TURNAROUND'::kmp_set_library_turnaround -!dec$ attributes alias:'KMP_SET_LIBRARY_THROUGHPUT'::kmp_set_library_throughput -!dec$ attributes alias:'KMP_SET_LIBRARY'::kmp_set_library -!dec$ attributes alias:'KMP_SET_DEFAULTS'::kmp_set_defaults -!dec$ attributes alias:'KMP_GET_STACKSIZE'::kmp_get_stacksize -!dec$ attributes alias:'KMP_GET_STACKSIZE_S'::kmp_get_stacksize_s -!dec$ attributes alias:'KMP_GET_BLOCKTIME'::kmp_get_blocktime -!dec$ attributes alias:'KMP_GET_LIBRARY'::kmp_get_library -!dec$ attributes alias:'KMP_SET_AFFINITY'::kmp_set_affinity -!dec$ attributes alias:'KMP_GET_AFFINITY'::kmp_get_affinity -!dec$ attributes alias:'KMP_GET_AFFINITY_MAX_PROC'::kmp_get_affinity_max_proc -!dec$ attributes alias:'KMP_CREATE_AFFINITY_MASK'::kmp_create_affinity_mask -!dec$ attributes alias:'KMP_DESTROY_AFFINITY_MASK'::kmp_destroy_affinity_mask -!dec$ attributes alias:'KMP_SET_AFFINITY_MASK_PROC'::kmp_set_affinity_mask_proc -!dec$ attributes alias:'KMP_UNSET_AFFINITY_MASK_PROC'::kmp_unset_affinity_mask_proc -!dec$ attributes alias:'KMP_GET_AFFINITY_MASK_PROC'::kmp_get_affinity_mask_proc -!dec$ attributes alias:'KMP_MALLOC'::kmp_malloc -!dec$ attributes alias:'KMP_ALIGNED_MALLOC'::kmp_aligned_malloc -!dec$ attributes alias:'KMP_CALLOC'::kmp_calloc -!dec$ attributes alias:'KMP_REALLOC'::kmp_realloc -!dec$ attributes alias:'KMP_FREE'::kmp_free - -!dec$ attributes alias:'KMP_SET_WARNINGS_ON'::kmp_set_warnings_on -!dec$ attributes alias:'KMP_SET_WARNINGS_OFF'::kmp_set_warnings_off - -!dec$ else - -!*** -!*** On Windows* OS IA-32 architecture, the Fortran entry points have an underscore prepended. -!*** - -!dec$ attributes alias:'_OMP_SET_NUM_THREADS'::omp_set_num_threads -!dec$ attributes alias:'_OMP_SET_DYNAMIC'::omp_set_dynamic -!dec$ attributes alias:'_OMP_SET_NESTED'::omp_set_nested -!dec$ attributes alias:'_OMP_GET_NUM_THREADS'::omp_get_num_threads -!dec$ attributes alias:'_OMP_GET_MAX_THREADS'::omp_get_max_threads -!dec$ attributes alias:'_OMP_GET_THREAD_NUM'::omp_get_thread_num -!dec$ attributes alias:'_OMP_GET_NUM_PROCS'::omp_get_num_procs -!dec$ attributes alias:'_OMP_IN_PARALLEL'::omp_in_parallel -!dec$ attributes alias:'_OMP_IN_FINAL'::omp_in_final -!dec$ attributes alias:'_OMP_GET_DYNAMIC'::omp_get_dynamic -!dec$ attributes alias:'_OMP_GET_NESTED'::omp_get_nested -!dec$ attributes alias:'_OMP_GET_THREAD_LIMIT'::omp_get_thread_limit -!dec$ attributes alias:'_OMP_SET_MAX_ACTIVE_LEVELS'::omp_set_max_active_levels -!dec$ attributes alias:'_OMP_GET_MAX_ACTIVE_LEVELS'::omp_get_max_active_levels -!dec$ attributes alias:'_OMP_GET_LEVEL'::omp_get_level -!dec$ attributes alias:'_OMP_GET_ACTIVE_LEVEL'::omp_get_active_level -!dec$ attributes alias:'_OMP_GET_ANCESTOR_THREAD_NUM'::omp_get_ancestor_thread_num -!dec$ attributes alias:'_OMP_GET_TEAM_SIZE'::omp_get_team_size -!dec$ attributes alias:'_OMP_SET_SCHEDULE'::omp_set_schedule -!dec$ attributes alias:'_OMP_GET_SCHEDULE'::omp_get_schedule -!dec$ attributes alias:'_OMP_GET_WTIME'::omp_get_wtime -!dec$ attributes alias:'_OMP_GET_WTICK'::omp_get_wtick - -!dec$ attributes alias:'_omp_init_lock'::omp_init_lock -!dec$ attributes alias:'_omp_destroy_lock'::omp_destroy_lock -!dec$ attributes alias:'_omp_set_lock'::omp_set_lock -!dec$ attributes alias:'_omp_unset_lock'::omp_unset_lock -!dec$ attributes alias:'_omp_test_lock'::omp_test_lock -!dec$ attributes alias:'_omp_init_nest_lock'::omp_init_nest_lock -!dec$ attributes alias:'_omp_destroy_nest_lock'::omp_destroy_nest_lock -!dec$ attributes alias:'_omp_set_nest_lock'::omp_set_nest_lock -!dec$ attributes alias:'_omp_unset_nest_lock'::omp_unset_nest_lock -!dec$ attributes alias:'_omp_test_nest_lock'::omp_test_nest_lock - -!dec$ attributes alias:'_KMP_SET_STACKSIZE'::kmp_set_stacksize -!dec$ attributes alias:'_KMP_SET_STACKSIZE_S'::kmp_set_stacksize_s -!dec$ attributes alias:'_KMP_SET_BLOCKTIME'::kmp_set_blocktime -!dec$ attributes alias:'_KMP_SET_LIBRARY_SERIAL'::kmp_set_library_serial -!dec$ attributes alias:'_KMP_SET_LIBRARY_TURNAROUND'::kmp_set_library_turnaround -!dec$ attributes alias:'_KMP_SET_LIBRARY_THROUGHPUT'::kmp_set_library_throughput -!dec$ attributes alias:'_KMP_SET_LIBRARY'::kmp_set_library -!dec$ attributes alias:'_KMP_SET_DEFAULTS'::kmp_set_defaults -!dec$ attributes alias:'_KMP_GET_STACKSIZE'::kmp_get_stacksize -!dec$ attributes alias:'_KMP_GET_STACKSIZE_S'::kmp_get_stacksize_s -!dec$ attributes alias:'_KMP_GET_BLOCKTIME'::kmp_get_blocktime -!dec$ attributes alias:'_KMP_GET_LIBRARY'::kmp_get_library -!dec$ attributes alias:'_KMP_SET_AFFINITY'::kmp_set_affinity -!dec$ attributes alias:'_KMP_GET_AFFINITY'::kmp_get_affinity -!dec$ attributes alias:'_KMP_GET_AFFINITY_MAX_PROC'::kmp_get_affinity_max_proc -!dec$ attributes alias:'_KMP_CREATE_AFFINITY_MASK'::kmp_create_affinity_mask -!dec$ attributes alias:'_KMP_DESTROY_AFFINITY_MASK'::kmp_destroy_affinity_mask -!dec$ attributes alias:'_KMP_SET_AFFINITY_MASK_PROC'::kmp_set_affinity_mask_proc -!dec$ attributes alias:'_KMP_UNSET_AFFINITY_MASK_PROC'::kmp_unset_affinity_mask_proc -!dec$ attributes alias:'_KMP_GET_AFFINITY_MASK_PROC'::kmp_get_affinity_mask_proc -!dec$ attributes alias:'_KMP_MALLOC'::kmp_malloc -!dec$ attributes alias:'_KMP_ALIGNED_MALLOC'::kmp_aligned_malloc -!dec$ attributes alias:'_KMP_CALLOC'::kmp_calloc -!dec$ attributes alias:'_KMP_REALLOC'::kmp_realloc -!dec$ attributes alias:'_KMP_FREE'::kmp_free - -!dec$ attributes alias:'_KMP_SET_WARNINGS_ON'::kmp_set_warnings_on -!dec$ attributes alias:'_KMP_SET_WARNINGS_OFF'::kmp_set_warnings_off - -!dec$ endif -!dec$ endif - -!dec$ if defined(__linux) - -!*** -!*** The Linux* OS entry points are in lowercase, with an underscore appended. -!*** - -!dec$ attributes alias:'omp_set_num_threads_'::omp_set_num_threads -!dec$ attributes alias:'omp_set_dynamic_'::omp_set_dynamic -!dec$ attributes alias:'omp_set_nested_'::omp_set_nested -!dec$ attributes alias:'omp_get_num_threads_'::omp_get_num_threads -!dec$ attributes alias:'omp_get_max_threads_'::omp_get_max_threads -!dec$ attributes alias:'omp_get_thread_num_'::omp_get_thread_num -!dec$ attributes alias:'omp_get_num_procs_'::omp_get_num_procs -!dec$ attributes alias:'omp_in_parallel_'::omp_in_parallel -!dec$ attributes alias:'omp_in_final_'::omp_in_final -!dec$ attributes alias:'omp_get_dynamic_'::omp_get_dynamic -!dec$ attributes alias:'omp_get_nested_'::omp_get_nested -!dec$ attributes alias:'omp_get_thread_limit_'::omp_get_thread_limit -!dec$ attributes alias:'omp_set_max_active_levels_'::omp_set_max_active_levels -!dec$ attributes alias:'omp_get_max_active_levels_'::omp_get_max_active_levels -!dec$ attributes alias:'omp_get_level_'::omp_get_level -!dec$ attributes alias:'omp_get_active_level_'::omp_get_active_level -!dec$ attributes alias:'omp_get_ancestor_thread_num_'::omp_get_ancestor_thread_num -!dec$ attributes alias:'omp_get_team_size_'::omp_get_team_size -!dec$ attributes alias:'omp_set_schedule_'::omp_set_schedule -!dec$ attributes alias:'omp_get_schedule_'::omp_get_schedule -!dec$ attributes alias:'omp_get_wtime_'::omp_get_wtime -!dec$ attributes alias:'omp_get_wtick_'::omp_get_wtick - -!dec$ attributes alias:'omp_init_lock_'::omp_init_lock -!dec$ attributes alias:'omp_destroy_lock_'::omp_destroy_lock -!dec$ attributes alias:'omp_set_lock_'::omp_set_lock -!dec$ attributes alias:'omp_unset_lock_'::omp_unset_lock -!dec$ attributes alias:'omp_test_lock_'::omp_test_lock -!dec$ attributes alias:'omp_init_nest_lock_'::omp_init_nest_lock -!dec$ attributes alias:'omp_destroy_nest_lock_'::omp_destroy_nest_lock -!dec$ attributes alias:'omp_set_nest_lock_'::omp_set_nest_lock -!dec$ attributes alias:'omp_unset_nest_lock_'::omp_unset_nest_lock -!dec$ attributes alias:'omp_test_nest_lock_'::omp_test_nest_lock - -!dec$ attributes alias:'kmp_set_stacksize_'::kmp_set_stacksize -!dec$ attributes alias:'kmp_set_stacksize_s_'::kmp_set_stacksize_s -!dec$ attributes alias:'kmp_set_blocktime_'::kmp_set_blocktime -!dec$ attributes alias:'kmp_set_library_serial_'::kmp_set_library_serial -!dec$ attributes alias:'kmp_set_library_turnaround_'::kmp_set_library_turnaround -!dec$ attributes alias:'kmp_set_library_throughput_'::kmp_set_library_throughput -!dec$ attributes alias:'kmp_set_library_'::kmp_set_library -!dec$ attributes alias:'kmp_set_defaults_'::kmp_set_defaults -!dec$ attributes alias:'kmp_get_stacksize_'::kmp_get_stacksize -!dec$ attributes alias:'kmp_get_stacksize_s_'::kmp_get_stacksize_s -!dec$ attributes alias:'kmp_get_blocktime_'::kmp_get_blocktime -!dec$ attributes alias:'kmp_get_library_'::kmp_get_library -!dec$ attributes alias:'kmp_set_affinity_'::kmp_set_affinity -!dec$ attributes alias:'kmp_get_affinity_'::kmp_get_affinity -!dec$ attributes alias:'kmp_get_affinity_max_proc_'::kmp_get_affinity_max_proc -!dec$ attributes alias:'kmp_create_affinity_mask_'::kmp_create_affinity_mask -!dec$ attributes alias:'kmp_destroy_affinity_mask_'::kmp_destroy_affinity_mask -!dec$ attributes alias:'kmp_set_affinity_mask_proc_'::kmp_set_affinity_mask_proc -!dec$ attributes alias:'kmp_unset_affinity_mask_proc_'::kmp_unset_affinity_mask_proc -!dec$ attributes alias:'kmp_get_affinity_mask_proc_'::kmp_get_affinity_mask_proc -!dec$ attributes alias:'kmp_malloc_'::kmp_malloc -!dec$ attributes alias:'kmp_aligned_malloc_'::kmp_aligned_malloc -!dec$ attributes alias:'kmp_calloc_'::kmp_calloc -!dec$ attributes alias:'kmp_realloc_'::kmp_realloc -!dec$ attributes alias:'kmp_free_'::kmp_free - -!dec$ attributes alias:'kmp_set_warnings_on_'::kmp_set_warnings_on -!dec$ attributes alias:'kmp_set_warnings_off_'::kmp_set_warnings_off - -!dec$ endif - -!dec$ if defined(__APPLE__) - -!*** -!*** The Mac entry points are in lowercase, with an both an underscore -!*** appended and an underscore prepended. -!*** - -!dec$ attributes alias:'_omp_set_num_threads_'::omp_set_num_threads -!dec$ attributes alias:'_omp_set_dynamic_'::omp_set_dynamic -!dec$ attributes alias:'_omp_set_nested_'::omp_set_nested -!dec$ attributes alias:'_omp_get_num_threads_'::omp_get_num_threads -!dec$ attributes alias:'_omp_get_max_threads_'::omp_get_max_threads -!dec$ attributes alias:'_omp_get_thread_num_'::omp_get_thread_num -!dec$ attributes alias:'_omp_get_num_procs_'::omp_get_num_procs -!dec$ attributes alias:'_omp_in_parallel_'::omp_in_parallel -!dec$ attributes alias:'_omp_in_final_'::omp_in_final -!dec$ attributes alias:'_omp_get_dynamic_'::omp_get_dynamic -!dec$ attributes alias:'_omp_get_nested_'::omp_get_nested -!dec$ attributes alias:'_omp_get_thread_limit_'::omp_get_thread_limit -!dec$ attributes alias:'_omp_set_max_active_levels_'::omp_set_max_active_levels -!dec$ attributes alias:'_omp_get_max_active_levels_'::omp_get_max_active_levels -!dec$ attributes alias:'_omp_get_level_'::omp_get_level -!dec$ attributes alias:'_omp_get_active_level_'::omp_get_active_level -!dec$ attributes alias:'_omp_get_ancestor_thread_num_'::omp_get_ancestor_thread_num -!dec$ attributes alias:'_omp_get_team_size_'::omp_get_team_size -!dec$ attributes alias:'_omp_set_schedule_'::omp_set_schedule -!dec$ attributes alias:'_omp_get_schedule_'::omp_get_schedule -!dec$ attributes alias:'_omp_get_wtime_'::omp_get_wtime -!dec$ attributes alias:'_omp_get_wtick_'::omp_get_wtick - -!dec$ attributes alias:'_omp_init_lock_'::omp_init_lock -!dec$ attributes alias:'_omp_destroy_lock_'::omp_destroy_lock -!dec$ attributes alias:'_omp_set_lock_'::omp_set_lock -!dec$ attributes alias:'_omp_unset_lock_'::omp_unset_lock -!dec$ attributes alias:'_omp_test_lock_'::omp_test_lock -!dec$ attributes alias:'_omp_init_nest_lock_'::omp_init_nest_lock -!dec$ attributes alias:'_omp_destroy_nest_lock_'::omp_destroy_nest_lock -!dec$ attributes alias:'_omp_set_nest_lock_'::omp_set_nest_lock -!dec$ attributes alias:'_omp_unset_nest_lock_'::omp_unset_nest_lock -!dec$ attributes alias:'_omp_test_nest_lock_'::omp_test_nest_lock - -!dec$ attributes alias:'_kmp_set_stacksize_'::kmp_set_stacksize -!dec$ attributes alias:'_kmp_set_stacksize_s_'::kmp_set_stacksize_s -!dec$ attributes alias:'_kmp_set_blocktime_'::kmp_set_blocktime -!dec$ attributes alias:'_kmp_set_library_serial_'::kmp_set_library_serial -!dec$ attributes alias:'_kmp_set_library_turnaround_'::kmp_set_library_turnaround -!dec$ attributes alias:'_kmp_set_library_throughput_'::kmp_set_library_throughput -!dec$ attributes alias:'_kmp_set_library_'::kmp_set_library -!dec$ attributes alias:'_kmp_set_defaults_'::kmp_set_defaults -!dec$ attributes alias:'_kmp_get_stacksize_'::kmp_get_stacksize -!dec$ attributes alias:'_kmp_get_stacksize_s_'::kmp_get_stacksize_s -!dec$ attributes alias:'_kmp_get_blocktime_'::kmp_get_blocktime -!dec$ attributes alias:'_kmp_get_library_'::kmp_get_library -!dec$ attributes alias:'_kmp_set_affinity_'::kmp_set_affinity -!dec$ attributes alias:'_kmp_get_affinity_'::kmp_get_affinity -!dec$ attributes alias:'_kmp_get_affinity_max_proc_'::kmp_get_affinity_max_proc -!dec$ attributes alias:'_kmp_create_affinity_mask_'::kmp_create_affinity_mask -!dec$ attributes alias:'_kmp_destroy_affinity_mask_'::kmp_destroy_affinity_mask -!dec$ attributes alias:'_kmp_set_affinity_mask_proc_'::kmp_set_affinity_mask_proc -!dec$ attributes alias:'_kmp_unset_affinity_mask_proc_'::kmp_unset_affinity_mask_proc -!dec$ attributes alias:'_kmp_get_affinity_mask_proc_'::kmp_get_affinity_mask_proc -!dec$ attributes alias:'_kmp_malloc_'::kmp_malloc -!dec$ attributes alias:'_kmp_aligned_malloc_'::kmp_aligned_malloc -!dec$ attributes alias:'_kmp_calloc_'::kmp_calloc -!dec$ attributes alias:'_kmp_realloc_'::kmp_realloc -!dec$ attributes alias:'_kmp_free_'::kmp_free - -!dec$ attributes alias:'_kmp_set_warnings_on_'::kmp_set_warnings_on -!dec$ attributes alias:'_kmp_set_warnings_off_'::kmp_set_warnings_off - -!dec$ endif - - Index: vendor/llvm-openmp/openmp-release80-r363030/runtime/src/include/30/omp_lib.f90.var =================================================================== --- vendor/llvm-openmp/openmp-release80-r363030/runtime/src/include/30/omp_lib.f90.var (revision 348960) +++ vendor/llvm-openmp/openmp-release80-r363030/runtime/src/include/30/omp_lib.f90.var (nonexistent) @@ -1,365 +0,0 @@ -! include/30/omp_lib.f90.var - -! -!//===----------------------------------------------------------------------===// -!// -!// The LLVM Compiler Infrastructure -!// -!// This file is dual licensed under the MIT and the University of Illinois Open -!// Source Licenses. See LICENSE.txt for details. -!// -!//===----------------------------------------------------------------------===// -! - - module omp_lib_kinds - - use, intrinsic :: iso_c_binding - - integer, parameter :: omp_integer_kind = c_int - integer, parameter :: omp_logical_kind = 4 - integer, parameter :: omp_real_kind = c_float - integer, parameter :: kmp_double_kind = c_double - integer, parameter :: omp_lock_kind = c_intptr_t - integer, parameter :: omp_nest_lock_kind = c_intptr_t - integer, parameter :: omp_sched_kind = omp_integer_kind - integer, parameter :: kmp_pointer_kind = c_intptr_t - integer, parameter :: kmp_size_t_kind = c_size_t - integer, parameter :: kmp_affinity_mask_kind = c_intptr_t - - end module omp_lib_kinds - - module omp_lib - - use omp_lib_kinds - - integer (kind=omp_integer_kind), parameter :: openmp_version = @LIBOMP_OMP_YEAR_MONTH@ - integer (kind=omp_integer_kind), parameter :: kmp_version_major = @LIBOMP_VERSION_MAJOR@ - integer (kind=omp_integer_kind), parameter :: kmp_version_minor = @LIBOMP_VERSION_MINOR@ - integer (kind=omp_integer_kind), parameter :: kmp_version_build = @LIBOMP_VERSION_BUILD@ - character(*) kmp_build_date - parameter( kmp_build_date = '@LIBOMP_BUILD_DATE@' ) - - integer(kind=omp_sched_kind), parameter :: omp_sched_static = 1 - integer(kind=omp_sched_kind), parameter :: omp_sched_dynamic = 2 - integer(kind=omp_sched_kind), parameter :: omp_sched_guided = 3 - integer(kind=omp_sched_kind), parameter :: omp_sched_auto = 4 - - interface - -! *** -! *** omp_* entry points -! *** - - subroutine omp_set_num_threads(nthreads) bind(c) - use omp_lib_kinds - integer (kind=omp_integer_kind), value :: nthreads - end subroutine omp_set_num_threads - - subroutine omp_set_dynamic(enable) bind(c) - use omp_lib_kinds - logical (kind=omp_logical_kind), value :: enable - end subroutine omp_set_dynamic - - subroutine omp_set_nested(enable) bind(c) - use omp_lib_kinds - logical (kind=omp_logical_kind), value :: enable - end subroutine omp_set_nested - - function omp_get_num_threads() bind(c) - use omp_lib_kinds - integer (kind=omp_integer_kind) omp_get_num_threads - end function omp_get_num_threads - - function omp_get_max_threads() bind(c) - use omp_lib_kinds - integer (kind=omp_integer_kind) omp_get_max_threads - end function omp_get_max_threads - - function omp_get_thread_num() bind(c) - use omp_lib_kinds - integer (kind=omp_integer_kind) omp_get_thread_num - end function omp_get_thread_num - - function omp_get_num_procs() bind(c) - use omp_lib_kinds - integer (kind=omp_integer_kind) omp_get_num_procs - end function omp_get_num_procs - - function omp_in_parallel() bind(c) - use omp_lib_kinds - logical (kind=omp_logical_kind) omp_in_parallel - end function omp_in_parallel - - function omp_in_final() bind(c) - use omp_lib_kinds - logical (kind=omp_logical_kind) omp_in_final - end function omp_in_final - - function omp_get_dynamic() bind(c) - use omp_lib_kinds - logical (kind=omp_logical_kind) omp_get_dynamic - end function omp_get_dynamic - - function omp_get_nested() bind(c) - use omp_lib_kinds - logical (kind=omp_logical_kind) omp_get_nested - end function omp_get_nested - - function omp_get_thread_limit() bind(c) - use omp_lib_kinds - integer (kind=omp_integer_kind) omp_get_thread_limit - end function omp_get_thread_limit - - subroutine omp_set_max_active_levels(max_levels) bind(c) - use omp_lib_kinds - integer (kind=omp_integer_kind), value :: max_levels - end subroutine omp_set_max_active_levels - - function omp_get_max_active_levels() bind(c) - use omp_lib_kinds - integer (kind=omp_integer_kind) omp_get_max_active_levels - end function omp_get_max_active_levels - - function omp_get_level() bind(c) - use omp_lib_kinds - integer (kind=omp_integer_kind) :: omp_get_level - end function omp_get_level - - function omp_get_active_level() bind(c) - use omp_lib_kinds - integer (kind=omp_integer_kind) :: omp_get_active_level - end function omp_get_active_level - - function omp_get_ancestor_thread_num(level) bind(c) - use omp_lib_kinds - integer (kind=omp_integer_kind) omp_get_ancestor_thread_num - integer (kind=omp_integer_kind), value :: level - end function omp_get_ancestor_thread_num - - function omp_get_team_size(level) bind(c) - use omp_lib_kinds - integer (kind=omp_integer_kind) omp_get_team_size - integer (kind=omp_integer_kind), value :: level - end function omp_get_team_size - - subroutine omp_set_schedule(kind, modifier) bind(c) - use omp_lib_kinds - integer (kind=omp_sched_kind), value :: kind - integer (kind=omp_integer_kind), value :: modifier - end subroutine omp_set_schedule - - subroutine omp_get_schedule(kind, modifier) bind(c) - use omp_lib_kinds - integer (kind=omp_sched_kind) :: kind - integer (kind=omp_integer_kind) :: modifier - end subroutine omp_get_schedule - - function omp_get_wtime() bind(c) - use omp_lib_kinds - real (kind=kmp_double_kind) omp_get_wtime - end function omp_get_wtime - - function omp_get_wtick() bind(c) - use omp_lib_kinds - real (kind=kmp_double_kind) omp_get_wtick - end function omp_get_wtick - - subroutine omp_init_lock(lockvar) bind(c) - use omp_lib_kinds - integer (kind=omp_lock_kind) lockvar - end subroutine omp_init_lock - - subroutine omp_destroy_lock(lockvar) bind(c) - use omp_lib_kinds - integer (kind=omp_lock_kind) lockvar - end subroutine omp_destroy_lock - - subroutine omp_set_lock(lockvar) bind(c) - use omp_lib_kinds - integer (kind=omp_lock_kind) lockvar - end subroutine omp_set_lock - - subroutine omp_unset_lock(lockvar) bind(c) - use omp_lib_kinds - integer (kind=omp_lock_kind) lockvar - end subroutine omp_unset_lock - - function omp_test_lock(lockvar) bind(c) - use omp_lib_kinds - logical (kind=omp_logical_kind) omp_test_lock - integer (kind=omp_lock_kind) lockvar - end function omp_test_lock - - subroutine omp_init_nest_lock(lockvar) bind(c) - use omp_lib_kinds - integer (kind=omp_nest_lock_kind) lockvar - end subroutine omp_init_nest_lock - - subroutine omp_destroy_nest_lock(lockvar) bind(c) - use omp_lib_kinds - integer (kind=omp_nest_lock_kind) lockvar - end subroutine omp_destroy_nest_lock - - subroutine omp_set_nest_lock(lockvar) bind(c) - use omp_lib_kinds - integer (kind=omp_nest_lock_kind) lockvar - end subroutine omp_set_nest_lock - - subroutine omp_unset_nest_lock(lockvar) bind(c) - use omp_lib_kinds - integer (kind=omp_nest_lock_kind) lockvar - end subroutine omp_unset_nest_lock - - function omp_test_nest_lock(lockvar) bind(c) - use omp_lib_kinds - integer (kind=omp_integer_kind) omp_test_nest_lock - integer (kind=omp_nest_lock_kind) lockvar - end function omp_test_nest_lock - -! *** -! *** kmp_* entry points -! *** - - subroutine kmp_set_stacksize(size) bind(c) - use omp_lib_kinds - integer (kind=omp_integer_kind), value :: size - end subroutine kmp_set_stacksize - - subroutine kmp_set_stacksize_s(size) bind(c) - use omp_lib_kinds - integer (kind=kmp_size_t_kind), value :: size - end subroutine kmp_set_stacksize_s - - subroutine kmp_set_blocktime(msec) bind(c) - use omp_lib_kinds - integer (kind=omp_integer_kind), value :: msec - end subroutine kmp_set_blocktime - - subroutine kmp_set_library_serial() bind(c) - end subroutine kmp_set_library_serial - - subroutine kmp_set_library_turnaround() bind(c) - end subroutine kmp_set_library_turnaround - - subroutine kmp_set_library_throughput() bind(c) - end subroutine kmp_set_library_throughput - - subroutine kmp_set_library(libnum) bind(c) - use omp_lib_kinds - integer (kind=omp_integer_kind), value :: libnum - end subroutine kmp_set_library - - subroutine kmp_set_defaults(string) bind(c) - use, intrinsic :: iso_c_binding - character (kind=c_char) :: string(*) - end subroutine kmp_set_defaults - - function kmp_get_stacksize() bind(c) - use omp_lib_kinds - integer (kind=omp_integer_kind) kmp_get_stacksize - end function kmp_get_stacksize - - function kmp_get_stacksize_s() bind(c) - use omp_lib_kinds - integer (kind=kmp_size_t_kind) kmp_get_stacksize_s - end function kmp_get_stacksize_s - - function kmp_get_blocktime() bind(c) - use omp_lib_kinds - integer (kind=omp_integer_kind) kmp_get_blocktime - end function kmp_get_blocktime - - function kmp_get_library() bind(c) - use omp_lib_kinds - integer (kind=omp_integer_kind) kmp_get_library - end function kmp_get_library - - function kmp_set_affinity(mask) bind(c) - use omp_lib_kinds - integer (kind=omp_integer_kind) kmp_set_affinity - integer (kind=kmp_affinity_mask_kind) mask - end function kmp_set_affinity - - function kmp_get_affinity(mask) bind(c) - use omp_lib_kinds - integer (kind=omp_integer_kind) kmp_get_affinity - integer (kind=kmp_affinity_mask_kind) mask - end function kmp_get_affinity - - function kmp_get_affinity_max_proc() bind(c) - use omp_lib_kinds - integer (kind=omp_integer_kind) kmp_get_affinity_max_proc - end function kmp_get_affinity_max_proc - - subroutine kmp_create_affinity_mask(mask) bind(c) - use omp_lib_kinds - integer (kind=kmp_affinity_mask_kind) mask - end subroutine kmp_create_affinity_mask - - subroutine kmp_destroy_affinity_mask(mask) bind(c) - use omp_lib_kinds - integer (kind=kmp_affinity_mask_kind) mask - end subroutine kmp_destroy_affinity_mask - - function kmp_set_affinity_mask_proc(proc, mask) bind(c) - use omp_lib_kinds - integer (kind=omp_integer_kind) kmp_set_affinity_mask_proc - integer (kind=omp_integer_kind), value :: proc - integer (kind=kmp_affinity_mask_kind) mask - end function kmp_set_affinity_mask_proc - - function kmp_unset_affinity_mask_proc(proc, mask) bind(c) - use omp_lib_kinds - integer (kind=omp_integer_kind) kmp_unset_affinity_mask_proc - integer (kind=omp_integer_kind), value :: proc - integer (kind=kmp_affinity_mask_kind) mask - end function kmp_unset_affinity_mask_proc - - function kmp_get_affinity_mask_proc(proc, mask) bind(c) - use omp_lib_kinds - integer (kind=omp_integer_kind) kmp_get_affinity_mask_proc - integer (kind=omp_integer_kind), value :: proc - integer (kind=kmp_affinity_mask_kind) mask - end function kmp_get_affinity_mask_proc - - function kmp_malloc(size) bind(c) - use omp_lib_kinds - integer (kind=kmp_pointer_kind) kmp_malloc - integer (kind=kmp_size_t_kind), value :: size - end function kmp_malloc - - function kmp_aligned_malloc(size, alignment) bind(c) - use omp_lib_kinds - integer (kind=kmp_pointer_kind) kmp_aligned_malloc - integer (kind=kmp_size_t_kind), value :: size - integer (kind=kmp_size_t_kind), value :: alignment - end function kmp_aligned_malloc - - function kmp_calloc(nelem, elsize) bind(c) - use omp_lib_kinds - integer (kind=kmp_pointer_kind) kmp_calloc - integer (kind=kmp_size_t_kind), value :: nelem - integer (kind=kmp_size_t_kind), value :: elsize - end function kmp_calloc - - function kmp_realloc(ptr, size) bind(c) - use omp_lib_kinds - integer (kind=kmp_pointer_kind) kmp_realloc - integer (kind=kmp_pointer_kind), value :: ptr - integer (kind=kmp_size_t_kind), value :: size - end function kmp_realloc - - subroutine kmp_free(ptr) bind(c) - use omp_lib_kinds - integer (kind=kmp_pointer_kind), value :: ptr - end subroutine kmp_free - - subroutine kmp_set_warnings_on() bind(c) - end subroutine kmp_set_warnings_on - - subroutine kmp_set_warnings_off() bind(c) - end subroutine kmp_set_warnings_off - - end interface - - end module omp_lib Index: vendor/llvm-openmp/openmp-release80-r363030/runtime/src/include/30/omp.h.var =================================================================== --- vendor/llvm-openmp/openmp-release80-r363030/runtime/src/include/30/omp.h.var (revision 348960) +++ vendor/llvm-openmp/openmp-release80-r363030/runtime/src/include/30/omp.h.var (nonexistent) @@ -1,165 +0,0 @@ -/* - * include/30/omp.h.var - */ - - -//===----------------------------------------------------------------------===// -// -// The LLVM Compiler Infrastructure -// -// This file is dual licensed under the MIT and the University of Illinois Open -// Source Licenses. See LICENSE.txt for details. -// -//===----------------------------------------------------------------------===// - - -#ifndef __OMP_H -# define __OMP_H - -# define KMP_VERSION_MAJOR @LIBOMP_VERSION_MAJOR@ -# define KMP_VERSION_MINOR @LIBOMP_VERSION_MINOR@ -# define KMP_VERSION_BUILD @LIBOMP_VERSION_BUILD@ -# define KMP_BUILD_DATE "@LIBOMP_BUILD_DATE@" - -# ifdef __cplusplus - extern "C" { -# endif - -# define omp_set_num_threads ompc_set_num_threads -# define omp_set_dynamic ompc_set_dynamic -# define omp_set_nested ompc_set_nested -# define omp_set_max_active_levels ompc_set_max_active_levels -# define omp_set_schedule ompc_set_schedule -# define omp_get_ancestor_thread_num ompc_get_ancestor_thread_num -# define omp_get_team_size ompc_get_team_size - - -# define kmp_set_stacksize kmpc_set_stacksize -# define kmp_set_stacksize_s kmpc_set_stacksize_s -# define kmp_set_blocktime kmpc_set_blocktime -# define kmp_set_library kmpc_set_library -# define kmp_set_defaults kmpc_set_defaults -# define kmp_set_affinity_mask_proc kmpc_set_affinity_mask_proc -# define kmp_unset_affinity_mask_proc kmpc_unset_affinity_mask_proc -# define kmp_get_affinity_mask_proc kmpc_get_affinity_mask_proc - -# define kmp_malloc kmpc_malloc -# define kmp_calloc kmpc_calloc -# define kmp_realloc kmpc_realloc -# define kmp_free kmpc_free - - -# if defined(_WIN32) -# define __KAI_KMPC_CONVENTION __cdecl -# else -# define __KAI_KMPC_CONVENTION -# endif - - /* schedule kind constants */ - typedef enum omp_sched_t { - omp_sched_static = 1, - omp_sched_dynamic = 2, - omp_sched_guided = 3, - omp_sched_auto = 4 - } omp_sched_t; - - /* set API functions */ - extern void __KAI_KMPC_CONVENTION omp_set_num_threads (int); - extern void __KAI_KMPC_CONVENTION omp_set_dynamic (int); - extern void __KAI_KMPC_CONVENTION omp_set_nested (int); - extern void __KAI_KMPC_CONVENTION omp_set_max_active_levels (int); - extern void __KAI_KMPC_CONVENTION omp_set_schedule (omp_sched_t, int); - - /* query API functions */ - extern int __KAI_KMPC_CONVENTION omp_get_num_threads (void); - extern int __KAI_KMPC_CONVENTION omp_get_dynamic (void); - extern int __KAI_KMPC_CONVENTION omp_get_nested (void); - extern int __KAI_KMPC_CONVENTION omp_get_max_threads (void); - extern int __KAI_KMPC_CONVENTION omp_get_thread_num (void); - extern int __KAI_KMPC_CONVENTION omp_get_num_procs (void); - extern int __KAI_KMPC_CONVENTION omp_in_parallel (void); - extern int __KAI_KMPC_CONVENTION omp_in_final (void); - extern int __KAI_KMPC_CONVENTION omp_get_active_level (void); - extern int __KAI_KMPC_CONVENTION omp_get_level (void); - extern int __KAI_KMPC_CONVENTION omp_get_ancestor_thread_num (int); - extern int __KAI_KMPC_CONVENTION omp_get_team_size (int); - extern int __KAI_KMPC_CONVENTION omp_get_thread_limit (void); - extern int __KAI_KMPC_CONVENTION omp_get_max_active_levels (void); - extern void __KAI_KMPC_CONVENTION omp_get_schedule (omp_sched_t *, int *); - - /* lock API functions */ - typedef struct omp_lock_t { - void * _lk; - } omp_lock_t; - - extern void __KAI_KMPC_CONVENTION omp_init_lock (omp_lock_t *); - extern void __KAI_KMPC_CONVENTION omp_set_lock (omp_lock_t *); - extern void __KAI_KMPC_CONVENTION omp_unset_lock (omp_lock_t *); - extern void __KAI_KMPC_CONVENTION omp_destroy_lock (omp_lock_t *); - extern int __KAI_KMPC_CONVENTION omp_test_lock (omp_lock_t *); - - /* nested lock API functions */ - typedef struct omp_nest_lock_t { - void * _lk; - } omp_nest_lock_t; - - extern void __KAI_KMPC_CONVENTION omp_init_nest_lock (omp_nest_lock_t *); - extern void __KAI_KMPC_CONVENTION omp_set_nest_lock (omp_nest_lock_t *); - extern void __KAI_KMPC_CONVENTION omp_unset_nest_lock (omp_nest_lock_t *); - extern void __KAI_KMPC_CONVENTION omp_destroy_nest_lock (omp_nest_lock_t *); - extern int __KAI_KMPC_CONVENTION omp_test_nest_lock (omp_nest_lock_t *); - - /* time API functions */ - extern double __KAI_KMPC_CONVENTION omp_get_wtime (void); - extern double __KAI_KMPC_CONVENTION omp_get_wtick (void); - -# include - /* kmp API functions */ - extern int __KAI_KMPC_CONVENTION kmp_get_stacksize (void); - extern void __KAI_KMPC_CONVENTION kmp_set_stacksize (int); - extern size_t __KAI_KMPC_CONVENTION kmp_get_stacksize_s (void); - extern void __KAI_KMPC_CONVENTION kmp_set_stacksize_s (size_t); - extern int __KAI_KMPC_CONVENTION kmp_get_blocktime (void); - extern int __KAI_KMPC_CONVENTION kmp_get_library (void); - extern void __KAI_KMPC_CONVENTION kmp_set_blocktime (int); - extern void __KAI_KMPC_CONVENTION kmp_set_library (int); - extern void __KAI_KMPC_CONVENTION kmp_set_library_serial (void); - extern void __KAI_KMPC_CONVENTION kmp_set_library_turnaround (void); - extern void __KAI_KMPC_CONVENTION kmp_set_library_throughput (void); - extern void __KAI_KMPC_CONVENTION kmp_set_defaults (char const *); - - /* affinity API functions */ - typedef void * kmp_affinity_mask_t; - - extern int __KAI_KMPC_CONVENTION kmp_set_affinity (kmp_affinity_mask_t *); - extern int __KAI_KMPC_CONVENTION kmp_get_affinity (kmp_affinity_mask_t *); - extern int __KAI_KMPC_CONVENTION kmp_get_affinity_max_proc (void); - extern void __KAI_KMPC_CONVENTION kmp_create_affinity_mask (kmp_affinity_mask_t *); - extern void __KAI_KMPC_CONVENTION kmp_destroy_affinity_mask (kmp_affinity_mask_t *); - extern int __KAI_KMPC_CONVENTION kmp_set_affinity_mask_proc (int, kmp_affinity_mask_t *); - extern int __KAI_KMPC_CONVENTION kmp_unset_affinity_mask_proc (int, kmp_affinity_mask_t *); - extern int __KAI_KMPC_CONVENTION kmp_get_affinity_mask_proc (int, kmp_affinity_mask_t *); - - extern void * __KAI_KMPC_CONVENTION kmp_malloc (size_t); - extern void * __KAI_KMPC_CONVENTION kmp_aligned_malloc (size_t, size_t); - extern void * __KAI_KMPC_CONVENTION kmp_calloc (size_t, size_t); - extern void * __KAI_KMPC_CONVENTION kmp_realloc (void *, size_t); - extern void __KAI_KMPC_CONVENTION kmp_free (void *); - - extern void __KAI_KMPC_CONVENTION kmp_set_warnings_on(void); - extern void __KAI_KMPC_CONVENTION kmp_set_warnings_off(void); - -# undef __KAI_KMPC_CONVENTION - - /* Warning: - The following typedefs are not standard, deprecated and will be removed in a future release. - */ - typedef int omp_int_t; - typedef double omp_wtime_t; - -# ifdef __cplusplus - } -# endif - -#endif /* __OMP_H */ - Index: vendor/llvm-openmp/openmp-release80-r363030/runtime/src/include/40/omp_lib.f.var =================================================================== --- vendor/llvm-openmp/openmp-release80-r363030/runtime/src/include/40/omp_lib.f.var (revision 348960) +++ vendor/llvm-openmp/openmp-release80-r363030/runtime/src/include/40/omp_lib.f.var (nonexistent) @@ -1,774 +0,0 @@ -! include/40/omp_lib.f.var - -! -!//===----------------------------------------------------------------------===// -!// -!// The LLVM Compiler Infrastructure -!// -!// This file is dual licensed under the MIT and the University of Illinois Open -!// Source Licenses. See LICENSE.txt for details. -!// -!//===----------------------------------------------------------------------===// -! - -!*** -!*** Some of the directives for the following routine extend past column 72, -!*** so process this file in 132-column mode. -!*** - -!dec$ fixedformlinesize:132 - - module omp_lib_kinds - - integer, parameter :: omp_integer_kind = 4 - integer, parameter :: omp_logical_kind = 4 - integer, parameter :: omp_real_kind = 4 - integer, parameter :: omp_lock_kind = int_ptr_kind() - integer, parameter :: omp_nest_lock_kind = int_ptr_kind() - integer, parameter :: omp_sched_kind = omp_integer_kind - integer, parameter :: omp_proc_bind_kind = omp_integer_kind - integer, parameter :: kmp_pointer_kind = int_ptr_kind() - integer, parameter :: kmp_size_t_kind = int_ptr_kind() - integer, parameter :: kmp_affinity_mask_kind = int_ptr_kind() - integer, parameter :: kmp_cancel_kind = omp_integer_kind - - end module omp_lib_kinds - - module omp_lib - - use omp_lib_kinds - - integer (kind=omp_integer_kind), parameter :: kmp_version_major = @LIBOMP_VERSION_MAJOR@ - integer (kind=omp_integer_kind), parameter :: kmp_version_minor = @LIBOMP_VERSION_MINOR@ - integer (kind=omp_integer_kind), parameter :: kmp_version_build = @LIBOMP_VERSION_BUILD@ - character(*), parameter :: kmp_build_date = '@LIBOMP_BUILD_DATE@' - integer (kind=omp_integer_kind), parameter :: openmp_version = @LIBOMP_OMP_YEAR_MONTH@ - - integer(kind=omp_sched_kind), parameter :: omp_sched_static = 1 - integer(kind=omp_sched_kind), parameter :: omp_sched_dynamic = 2 - integer(kind=omp_sched_kind), parameter :: omp_sched_guided = 3 - integer(kind=omp_sched_kind), parameter :: omp_sched_auto = 4 - - integer (kind=omp_proc_bind_kind), parameter :: omp_proc_bind_false = 0 - integer (kind=omp_proc_bind_kind), parameter :: omp_proc_bind_true = 1 - integer (kind=omp_proc_bind_kind), parameter :: omp_proc_bind_master = 2 - integer (kind=omp_proc_bind_kind), parameter :: omp_proc_bind_close = 3 - integer (kind=omp_proc_bind_kind), parameter :: omp_proc_bind_spread = 4 - - integer (kind=kmp_cancel_kind), parameter :: kmp_cancel_parallel = 1 - integer (kind=kmp_cancel_kind), parameter :: kmp_cancel_loop = 2 - integer (kind=kmp_cancel_kind), parameter :: kmp_cancel_sections = 3 - integer (kind=kmp_cancel_kind), parameter :: kmp_cancel_taskgroup = 4 - - interface - -! *** -! *** omp_* entry points -! *** - - subroutine omp_set_num_threads(num_threads) - use omp_lib_kinds - integer (kind=omp_integer_kind) num_threads - end subroutine omp_set_num_threads - - subroutine omp_set_dynamic(dynamic_threads) - use omp_lib_kinds - logical (kind=omp_logical_kind) dynamic_threads - end subroutine omp_set_dynamic - - subroutine omp_set_nested(nested) - use omp_lib_kinds - logical (kind=omp_logical_kind) nested - end subroutine omp_set_nested - - function omp_get_num_threads() - use omp_lib_kinds - integer (kind=omp_integer_kind) omp_get_num_threads - end function omp_get_num_threads - - function omp_get_max_threads() - use omp_lib_kinds - integer (kind=omp_integer_kind) omp_get_max_threads - end function omp_get_max_threads - - function omp_get_thread_num() - use omp_lib_kinds - integer (kind=omp_integer_kind) omp_get_thread_num - end function omp_get_thread_num - - function omp_get_num_procs() - use omp_lib_kinds - integer (kind=omp_integer_kind) omp_get_num_procs - end function omp_get_num_procs - - function omp_in_parallel() - use omp_lib_kinds - logical (kind=omp_logical_kind) omp_in_parallel - end function omp_in_parallel - - function omp_in_final() - use omp_lib_kinds - logical (kind=omp_logical_kind) omp_in_final - end function omp_in_final - - function omp_get_dynamic() - use omp_lib_kinds - logical (kind=omp_logical_kind) omp_get_dynamic - end function omp_get_dynamic - - function omp_get_nested() - use omp_lib_kinds - logical (kind=omp_logical_kind) omp_get_nested - end function omp_get_nested - - function omp_get_thread_limit() - use omp_lib_kinds - integer (kind=omp_integer_kind) omp_get_thread_limit - end function omp_get_thread_limit - - subroutine omp_set_max_active_levels(max_levels) - use omp_lib_kinds - integer (kind=omp_integer_kind) max_levels - end subroutine omp_set_max_active_levels - - function omp_get_max_active_levels() - use omp_lib_kinds - integer (kind=omp_integer_kind) omp_get_max_active_levels - end function omp_get_max_active_levels - - function omp_get_level() - use omp_lib_kinds - integer (kind=omp_integer_kind) omp_get_level - end function omp_get_level - - function omp_get_active_level() - use omp_lib_kinds - integer (kind=omp_integer_kind) omp_get_active_level - end function omp_get_active_level - - function omp_get_ancestor_thread_num(level) - use omp_lib_kinds - integer (kind=omp_integer_kind) level - integer (kind=omp_integer_kind) omp_get_ancestor_thread_num - end function omp_get_ancestor_thread_num - - function omp_get_team_size(level) - use omp_lib_kinds - integer (kind=omp_integer_kind) level - integer (kind=omp_integer_kind) omp_get_team_size - end function omp_get_team_size - - subroutine omp_set_schedule(kind, chunk_size) - use omp_lib_kinds - integer (kind=omp_sched_kind) kind - integer (kind=omp_integer_kind) chunk_size - end subroutine omp_set_schedule - - subroutine omp_get_schedule(kind, chunk_size) - use omp_lib_kinds - integer (kind=omp_sched_kind) kind - integer (kind=omp_integer_kind) chunk_size - end subroutine omp_get_schedule - - function omp_get_proc_bind() - use omp_lib_kinds - integer (kind=omp_proc_bind_kind) omp_get_proc_bind - end function omp_get_proc_bind - - function omp_get_wtime() - double precision omp_get_wtime - end function omp_get_wtime - - function omp_get_wtick () - double precision omp_get_wtick - end function omp_get_wtick - - function omp_get_default_device() - use omp_lib_kinds - integer (kind=omp_integer_kind) omp_get_default_device - end function omp_get_default_device - - subroutine omp_set_default_device(device_num) - use omp_lib_kinds - integer (kind=omp_integer_kind) device_num - end subroutine omp_set_default_device - - function omp_get_num_devices() - use omp_lib_kinds - integer (kind=omp_integer_kind) omp_get_num_devices - end function omp_get_num_devices - - function omp_get_num_teams() - use omp_lib_kinds - integer (kind=omp_integer_kind) omp_get_num_teams - end function omp_get_num_teams - - function omp_get_team_num() - use omp_lib_kinds - integer (kind=omp_integer_kind) omp_get_team_num - end function omp_get_team_num - - function omp_get_cancellation() - use omp_lib_kinds - integer (kind=omp_integer_kind) omp_get_cancellation - end function omp_get_cancellation - - function omp_is_initial_device() - use omp_lib_kinds - logical (kind=omp_logical_kind) omp_is_initial_device - end function omp_is_initial_device - - subroutine omp_init_lock(svar) -!DIR$ IF(__INTEL_COMPILER.GE.1400) -!DIR$ attributes known_intrinsic :: omp_init_lock -!DIR$ ENDIF - use omp_lib_kinds - integer (kind=omp_lock_kind) svar - end subroutine omp_init_lock - - subroutine omp_destroy_lock(svar) -!DIR$ IF(__INTEL_COMPILER.GE.1400) -!DIR$ attributes known_intrinsic :: omp_destroy_lock -!DIR$ ENDIF - use omp_lib_kinds - integer (kind=omp_lock_kind) svar - end subroutine omp_destroy_lock - - subroutine omp_set_lock(svar) -!DIR$ IF(__INTEL_COMPILER.GE.1400) -!DIR$ attributes known_intrinsic :: omp_set_lock -!DIR$ ENDIF - use omp_lib_kinds - integer (kind=omp_lock_kind) svar - end subroutine omp_set_lock - - subroutine omp_unset_lock(svar) -!DIR$ IF(__INTEL_COMPILER.GE.1400) -!DIR$ attributes known_intrinsic :: omp_unset_lock -!DIR$ ENDIF - use omp_lib_kinds - integer (kind=omp_lock_kind) svar - end subroutine omp_unset_lock - - function omp_test_lock(svar) -!DIR$ IF(__INTEL_COMPILER.GE.1400) -!DIR$ attributes known_intrinsic :: omp_test_lock -!DIR$ ENDIF - use omp_lib_kinds - logical (kind=omp_logical_kind) omp_test_lock - integer (kind=omp_lock_kind) svar - end function omp_test_lock - - subroutine omp_init_nest_lock(nvar) -!DIR$ IF(__INTEL_COMPILER.GE.1400) -!DIR$ attributes known_intrinsic :: omp_init_nest_lock -!DIR$ ENDIF - use omp_lib_kinds - integer (kind=omp_nest_lock_kind) nvar - end subroutine omp_init_nest_lock - - subroutine omp_destroy_nest_lock(nvar) -!DIR$ IF(__INTEL_COMPILER.GE.1400) -!DIR$ attributes known_intrinsic :: omp_destroy_nest_lock -!DIR$ ENDIF - use omp_lib_kinds - integer (kind=omp_nest_lock_kind) nvar - end subroutine omp_destroy_nest_lock - - subroutine omp_set_nest_lock(nvar) -!DIR$ IF(__INTEL_COMPILER.GE.1400) -!DIR$ attributes known_intrinsic :: omp_set_nest_lock -!DIR$ ENDIF - use omp_lib_kinds - integer (kind=omp_nest_lock_kind) nvar - end subroutine omp_set_nest_lock - - subroutine omp_unset_nest_lock(nvar) -!DIR$ IF(__INTEL_COMPILER.GE.1400) -!DIR$ attributes known_intrinsic :: omp_unset_nest_lock -!DIR$ ENDIF - use omp_lib_kinds - integer (kind=omp_nest_lock_kind) nvar - end subroutine omp_unset_nest_lock - - function omp_test_nest_lock(nvar) -!DIR$ IF(__INTEL_COMPILER.GE.1400) -!DIR$ attributes known_intrinsic :: omp_test_nest_lock -!DIR$ ENDIF - use omp_lib_kinds - integer (kind=omp_integer_kind) omp_test_nest_lock - integer (kind=omp_nest_lock_kind) nvar - end function omp_test_nest_lock - -! *** -! *** kmp_* entry points -! *** - - subroutine kmp_set_stacksize(size) - use omp_lib_kinds - integer (kind=omp_integer_kind) size - end subroutine kmp_set_stacksize - - subroutine kmp_set_stacksize_s(size) - use omp_lib_kinds - integer (kind=kmp_size_t_kind) size - end subroutine kmp_set_stacksize_s - - subroutine kmp_set_blocktime(msec) - use omp_lib_kinds - integer (kind=omp_integer_kind) msec - end subroutine kmp_set_blocktime - - subroutine kmp_set_library_serial() - end subroutine kmp_set_library_serial - - subroutine kmp_set_library_turnaround() - end subroutine kmp_set_library_turnaround - - subroutine kmp_set_library_throughput() - end subroutine kmp_set_library_throughput - - subroutine kmp_set_library(libnum) - use omp_lib_kinds - integer (kind=omp_integer_kind) libnum - end subroutine kmp_set_library - - subroutine kmp_set_defaults(string) - character*(*) string - end subroutine kmp_set_defaults - - function kmp_get_stacksize() - use omp_lib_kinds - integer (kind=omp_integer_kind) kmp_get_stacksize - end function kmp_get_stacksize - - function kmp_get_stacksize_s() - use omp_lib_kinds - integer (kind=kmp_size_t_kind) kmp_get_stacksize_s - end function kmp_get_stacksize_s - - function kmp_get_blocktime() - use omp_lib_kinds - integer (kind=omp_integer_kind) kmp_get_blocktime - end function kmp_get_blocktime - - function kmp_get_library() - use omp_lib_kinds - integer (kind=omp_integer_kind) kmp_get_library - end function kmp_get_library - - function kmp_set_affinity(mask) - use omp_lib_kinds - integer (kind=omp_integer_kind) kmp_set_affinity - integer (kind=kmp_affinity_mask_kind) mask - end function kmp_set_affinity - - function kmp_get_affinity(mask) - use omp_lib_kinds - integer (kind=omp_integer_kind) kmp_get_affinity - integer (kind=kmp_affinity_mask_kind) mask - end function kmp_get_affinity - - function kmp_get_affinity_max_proc() - use omp_lib_kinds - integer (kind=omp_integer_kind) kmp_get_affinity_max_proc - end function kmp_get_affinity_max_proc - - subroutine kmp_create_affinity_mask(mask) - use omp_lib_kinds - integer (kind=kmp_affinity_mask_kind) mask - end subroutine kmp_create_affinity_mask - - subroutine kmp_destroy_affinity_mask(mask) - use omp_lib_kinds - integer (kind=kmp_affinity_mask_kind) mask - end subroutine kmp_destroy_affinity_mask - - function kmp_set_affinity_mask_proc(proc, mask) - use omp_lib_kinds - integer (kind=omp_integer_kind) kmp_set_affinity_mask_proc - integer (kind=omp_integer_kind) proc - integer (kind=kmp_affinity_mask_kind) mask - end function kmp_set_affinity_mask_proc - - function kmp_unset_affinity_mask_proc(proc, mask) - use omp_lib_kinds - integer (kind=omp_integer_kind) kmp_unset_affinity_mask_proc - integer (kind=omp_integer_kind) proc - integer (kind=kmp_affinity_mask_kind) mask - end function kmp_unset_affinity_mask_proc - - function kmp_get_affinity_mask_proc(proc, mask) - use omp_lib_kinds - integer (kind=omp_integer_kind) kmp_get_affinity_mask_proc - integer (kind=omp_integer_kind) proc - integer (kind=kmp_affinity_mask_kind) mask - end function kmp_get_affinity_mask_proc - - function kmp_malloc(size) - use omp_lib_kinds - integer (kind=kmp_pointer_kind) kmp_malloc - integer (kind=kmp_size_t_kind) size - end function kmp_malloc - - function kmp_aligned_malloc(size, alignment) - use omp_lib_kinds - integer (kind=kmp_pointer_kind) kmp_aligned_malloc - integer (kind=kmp_size_t_kind) size - integer (kind=kmp_size_t_kind) alignment - end function kmp_aligned_malloc - - function kmp_calloc(nelem, elsize) - use omp_lib_kinds - integer (kind=kmp_pointer_kind) kmp_calloc - integer (kind=kmp_size_t_kind) nelem - integer (kind=kmp_size_t_kind) elsize - end function kmp_calloc - - function kmp_realloc(ptr, size) - use omp_lib_kinds - integer (kind=kmp_pointer_kind) kmp_realloc - integer (kind=kmp_pointer_kind) ptr - integer (kind=kmp_size_t_kind) size - end function kmp_realloc - - subroutine kmp_free(ptr) - use omp_lib_kinds - integer (kind=kmp_pointer_kind) ptr - end subroutine kmp_free - - subroutine kmp_set_warnings_on() - end subroutine kmp_set_warnings_on - - subroutine kmp_set_warnings_off() - end subroutine kmp_set_warnings_off - - function kmp_get_cancellation_status(cancelkind) - use omp_lib_kinds - integer (kind=kmp_cancel_kind) cancelkind - logical (kind=omp_logical_kind) kmp_get_cancellation_status - end function kmp_get_cancellation_status - - end interface - -!dec$ if defined(_WIN32) -!dec$ if defined(_WIN64) .or. defined(_M_AMD64) - -!*** -!*** The Fortran entry points must be in uppercase, even if the /Qlowercase -!*** option is specified. The alias attribute ensures that the specified -!*** string is used as the entry point. -!*** -!*** On the Windows* OS IA-32 architecture, the Fortran entry points have an -!*** underscore prepended. On the Windows* OS Intel(R) 64 -!*** architecture, no underscore is prepended. -!*** - -!dec$ attributes alias:'OMP_SET_NUM_THREADS' :: omp_set_num_threads -!dec$ attributes alias:'OMP_SET_DYNAMIC' :: omp_set_dynamic -!dec$ attributes alias:'OMP_SET_NESTED' :: omp_set_nested -!dec$ attributes alias:'OMP_GET_NUM_THREADS' :: omp_get_num_threads -!dec$ attributes alias:'OMP_GET_MAX_THREADS' :: omp_get_max_threads -!dec$ attributes alias:'OMP_GET_THREAD_NUM' :: omp_get_thread_num -!dec$ attributes alias:'OMP_GET_NUM_PROCS' :: omp_get_num_procs -!dec$ attributes alias:'OMP_IN_PARALLEL' :: omp_in_parallel -!dec$ attributes alias:'OMP_GET_DYNAMIC' :: omp_get_dynamic -!dec$ attributes alias:'OMP_GET_NESTED' :: omp_get_nested -!dec$ attributes alias:'OMP_GET_THREAD_LIMIT' :: omp_get_thread_limit -!dec$ attributes alias:'OMP_SET_MAX_ACTIVE_LEVELS' :: omp_set_max_active_levels -!dec$ attributes alias:'OMP_GET_MAX_ACTIVE_LEVELS' :: omp_get_max_active_levels -!dec$ attributes alias:'OMP_GET_LEVEL' :: omp_get_level -!dec$ attributes alias:'OMP_GET_ACTIVE_LEVEL' :: omp_get_active_level -!dec$ attributes alias:'OMP_GET_ANCESTOR_THREAD_NUM' :: omp_get_ancestor_thread_num -!dec$ attributes alias:'OMP_GET_TEAM_SIZE' :: omp_get_team_size -!dec$ attributes alias:'OMP_SET_SCHEDULE' :: omp_set_schedule -!dec$ attributes alias:'OMP_GET_SCHEDULE' :: omp_get_schedule -!dec$ attributes alias:'OMP_GET_PROC_BIND' :: omp_get_proc_bind -!dec$ attributes alias:'OMP_GET_WTIME' :: omp_get_wtime -!dec$ attributes alias:'OMP_GET_WTICK' :: omp_get_wtick -!dec$ attributes alias:'OMP_GET_DEFAULT_DEVICE' :: omp_get_default_device -!dec$ attributes alias:'OMP_SET_DEFAULT_DEVICE' :: omp_set_default_device -!dec$ attributes alias:'OMP_GET_NUM_DEVICES' :: omp_get_num_devices -!dec$ attributes alias:'OMP_GET_NUM_TEAMS' :: omp_get_num_teams -!dec$ attributes alias:'OMP_GET_TEAM_NUM' :: omp_get_team_num -!dec$ attributes alias:'OMP_GET_CANCELLATION' :: omp_get_cancellation -!dec$ attributes alias:'OMP_IS_INITIAL_DEVICE' :: omp_is_initial_device - -!dec$ attributes alias:'omp_init_lock' :: omp_init_lock -!dec$ attributes alias:'omp_destroy_lock' :: omp_destroy_lock -!dec$ attributes alias:'omp_set_lock' :: omp_set_lock -!dec$ attributes alias:'omp_unset_lock' :: omp_unset_lock -!dec$ attributes alias:'omp_test_lock' :: omp_test_lock -!dec$ attributes alias:'omp_init_nest_lock' :: omp_init_nest_lock -!dec$ attributes alias:'omp_destroy_nest_lock' :: omp_destroy_nest_lock -!dec$ attributes alias:'omp_set_nest_lock' :: omp_set_nest_lock -!dec$ attributes alias:'omp_unset_nest_lock' :: omp_unset_nest_lock -!dec$ attributes alias:'omp_test_nest_lock' :: omp_test_nest_lock - -!dec$ attributes alias:'KMP_SET_STACKSIZE'::kmp_set_stacksize -!dec$ attributes alias:'KMP_SET_STACKSIZE_S'::kmp_set_stacksize_s -!dec$ attributes alias:'KMP_SET_BLOCKTIME'::kmp_set_blocktime -!dec$ attributes alias:'KMP_SET_LIBRARY_SERIAL'::kmp_set_library_serial -!dec$ attributes alias:'KMP_SET_LIBRARY_TURNAROUND'::kmp_set_library_turnaround -!dec$ attributes alias:'KMP_SET_LIBRARY_THROUGHPUT'::kmp_set_library_throughput -!dec$ attributes alias:'KMP_SET_LIBRARY'::kmp_set_library -!dec$ attributes alias:'KMP_GET_STACKSIZE'::kmp_get_stacksize -!dec$ attributes alias:'KMP_GET_STACKSIZE_S'::kmp_get_stacksize_s -!dec$ attributes alias:'KMP_GET_BLOCKTIME'::kmp_get_blocktime -!dec$ attributes alias:'KMP_GET_LIBRARY'::kmp_get_library -!dec$ attributes alias:'KMP_SET_AFFINITY'::kmp_set_affinity -!dec$ attributes alias:'KMP_GET_AFFINITY'::kmp_get_affinity -!dec$ attributes alias:'KMP_GET_AFFINITY_MAX_PROC'::kmp_get_affinity_max_proc -!dec$ attributes alias:'KMP_CREATE_AFFINITY_MASK'::kmp_create_affinity_mask -!dec$ attributes alias:'KMP_DESTROY_AFFINITY_MASK'::kmp_destroy_affinity_mask -!dec$ attributes alias:'KMP_SET_AFFINITY_MASK_PROC'::kmp_set_affinity_mask_proc -!dec$ attributes alias:'KMP_UNSET_AFFINITY_MASK_PROC'::kmp_unset_affinity_mask_proc -!dec$ attributes alias:'KMP_GET_AFFINITY_MASK_PROC'::kmp_get_affinity_mask_proc -!dec$ attributes alias:'KMP_MALLOC'::kmp_malloc -!dec$ attributes alias:'KMP_ALIGNED_MALLOC'::kmp_aligned_malloc -!dec$ attributes alias:'KMP_CALLOC'::kmp_calloc -!dec$ attributes alias:'KMP_REALLOC'::kmp_realloc -!dec$ attributes alias:'KMP_FREE'::kmp_free - -!dec$ attributes alias:'KMP_SET_WARNINGS_ON'::kmp_set_warnings_on -!dec$ attributes alias:'KMP_SET_WARNINGS_OFF'::kmp_set_warnings_off - -!dec$ attributes alias:'KMP_GET_CANCELLATION_STATUS' :: kmp_get_cancellation_status - -!dec$ else - -!*** -!*** On Windows* OS IA-32 architecture, the Fortran entry points have an underscore prepended. -!*** - -!dec$ attributes alias:'_OMP_SET_NUM_THREADS' :: omp_set_num_threads -!dec$ attributes alias:'_OMP_SET_DYNAMIC' :: omp_set_dynamic -!dec$ attributes alias:'_OMP_SET_NESTED' :: omp_set_nested -!dec$ attributes alias:'_OMP_GET_NUM_THREADS' :: omp_get_num_threads -!dec$ attributes alias:'_OMP_GET_MAX_THREADS' :: omp_get_max_threads -!dec$ attributes alias:'_OMP_GET_THREAD_NUM' :: omp_get_thread_num -!dec$ attributes alias:'_OMP_GET_NUM_PROCS' :: omp_get_num_procs -!dec$ attributes alias:'_OMP_IN_PARALLEL' :: omp_in_parallel -!dec$ attributes alias:'_OMP_GET_DYNAMIC' :: omp_get_dynamic -!dec$ attributes alias:'_OMP_GET_NESTED' :: omp_get_nested -!dec$ attributes alias:'_OMP_GET_THREAD_LIMIT' :: omp_get_thread_limit -!dec$ attributes alias:'_OMP_SET_MAX_ACTIVE_LEVELS' :: omp_set_max_active_levels -!dec$ attributes alias:'_OMP_GET_MAX_ACTIVE_LEVELS' :: omp_get_max_active_levels -!dec$ attributes alias:'_OMP_GET_LEVEL' :: omp_get_level -!dec$ attributes alias:'_OMP_GET_ACTIVE_LEVEL' :: omp_get_active_level -!dec$ attributes alias:'_OMP_GET_ANCESTOR_THREAD_NUM' :: omp_get_ancestor_thread_num -!dec$ attributes alias:'_OMP_GET_TEAM_SIZE' :: omp_get_team_size -!dec$ attributes alias:'_OMP_SET_SCHEDULE' :: omp_set_schedule -!dec$ attributes alias:'_OMP_GET_SCHEDULE' :: omp_get_schedule -!dec$ attributes alias:'_OMP_GET_PROC_BIND' :: omp_get_proc_bind -!dec$ attributes alias:'_OMP_GET_WTIME' :: omp_get_wtime -!dec$ attributes alias:'_OMP_GET_WTICK' :: omp_get_wtick -!dec$ attributes alias:'_OMP_GET_DEFAULT_DEVICE' :: omp_get_default_device -!dec$ attributes alias:'_OMP_SET_DEFAULT_DEVICE' :: omp_set_default_device -!dec$ attributes alias:'_OMP_GET_NUM_DEVICES' :: omp_get_num_devices -!dec$ attributes alias:'_OMP_GET_NUM_TEAMS' :: omp_get_num_teams -!dec$ attributes alias:'_OMP_GET_TEAM_NUM' :: omp_get_team_num -!dec$ attributes alias:'_OMP_GET_CANCELLATION' :: omp_get_cancellation -!dec$ attributes alias:'_OMP_IS_INITIAL_DEVICE' :: omp_is_initial_device - -!dec$ attributes alias:'_omp_init_lock' :: omp_init_lock -!dec$ attributes alias:'_omp_destroy_lock' :: omp_destroy_lock -!dec$ attributes alias:'_omp_set_lock' :: omp_set_lock -!dec$ attributes alias:'_omp_unset_lock' :: omp_unset_lock -!dec$ attributes alias:'_omp_test_lock' :: omp_test_lock -!dec$ attributes alias:'_omp_init_nest_lock' :: omp_init_nest_lock -!dec$ attributes alias:'_omp_destroy_nest_lock' :: omp_destroy_nest_lock -!dec$ attributes alias:'_omp_set_nest_lock' :: omp_set_nest_lock -!dec$ attributes alias:'_omp_unset_nest_lock' :: omp_unset_nest_lock -!dec$ attributes alias:'_omp_test_nest_lock' :: omp_test_nest_lock - -!dec$ attributes alias:'_KMP_SET_STACKSIZE'::kmp_set_stacksize -!dec$ attributes alias:'_KMP_SET_STACKSIZE_S'::kmp_set_stacksize_s -!dec$ attributes alias:'_KMP_SET_BLOCKTIME'::kmp_set_blocktime -!dec$ attributes alias:'_KMP_SET_LIBRARY_SERIAL'::kmp_set_library_serial -!dec$ attributes alias:'_KMP_SET_LIBRARY_TURNAROUND'::kmp_set_library_turnaround -!dec$ attributes alias:'_KMP_SET_LIBRARY_THROUGHPUT'::kmp_set_library_throughput -!dec$ attributes alias:'_KMP_SET_LIBRARY'::kmp_set_library -!dec$ attributes alias:'_KMP_GET_STACKSIZE'::kmp_get_stacksize -!dec$ attributes alias:'_KMP_GET_STACKSIZE_S'::kmp_get_stacksize_s -!dec$ attributes alias:'_KMP_GET_BLOCKTIME'::kmp_get_blocktime -!dec$ attributes alias:'_KMP_GET_LIBRARY'::kmp_get_library -!dec$ attributes alias:'_KMP_SET_AFFINITY'::kmp_set_affinity -!dec$ attributes alias:'_KMP_GET_AFFINITY'::kmp_get_affinity -!dec$ attributes alias:'_KMP_GET_AFFINITY_MAX_PROC'::kmp_get_affinity_max_proc -!dec$ attributes alias:'_KMP_CREATE_AFFINITY_MASK'::kmp_create_affinity_mask -!dec$ attributes alias:'_KMP_DESTROY_AFFINITY_MASK'::kmp_destroy_affinity_mask -!dec$ attributes alias:'_KMP_SET_AFFINITY_MASK_PROC'::kmp_set_affinity_mask_proc -!dec$ attributes alias:'_KMP_UNSET_AFFINITY_MASK_PROC'::kmp_unset_affinity_mask_proc -!dec$ attributes alias:'_KMP_GET_AFFINITY_MASK_PROC'::kmp_get_affinity_mask_proc -!dec$ attributes alias:'_KMP_MALLOC'::kmp_malloc -!dec$ attributes alias:'_KMP_ALIGNED_MALLOC'::kmp_aligned_malloc -!dec$ attributes alias:'_KMP_CALLOC'::kmp_calloc -!dec$ attributes alias:'_KMP_REALLOC'::kmp_realloc -!dec$ attributes alias:'_KMP_FREE'::kmp_free - -!dec$ attributes alias:'_KMP_SET_WARNINGS_ON'::kmp_set_warnings_on -!dec$ attributes alias:'_KMP_SET_WARNINGS_OFF'::kmp_set_warnings_off - -!dec$ attributes alias:'_KMP_GET_CANCELLATION_STATUS' :: kmp_get_cancellation_status - -!dec$ endif -!dec$ endif - -!dec$ if defined(__linux) - -!*** -!*** The Linux* OS entry points are in lowercase, with an underscore appended. -!*** - -!dec$ attributes alias:'omp_set_num_threads_'::omp_set_num_threads -!dec$ attributes alias:'omp_set_dynamic_'::omp_set_dynamic -!dec$ attributes alias:'omp_set_nested_'::omp_set_nested -!dec$ attributes alias:'omp_get_num_threads_'::omp_get_num_threads -!dec$ attributes alias:'omp_get_max_threads_'::omp_get_max_threads -!dec$ attributes alias:'omp_get_thread_num_'::omp_get_thread_num -!dec$ attributes alias:'omp_get_num_procs_'::omp_get_num_procs -!dec$ attributes alias:'omp_in_parallel_'::omp_in_parallel -!dec$ attributes alias:'omp_get_dynamic_'::omp_get_dynamic -!dec$ attributes alias:'omp_get_nested_'::omp_get_nested -!dec$ attributes alias:'omp_get_thread_limit_'::omp_get_thread_limit -!dec$ attributes alias:'omp_set_max_active_levels_'::omp_set_max_active_levels -!dec$ attributes alias:'omp_get_max_active_levels_'::omp_get_max_active_levels -!dec$ attributes alias:'omp_get_level_'::omp_get_level -!dec$ attributes alias:'omp_get_active_level_'::omp_get_active_level -!dec$ attributes alias:'omp_get_ancestor_thread_num_'::omp_get_ancestor_thread_num -!dec$ attributes alias:'omp_get_team_size_'::omp_get_team_size -!dec$ attributes alias:'omp_set_schedule_'::omp_set_schedule -!dec$ attributes alias:'omp_get_schedule_'::omp_get_schedule -!dec$ attributes alias:'omp_get_proc_bind_' :: omp_get_proc_bind -!dec$ attributes alias:'omp_get_wtime_'::omp_get_wtime -!dec$ attributes alias:'omp_get_wtick_'::omp_get_wtick -!dec$ attributes alias:'omp_get_default_device_'::omp_get_default_device -!dec$ attributes alias:'omp_set_default_device_'::omp_set_default_device -!dec$ attributes alias:'omp_get_num_devices_'::omp_get_num_devices -!dec$ attributes alias:'omp_get_num_teams_'::omp_get_num_teams -!dec$ attributes alias:'omp_get_team_num_'::omp_get_team_num -!dec$ attributes alias:'omp_get_cancellation_'::omp_get_cancellation -!dec$ attributes alias:'omp_is_initial_device_'::omp_is_initial_device - -!dec$ attributes alias:'omp_init_lock_'::omp_init_lock -!dec$ attributes alias:'omp_destroy_lock_'::omp_destroy_lock -!dec$ attributes alias:'omp_set_lock_'::omp_set_lock -!dec$ attributes alias:'omp_unset_lock_'::omp_unset_lock -!dec$ attributes alias:'omp_test_lock_'::omp_test_lock -!dec$ attributes alias:'omp_init_nest_lock_'::omp_init_nest_lock -!dec$ attributes alias:'omp_destroy_nest_lock_'::omp_destroy_nest_lock -!dec$ attributes alias:'omp_set_nest_lock_'::omp_set_nest_lock -!dec$ attributes alias:'omp_unset_nest_lock_'::omp_unset_nest_lock -!dec$ attributes alias:'omp_test_nest_lock_'::omp_test_nest_lock - -!dec$ attributes alias:'kmp_set_stacksize_'::kmp_set_stacksize -!dec$ attributes alias:'kmp_set_stacksize_s_'::kmp_set_stacksize_s -!dec$ attributes alias:'kmp_set_blocktime_'::kmp_set_blocktime -!dec$ attributes alias:'kmp_set_library_serial_'::kmp_set_library_serial -!dec$ attributes alias:'kmp_set_library_turnaround_'::kmp_set_library_turnaround -!dec$ attributes alias:'kmp_set_library_throughput_'::kmp_set_library_throughput -!dec$ attributes alias:'kmp_set_library_'::kmp_set_library -!dec$ attributes alias:'kmp_get_stacksize_'::kmp_get_stacksize -!dec$ attributes alias:'kmp_get_stacksize_s_'::kmp_get_stacksize_s -!dec$ attributes alias:'kmp_get_blocktime_'::kmp_get_blocktime -!dec$ attributes alias:'kmp_get_library_'::kmp_get_library -!dec$ attributes alias:'kmp_set_affinity_'::kmp_set_affinity -!dec$ attributes alias:'kmp_get_affinity_'::kmp_get_affinity -!dec$ attributes alias:'kmp_get_affinity_max_proc_'::kmp_get_affinity_max_proc -!dec$ attributes alias:'kmp_create_affinity_mask_'::kmp_create_affinity_mask -!dec$ attributes alias:'kmp_destroy_affinity_mask_'::kmp_destroy_affinity_mask -!dec$ attributes alias:'kmp_set_affinity_mask_proc_'::kmp_set_affinity_mask_proc -!dec$ attributes alias:'kmp_unset_affinity_mask_proc_'::kmp_unset_affinity_mask_proc -!dec$ attributes alias:'kmp_get_affinity_mask_proc_'::kmp_get_affinity_mask_proc -!dec$ attributes alias:'kmp_malloc_'::kmp_malloc -!dec$ attributes alias:'kmp_aligned_malloc_'::kmp_aligned_malloc -!dec$ attributes alias:'kmp_calloc_'::kmp_calloc -!dec$ attributes alias:'kmp_realloc_'::kmp_realloc -!dec$ attributes alias:'kmp_free_'::kmp_free - -!dec$ attributes alias:'kmp_set_warnings_on_'::kmp_set_warnings_on -!dec$ attributes alias:'kmp_set_warnings_off_'::kmp_set_warnings_off -!dec$ attributes alias:'kmp_get_cancellation_status_'::kmp_get_cancellation_status - -!dec$ endif - -!dec$ if defined(__APPLE__) - -!*** -!*** The Mac entry points are in lowercase, with an both an underscore -!*** appended and an underscore prepended. -!*** - -!dec$ attributes alias:'_omp_set_num_threads_'::omp_set_num_threads -!dec$ attributes alias:'_omp_set_dynamic_'::omp_set_dynamic -!dec$ attributes alias:'_omp_set_nested_'::omp_set_nested -!dec$ attributes alias:'_omp_get_num_threads_'::omp_get_num_threads -!dec$ attributes alias:'_omp_get_max_threads_'::omp_get_max_threads -!dec$ attributes alias:'_omp_get_thread_num_'::omp_get_thread_num -!dec$ attributes alias:'_omp_get_num_procs_'::omp_get_num_procs -!dec$ attributes alias:'_omp_in_parallel_'::omp_in_parallel -!dec$ attributes alias:'_omp_get_dynamic_'::omp_get_dynamic -!dec$ attributes alias:'_omp_get_nested_'::omp_get_nested -!dec$ attributes alias:'_omp_get_thread_limit_'::omp_get_thread_limit -!dec$ attributes alias:'_omp_set_max_active_levels_'::omp_set_max_active_levels -!dec$ attributes alias:'_omp_get_max_active_levels_'::omp_get_max_active_levels -!dec$ attributes alias:'_omp_get_level_'::omp_get_level -!dec$ attributes alias:'_omp_get_active_level_'::omp_get_active_level -!dec$ attributes alias:'_omp_get_ancestor_thread_num_'::omp_get_ancestor_thread_num -!dec$ attributes alias:'_omp_get_team_size_'::omp_get_team_size -!dec$ attributes alias:'_omp_set_schedule_'::omp_set_schedule -!dec$ attributes alias:'_omp_get_schedule_'::omp_get_schedule -!dec$ attributes alias:'_omp_get_proc_bind_' :: omp_get_proc_bind -!dec$ attributes alias:'_omp_get_wtime_'::omp_get_wtime -!dec$ attributes alias:'_omp_get_wtick_'::omp_get_wtick -!dec$ attributes alias:'_omp_get_num_teams_'::omp_get_num_teams -!dec$ attributes alias:'_omp_get_team_num_'::omp_get_team_num -!dec$ attributes alias:'_omp_get_cancellation_'::omp_get_cancellation -!dec$ attributes alias:'_omp_is_initial_device_'::omp_is_initial_device - -!dec$ attributes alias:'_omp_init_lock_'::omp_init_lock -!dec$ attributes alias:'_omp_destroy_lock_'::omp_destroy_lock -!dec$ attributes alias:'_omp_set_lock_'::omp_set_lock -!dec$ attributes alias:'_omp_unset_lock_'::omp_unset_lock -!dec$ attributes alias:'_omp_test_lock_'::omp_test_lock -!dec$ attributes alias:'_omp_init_nest_lock_'::omp_init_nest_lock -!dec$ attributes alias:'_omp_destroy_nest_lock_'::omp_destroy_nest_lock -!dec$ attributes alias:'_omp_set_nest_lock_'::omp_set_nest_lock -!dec$ attributes alias:'_omp_unset_nest_lock_'::omp_unset_nest_lock -!dec$ attributes alias:'_omp_test_nest_lock_'::omp_test_nest_lock - -!dec$ attributes alias:'_kmp_set_stacksize_'::kmp_set_stacksize -!dec$ attributes alias:'_kmp_set_stacksize_s_'::kmp_set_stacksize_s -!dec$ attributes alias:'_kmp_set_blocktime_'::kmp_set_blocktime -!dec$ attributes alias:'_kmp_set_library_serial_'::kmp_set_library_serial -!dec$ attributes alias:'_kmp_set_library_turnaround_'::kmp_set_library_turnaround -!dec$ attributes alias:'_kmp_set_library_throughput_'::kmp_set_library_throughput -!dec$ attributes alias:'_kmp_set_library_'::kmp_set_library -!dec$ attributes alias:'_kmp_get_stacksize_'::kmp_get_stacksize -!dec$ attributes alias:'_kmp_get_stacksize_s_'::kmp_get_stacksize_s -!dec$ attributes alias:'_kmp_get_blocktime_'::kmp_get_blocktime -!dec$ attributes alias:'_kmp_get_library_'::kmp_get_library -!dec$ attributes alias:'_kmp_set_affinity_'::kmp_set_affinity -!dec$ attributes alias:'_kmp_get_affinity_'::kmp_get_affinity -!dec$ attributes alias:'_kmp_get_affinity_max_proc_'::kmp_get_affinity_max_proc -!dec$ attributes alias:'_kmp_create_affinity_mask_'::kmp_create_affinity_mask -!dec$ attributes alias:'_kmp_destroy_affinity_mask_'::kmp_destroy_affinity_mask -!dec$ attributes alias:'_kmp_set_affinity_mask_proc_'::kmp_set_affinity_mask_proc -!dec$ attributes alias:'_kmp_unset_affinity_mask_proc_'::kmp_unset_affinity_mask_proc -!dec$ attributes alias:'_kmp_get_affinity_mask_proc_'::kmp_get_affinity_mask_proc -!dec$ attributes alias:'_kmp_malloc_'::kmp_malloc -!dec$ attributes alias:'_kmp_aligned_malloc_'::kmp_aligned_malloc -!dec$ attributes alias:'_kmp_calloc_'::kmp_calloc -!dec$ attributes alias:'_kmp_realloc_'::kmp_realloc -!dec$ attributes alias:'_kmp_free_'::kmp_free - -!dec$ attributes alias:'_kmp_set_warnings_on_'::kmp_set_warnings_on -!dec$ attributes alias:'_kmp_set_warnings_off_'::kmp_set_warnings_off - -!dec$ attributes alias:'_kmp_get_cancellation_status_'::kmp_get_cancellation_status - -!dec$ endif - - end module omp_lib - Index: vendor/llvm-openmp/openmp-release80-r363030/runtime/src/include/40/omp_lib.h.var =================================================================== --- vendor/llvm-openmp/openmp-release80-r363030/runtime/src/include/40/omp_lib.h.var (revision 348960) +++ vendor/llvm-openmp/openmp-release80-r363030/runtime/src/include/40/omp_lib.h.var (nonexistent) @@ -1,567 +0,0 @@ -! include/40/omp_lib.h.var - -! -!//===----------------------------------------------------------------------===// -!// -!// The LLVM Compiler Infrastructure -!// -!// This file is dual licensed under the MIT and the University of Illinois Open -!// Source Licenses. See LICENSE.txt for details. -!// -!//===----------------------------------------------------------------------===// -! - -!*** -!*** Some of the directives for the following routine extend past column 72, -!*** so process this file in 132-column mode. -!*** - -!DIR$ fixedformlinesize:132 - - integer, parameter :: omp_integer_kind = 4 - integer, parameter :: omp_logical_kind = 4 - integer, parameter :: omp_real_kind = 4 - integer, parameter :: omp_lock_kind = int_ptr_kind() - integer, parameter :: omp_nest_lock_kind = int_ptr_kind() - integer, parameter :: omp_sched_kind = omp_integer_kind - integer, parameter :: omp_proc_bind_kind = omp_integer_kind - integer, parameter :: kmp_pointer_kind = int_ptr_kind() - integer, parameter :: kmp_size_t_kind = int_ptr_kind() - integer, parameter :: kmp_affinity_mask_kind = int_ptr_kind() - - integer (kind=omp_integer_kind), parameter :: openmp_version = @LIBOMP_OMP_YEAR_MONTH@ - integer (kind=omp_integer_kind), parameter :: kmp_version_major = @LIBOMP_VERSION_MAJOR@ - integer (kind=omp_integer_kind), parameter :: kmp_version_minor = @LIBOMP_VERSION_MINOR@ - integer (kind=omp_integer_kind), parameter :: kmp_version_build = @LIBOMP_VERSION_BUILD@ - character(*) kmp_build_date - parameter( kmp_build_date = '@LIBOMP_BUILD_DATE@' ) - - integer(kind=omp_sched_kind), parameter :: omp_sched_static = 1 - integer(kind=omp_sched_kind), parameter :: omp_sched_dynamic = 2 - integer(kind=omp_sched_kind), parameter :: omp_sched_guided = 3 - integer(kind=omp_sched_kind), parameter :: omp_sched_auto = 4 - - integer (kind=omp_proc_bind_kind), parameter :: omp_proc_bind_false = 0 - integer (kind=omp_proc_bind_kind), parameter :: omp_proc_bind_true = 1 - integer (kind=omp_proc_bind_kind), parameter :: omp_proc_bind_master = 2 - integer (kind=omp_proc_bind_kind), parameter :: omp_proc_bind_close = 3 - integer (kind=omp_proc_bind_kind), parameter :: omp_proc_bind_spread = 4 - - interface - -! *** -! *** omp_* entry points -! *** - - subroutine omp_set_num_threads(num_threads) bind(c) - import - integer (kind=omp_integer_kind), value :: num_threads - end subroutine omp_set_num_threads - - subroutine omp_set_dynamic(dynamic_threads) bind(c) - import - logical (kind=omp_logical_kind), value :: dynamic_threads - end subroutine omp_set_dynamic - - subroutine omp_set_nested(nested) bind(c) - import - logical (kind=omp_logical_kind), value :: nested - end subroutine omp_set_nested - - function omp_get_num_threads() bind(c) - import - integer (kind=omp_integer_kind) omp_get_num_threads - end function omp_get_num_threads - - function omp_get_max_threads() bind(c) - import - integer (kind=omp_integer_kind) omp_get_max_threads - end function omp_get_max_threads - - function omp_get_thread_num() bind(c) - import - integer (kind=omp_integer_kind) omp_get_thread_num - end function omp_get_thread_num - - function omp_get_num_procs() bind(c) - import - integer (kind=omp_integer_kind) omp_get_num_procs - end function omp_get_num_procs - - function omp_in_parallel() bind(c) - import - logical (kind=omp_logical_kind) omp_in_parallel - end function omp_in_parallel - - function omp_in_final() bind(c) - import - logical (kind=omp_logical_kind) omp_in_final - end function omp_in_final - - function omp_get_dynamic() bind(c) - import - logical (kind=omp_logical_kind) omp_get_dynamic - end function omp_get_dynamic - - function omp_get_nested() bind(c) - import - logical (kind=omp_logical_kind) omp_get_nested - end function omp_get_nested - - function omp_get_thread_limit() bind(c) - import - integer (kind=omp_integer_kind) omp_get_thread_limit - end function omp_get_thread_limit - - subroutine omp_set_max_active_levels(max_levels) bind(c) - import - integer (kind=omp_integer_kind), value :: max_levels - end subroutine omp_set_max_active_levels - - function omp_get_max_active_levels() bind(c) - import - integer (kind=omp_integer_kind) omp_get_max_active_levels - end function omp_get_max_active_levels - - function omp_get_level() bind(c) - import - integer (kind=omp_integer_kind) omp_get_level - end function omp_get_level - - function omp_get_active_level() bind(c) - import - integer (kind=omp_integer_kind) omp_get_active_level - end function omp_get_active_level - - function omp_get_ancestor_thread_num(level) bind(c) - import - integer (kind=omp_integer_kind) omp_get_ancestor_thread_num - integer (kind=omp_integer_kind), value :: level - end function omp_get_ancestor_thread_num - - function omp_get_team_size(level) bind(c) - import - integer (kind=omp_integer_kind) omp_get_team_size - integer (kind=omp_integer_kind), value :: level - end function omp_get_team_size - - subroutine omp_set_schedule(kind, chunk_size) bind(c) - import - integer (kind=omp_sched_kind), value :: kind - integer (kind=omp_integer_kind), value :: chunk_size - end subroutine omp_set_schedule - - subroutine omp_get_schedule(kind, chunk_size) bind(c) - import - integer (kind=omp_sched_kind) kind - integer (kind=omp_integer_kind) chunk_size - end subroutine omp_get_schedule - - function omp_get_proc_bind() bind(c) - import - integer (kind=omp_proc_bind_kind) omp_get_proc_bind - end function omp_get_proc_bind - - function omp_get_wtime() bind(c) - double precision omp_get_wtime - end function omp_get_wtime - - function omp_get_wtick() bind(c) - double precision omp_get_wtick - end function omp_get_wtick - - function omp_get_default_device() bind(c) - import - integer (kind=omp_integer_kind) omp_get_default_device - end function omp_get_default_device - - subroutine omp_set_default_device(device_num) bind(c) - import - integer (kind=omp_integer_kind), value :: device_num - end subroutine omp_set_default_device - - function omp_get_num_devices() bind(c) - import - integer (kind=omp_integer_kind) omp_get_num_devices - end function omp_get_num_devices - - function omp_get_num_teams() bind(c) - import - integer (kind=omp_integer_kind) omp_get_num_teams - end function omp_get_num_teams - - function omp_get_team_num() bind(c) - import - integer (kind=omp_integer_kind) omp_get_team_num - end function omp_get_team_num - - function omp_is_initial_device() bind(c) - import - logical (kind=omp_logical_kind) omp_is_initial_device - end function omp_is_initial_device - - subroutine omp_init_lock(svar) bind(c) -!DIR$ IF(__INTEL_COMPILER.GE.1400) -!DIR$ attributes known_intrinsic :: omp_init_lock -!DIR$ ENDIF - import - integer (kind=omp_lock_kind) svar - end subroutine omp_init_lock - - subroutine omp_destroy_lock(svar) bind(c) -!DIR$ IF(__INTEL_COMPILER.GE.1400) -!DIR$ attributes known_intrinsic :: omp_destroy_lock -!DIR$ ENDIF - import - integer (kind=omp_lock_kind) svar - end subroutine omp_destroy_lock - - subroutine omp_set_lock(svar) bind(c) -!DIR$ IF(__INTEL_COMPILER.GE.1400) -!DIR$ attributes known_intrinsic :: omp_set_lock -!DIR$ ENDIF - import - integer (kind=omp_lock_kind) svar - end subroutine omp_set_lock - - subroutine omp_unset_lock(svar) bind(c) -!DIR$ IF(__INTEL_COMPILER.GE.1400) -!DIR$ attributes known_intrinsic :: omp_unset_lock -!DIR$ ENDIF - import - integer (kind=omp_lock_kind) svar - end subroutine omp_unset_lock - - function omp_test_lock(svar) bind(c) -!DIR$ IF(__INTEL_COMPILER.GE.1400) -!DIR$ attributes known_intrinsic :: omp_test_lock -!DIR$ ENDIF - import - logical (kind=omp_logical_kind) omp_test_lock - integer (kind=omp_lock_kind) svar - end function omp_test_lock - - subroutine omp_init_nest_lock(nvar) bind(c) -!DIR$ IF(__INTEL_COMPILER.GE.1400) -!DIR$ attributes known_intrinsic :: omp_init_nest_lock -!DIR$ ENDIF - import - integer (kind=omp_nest_lock_kind) nvar - end subroutine omp_init_nest_lock - - subroutine omp_destroy_nest_lock(nvar) bind(c) -!DIR$ IF(__INTEL_COMPILER.GE.1400) -!DIR$ attributes known_intrinsic :: omp_destroy_nest_lock -!DIR$ ENDIF - import - integer (kind=omp_nest_lock_kind) nvar - end subroutine omp_destroy_nest_lock - - subroutine omp_set_nest_lock(nvar) bind(c) -!DIR$ IF(__INTEL_COMPILER.GE.1400) -!DIR$ attributes known_intrinsic :: omp_set_nest_lock -!DIR$ ENDIF - import - integer (kind=omp_nest_lock_kind) nvar - end subroutine omp_set_nest_lock - - subroutine omp_unset_nest_lock(nvar) bind(c) -!DIR$ IF(__INTEL_COMPILER.GE.1400) -!DIR$ attributes known_intrinsic :: omp_unset_nest_lock -!DIR$ ENDIF - import - integer (kind=omp_nest_lock_kind) nvar - end subroutine omp_unset_nest_lock - - function omp_test_nest_lock(nvar) bind(c) -!DIR$ IF(__INTEL_COMPILER.GE.1400) -!DIR$ attributes known_intrinsic :: omp_test_nest_lock -!DIR$ ENDIF - import - integer (kind=omp_integer_kind) omp_test_nest_lock - integer (kind=omp_nest_lock_kind) nvar - end function omp_test_nest_lock - -! *** -! *** kmp_* entry points -! *** - - subroutine kmp_set_stacksize(size) bind(c) - import - integer (kind=omp_integer_kind), value :: size - end subroutine kmp_set_stacksize - - subroutine kmp_set_stacksize_s(size) bind(c) - import - integer (kind=kmp_size_t_kind), value :: size - end subroutine kmp_set_stacksize_s - - subroutine kmp_set_blocktime(msec) bind(c) - import - integer (kind=omp_integer_kind), value :: msec - end subroutine kmp_set_blocktime - - subroutine kmp_set_library_serial() bind(c) - end subroutine kmp_set_library_serial - - subroutine kmp_set_library_turnaround() bind(c) - end subroutine kmp_set_library_turnaround - - subroutine kmp_set_library_throughput() bind(c) - end subroutine kmp_set_library_throughput - - subroutine kmp_set_library(libnum) bind(c) - import - integer (kind=omp_integer_kind), value :: libnum - end subroutine kmp_set_library - - subroutine kmp_set_defaults(string) bind(c) - character string(*) - end subroutine kmp_set_defaults - - function kmp_get_stacksize() bind(c) - import - integer (kind=omp_integer_kind) kmp_get_stacksize - end function kmp_get_stacksize - - function kmp_get_stacksize_s() bind(c) - import - integer (kind=kmp_size_t_kind) kmp_get_stacksize_s - end function kmp_get_stacksize_s - - function kmp_get_blocktime() bind(c) - import - integer (kind=omp_integer_kind) kmp_get_blocktime - end function kmp_get_blocktime - - function kmp_get_library() bind(c) - import - integer (kind=omp_integer_kind) kmp_get_library - end function kmp_get_library - - function kmp_set_affinity(mask) bind(c) - import - integer (kind=omp_integer_kind) kmp_set_affinity - integer (kind=kmp_affinity_mask_kind) mask - end function kmp_set_affinity - - function kmp_get_affinity(mask) bind(c) - import - integer (kind=omp_integer_kind) kmp_get_affinity - integer (kind=kmp_affinity_mask_kind) mask - end function kmp_get_affinity - - function kmp_get_affinity_max_proc() bind(c) - import - integer (kind=omp_integer_kind) kmp_get_affinity_max_proc - end function kmp_get_affinity_max_proc - - subroutine kmp_create_affinity_mask(mask) bind(c) - import - integer (kind=kmp_affinity_mask_kind) mask - end subroutine kmp_create_affinity_mask - - subroutine kmp_destroy_affinity_mask(mask) bind(c) - import - integer (kind=kmp_affinity_mask_kind) mask - end subroutine kmp_destroy_affinity_mask - - function kmp_set_affinity_mask_proc(proc, mask) bind(c) - import - integer (kind=omp_integer_kind) kmp_set_affinity_mask_proc - integer (kind=omp_integer_kind), value :: proc - integer (kind=kmp_affinity_mask_kind) mask - end function kmp_set_affinity_mask_proc - - function kmp_unset_affinity_mask_proc(proc, mask) bind(c) - import - integer (kind=omp_integer_kind) kmp_unset_affinity_mask_proc - integer (kind=omp_integer_kind), value :: proc - integer (kind=kmp_affinity_mask_kind) mask - end function kmp_unset_affinity_mask_proc - - function kmp_get_affinity_mask_proc(proc, mask) bind(c) - import - integer (kind=omp_integer_kind) kmp_get_affinity_mask_proc - integer (kind=omp_integer_kind), value :: proc - integer (kind=kmp_affinity_mask_kind) mask - end function kmp_get_affinity_mask_proc - - function kmp_malloc(size) bind(c) - import - integer (kind=kmp_pointer_kind) kmp_malloc - integer (kind=kmp_size_t_kind), value :: size - end function kmp_malloc - - function kmp_aligned_malloc(size, alignment) bind(c) - import - integer (kind=kmp_pointer_kind) kmp_aligned_malloc - integer (kind=kmp_size_t_kind), value :: size - integer (kind=kmp_size_t_kind), value :: alignment - end function kmp_aligned_malloc - - function kmp_calloc(nelem, elsize) bind(c) - import - integer (kind=kmp_pointer_kind) kmp_calloc - integer (kind=kmp_size_t_kind), value :: nelem - integer (kind=kmp_size_t_kind), value :: elsize - end function kmp_calloc - - function kmp_realloc(ptr, size) bind(c) - import - integer (kind=kmp_pointer_kind) kmp_realloc - integer (kind=kmp_pointer_kind), value :: ptr - integer (kind=kmp_size_t_kind), value :: size - end function kmp_realloc - - subroutine kmp_free(ptr) bind(c) - import - integer (kind=kmp_pointer_kind), value :: ptr - end subroutine kmp_free - - subroutine kmp_set_warnings_on() bind(c) - end subroutine kmp_set_warnings_on - - subroutine kmp_set_warnings_off() bind(c) - end subroutine kmp_set_warnings_off - - end interface - -!DIR$ IF DEFINED (__INTEL_OFFLOAD) -!DIR$ ATTRIBUTES OFFLOAD:MIC :: omp_set_num_threads -!DIR$ ATTRIBUTES OFFLOAD:MIC :: omp_set_dynamic -!DIR$ ATTRIBUTES OFFLOAD:MIC :: omp_set_nested -!DIR$ ATTRIBUTES OFFLOAD:MIC :: omp_get_num_threads -!DIR$ ATTRIBUTES OFFLOAD:MIC :: omp_get_max_threads -!DIR$ ATTRIBUTES OFFLOAD:MIC :: omp_get_thread_num -!DIR$ ATTRIBUTES OFFLOAD:MIC :: omp_get_num_procs -!DIR$ ATTRIBUTES OFFLOAD:MIC :: omp_in_parallel -!DIR$ ATTRIBUTES OFFLOAD:MIC :: omp_in_final -!DIR$ ATTRIBUTES OFFLOAD:MIC :: omp_get_dynamic -!DIR$ ATTRIBUTES OFFLOAD:MIC :: omp_get_nested -!DIR$ ATTRIBUTES OFFLOAD:MIC :: omp_get_thread_limit -!DIR$ ATTRIBUTES OFFLOAD:MIC :: omp_set_max_active_levels -!DIR$ ATTRIBUTES OFFLOAD:MIC :: omp_get_max_active_levels -!DIR$ ATTRIBUTES OFFLOAD:MIC :: omp_get_level -!DIR$ ATTRIBUTES OFFLOAD:MIC :: omp_get_active_level -!DIR$ ATTRIBUTES OFFLOAD:MIC :: omp_get_ancestor_thread_num -!DIR$ ATTRIBUTES OFFLOAD:MIC :: omp_get_team_size -!DIR$ ATTRIBUTES OFFLOAD:MIC :: omp_set_schedule -!DIR$ ATTRIBUTES OFFLOAD:MIC :: omp_get_schedule -!DIR$ ATTRIBUTES OFFLOAD:MIC :: omp_get_proc_bind -!DIR$ ATTRIBUTES OFFLOAD:MIC :: omp_get_wtime -!DIR$ ATTRIBUTES OFFLOAD:MIC :: omp_get_wtick -!DIR$ ATTRIBUTES OFFLOAD:MIC :: omp_get_default_device -!DIR$ ATTRIBUTES OFFLOAD:MIC :: omp_set_default_device -!DIR$ ATTRIBUTES OFFLOAD:MIC :: omp_is_initial_device -!DIR$ ATTRIBUTES OFFLOAD:MIC :: omp_get_num_devices -!DIR$ ATTRIBUTES OFFLOAD:MIC :: omp_get_num_teams -!DIR$ ATTRIBUTES OFFLOAD:MIC :: omp_get_team_num -!DIR$ ATTRIBUTES OFFLOAD:MIC :: omp_init_lock -!DIR$ ATTRIBUTES OFFLOAD:MIC :: omp_destroy_lock -!DIR$ ATTRIBUTES OFFLOAD:MIC :: omp_set_lock -!DIR$ ATTRIBUTES OFFLOAD:MIC :: omp_unset_lock -!DIR$ ATTRIBUTES OFFLOAD:MIC :: omp_test_lock -!DIR$ ATTRIBUTES OFFLOAD:MIC :: omp_init_nest_lock -!DIR$ ATTRIBUTES OFFLOAD:MIC :: omp_destroy_nest_lock -!DIR$ ATTRIBUTES OFFLOAD:MIC :: omp_set_nest_lock -!DIR$ ATTRIBUTES OFFLOAD:MIC :: omp_unset_nest_lock -!DIR$ ATTRIBUTES OFFLOAD:MIC :: omp_test_nest_lock -!DIR$ ATTRIBUTES OFFLOAD:MIC :: kmp_set_stacksize -!DIR$ ATTRIBUTES OFFLOAD:MIC :: kmp_set_stacksize_s -!DIR$ ATTRIBUTES OFFLOAD:MIC :: kmp_set_blocktime -!DIR$ ATTRIBUTES OFFLOAD:MIC :: kmp_set_library_serial -!DIR$ ATTRIBUTES OFFLOAD:MIC :: kmp_set_library_turnaround -!DIR$ ATTRIBUTES OFFLOAD:MIC :: kmp_set_library_throughput -!DIR$ ATTRIBUTES OFFLOAD:MIC :: kmp_set_library -!DIR$ ATTRIBUTES OFFLOAD:MIC :: kmp_set_defaults -!DIR$ ATTRIBUTES OFFLOAD:MIC :: kmp_get_stacksize -!DIR$ ATTRIBUTES OFFLOAD:MIC :: kmp_get_stacksize_s -!DIR$ ATTRIBUTES OFFLOAD:MIC :: kmp_get_blocktime -!DIR$ ATTRIBUTES OFFLOAD:MIC :: kmp_get_library -!DIR$ ATTRIBUTES OFFLOAD:MIC :: kmp_set_affinity -!DIR$ ATTRIBUTES OFFLOAD:MIC :: kmp_get_affinity -!DIR$ ATTRIBUTES OFFLOAD:MIC :: kmp_get_affinity_max_proc -!DIR$ ATTRIBUTES OFFLOAD:MIC :: kmp_create_affinity_mask -!DIR$ ATTRIBUTES OFFLOAD:MIC :: kmp_destroy_affinity_mask -!DIR$ ATTRIBUTES OFFLOAD:MIC :: kmp_set_affinity_mask_proc -!DIR$ ATTRIBUTES OFFLOAD:MIC :: kmp_unset_affinity_mask_proc -!DIR$ ATTRIBUTES OFFLOAD:MIC :: kmp_get_affinity_mask_proc -!DIR$ ATTRIBUTES OFFLOAD:MIC :: kmp_malloc -!DIR$ ATTRIBUTES OFFLOAD:MIC :: kmp_aligned_malloc -!DIR$ ATTRIBUTES OFFLOAD:MIC :: kmp_calloc -!DIR$ ATTRIBUTES OFFLOAD:MIC :: kmp_realloc -!DIR$ ATTRIBUTES OFFLOAD:MIC :: kmp_free -!DIR$ ATTRIBUTES OFFLOAD:MIC :: kmp_set_warnings_on -!DIR$ ATTRIBUTES OFFLOAD:MIC :: kmp_set_warnings_off - -!DIR$ IF(__INTEL_COMPILER.GE.1400) -!$omp declare target(omp_set_num_threads ) -!$omp declare target(omp_set_dynamic ) -!$omp declare target(omp_set_nested ) -!$omp declare target(omp_get_num_threads ) -!$omp declare target(omp_get_max_threads ) -!$omp declare target(omp_get_thread_num ) -!$omp declare target(omp_get_num_procs ) -!$omp declare target(omp_in_parallel ) -!$omp declare target(omp_in_final ) -!$omp declare target(omp_get_dynamic ) -!$omp declare target(omp_get_nested ) -!$omp declare target(omp_get_thread_limit ) -!$omp declare target(omp_set_max_active_levels ) -!$omp declare target(omp_get_max_active_levels ) -!$omp declare target(omp_get_level ) -!$omp declare target(omp_get_active_level ) -!$omp declare target(omp_get_ancestor_thread_num ) -!$omp declare target(omp_get_team_size ) -!$omp declare target(omp_set_schedule ) -!$omp declare target(omp_get_schedule ) -!$omp declare target(omp_get_proc_bind ) -!$omp declare target(omp_get_wtime ) -!$omp declare target(omp_get_wtick ) -!$omp declare target(omp_get_default_device ) -!$omp declare target(omp_set_default_device ) -!$omp declare target(omp_is_initial_device ) -!$omp declare target(omp_get_num_devices ) -!$omp declare target(omp_get_num_teams ) -!$omp declare target(omp_get_team_num ) -!$omp declare target(omp_init_lock ) -!$omp declare target(omp_destroy_lock ) -!$omp declare target(omp_set_lock ) -!$omp declare target(omp_unset_lock ) -!$omp declare target(omp_test_lock ) -!$omp declare target(omp_init_nest_lock ) -!$omp declare target(omp_destroy_nest_lock ) -!$omp declare target(omp_set_nest_lock ) -!$omp declare target(omp_unset_nest_lock ) -!$omp declare target(omp_test_nest_lock ) -!$omp declare target(kmp_set_stacksize ) -!$omp declare target(kmp_set_stacksize_s ) -!$omp declare target(kmp_set_blocktime ) -!$omp declare target(kmp_set_library_serial ) -!$omp declare target(kmp_set_library_turnaround ) -!$omp declare target(kmp_set_library_throughput ) -!$omp declare target(kmp_set_library ) -!$omp declare target(kmp_set_defaults ) -!$omp declare target(kmp_get_stacksize ) -!$omp declare target(kmp_get_stacksize_s ) -!$omp declare target(kmp_get_blocktime ) -!$omp declare target(kmp_get_library ) -!$omp declare target(kmp_set_affinity ) -!$omp declare target(kmp_get_affinity ) -!$omp declare target(kmp_get_affinity_max_proc ) -!$omp declare target(kmp_create_affinity_mask ) -!$omp declare target(kmp_destroy_affinity_mask ) -!$omp declare target(kmp_set_affinity_mask_proc ) -!$omp declare target(kmp_unset_affinity_mask_proc ) -!$omp declare target(kmp_get_affinity_mask_proc ) -!$omp declare target(kmp_malloc ) -!$omp declare target(kmp_aligned_malloc ) -!$omp declare target(kmp_calloc ) -!$omp declare target(kmp_realloc ) -!$omp declare target(kmp_free ) -!$omp declare target(kmp_set_warnings_on ) -!$omp declare target(kmp_set_warnings_off ) -!DIR$ ENDIF -!DIR$ ENDIF - Index: vendor/llvm-openmp/openmp-release80-r363030/runtime/src/include/40/omp_lib.f90.var =================================================================== --- vendor/llvm-openmp/openmp-release80-r363030/runtime/src/include/40/omp_lib.f90.var (revision 348960) +++ vendor/llvm-openmp/openmp-release80-r363030/runtime/src/include/40/omp_lib.f90.var (nonexistent) @@ -1,455 +0,0 @@ -! include/40/omp_lib.f90.var - -! -!//===----------------------------------------------------------------------===// -!// -!// The LLVM Compiler Infrastructure -!// -!// This file is dual licensed under the MIT and the University of Illinois Open -!// Source Licenses. See LICENSE.txt for details. -!// -!//===----------------------------------------------------------------------===// -! - - module omp_lib_kinds - - use, intrinsic :: iso_c_binding - - integer, parameter :: omp_integer_kind = c_int - integer, parameter :: omp_logical_kind = 4 - integer, parameter :: omp_real_kind = c_float - integer, parameter :: kmp_double_kind = c_double - integer, parameter :: omp_lock_kind = c_intptr_t - integer, parameter :: omp_nest_lock_kind = c_intptr_t - integer, parameter :: omp_sched_kind = omp_integer_kind - integer, parameter :: omp_proc_bind_kind = omp_integer_kind - integer, parameter :: kmp_pointer_kind = c_intptr_t - integer, parameter :: kmp_size_t_kind = c_size_t - integer, parameter :: kmp_affinity_mask_kind = c_intptr_t - integer, parameter :: kmp_cancel_kind = omp_integer_kind - - end module omp_lib_kinds - - module omp_lib - - use omp_lib_kinds - - integer (kind=omp_integer_kind), parameter :: openmp_version = @LIBOMP_OMP_YEAR_MONTH@ - integer (kind=omp_integer_kind), parameter :: kmp_version_major = @LIBOMP_VERSION_MAJOR@ - integer (kind=omp_integer_kind), parameter :: kmp_version_minor = @LIBOMP_VERSION_MINOR@ - integer (kind=omp_integer_kind), parameter :: kmp_version_build = @LIBOMP_VERSION_BUILD@ - character(*) kmp_build_date - parameter( kmp_build_date = '@LIBOMP_BUILD_DATE@' ) - - integer(kind=omp_sched_kind), parameter :: omp_sched_static = 1 - integer(kind=omp_sched_kind), parameter :: omp_sched_dynamic = 2 - integer(kind=omp_sched_kind), parameter :: omp_sched_guided = 3 - integer(kind=omp_sched_kind), parameter :: omp_sched_auto = 4 - - - integer (kind=omp_proc_bind_kind), parameter :: omp_proc_bind_false = 0 - integer (kind=omp_proc_bind_kind), parameter :: omp_proc_bind_true = 1 - integer (kind=omp_proc_bind_kind), parameter :: omp_proc_bind_master = 2 - integer (kind=omp_proc_bind_kind), parameter :: omp_proc_bind_close = 3 - integer (kind=omp_proc_bind_kind), parameter :: omp_proc_bind_spread = 4 - - integer (kind=kmp_cancel_kind), parameter :: kmp_cancel_parallel = 1 - integer (kind=kmp_cancel_kind), parameter :: kmp_cancel_loop = 2 - integer (kind=kmp_cancel_kind), parameter :: kmp_cancel_sections = 3 - integer (kind=kmp_cancel_kind), parameter :: kmp_cancel_taskgroup = 4 - - interface - -! *** -! *** omp_* entry points -! *** - - subroutine omp_set_num_threads(num_threads) bind(c) - use omp_lib_kinds - integer (kind=omp_integer_kind), value :: num_threads - end subroutine omp_set_num_threads - - subroutine omp_set_dynamic(dynamic_threads) bind(c) - use omp_lib_kinds - logical (kind=omp_logical_kind), value :: dynamic_threads - end subroutine omp_set_dynamic - - subroutine omp_set_nested(nested) bind(c) - use omp_lib_kinds - logical (kind=omp_logical_kind), value :: nested - end subroutine omp_set_nested - - function omp_get_num_threads() bind(c) - use omp_lib_kinds - integer (kind=omp_integer_kind) omp_get_num_threads - end function omp_get_num_threads - - function omp_get_max_threads() bind(c) - use omp_lib_kinds - integer (kind=omp_integer_kind) omp_get_max_threads - end function omp_get_max_threads - - function omp_get_thread_num() bind(c) - use omp_lib_kinds - integer (kind=omp_integer_kind) omp_get_thread_num - end function omp_get_thread_num - - function omp_get_num_procs() bind(c) - use omp_lib_kinds - integer (kind=omp_integer_kind) omp_get_num_procs - end function omp_get_num_procs - - function omp_in_parallel() bind(c) - use omp_lib_kinds - logical (kind=omp_logical_kind) omp_in_parallel - end function omp_in_parallel - - function omp_in_final() bind(c) - use omp_lib_kinds - logical (kind=omp_logical_kind) omp_in_final - end function omp_in_final - - function omp_get_dynamic() bind(c) - use omp_lib_kinds - logical (kind=omp_logical_kind) omp_get_dynamic - end function omp_get_dynamic - - function omp_get_nested() bind(c) - use omp_lib_kinds - logical (kind=omp_logical_kind) omp_get_nested - end function omp_get_nested - - function omp_get_thread_limit() bind(c) - use omp_lib_kinds - integer (kind=omp_integer_kind) omp_get_thread_limit - end function omp_get_thread_limit - - subroutine omp_set_max_active_levels(max_levels) bind(c) - use omp_lib_kinds - integer (kind=omp_integer_kind), value :: max_levels - end subroutine omp_set_max_active_levels - - function omp_get_max_active_levels() bind(c) - use omp_lib_kinds - integer (kind=omp_integer_kind) omp_get_max_active_levels - end function omp_get_max_active_levels - - function omp_get_level() bind(c) - use omp_lib_kinds - integer (kind=omp_integer_kind) omp_get_level - end function omp_get_level - - function omp_get_active_level() bind(c) - use omp_lib_kinds - integer (kind=omp_integer_kind) omp_get_active_level - end function omp_get_active_level - - function omp_get_ancestor_thread_num(level) bind(c) - use omp_lib_kinds - integer (kind=omp_integer_kind) omp_get_ancestor_thread_num - integer (kind=omp_integer_kind), value :: level - end function omp_get_ancestor_thread_num - - function omp_get_team_size(level) bind(c) - use omp_lib_kinds - integer (kind=omp_integer_kind) omp_get_team_size - integer (kind=omp_integer_kind), value :: level - end function omp_get_team_size - - subroutine omp_set_schedule(kind, chunk_size) bind(c) - use omp_lib_kinds - integer (kind=omp_sched_kind), value :: kind - integer (kind=omp_integer_kind), value :: chunk_size - end subroutine omp_set_schedule - - subroutine omp_get_schedule(kind, chunk_size) bind(c) - use omp_lib_kinds - integer (kind=omp_sched_kind) kind - integer (kind=omp_integer_kind) chunk_size - end subroutine omp_get_schedule - - function omp_get_proc_bind() bind(c) - use omp_lib_kinds - integer (kind=omp_proc_bind_kind) omp_get_proc_bind - end function omp_get_proc_bind - - function omp_get_wtime() bind(c) - use omp_lib_kinds - real (kind=kmp_double_kind) omp_get_wtime - end function omp_get_wtime - - function omp_get_wtick() bind(c) - use omp_lib_kinds - real (kind=kmp_double_kind) omp_get_wtick - end function omp_get_wtick - - function omp_get_default_device() bind(c) - use omp_lib_kinds - integer (kind=omp_integer_kind) omp_get_default_device - end function omp_get_default_device - - subroutine omp_set_default_device(device_num) bind(c) - use omp_lib_kinds - integer (kind=omp_integer_kind), value :: device_num - end subroutine omp_set_default_device - - function omp_get_num_devices() bind(c) - use omp_lib_kinds - integer (kind=omp_integer_kind) omp_get_num_devices - end function omp_get_num_devices - - function omp_get_num_teams() bind(c) - use omp_lib_kinds - integer (kind=omp_integer_kind) omp_get_num_teams - end function omp_get_num_teams - - function omp_get_team_num() bind(c) - use omp_lib_kinds - integer (kind=omp_integer_kind) omp_get_team_num - end function omp_get_team_num - - function omp_get_cancellation() bind(c) - use omp_lib_kinds - integer (kind=omp_integer_kind) omp_get_cancellation - end function omp_get_cancellation - - function omp_is_initial_device() bind(c) - use omp_lib_kinds - logical (kind=omp_logical_kind) omp_is_initial_device - end function omp_is_initial_device - - subroutine omp_init_lock(svar) bind(c) -!DIR$ IF(__INTEL_COMPILER.GE.1400) -!DIR$ attributes known_intrinsic :: omp_init_lock -!DIR$ ENDIF - use omp_lib_kinds - integer (kind=omp_lock_kind) svar - end subroutine omp_init_lock - - subroutine omp_destroy_lock(svar) bind(c) -!DIR$ IF(__INTEL_COMPILER.GE.1400) -!DIR$ attributes known_intrinsic :: omp_destroy_lock -!DIR$ ENDIF - use omp_lib_kinds - integer (kind=omp_lock_kind) svar - end subroutine omp_destroy_lock - - subroutine omp_set_lock(svar) bind(c) -!DIR$ IF(__INTEL_COMPILER.GE.1400) -!DIR$ attributes known_intrinsic :: omp_set_lock -!DIR$ ENDIF - use omp_lib_kinds - integer (kind=omp_lock_kind) svar - end subroutine omp_set_lock - - subroutine omp_unset_lock(svar) bind(c) -!DIR$ IF(__INTEL_COMPILER.GE.1400) -!DIR$ attributes known_intrinsic :: omp_unset_lock -!DIR$ ENDIF - use omp_lib_kinds - integer (kind=omp_lock_kind) svar - end subroutine omp_unset_lock - - function omp_test_lock(svar) bind(c) -!DIR$ IF(__INTEL_COMPILER.GE.1400) -!DIR$ attributes known_intrinsic :: omp_test_lock -!DIR$ ENDIF - use omp_lib_kinds - logical (kind=omp_logical_kind) omp_test_lock - integer (kind=omp_lock_kind) svar - end function omp_test_lock - - subroutine omp_init_nest_lock(nvar) bind(c) -!DIR$ IF(__INTEL_COMPILER.GE.1400) -!DIR$ attributes known_intrinsic :: omp_init_nest_lock -!DIR$ ENDIF - use omp_lib_kinds - integer (kind=omp_nest_lock_kind) nvar - end subroutine omp_init_nest_lock - - subroutine omp_destroy_nest_lock(nvar) bind(c) -!DIR$ IF(__INTEL_COMPILER.GE.1400) -!DIR$ attributes known_intrinsic :: omp_destroy_nest_lock -!DIR$ ENDIF - use omp_lib_kinds - integer (kind=omp_nest_lock_kind) nvar - end subroutine omp_destroy_nest_lock - - subroutine omp_set_nest_lock(nvar) bind(c) -!DIR$ IF(__INTEL_COMPILER.GE.1400) -!DIR$ attributes known_intrinsic :: omp_set_nest_lock -!DIR$ ENDIF - use omp_lib_kinds - integer (kind=omp_nest_lock_kind) nvar - end subroutine omp_set_nest_lock - - subroutine omp_unset_nest_lock(nvar) bind(c) -!DIR$ IF(__INTEL_COMPILER.GE.1400) -!DIR$ attributes known_intrinsic :: omp_unset_nest_lock -!DIR$ ENDIF - use omp_lib_kinds - integer (kind=omp_nest_lock_kind) nvar - end subroutine omp_unset_nest_lock - - function omp_test_nest_lock(nvar) bind(c) -!DIR$ IF(__INTEL_COMPILER.GE.1400) -!DIR$ attributes known_intrinsic :: omp_test_nest_lock -!DIR$ ENDIF - use omp_lib_kinds - integer (kind=omp_integer_kind) omp_test_nest_lock - integer (kind=omp_nest_lock_kind) nvar - end function omp_test_nest_lock - -! *** -! *** kmp_* entry points -! *** - - subroutine kmp_set_stacksize(size) bind(c) - use omp_lib_kinds - integer (kind=omp_integer_kind), value :: size - end subroutine kmp_set_stacksize - - subroutine kmp_set_stacksize_s(size) bind(c) - use omp_lib_kinds - integer (kind=kmp_size_t_kind), value :: size - end subroutine kmp_set_stacksize_s - - subroutine kmp_set_blocktime(msec) bind(c) - use omp_lib_kinds - integer (kind=omp_integer_kind), value :: msec - end subroutine kmp_set_blocktime - - subroutine kmp_set_library_serial() bind(c) - end subroutine kmp_set_library_serial - - subroutine kmp_set_library_turnaround() bind(c) - end subroutine kmp_set_library_turnaround - - subroutine kmp_set_library_throughput() bind(c) - end subroutine kmp_set_library_throughput - - subroutine kmp_set_library(libnum) bind(c) - use omp_lib_kinds - integer (kind=omp_integer_kind), value :: libnum - end subroutine kmp_set_library - - subroutine kmp_set_defaults(string) bind(c) - use, intrinsic :: iso_c_binding - character (kind=c_char) :: string(*) - end subroutine kmp_set_defaults - - function kmp_get_stacksize() bind(c) - use omp_lib_kinds - integer (kind=omp_integer_kind) kmp_get_stacksize - end function kmp_get_stacksize - - function kmp_get_stacksize_s() bind(c) - use omp_lib_kinds - integer (kind=kmp_size_t_kind) kmp_get_stacksize_s - end function kmp_get_stacksize_s - - function kmp_get_blocktime() bind(c) - use omp_lib_kinds - integer (kind=omp_integer_kind) kmp_get_blocktime - end function kmp_get_blocktime - - function kmp_get_library() bind(c) - use omp_lib_kinds - integer (kind=omp_integer_kind) kmp_get_library - end function kmp_get_library - - function kmp_set_affinity(mask) bind(c) - use omp_lib_kinds - integer (kind=omp_integer_kind) kmp_set_affinity - integer (kind=kmp_affinity_mask_kind) mask - end function kmp_set_affinity - - function kmp_get_affinity(mask) bind(c) - use omp_lib_kinds - integer (kind=omp_integer_kind) kmp_get_affinity - integer (kind=kmp_affinity_mask_kind) mask - end function kmp_get_affinity - - function kmp_get_affinity_max_proc() bind(c) - use omp_lib_kinds - integer (kind=omp_integer_kind) kmp_get_affinity_max_proc - end function kmp_get_affinity_max_proc - - subroutine kmp_create_affinity_mask(mask) bind(c) - use omp_lib_kinds - integer (kind=kmp_affinity_mask_kind) mask - end subroutine kmp_create_affinity_mask - - subroutine kmp_destroy_affinity_mask(mask) bind(c) - use omp_lib_kinds - integer (kind=kmp_affinity_mask_kind) mask - end subroutine kmp_destroy_affinity_mask - - function kmp_set_affinity_mask_proc(proc, mask) bind(c) - use omp_lib_kinds - integer (kind=omp_integer_kind) kmp_set_affinity_mask_proc - integer (kind=omp_integer_kind), value :: proc - integer (kind=kmp_affinity_mask_kind) mask - end function kmp_set_affinity_mask_proc - - function kmp_unset_affinity_mask_proc(proc, mask) bind(c) - use omp_lib_kinds - integer (kind=omp_integer_kind) kmp_unset_affinity_mask_proc - integer (kind=omp_integer_kind), value :: proc - integer (kind=kmp_affinity_mask_kind) mask - end function kmp_unset_affinity_mask_proc - - function kmp_get_affinity_mask_proc(proc, mask) bind(c) - use omp_lib_kinds - integer (kind=omp_integer_kind) kmp_get_affinity_mask_proc - integer (kind=omp_integer_kind), value :: proc - integer (kind=kmp_affinity_mask_kind) mask - end function kmp_get_affinity_mask_proc - - function kmp_malloc(size) bind(c) - use omp_lib_kinds - integer (kind=kmp_pointer_kind) kmp_malloc - integer (kind=kmp_size_t_kind), value :: size - end function kmp_malloc - - function kmp_aligned_malloc(size, alignment) bind(c) - use omp_lib_kinds - integer (kind=kmp_pointer_kind) kmp_aligned_malloc - integer (kind=kmp_size_t_kind), value :: size - integer (kind=kmp_size_t_kind), value :: alignment - end function kmp_aligned_malloc - - function kmp_calloc(nelem, elsize) bind(c) - use omp_lib_kinds - integer (kind=kmp_pointer_kind) kmp_calloc - integer (kind=kmp_size_t_kind), value :: nelem - integer (kind=kmp_size_t_kind), value :: elsize - end function kmp_calloc - - function kmp_realloc(ptr, size) bind(c) - use omp_lib_kinds - integer (kind=kmp_pointer_kind) kmp_realloc - integer (kind=kmp_pointer_kind), value :: ptr - integer (kind=kmp_size_t_kind), value :: size - end function kmp_realloc - - subroutine kmp_free(ptr) bind(c) - use omp_lib_kinds - integer (kind=kmp_pointer_kind), value :: ptr - end subroutine kmp_free - - subroutine kmp_set_warnings_on() bind(c) - end subroutine kmp_set_warnings_on - - subroutine kmp_set_warnings_off() bind(c) - end subroutine kmp_set_warnings_off - - function kmp_get_cancellation_status(cancelkind) bind(c) - use omp_lib_kinds - integer (kind=kmp_cancel_kind), value :: cancelkind - logical (kind=omp_logical_kind) kmp_get_cancellation_status - end function kmp_get_cancellation_status - - end interface - - end module omp_lib Index: vendor/llvm-openmp/openmp-release80-r363030/runtime/src/include/40/omp.h.var =================================================================== --- vendor/llvm-openmp/openmp-release80-r363030/runtime/src/include/40/omp.h.var (revision 348960) +++ vendor/llvm-openmp/openmp-release80-r363030/runtime/src/include/40/omp.h.var (nonexistent) @@ -1,161 +0,0 @@ -/* - * include/40/omp.h.var - */ - - -//===----------------------------------------------------------------------===// -// -// The LLVM Compiler Infrastructure -// -// This file is dual licensed under the MIT and the University of Illinois Open -// Source Licenses. See LICENSE.txt for details. -// -//===----------------------------------------------------------------------===// - - -#ifndef __OMP_H -# define __OMP_H - -# define KMP_VERSION_MAJOR @LIBOMP_VERSION_MAJOR@ -# define KMP_VERSION_MINOR @LIBOMP_VERSION_MINOR@ -# define KMP_VERSION_BUILD @LIBOMP_VERSION_BUILD@ -# define KMP_BUILD_DATE "@LIBOMP_BUILD_DATE@" - -# ifdef __cplusplus - extern "C" { -# endif - -# if defined(_WIN32) -# define __KAI_KMPC_CONVENTION __cdecl -# else -# define __KAI_KMPC_CONVENTION -# endif - - /* schedule kind constants */ - typedef enum omp_sched_t { - omp_sched_static = 1, - omp_sched_dynamic = 2, - omp_sched_guided = 3, - omp_sched_auto = 4 - } omp_sched_t; - - /* set API functions */ - extern void __KAI_KMPC_CONVENTION omp_set_num_threads (int); - extern void __KAI_KMPC_CONVENTION omp_set_dynamic (int); - extern void __KAI_KMPC_CONVENTION omp_set_nested (int); - extern void __KAI_KMPC_CONVENTION omp_set_max_active_levels (int); - extern void __KAI_KMPC_CONVENTION omp_set_schedule (omp_sched_t, int); - - /* query API functions */ - extern int __KAI_KMPC_CONVENTION omp_get_num_threads (void); - extern int __KAI_KMPC_CONVENTION omp_get_dynamic (void); - extern int __KAI_KMPC_CONVENTION omp_get_nested (void); - extern int __KAI_KMPC_CONVENTION omp_get_max_threads (void); - extern int __KAI_KMPC_CONVENTION omp_get_thread_num (void); - extern int __KAI_KMPC_CONVENTION omp_get_num_procs (void); - extern int __KAI_KMPC_CONVENTION omp_in_parallel (void); - extern int __KAI_KMPC_CONVENTION omp_in_final (void); - extern int __KAI_KMPC_CONVENTION omp_get_active_level (void); - extern int __KAI_KMPC_CONVENTION omp_get_level (void); - extern int __KAI_KMPC_CONVENTION omp_get_ancestor_thread_num (int); - extern int __KAI_KMPC_CONVENTION omp_get_team_size (int); - extern int __KAI_KMPC_CONVENTION omp_get_thread_limit (void); - extern int __KAI_KMPC_CONVENTION omp_get_max_active_levels (void); - extern void __KAI_KMPC_CONVENTION omp_get_schedule (omp_sched_t *, int *); - - /* lock API functions */ - typedef struct omp_lock_t { - void * _lk; - } omp_lock_t; - - extern void __KAI_KMPC_CONVENTION omp_init_lock (omp_lock_t *); - extern void __KAI_KMPC_CONVENTION omp_set_lock (omp_lock_t *); - extern void __KAI_KMPC_CONVENTION omp_unset_lock (omp_lock_t *); - extern void __KAI_KMPC_CONVENTION omp_destroy_lock (omp_lock_t *); - extern int __KAI_KMPC_CONVENTION omp_test_lock (omp_lock_t *); - - /* nested lock API functions */ - typedef struct omp_nest_lock_t { - void * _lk; - } omp_nest_lock_t; - - extern void __KAI_KMPC_CONVENTION omp_init_nest_lock (omp_nest_lock_t *); - extern void __KAI_KMPC_CONVENTION omp_set_nest_lock (omp_nest_lock_t *); - extern void __KAI_KMPC_CONVENTION omp_unset_nest_lock (omp_nest_lock_t *); - extern void __KAI_KMPC_CONVENTION omp_destroy_nest_lock (omp_nest_lock_t *); - extern int __KAI_KMPC_CONVENTION omp_test_nest_lock (omp_nest_lock_t *); - - /* time API functions */ - extern double __KAI_KMPC_CONVENTION omp_get_wtime (void); - extern double __KAI_KMPC_CONVENTION omp_get_wtick (void); - - /* OpenMP 4.0 */ - extern int __KAI_KMPC_CONVENTION omp_get_default_device (void); - extern void __KAI_KMPC_CONVENTION omp_set_default_device (int); - extern int __KAI_KMPC_CONVENTION omp_is_initial_device (void); - extern int __KAI_KMPC_CONVENTION omp_get_num_devices (void); - extern int __KAI_KMPC_CONVENTION omp_get_num_teams (void); - extern int __KAI_KMPC_CONVENTION omp_get_team_num (void); - extern int __KAI_KMPC_CONVENTION omp_get_cancellation (void); - -# include - /* kmp API functions */ - extern int __KAI_KMPC_CONVENTION kmp_get_stacksize (void); - extern void __KAI_KMPC_CONVENTION kmp_set_stacksize (int); - extern size_t __KAI_KMPC_CONVENTION kmp_get_stacksize_s (void); - extern void __KAI_KMPC_CONVENTION kmp_set_stacksize_s (size_t); - extern int __KAI_KMPC_CONVENTION kmp_get_blocktime (void); - extern int __KAI_KMPC_CONVENTION kmp_get_library (void); - extern void __KAI_KMPC_CONVENTION kmp_set_blocktime (int); - extern void __KAI_KMPC_CONVENTION kmp_set_library (int); - extern void __KAI_KMPC_CONVENTION kmp_set_library_serial (void); - extern void __KAI_KMPC_CONVENTION kmp_set_library_turnaround (void); - extern void __KAI_KMPC_CONVENTION kmp_set_library_throughput (void); - extern void __KAI_KMPC_CONVENTION kmp_set_defaults (char const *); - - /* Intel affinity API */ - typedef void * kmp_affinity_mask_t; - - extern int __KAI_KMPC_CONVENTION kmp_set_affinity (kmp_affinity_mask_t *); - extern int __KAI_KMPC_CONVENTION kmp_get_affinity (kmp_affinity_mask_t *); - extern int __KAI_KMPC_CONVENTION kmp_get_affinity_max_proc (void); - extern void __KAI_KMPC_CONVENTION kmp_create_affinity_mask (kmp_affinity_mask_t *); - extern void __KAI_KMPC_CONVENTION kmp_destroy_affinity_mask (kmp_affinity_mask_t *); - extern int __KAI_KMPC_CONVENTION kmp_set_affinity_mask_proc (int, kmp_affinity_mask_t *); - extern int __KAI_KMPC_CONVENTION kmp_unset_affinity_mask_proc (int, kmp_affinity_mask_t *); - extern int __KAI_KMPC_CONVENTION kmp_get_affinity_mask_proc (int, kmp_affinity_mask_t *); - - /* OpenMP 4.0 affinity API */ - typedef enum omp_proc_bind_t { - omp_proc_bind_false = 0, - omp_proc_bind_true = 1, - omp_proc_bind_master = 2, - omp_proc_bind_close = 3, - omp_proc_bind_spread = 4 - } omp_proc_bind_t; - - extern omp_proc_bind_t __KAI_KMPC_CONVENTION omp_get_proc_bind (void); - - extern void * __KAI_KMPC_CONVENTION kmp_malloc (size_t); - extern void * __KAI_KMPC_CONVENTION kmp_aligned_malloc (size_t, size_t); - extern void * __KAI_KMPC_CONVENTION kmp_calloc (size_t, size_t); - extern void * __KAI_KMPC_CONVENTION kmp_realloc (void *, size_t); - extern void __KAI_KMPC_CONVENTION kmp_free (void *); - - extern void __KAI_KMPC_CONVENTION kmp_set_warnings_on(void); - extern void __KAI_KMPC_CONVENTION kmp_set_warnings_off(void); - -# undef __KAI_KMPC_CONVENTION - - /* Warning: - The following typedefs are not standard, deprecated and will be removed in a future release. - */ - typedef int omp_int_t; - typedef double omp_wtime_t; - -# ifdef __cplusplus - } -# endif - -#endif /* __OMP_H */ - Index: vendor/llvm-openmp/openmp-release80-r363030/runtime/src/include/50/omp-tools.h.var =================================================================== --- vendor/llvm-openmp/openmp-release80-r363030/runtime/src/include/50/omp-tools.h.var (revision 348960) +++ vendor/llvm-openmp/openmp-release80-r363030/runtime/src/include/50/omp-tools.h.var (nonexistent) @@ -1,1083 +0,0 @@ -/* - * include/50/omp-tools.h.var - */ - -//===----------------------------------------------------------------------===// -// -// The LLVM Compiler Infrastructure -// -// This file is dual licensed under the MIT and the University of Illinois Open -// Source Licenses. See LICENSE.txt for details. -// -//===----------------------------------------------------------------------===// - -#ifndef __OMPT__ -#define __OMPT__ - -/***************************************************************************** - * system include files - *****************************************************************************/ - -#include -#include - -/***************************************************************************** - * iteration macros - *****************************************************************************/ - -#define FOREACH_OMPT_INQUIRY_FN(macro) \ - macro (ompt_enumerate_states) \ - macro (ompt_enumerate_mutex_impls) \ - \ - macro (ompt_set_callback) \ - macro (ompt_get_callback) \ - \ - macro (ompt_get_state) \ - \ - macro (ompt_get_parallel_info) \ - macro (ompt_get_task_info) \ - macro (ompt_get_task_memory) \ - macro (ompt_get_thread_data) \ - macro (ompt_get_unique_id) \ - macro (ompt_finalize_tool) \ - \ - macro(ompt_get_num_procs) \ - macro(ompt_get_num_places) \ - macro(ompt_get_place_proc_ids) \ - macro(ompt_get_place_num) \ - macro(ompt_get_partition_place_nums) \ - macro(ompt_get_proc_id) \ - \ - macro(ompt_get_target_info) \ - macro(ompt_get_num_devices) - -#define FOREACH_OMPT_STATE(macro) \ - \ - /* first available state */ \ - macro (ompt_state_undefined, 0x102) /* undefined thread state */ \ - \ - /* work states (0..15) */ \ - macro (ompt_state_work_serial, 0x000) /* working outside parallel */ \ - macro (ompt_state_work_parallel, 0x001) /* working within parallel */ \ - macro (ompt_state_work_reduction, 0x002) /* performing a reduction */ \ - \ - /* barrier wait states (16..31) */ \ - macro (ompt_state_wait_barrier, 0x010) /* waiting at a barrier */ \ - macro (ompt_state_wait_barrier_implicit_parallel, 0x011) \ - /* implicit barrier at the end of parallel region */\ - macro (ompt_state_wait_barrier_implicit_workshare, 0x012) \ - /* implicit barrier at the end of worksharing */ \ - macro (ompt_state_wait_barrier_implicit, 0x013) /* implicit barrier */ \ - macro (ompt_state_wait_barrier_explicit, 0x014) /* explicit barrier */ \ - \ - /* task wait states (32..63) */ \ - macro (ompt_state_wait_taskwait, 0x020) /* waiting at a taskwait */ \ - macro (ompt_state_wait_taskgroup, 0x021) /* waiting at a taskgroup */ \ - \ - /* mutex wait states (64..127) */ \ - macro (ompt_state_wait_mutex, 0x040) \ - macro (ompt_state_wait_lock, 0x041) /* waiting for lock */ \ - macro (ompt_state_wait_critical, 0x042) /* waiting for critical */ \ - macro (ompt_state_wait_atomic, 0x043) /* waiting for atomic */ \ - macro (ompt_state_wait_ordered, 0x044) /* waiting for ordered */ \ - \ - /* target wait states (128..255) */ \ - macro (ompt_state_wait_target, 0x080) /* waiting for target region */ \ - macro (ompt_state_wait_target_map, 0x081) /* waiting for target data mapping operation */ \ - macro (ompt_state_wait_target_update, 0x082) /* waiting for target update operation */ \ - \ - /* misc (256..511) */ \ - macro (ompt_state_idle, 0x100) /* waiting for work */ \ - macro (ompt_state_overhead, 0x101) /* overhead excluding wait states */ \ - \ - /* implementation-specific states (512..) */ - - -#define FOREACH_KMP_MUTEX_IMPL(macro) \ - macro (kmp_mutex_impl_none, 0) /* unknown implementation */ \ - macro (kmp_mutex_impl_spin, 1) /* based on spin */ \ - macro (kmp_mutex_impl_queuing, 2) /* based on some fair policy */ \ - macro (kmp_mutex_impl_speculative, 3) /* based on HW-supported speculation */ - -#define FOREACH_OMPT_EVENT(macro) \ - \ - /*--- Mandatory Events ---*/ \ - macro (ompt_callback_thread_begin, ompt_callback_thread_begin_t, 1) /* thread begin */ \ - macro (ompt_callback_thread_end, ompt_callback_thread_end_t, 2) /* thread end */ \ - \ - macro (ompt_callback_parallel_begin, ompt_callback_parallel_begin_t, 3) /* parallel begin */ \ - macro (ompt_callback_parallel_end, ompt_callback_parallel_end_t, 4) /* parallel end */ \ - \ - macro (ompt_callback_task_create, ompt_callback_task_create_t, 5) /* task begin */ \ - macro (ompt_callback_task_schedule, ompt_callback_task_schedule_t, 6) /* task schedule */ \ - macro (ompt_callback_implicit_task, ompt_callback_implicit_task_t, 7) /* implicit task */ \ - \ - macro (ompt_callback_target, ompt_callback_target_t, 8) /* target */ \ - macro (ompt_callback_target_data_op, ompt_callback_target_data_op_t, 9) /* target data op */ \ - macro (ompt_callback_target_submit, ompt_callback_target_submit_t, 10) /* target submit */ \ - \ - macro (ompt_callback_control_tool, ompt_callback_control_tool_t, 11) /* control tool */ \ - \ - macro (ompt_callback_device_initialize, ompt_callback_device_initialize_t, 12) /* device initialize */ \ - macro (ompt_callback_device_finalize, ompt_callback_device_finalize_t, 13) /* device finalize */ \ - \ - macro (ompt_callback_device_load, ompt_callback_device_load_t, 14) /* device load */ \ - macro (ompt_callback_device_unload, ompt_callback_device_unload_t, 15) /* device unload */ \ - \ - /* Optional Events */ \ - macro (ompt_callback_sync_region_wait, ompt_callback_sync_region_t, 16) /* sync region wait begin or end */ \ - \ - macro (ompt_callback_mutex_released, ompt_callback_mutex_t, 17) /* mutex released */ \ - \ - macro (ompt_callback_dependences, ompt_callback_dependences_t, 18) /* report task dependences */ \ - macro (ompt_callback_task_dependence, ompt_callback_task_dependence_t, 19) /* report task dependence */ \ - \ - macro (ompt_callback_work, ompt_callback_work_t, 20) /* task at work begin or end */ \ - \ - macro (ompt_callback_master, ompt_callback_master_t, 21) /* task at master begin or end */ \ - \ - macro (ompt_callback_target_map, ompt_callback_target_map_t, 22) /* target map */ \ - \ - macro (ompt_callback_sync_region, ompt_callback_sync_region_t, 23) /* sync region begin or end */ \ - \ - macro (ompt_callback_lock_init, ompt_callback_mutex_acquire_t, 24) /* lock init */ \ - macro (ompt_callback_lock_destroy, ompt_callback_mutex_t, 25) /* lock destroy */ \ - \ - macro (ompt_callback_mutex_acquire, ompt_callback_mutex_acquire_t, 26) /* mutex acquire */ \ - macro (ompt_callback_mutex_acquired, ompt_callback_mutex_t, 27) /* mutex acquired */ \ - \ - macro (ompt_callback_nest_lock, ompt_callback_nest_lock_t, 28) /* nest lock */ \ - \ - macro (ompt_callback_flush, ompt_callback_flush_t, 29) /* after executing flush */ \ - \ - macro (ompt_callback_cancel, ompt_callback_cancel_t, 30) /* cancel innermost binding region */ \ - \ - macro (ompt_callback_reduction, ompt_callback_sync_region_t, 31) /* reduction */ \ - \ - macro (ompt_callback_dispatch, ompt_callback_dispatch_t, 32) /* dispatch of work */ - -/***************************************************************************** - * implementation specific types - *****************************************************************************/ - -typedef enum kmp_mutex_impl_t { -#define kmp_mutex_impl_macro(impl, code) impl = code, - FOREACH_KMP_MUTEX_IMPL(kmp_mutex_impl_macro) -#undef kmp_mutex_impl_macro -} kmp_mutex_impl_t; - -/***************************************************************************** - * definitions generated from spec - *****************************************************************************/ - -typedef enum ompt_callbacks_t { - ompt_callback_thread_begin = 1, - ompt_callback_thread_end = 2, - ompt_callback_parallel_begin = 3, - ompt_callback_parallel_end = 4, - ompt_callback_task_create = 5, - ompt_callback_task_schedule = 6, - ompt_callback_implicit_task = 7, - ompt_callback_target = 8, - ompt_callback_target_data_op = 9, - ompt_callback_target_submit = 10, - ompt_callback_control_tool = 11, - ompt_callback_device_initialize = 12, - ompt_callback_device_finalize = 13, - ompt_callback_device_load = 14, - ompt_callback_device_unload = 15, - ompt_callback_sync_region_wait = 16, - ompt_callback_mutex_released = 17, - ompt_callback_dependences = 18, - ompt_callback_task_dependence = 19, - ompt_callback_work = 20, - ompt_callback_master = 21, - ompt_callback_target_map = 22, - ompt_callback_sync_region = 23, - ompt_callback_lock_init = 24, - ompt_callback_lock_destroy = 25, - ompt_callback_mutex_acquire = 26, - ompt_callback_mutex_acquired = 27, - ompt_callback_nest_lock = 28, - ompt_callback_flush = 29, - ompt_callback_cancel = 30, - ompt_callback_reduction = 31, - ompt_callback_dispatch = 32 -} ompt_callbacks_t; - -typedef enum ompt_record_t { - ompt_record_ompt = 1, - ompt_record_native = 2, - ompt_record_invalid = 3 -} ompt_record_t; - -typedef enum ompt_record_native_t { - ompt_record_native_info = 1, - ompt_record_native_event = 2 -} ompt_record_native_t; - -typedef enum ompt_set_result_t { - ompt_set_error = 0, - ompt_set_never = 1, - ompt_set_impossible = 2, - ompt_set_sometimes = 3, - ompt_set_sometimes_paired = 4, - ompt_set_always = 5 -} ompt_set_result_t; - -typedef uint64_t ompt_id_t; - -typedef uint64_t ompt_device_time_t; - -typedef uint64_t ompt_buffer_cursor_t; - -typedef enum ompt_thread_t { - ompt_thread_initial = 1, - ompt_thread_worker = 2, - ompt_thread_other = 3, - ompt_thread_unknown = 4 -} ompt_thread_t; - -typedef enum ompt_scope_endpoint_t { - ompt_scope_begin = 1, - ompt_scope_end = 2 -} ompt_scope_endpoint_t; - -typedef enum ompt_dispatch_t { - ompt_dispatch_iteration = 1, - ompt_dispatch_section = 2 -} ompt_dispatch_t; - -typedef enum ompt_sync_region_t { - ompt_sync_region_barrier = 1, - ompt_sync_region_barrier_implicit = 2, - ompt_sync_region_barrier_explicit = 3, - ompt_sync_region_barrier_implementation = 4, - ompt_sync_region_taskwait = 5, - ompt_sync_region_taskgroup = 6, - ompt_sync_region_reduction = 7 -} ompt_sync_region_t; - -typedef enum ompt_target_data_op_t { - ompt_target_data_alloc = 1, - ompt_target_data_transfer_to_device = 2, - ompt_target_data_transfer_from_device = 3, - ompt_target_data_delete = 4, - ompt_target_data_associate = 5, - ompt_target_data_disassociate = 6 -} ompt_target_data_op_t; - -typedef enum ompt_work_t { - ompt_work_loop = 1, - ompt_work_sections = 2, - ompt_work_single_executor = 3, - ompt_work_single_other = 4, - ompt_work_workshare = 5, - ompt_work_distribute = 6, - ompt_work_taskloop = 7 -} ompt_work_t; - -typedef enum ompt_mutex_t { - ompt_mutex_lock = 1, - ompt_mutex_test_lock = 2, - ompt_mutex_nest_lock = 3, - ompt_mutex_test_nest_lock = 4, - ompt_mutex_critical = 5, - ompt_mutex_atomic = 6, - ompt_mutex_ordered = 7 -} ompt_mutex_t; - -typedef enum ompt_native_mon_flag_t { - ompt_native_data_motion_explicit = 0x01, - ompt_native_data_motion_implicit = 0x02, - ompt_native_kernel_invocation = 0x04, - ompt_native_kernel_execution = 0x08, - ompt_native_driver = 0x10, - ompt_native_runtime = 0x20, - ompt_native_overhead = 0x40, - ompt_native_idleness = 0x80 -} ompt_native_mon_flag_t; - -typedef enum ompt_task_flag_t { - ompt_task_initial = 0x00000001, - ompt_task_implicit = 0x00000002, - ompt_task_explicit = 0x00000004, - ompt_task_target = 0x00000008, - ompt_task_undeferred = 0x08000000, - ompt_task_untied = 0x10000000, - ompt_task_final = 0x20000000, - ompt_task_mergeable = 0x40000000, - ompt_task_merged = 0x80000000 -} ompt_task_flag_t; - -typedef enum ompt_task_status_t { - ompt_task_complete = 1, - ompt_task_yield = 2, - ompt_task_cancel = 3, - ompt_task_detach = 4, - ompt_task_early_fulfill = 5, - ompt_task_late_fulfill = 6, - ompt_task_switch = 7 -} ompt_task_status_t; - -typedef enum ompt_target_t { - ompt_target = 1, - ompt_target_enter_data = 2, - ompt_target_exit_data = 3, - ompt_target_update = 4 -} ompt_target_t; - -typedef enum ompt_parallel_flag_t { - ompt_parallel_invoker_program = 0x00000001, - ompt_parallel_invoker_runtime = 0x00000002, - ompt_parallel_league = 0x40000000, - ompt_parallel_team = 0x80000000 -} ompt_parallel_flag_t; - -typedef enum ompt_target_map_flag_t { - ompt_target_map_flag_to = 0x01, - ompt_target_map_flag_from = 0x02, - ompt_target_map_flag_alloc = 0x04, - ompt_target_map_flag_release = 0x08, - ompt_target_map_flag_delete = 0x10, - ompt_target_map_flag_implicit = 0x20 -} ompt_target_map_flag_t; - -typedef enum ompt_dependence_type_t { - ompt_dependence_type_in = 1, - ompt_dependence_type_out = 2, - ompt_dependence_type_inout = 3, - ompt_dependence_type_mutexinoutset = 4, - ompt_dependence_type_source = 5, - ompt_dependence_type_sink = 6 -} ompt_dependence_type_t; - -typedef enum ompt_cancel_flag_t { - ompt_cancel_parallel = 0x01, - ompt_cancel_sections = 0x02, - ompt_cancel_loop = 0x04, - ompt_cancel_taskgroup = 0x08, - ompt_cancel_activated = 0x10, - ompt_cancel_detected = 0x20, - ompt_cancel_discarded_task = 0x40 -} ompt_cancel_flag_t; - -typedef uint64_t ompt_hwid_t; - -typedef uint64_t ompt_wait_id_t; - -typedef enum ompt_frame_flag_t { - ompt_frame_runtime = 0x00, - ompt_frame_application = 0x01, - ompt_frame_cfa = 0x10, - ompt_frame_framepointer = 0x20, - ompt_frame_stackaddress = 0x30 -} ompt_frame_flag_t; - -typedef enum ompt_state_t { - ompt_state_work_serial = 0x000, - ompt_state_work_parallel = 0x001, - ompt_state_work_reduction = 0x002, - - ompt_state_wait_barrier = 0x010, - ompt_state_wait_barrier_implicit_parallel = 0x011, - ompt_state_wait_barrier_implicit_workshare = 0x012, - ompt_state_wait_barrier_implicit = 0x013, - ompt_state_wait_barrier_explicit = 0x014, - - ompt_state_wait_taskwait = 0x020, - ompt_state_wait_taskgroup = 0x021, - - ompt_state_wait_mutex = 0x040, - ompt_state_wait_lock = 0x041, - ompt_state_wait_critical = 0x042, - ompt_state_wait_atomic = 0x043, - ompt_state_wait_ordered = 0x044, - - ompt_state_wait_target = 0x080, - ompt_state_wait_target_map = 0x081, - ompt_state_wait_target_update = 0x082, - - ompt_state_idle = 0x100, - ompt_state_overhead = 0x101, - ompt_state_undefined = 0x102 -} ompt_state_t; - -typedef uint64_t (*ompt_get_unique_id_t) (void); - -typedef uint64_t ompd_size_t; - -typedef uint64_t ompd_wait_id_t; - -typedef uint64_t ompd_addr_t; -typedef int64_t ompd_word_t; -typedef uint64_t ompd_seg_t; - -typedef uint64_t ompd_device_t; - -typedef uint64_t ompd_thread_id_t; - -typedef enum ompd_scope_t { - ompd_scope_global = 1, - ompd_scope_address_space = 2, - ompd_scope_thread = 3, - ompd_scope_parallel = 4, - ompd_scope_implicit_task = 5, - ompd_scope_task = 6 -} ompd_scope_t; - -typedef uint64_t ompd_icv_id_t; - -typedef enum ompd_rc_t { - ompd_rc_ok = 0, - ompd_rc_unavailable = 1, - ompd_rc_stale_handle = 2, - ompd_rc_bad_input = 3, - ompd_rc_error = 4, - ompd_rc_unsupported = 5, - ompd_rc_needs_state_tracking = 6, - ompd_rc_incompatible = 7, - ompd_rc_device_read_error = 8, - ompd_rc_device_write_error = 9, - ompd_rc_nomem = 10, -} ompd_rc_t; - -typedef void (*ompt_interface_fn_t) (void); - -typedef ompt_interface_fn_t (*ompt_function_lookup_t) ( - const char *interface_function_name -); - -typedef union ompt_data_t { - uint64_t value; - void *ptr; -} ompt_data_t; - -typedef struct ompt_frame_t { - ompt_data_t exit_frame; - ompt_data_t enter_frame; - int exit_frame_flags; - int enter_frame_flags; -} ompt_frame_t; - -typedef void (*ompt_callback_t) (void); - -typedef void ompt_device_t; - -typedef void ompt_buffer_t; - -typedef void (*ompt_callback_buffer_request_t) ( - int device_num, - ompt_buffer_t **buffer, - size_t *bytes -); - -typedef void (*ompt_callback_buffer_complete_t) ( - int device_num, - ompt_buffer_t *buffer, - size_t bytes, - ompt_buffer_cursor_t begin, - int buffer_owned -); - -typedef void (*ompt_finalize_t) ( - ompt_data_t *tool_data -); - -typedef int (*ompt_initialize_t) ( - ompt_function_lookup_t lookup, - int initial_device_num, - ompt_data_t *tool_data -); - -typedef struct ompt_start_tool_result_t { - ompt_initialize_t initialize; - ompt_finalize_t finalize; - ompt_data_t tool_data; -} ompt_start_tool_result_t; - -typedef struct ompt_record_abstract_t { - ompt_record_native_t rclass; - const char *type; - ompt_device_time_t start_time; - ompt_device_time_t end_time; - ompt_hwid_t hwid; -} ompt_record_abstract_t; - -typedef struct ompt_dependence_t { - ompt_data_t variable; - ompt_dependence_type_t dependence_type; -} ompt_dependence_t; - -typedef int (*ompt_enumerate_states_t) ( - int current_state, - int *next_state, - const char **next_state_name -); - -typedef int (*ompt_enumerate_mutex_impls_t) ( - int current_impl, - int *next_impl, - const char **next_impl_name -); - -typedef ompt_set_result_t (*ompt_set_callback_t) ( - ompt_callbacks_t event, - ompt_callback_t callback -); - -typedef int (*ompt_get_callback_t) ( - ompt_callbacks_t event, - ompt_callback_t *callback -); - -typedef ompt_data_t *(*ompt_get_thread_data_t) (void); - -typedef int (*ompt_get_num_procs_t) (void); - -typedef int (*ompt_get_num_places_t) (void); - -typedef int (*ompt_get_place_proc_ids_t) ( - int place_num, - int ids_size, - int *ids -); - -typedef int (*ompt_get_place_num_t) (void); - -typedef int (*ompt_get_partition_place_nums_t) ( - int place_nums_size, - int *place_nums -); - -typedef int (*ompt_get_proc_id_t) (void); - -typedef int (*ompt_get_state_t) ( - ompt_wait_id_t *wait_id -); - -typedef int (*ompt_get_parallel_info_t) ( - int ancestor_level, - ompt_data_t **parallel_data, - int *team_size -); - -typedef int (*ompt_get_task_info_t) ( - int ancestor_level, - int *flags, - ompt_data_t **task_data, - ompt_frame_t **task_frame, - ompt_data_t **parallel_data, - int *thread_num -); - -typedef int (*ompt_get_task_memory_t)( - void **addr, - size_t *size, - int block -); - -typedef int (*ompt_get_target_info_t) ( - uint64_t *device_num, - ompt_id_t *target_id, - ompt_id_t *host_op_id -); - -typedef int (*ompt_get_num_devices_t) (void); - -typedef void (*ompt_finalize_tool_t) (void); - -typedef int (*ompt_get_device_num_procs_t) ( - ompt_device_t *device -); - -typedef ompt_device_time_t (*ompt_get_device_time_t) ( - ompt_device_t *device -); - -typedef double (*ompt_translate_time_t) ( - ompt_device_t *device, - ompt_device_time_t time -); - -typedef ompt_set_result_t (*ompt_set_trace_ompt_t) ( - ompt_device_t *device, - unsigned int enable, - unsigned int etype -); - -typedef ompt_set_result_t (*ompt_set_trace_native_t) ( - ompt_device_t *device, - int enable, - int flags -); - -typedef int (*ompt_start_trace_t) ( - ompt_device_t *device, - ompt_callback_buffer_request_t request, - ompt_callback_buffer_complete_t complete -); - -typedef int (*ompt_pause_trace_t) ( - ompt_device_t *device, - int begin_pause -); - -typedef int (*ompt_flush_trace_t) ( - ompt_device_t *device -); - -typedef int (*ompt_stop_trace_t) ( - ompt_device_t *device -); - -typedef int (*ompt_advance_buffer_cursor_t) ( - ompt_device_t *device, - ompt_buffer_t *buffer, - size_t size, - ompt_buffer_cursor_t current, - ompt_buffer_cursor_t *next -); - -typedef ompt_record_t (*ompt_get_record_type_t) ( - ompt_buffer_t *buffer, - ompt_buffer_cursor_t current -); - -typedef void *(*ompt_get_record_native_t) ( - ompt_buffer_t *buffer, - ompt_buffer_cursor_t current, - ompt_id_t *host_op_id -); - -typedef ompt_record_abstract_t * -(*ompt_get_record_abstract_t) ( - void *native_record -); - -typedef void (*ompt_callback_thread_begin_t) ( - ompt_thread_t thread_type, - ompt_data_t *thread_data -); - -typedef struct ompt_record_thread_begin_t { - ompt_thread_t thread_type; -} ompt_record_thread_begin_t; - -typedef void (*ompt_callback_thread_end_t) ( - ompt_data_t *thread_data -); - -typedef void (*ompt_callback_parallel_begin_t) ( - ompt_data_t *encountering_task_data, - const ompt_frame_t *encountering_task_frame, - ompt_data_t *parallel_data, - unsigned int requested_parallelism, - int flags, - const void *codeptr_ra -); - -typedef struct ompt_record_parallel_begin_t { - ompt_id_t encountering_task_id; - ompt_id_t parallel_id; - unsigned int requested_parallelism; - int flags; - const void *codeptr_ra; -} ompt_record_parallel_begin_t; - -typedef void (*ompt_callback_parallel_end_t) ( - ompt_data_t *parallel_data, - ompt_data_t *encountering_task_data, - int flags, - const void *codeptr_ra -); - -typedef struct ompt_record_parallel_end_t { - ompt_id_t parallel_id; - ompt_id_t encountering_task_id; - int flags; - const void *codeptr_ra; -} ompt_record_parallel_end_t; - -typedef void (*ompt_callback_work_t) ( - ompt_work_t wstype, - ompt_scope_endpoint_t endpoint, - ompt_data_t *parallel_data, - ompt_data_t *task_data, - uint64_t count, - const void *codeptr_ra -); - -typedef struct ompt_record_work_t { - ompt_work_t wstype; - ompt_scope_endpoint_t endpoint; - ompt_id_t parallel_id; - ompt_id_t task_id; - uint64_t count; - const void *codeptr_ra; -} ompt_record_work_t; - -typedef void (*ompt_callback_dispatch_t) ( - ompt_data_t *parallel_data, - ompt_data_t *task_data, - ompt_dispatch_t kind, - ompt_data_t instance -); - -typedef struct ompt_record_dispatch_t { - ompt_id_t parallel_id; - ompt_id_t task_id; - ompt_dispatch_t kind; - ompt_data_t instance; -} ompt_record_dispatch_t; - -typedef void (*ompt_callback_task_create_t) ( - ompt_data_t *encountering_task_data, - const ompt_frame_t *encountering_task_frame, - ompt_data_t *new_task_data, - int flags, - int has_dependences, - const void *codeptr_ra -); - -typedef struct ompt_record_task_create_t { - ompt_id_t encountering_task_id; - ompt_id_t new_task_id; - int flags; - int has_dependences; - const void *codeptr_ra; -} ompt_record_task_create_t; - -typedef void (*ompt_callback_dependences_t) ( - ompt_data_t *task_data, - const ompt_dependence_t *deps, - int ndeps -); - -typedef struct ompt_record_dependences_t { - ompt_id_t task_id; - ompt_dependence_t dep; - int ndeps; -} ompt_record_dependences_t; - -typedef void (*ompt_callback_task_dependence_t) ( - ompt_data_t *src_task_data, - ompt_data_t *sink_task_data -); - -typedef struct ompt_record_task_dependence_t { - ompt_id_t src_task_id; - ompt_id_t sink_task_id; -} ompt_record_task_dependence_t; - -typedef void (*ompt_callback_task_schedule_t) ( - ompt_data_t *prior_task_data, - ompt_task_status_t prior_task_status, - ompt_data_t *next_task_data -); - -typedef struct ompt_record_task_schedule_t { - ompt_id_t prior_task_id; - ompt_task_status_t prior_task_status; - ompt_id_t next_task_id; -} ompt_record_task_schedule_t; - -typedef void (*ompt_callback_implicit_task_t) ( - ompt_scope_endpoint_t endpoint, - ompt_data_t *parallel_data, - ompt_data_t *task_data, - unsigned int actual_parallelism, - unsigned int index, - int flags -); - -typedef struct ompt_record_implicit_task_t { - ompt_scope_endpoint_t endpoint; - ompt_id_t parallel_id; - ompt_id_t task_id; - unsigned int actual_parallelism; - unsigned int index; - int flags; -} ompt_record_implicit_task_t; - -typedef void (*ompt_callback_master_t) ( - ompt_scope_endpoint_t endpoint, - ompt_data_t *parallel_data, - ompt_data_t *task_data, - const void *codeptr_ra -); - -typedef struct ompt_record_master_t { - ompt_scope_endpoint_t endpoint; - ompt_id_t parallel_id; - ompt_id_t task_id; - const void *codeptr_ra; -} ompt_record_master_t; - -typedef void (*ompt_callback_sync_region_t) ( - ompt_sync_region_t kind, - ompt_scope_endpoint_t endpoint, - ompt_data_t *parallel_data, - ompt_data_t *task_data, - const void *codeptr_ra -); - -typedef struct ompt_record_sync_region_t { - ompt_sync_region_t kind; - ompt_scope_endpoint_t endpoint; - ompt_id_t parallel_id; - ompt_id_t task_id; - const void *codeptr_ra; -} ompt_record_sync_region_t; - -typedef void (*ompt_callback_mutex_acquire_t) ( - ompt_mutex_t kind, - unsigned int hint, - unsigned int impl, - ompt_wait_id_t wait_id, - const void *codeptr_ra -); - -typedef struct ompt_record_mutex_acquire_t { - ompt_mutex_t kind; - unsigned int hint; - unsigned int impl; - ompt_wait_id_t wait_id; - const void *codeptr_ra; -} ompt_record_mutex_acquire_t; - -typedef void (*ompt_callback_mutex_t) ( - ompt_mutex_t kind, - ompt_wait_id_t wait_id, - const void *codeptr_ra -); - -typedef struct ompt_record_mutex_t { - ompt_mutex_t kind; - ompt_wait_id_t wait_id; - const void *codeptr_ra; -} ompt_record_mutex_t; - -typedef void (*ompt_callback_nest_lock_t) ( - ompt_scope_endpoint_t endpoint, - ompt_wait_id_t wait_id, - const void *codeptr_ra -); - -typedef struct ompt_record_nest_lock_t { - ompt_scope_endpoint_t endpoint; - ompt_wait_id_t wait_id; - const void *codeptr_ra; -} ompt_record_nest_lock_t; - -typedef void (*ompt_callback_flush_t) ( - ompt_data_t *thread_data, - const void *codeptr_ra -); - -typedef struct ompt_record_flush_t { - const void *codeptr_ra; -} ompt_record_flush_t; - -typedef void (*ompt_callback_cancel_t) ( - ompt_data_t *task_data, - int flags, - const void *codeptr_ra -); - -typedef struct ompt_record_cancel_t { - ompt_id_t task_id; - int flags; - const void *codeptr_ra; -} ompt_record_cancel_t; - -typedef void (*ompt_callback_device_initialize_t) ( - int device_num, - const char *type, - ompt_device_t *device, - ompt_function_lookup_t lookup, - const char *documentation -); - -typedef void (*ompt_callback_device_finalize_t) ( - int device_num -); - -typedef void (*ompt_callback_device_load_t) ( - int device_num, - const char *filename, - int64_t offset_in_file, - void *vma_in_file, - size_t bytes, - void *host_addr, - void *device_addr, - uint64_t module_id -); - -typedef void (*ompt_callback_device_unload_t) ( - int device_num, - uint64_t module_id -); - -typedef void (*ompt_callback_target_data_op_t) ( - ompt_id_t target_id, - ompt_id_t host_op_id, - ompt_target_data_op_t optype, - void *src_addr, - int src_device_num, - void *dest_addr, - int dest_device_num, - size_t bytes, - const void *codeptr_ra -); - -typedef struct ompt_record_target_data_op_t { - ompt_id_t host_op_id; - ompt_target_data_op_t optype; - void *src_addr; - int src_device_num; - void *dest_addr; - int dest_device_num; - size_t bytes; - ompt_device_time_t end_time; - const void *codeptr_ra; -} ompt_record_target_data_op_t; - -typedef void (*ompt_callback_target_t) ( - ompt_target_t kind, - ompt_scope_endpoint_t endpoint, - int device_num, - ompt_data_t *task_data, - ompt_id_t target_id, - const void *codeptr_ra -); - -typedef struct ompt_record_target_t { - ompt_target_t kind; - ompt_scope_endpoint_t endpoint; - int device_num; - ompt_id_t task_id; - ompt_id_t target_id; - const void *codeptr_ra; -} ompt_record_target_t; - -typedef void (*ompt_callback_target_map_t) ( - ompt_id_t target_id, - unsigned int nitems, - void **host_addr, - void **device_addr, - size_t *bytes, - unsigned int *mapping_flags, - const void *codeptr_ra -); - -typedef struct ompt_record_target_map_t { - ompt_id_t target_id; - unsigned int nitems; - void **host_addr; - void **device_addr; - size_t *bytes; - unsigned int *mapping_flags; - const void *codeptr_ra; -} ompt_record_target_map_t; - -typedef void (*ompt_callback_target_submit_t) ( - ompt_id_t target_id, - ompt_id_t host_op_id, - unsigned int requested_num_teams -); - -typedef struct ompt_record_target_kernel_t { - ompt_id_t host_op_id; - unsigned int requested_num_teams; - unsigned int granted_num_teams; - ompt_device_time_t end_time; -} ompt_record_target_kernel_t; - -typedef int (*ompt_callback_control_tool_t) ( - uint64_t command, - uint64_t modifier, - void *arg, - const void *codeptr_ra -); - -typedef struct ompt_record_control_tool_t { - uint64_t command; - uint64_t modifier; - const void *codeptr_ra; -} ompt_record_control_tool_t; - -typedef struct ompd_address_t { - ompd_seg_t segment; - ompd_addr_t address; -} ompd_address_t; - -typedef struct ompd_frame_info_t { - ompd_address_t frame_address; - ompd_word_t frame_flag; -} ompd_frame_info_t; - -typedef struct _ompd_aspace_handle ompd_address_space_handle_t; -typedef struct _ompd_thread_handle ompd_thread_handle_t; -typedef struct _ompd_parallel_handle ompd_parallel_handle_t; -typedef struct _ompd_task_handle ompd_task_handle_t; - -typedef struct _ompd_aspace_cont ompd_address_space_context_t; -typedef struct _ompd_thread_cont ompd_thread_context_t; - -typedef struct ompd_device_type_sizes_t { - uint8_t sizeof_char; - uint8_t sizeof_short; - uint8_t sizeof_int; - uint8_t sizeof_long; - uint8_t sizeof_long_long; - uint8_t sizeof_pointer; -} ompd_device_type_sizes_t; - -typedef struct ompt_record_ompt_t { - ompt_callbacks_t type; - ompt_device_time_t time; - ompt_id_t thread_id; - ompt_id_t target_id; - union { - ompt_record_thread_begin_t thread_begin; - ompt_record_parallel_begin_t parallel_begin; - ompt_record_parallel_end_t parallel_end; - ompt_record_work_t work; - ompt_record_dispatch_t dispatch; - ompt_record_task_create_t task_create; - ompt_record_dependences_t dependences; - ompt_record_task_dependence_t task_dependence; - ompt_record_task_schedule_t task_schedule; - ompt_record_implicit_task_t implicit_task; - ompt_record_master_t master; - ompt_record_sync_region_t sync_region; - ompt_record_mutex_acquire_t mutex_acquire; - ompt_record_mutex_t mutex; - ompt_record_nest_lock_t nest_lock; - ompt_record_flush_t flush; - ompt_record_cancel_t cancel; - ompt_record_target_t target; - ompt_record_target_data_op_t target_data_op; - ompt_record_target_map_t target_map; - ompt_record_target_kernel_t target_kernel; - ompt_record_control_tool_t control_tool; - } record; -} ompt_record_ompt_t; - -typedef ompt_record_ompt_t *(*ompt_get_record_ompt_t) ( - ompt_buffer_t *buffer, - ompt_buffer_cursor_t current -); - -#define ompt_id_none 0 -#define ompt_data_none {0} -#define ompt_time_none 0 -#define ompt_hwid_none 0 -#define ompt_addr_none ~0 -#define ompt_mutex_impl_none 0 -#define ompt_wait_id_none 0 - -#define ompd_segment_none 0 - -#endif /* __OMPT__ */ Index: vendor/llvm-openmp/openmp-release80-r363030/runtime/src/include/50/omp_lib.f.var =================================================================== --- vendor/llvm-openmp/openmp-release80-r363030/runtime/src/include/50/omp_lib.f.var (revision 348960) +++ vendor/llvm-openmp/openmp-release80-r363030/runtime/src/include/50/omp_lib.f.var (nonexistent) @@ -1,940 +0,0 @@ -! include/50/omp_lib.f.var - -! -!//===----------------------------------------------------------------------===// -!// -!// The LLVM Compiler Infrastructure -!// -!// This file is dual licensed under the MIT and the University of Illinois Open -!// Source Licenses. See LICENSE.txt for details. -!// -!//===----------------------------------------------------------------------===// -! - -!*** -!*** Some of the directives for the following routine extend past column 72, -!*** so process this file in 132-column mode. -!*** - -!dec$ fixedformlinesize:132 - - module omp_lib_kinds - - integer, parameter :: omp_integer_kind = 4 - integer, parameter :: omp_logical_kind = 4 - integer, parameter :: omp_real_kind = 4 - integer, parameter :: omp_lock_kind = int_ptr_kind() - integer, parameter :: omp_nest_lock_kind = int_ptr_kind() - integer, parameter :: omp_sched_kind = omp_integer_kind - integer, parameter :: omp_proc_bind_kind = omp_integer_kind - integer, parameter :: kmp_pointer_kind = int_ptr_kind() - integer, parameter :: kmp_size_t_kind = int_ptr_kind() - integer, parameter :: kmp_affinity_mask_kind = int_ptr_kind() - integer, parameter :: kmp_cancel_kind = omp_integer_kind - integer, parameter :: omp_lock_hint_kind = omp_integer_kind - integer, parameter :: omp_control_tool_kind = omp_integer_kind - integer, parameter :: omp_control_tool_result_kind = omp_integer_kind - integer, parameter :: omp_allocator_kind = int_ptr_kind() - - end module omp_lib_kinds - - module omp_lib - - use omp_lib_kinds - - integer (kind=omp_integer_kind), parameter :: kmp_version_major = @LIBOMP_VERSION_MAJOR@ - integer (kind=omp_integer_kind), parameter :: kmp_version_minor = @LIBOMP_VERSION_MINOR@ - integer (kind=omp_integer_kind), parameter :: kmp_version_build = @LIBOMP_VERSION_BUILD@ - character(*), parameter :: kmp_build_date = '@LIBOMP_BUILD_DATE@' - integer (kind=omp_integer_kind), parameter :: openmp_version = @LIBOMP_OMP_YEAR_MONTH@ - - integer(kind=omp_sched_kind), parameter :: omp_sched_static = 1 - integer(kind=omp_sched_kind), parameter :: omp_sched_dynamic = 2 - integer(kind=omp_sched_kind), parameter :: omp_sched_guided = 3 - integer(kind=omp_sched_kind), parameter :: omp_sched_auto = 4 - - integer (kind=omp_proc_bind_kind), parameter :: omp_proc_bind_false = 0 - integer (kind=omp_proc_bind_kind), parameter :: omp_proc_bind_true = 1 - integer (kind=omp_proc_bind_kind), parameter :: omp_proc_bind_master = 2 - integer (kind=omp_proc_bind_kind), parameter :: omp_proc_bind_close = 3 - integer (kind=omp_proc_bind_kind), parameter :: omp_proc_bind_spread = 4 - - integer (kind=kmp_cancel_kind), parameter :: kmp_cancel_parallel = 1 - integer (kind=kmp_cancel_kind), parameter :: kmp_cancel_loop = 2 - integer (kind=kmp_cancel_kind), parameter :: kmp_cancel_sections = 3 - integer (kind=kmp_cancel_kind), parameter :: kmp_cancel_taskgroup = 4 - - integer (kind=omp_lock_hint_kind), parameter :: omp_lock_hint_none = 0 - integer (kind=omp_lock_hint_kind), parameter :: omp_lock_hint_uncontended = 1 - integer (kind=omp_lock_hint_kind), parameter :: omp_lock_hint_contended = 2 - integer (kind=omp_lock_hint_kind), parameter :: omp_lock_hint_nonspeculative = 4 - integer (kind=omp_lock_hint_kind), parameter :: omp_lock_hint_speculative = 8 - integer (kind=omp_lock_hint_kind), parameter :: kmp_lock_hint_hle = 65536 - integer (kind=omp_lock_hint_kind), parameter :: kmp_lock_hint_rtm = 131072 - integer (kind=omp_lock_hint_kind), parameter :: kmp_lock_hint_adaptive = 262144 - - integer (kind=omp_allocator_kind), parameter :: omp_null_allocator = 0 - integer (kind=omp_allocator_kind), parameter :: omp_default_mem_alloc = 1 - integer (kind=omp_allocator_kind), parameter :: omp_large_cap_mem_alloc = 2 - integer (kind=omp_allocator_kind), parameter :: omp_const_mem_alloc = 3 - integer (kind=omp_allocator_kind), parameter :: omp_high_bw_mem_alloc = 4 - integer (kind=omp_allocator_kind), parameter :: omp_low_lat_mem_alloc = 5 - integer (kind=omp_allocator_kind), parameter :: omp_cgroup_mem_alloc = 6 - integer (kind=omp_allocator_kind), parameter :: omp_pteam_mem_alloc = 7 - integer (kind=omp_allocator_kind), parameter :: omp_thread_mem_alloc = 8 - - interface - -! *** -! *** omp_* entry points -! *** - - subroutine omp_set_num_threads(num_threads) - use omp_lib_kinds - integer (kind=omp_integer_kind) num_threads - end subroutine omp_set_num_threads - - subroutine omp_set_dynamic(dynamic_threads) - use omp_lib_kinds - logical (kind=omp_logical_kind) dynamic_threads - end subroutine omp_set_dynamic - - subroutine omp_set_nested(nested) - use omp_lib_kinds - logical (kind=omp_logical_kind) nested - end subroutine omp_set_nested - - function omp_get_num_threads() - use omp_lib_kinds - integer (kind=omp_integer_kind) omp_get_num_threads - end function omp_get_num_threads - - function omp_get_max_threads() - use omp_lib_kinds - integer (kind=omp_integer_kind) omp_get_max_threads - end function omp_get_max_threads - - function omp_get_thread_num() - use omp_lib_kinds - integer (kind=omp_integer_kind) omp_get_thread_num - end function omp_get_thread_num - - function omp_get_num_procs() - use omp_lib_kinds - integer (kind=omp_integer_kind) omp_get_num_procs - end function omp_get_num_procs - - function omp_in_parallel() - use omp_lib_kinds - logical (kind=omp_logical_kind) omp_in_parallel - end function omp_in_parallel - - function omp_in_final() - use omp_lib_kinds - logical (kind=omp_logical_kind) omp_in_final - end function omp_in_final - - function omp_get_dynamic() - use omp_lib_kinds - logical (kind=omp_logical_kind) omp_get_dynamic - end function omp_get_dynamic - - function omp_get_nested() - use omp_lib_kinds - logical (kind=omp_logical_kind) omp_get_nested - end function omp_get_nested - - function omp_get_thread_limit() - use omp_lib_kinds - integer (kind=omp_integer_kind) omp_get_thread_limit - end function omp_get_thread_limit - - subroutine omp_set_max_active_levels(max_levels) - use omp_lib_kinds - integer (kind=omp_integer_kind) max_levels - end subroutine omp_set_max_active_levels - - function omp_get_max_active_levels() - use omp_lib_kinds - integer (kind=omp_integer_kind) omp_get_max_active_levels - end function omp_get_max_active_levels - - function omp_get_level() - use omp_lib_kinds - integer (kind=omp_integer_kind) omp_get_level - end function omp_get_level - - function omp_get_active_level() - use omp_lib_kinds - integer (kind=omp_integer_kind) omp_get_active_level - end function omp_get_active_level - - function omp_get_ancestor_thread_num(level) - use omp_lib_kinds - integer (kind=omp_integer_kind) level - integer (kind=omp_integer_kind) omp_get_ancestor_thread_num - end function omp_get_ancestor_thread_num - - function omp_get_team_size(level) - use omp_lib_kinds - integer (kind=omp_integer_kind) level - integer (kind=omp_integer_kind) omp_get_team_size - end function omp_get_team_size - - subroutine omp_set_schedule(kind, chunk_size) - use omp_lib_kinds - integer (kind=omp_sched_kind) kind - integer (kind=omp_integer_kind) chunk_size - end subroutine omp_set_schedule - - subroutine omp_get_schedule(kind, chunk_size) - use omp_lib_kinds - integer (kind=omp_sched_kind) kind - integer (kind=omp_integer_kind) chunk_size - end subroutine omp_get_schedule - - function omp_get_proc_bind() - use omp_lib_kinds - integer (kind=omp_proc_bind_kind) omp_get_proc_bind - end function omp_get_proc_bind - - function omp_get_num_places() - use omp_lib_kinds - integer (kind=omp_integer_kind) omp_get_num_places - end function omp_get_num_places - - function omp_get_place_num_procs(place_num) - use omp_lib_kinds - integer (kind=omp_integer_kind) place_num - integer (kind=omp_integer_kind) omp_get_place_num_procs - end function omp_get_place_num_procs - - subroutine omp_get_place_proc_ids(place_num, ids) - use omp_lib_kinds - integer (kind=omp_integer_kind) place_num - integer (kind=omp_integer_kind) ids(*) - end subroutine omp_get_place_proc_ids - - function omp_get_place_num() - use omp_lib_kinds - integer (kind=omp_integer_kind) omp_get_place_num - end function omp_get_place_num - - function omp_get_partition_num_places() - use omp_lib_kinds - integer (kind=omp_integer_kind) omp_get_partition_num_places - end function omp_get_partition_num_places - - subroutine omp_get_partition_place_nums(place_nums) - use omp_lib_kinds - integer (kind=omp_integer_kind) place_nums(*) - end subroutine omp_get_partition_place_nums - - function omp_get_wtime() - double precision omp_get_wtime - end function omp_get_wtime - - function omp_get_wtick () - double precision omp_get_wtick - end function omp_get_wtick - - function omp_get_default_device() - use omp_lib_kinds - integer (kind=omp_integer_kind) omp_get_default_device - end function omp_get_default_device - - subroutine omp_set_default_device(device_num) - use omp_lib_kinds - integer (kind=omp_integer_kind) device_num - end subroutine omp_set_default_device - - function omp_get_num_devices() - use omp_lib_kinds - integer (kind=omp_integer_kind) omp_get_num_devices - end function omp_get_num_devices - - function omp_get_num_teams() - use omp_lib_kinds - integer (kind=omp_integer_kind) omp_get_num_teams - end function omp_get_num_teams - - function omp_get_team_num() - use omp_lib_kinds - integer (kind=omp_integer_kind) omp_get_team_num - end function omp_get_team_num - - function omp_get_cancellation() - use omp_lib_kinds - integer (kind=omp_integer_kind) omp_get_cancellation - end function omp_get_cancellation - - function omp_is_initial_device() - use omp_lib_kinds - logical (kind=omp_logical_kind) omp_is_initial_device - end function omp_is_initial_device - - function omp_get_initial_device() - use omp_lib_kinds - integer (kind=omp_integer_kind) omp_get_initial_device - end function omp_get_initial_device - - function omp_get_device_num() - use omp_lib_kinds - integer (kind=omp_integer_kind) omp_get_device_num - end function omp_get_device_num - - subroutine omp_init_lock(svar) -!DIR$ IF(__INTEL_COMPILER.GE.1400) -!DIR$ attributes known_intrinsic :: omp_init_lock -!DIR$ ENDIF - use omp_lib_kinds - integer (kind=omp_lock_kind) svar - end subroutine omp_init_lock - - subroutine omp_destroy_lock(svar) -!DIR$ IF(__INTEL_COMPILER.GE.1400) -!DIR$ attributes known_intrinsic :: omp_destroy_lock -!DIR$ ENDIF - use omp_lib_kinds - integer (kind=omp_lock_kind) svar - end subroutine omp_destroy_lock - - subroutine omp_set_lock(svar) -!DIR$ IF(__INTEL_COMPILER.GE.1400) -!DIR$ attributes known_intrinsic :: omp_set_lock -!DIR$ ENDIF - use omp_lib_kinds - integer (kind=omp_lock_kind) svar - end subroutine omp_set_lock - - subroutine omp_unset_lock(svar) -!DIR$ IF(__INTEL_COMPILER.GE.1400) -!DIR$ attributes known_intrinsic :: omp_unset_lock -!DIR$ ENDIF - use omp_lib_kinds - integer (kind=omp_lock_kind) svar - end subroutine omp_unset_lock - - function omp_test_lock(svar) -!DIR$ IF(__INTEL_COMPILER.GE.1400) -!DIR$ attributes known_intrinsic :: omp_test_lock -!DIR$ ENDIF - use omp_lib_kinds - logical (kind=omp_logical_kind) omp_test_lock - integer (kind=omp_lock_kind) svar - end function omp_test_lock - - subroutine omp_init_nest_lock(nvar) -!DIR$ IF(__INTEL_COMPILER.GE.1400) -!DIR$ attributes known_intrinsic :: omp_init_nest_lock -!DIR$ ENDIF - use omp_lib_kinds - integer (kind=omp_nest_lock_kind) nvar - end subroutine omp_init_nest_lock - - subroutine omp_destroy_nest_lock(nvar) -!DIR$ IF(__INTEL_COMPILER.GE.1400) -!DIR$ attributes known_intrinsic :: omp_destroy_nest_lock -!DIR$ ENDIF - use omp_lib_kinds - integer (kind=omp_nest_lock_kind) nvar - end subroutine omp_destroy_nest_lock - - subroutine omp_set_nest_lock(nvar) -!DIR$ IF(__INTEL_COMPILER.GE.1400) -!DIR$ attributes known_intrinsic :: omp_set_nest_lock -!DIR$ ENDIF - use omp_lib_kinds - integer (kind=omp_nest_lock_kind) nvar - end subroutine omp_set_nest_lock - - subroutine omp_unset_nest_lock(nvar) -!DIR$ IF(__INTEL_COMPILER.GE.1400) -!DIR$ attributes known_intrinsic :: omp_unset_nest_lock -!DIR$ ENDIF - use omp_lib_kinds - integer (kind=omp_nest_lock_kind) nvar - end subroutine omp_unset_nest_lock - - function omp_test_nest_lock(nvar) -!DIR$ IF(__INTEL_COMPILER.GE.1400) -!DIR$ attributes known_intrinsic :: omp_test_nest_lock -!DIR$ ENDIF - use omp_lib_kinds - integer (kind=omp_integer_kind) omp_test_nest_lock - integer (kind=omp_nest_lock_kind) nvar - end function omp_test_nest_lock - - function omp_get_max_task_priority() - use omp_lib_kinds - integer (kind=omp_integer_kind) omp_get_max_task_priority - end function omp_get_max_task_priority - - subroutine omp_set_default_allocator(svar) - use omp_lib_kinds - integer (kind=omp_allocator_kind) svar - end subroutine omp_set_default_allocator - - function omp_get_default_allocator() - use omp_lib_kinds - integer (kind=omp_allocator_kind) omp_get_default_allocator - end function omp_get_default_allocator - - subroutine omp_set_affinity_format(format) - character (len=*) format - end subroutine omp_set_affinity_format - - function omp_get_affinity_format(buffer) - use omp_lib_kinds - character (len=*) buffer - integer (kind=kmp_size_t_kind) omp_get_affinity_format - end function omp_get_affinity_format - - subroutine omp_display_affinity(format) - character (len=*) format - end subroutine omp_display_affinity - - function omp_capture_affinity(buffer, format) - use omp_lib_kinds - character (len=*) format - character (len=*) buffer - integer (kind=kmp_size_t_kind) omp_capture_affinity - end function omp_capture_affinity - -! *** -! *** kmp_* entry points -! *** - - subroutine kmp_set_stacksize(size) - use omp_lib_kinds - integer (kind=omp_integer_kind) size - end subroutine kmp_set_stacksize - - subroutine kmp_set_stacksize_s(size) - use omp_lib_kinds - integer (kind=kmp_size_t_kind) size - end subroutine kmp_set_stacksize_s - - subroutine kmp_set_blocktime(msec) - use omp_lib_kinds - integer (kind=omp_integer_kind) msec - end subroutine kmp_set_blocktime - - subroutine kmp_set_library_serial() - end subroutine kmp_set_library_serial - - subroutine kmp_set_library_turnaround() - end subroutine kmp_set_library_turnaround - - subroutine kmp_set_library_throughput() - end subroutine kmp_set_library_throughput - - subroutine kmp_set_library(libnum) - use omp_lib_kinds - integer (kind=omp_integer_kind) libnum - end subroutine kmp_set_library - - subroutine kmp_set_defaults(string) - character*(*) string - end subroutine kmp_set_defaults - - function kmp_get_stacksize() - use omp_lib_kinds - integer (kind=omp_integer_kind) kmp_get_stacksize - end function kmp_get_stacksize - - function kmp_get_stacksize_s() - use omp_lib_kinds - integer (kind=kmp_size_t_kind) kmp_get_stacksize_s - end function kmp_get_stacksize_s - - function kmp_get_blocktime() - use omp_lib_kinds - integer (kind=omp_integer_kind) kmp_get_blocktime - end function kmp_get_blocktime - - function kmp_get_library() - use omp_lib_kinds - integer (kind=omp_integer_kind) kmp_get_library - end function kmp_get_library - - subroutine kmp_set_disp_num_buffers(num) - use omp_lib_kinds - integer (kind=omp_integer_kind) num - end subroutine kmp_set_disp_num_buffers - - function kmp_set_affinity(mask) - use omp_lib_kinds - integer (kind=omp_integer_kind) kmp_set_affinity - integer (kind=kmp_affinity_mask_kind) mask - end function kmp_set_affinity - - function kmp_get_affinity(mask) - use omp_lib_kinds - integer (kind=omp_integer_kind) kmp_get_affinity - integer (kind=kmp_affinity_mask_kind) mask - end function kmp_get_affinity - - function kmp_get_affinity_max_proc() - use omp_lib_kinds - integer (kind=omp_integer_kind) kmp_get_affinity_max_proc - end function kmp_get_affinity_max_proc - - subroutine kmp_create_affinity_mask(mask) - use omp_lib_kinds - integer (kind=kmp_affinity_mask_kind) mask - end subroutine kmp_create_affinity_mask - - subroutine kmp_destroy_affinity_mask(mask) - use omp_lib_kinds - integer (kind=kmp_affinity_mask_kind) mask - end subroutine kmp_destroy_affinity_mask - - function kmp_set_affinity_mask_proc(proc, mask) - use omp_lib_kinds - integer (kind=omp_integer_kind) kmp_set_affinity_mask_proc - integer (kind=omp_integer_kind) proc - integer (kind=kmp_affinity_mask_kind) mask - end function kmp_set_affinity_mask_proc - - function kmp_unset_affinity_mask_proc(proc, mask) - use omp_lib_kinds - integer (kind=omp_integer_kind) kmp_unset_affinity_mask_proc - integer (kind=omp_integer_kind) proc - integer (kind=kmp_affinity_mask_kind) mask - end function kmp_unset_affinity_mask_proc - - function kmp_get_affinity_mask_proc(proc, mask) - use omp_lib_kinds - integer (kind=omp_integer_kind) kmp_get_affinity_mask_proc - integer (kind=omp_integer_kind) proc - integer (kind=kmp_affinity_mask_kind) mask - end function kmp_get_affinity_mask_proc - - function kmp_malloc(size) - use omp_lib_kinds - integer (kind=kmp_pointer_kind) kmp_malloc - integer (kind=kmp_size_t_kind) size - end function kmp_malloc - - function kmp_aligned_malloc(size, alignment) - use omp_lib_kinds - integer (kind=kmp_pointer_kind) kmp_aligned_malloc - integer (kind=kmp_size_t_kind) size - integer (kind=kmp_size_t_kind) alignment - end function kmp_aligned_malloc - - function kmp_calloc(nelem, elsize) - use omp_lib_kinds - integer (kind=kmp_pointer_kind) kmp_calloc - integer (kind=kmp_size_t_kind) nelem - integer (kind=kmp_size_t_kind) elsize - end function kmp_calloc - - function kmp_realloc(ptr, size) - use omp_lib_kinds - integer (kind=kmp_pointer_kind) kmp_realloc - integer (kind=kmp_pointer_kind) ptr - integer (kind=kmp_size_t_kind) size - end function kmp_realloc - - subroutine kmp_free(ptr) - use omp_lib_kinds - integer (kind=kmp_pointer_kind) ptr - end subroutine kmp_free - - subroutine kmp_set_warnings_on() - end subroutine kmp_set_warnings_on - - subroutine kmp_set_warnings_off() - end subroutine kmp_set_warnings_off - - function kmp_get_cancellation_status(cancelkind) - use omp_lib_kinds - integer (kind=kmp_cancel_kind) cancelkind - logical (kind=omp_logical_kind) kmp_get_cancellation_status - end function kmp_get_cancellation_status - - subroutine omp_init_lock_with_hint(svar, hint) - use omp_lib_kinds - integer (kind=omp_lock_kind) svar - integer (kind=omp_lock_hint_kind) hint - end subroutine omp_init_lock_with_hint - - subroutine omp_init_nest_lock_with_hint(nvar, hint) - use omp_lib_kinds - integer (kind=omp_nest_lock_kind) nvar - integer (kind=omp_lock_hint_kind) hint - end subroutine omp_init_nest_lock_with_hint - - function omp_control_tool(command, modifier) - use omp_lib_kinds - integer (kind=omp_integer_kind) omp_control_tool - integer (kind=omp_control_tool_kind) command - integer (kind=omp_control_tool_kind) modifier - end function omp_control_tool - - end interface - -!dec$ if defined(_WIN32) -!dec$ if defined(_WIN64) .or. defined(_M_AMD64) - -!*** -!*** The Fortran entry points must be in uppercase, even if the /Qlowercase -!*** option is specified. The alias attribute ensures that the specified -!*** string is used as the entry point. -!*** -!*** On the Windows* OS IA-32 architecture, the Fortran entry points have an -!*** underscore prepended. On the Windows* OS Intel(R) 64 -!*** architecture, no underscore is prepended. -!*** - -!dec$ attributes alias:'OMP_SET_NUM_THREADS' :: omp_set_num_threads -!dec$ attributes alias:'OMP_SET_DYNAMIC' :: omp_set_dynamic -!dec$ attributes alias:'OMP_SET_NESTED' :: omp_set_nested -!dec$ attributes alias:'OMP_GET_NUM_THREADS' :: omp_get_num_threads -!dec$ attributes alias:'OMP_GET_MAX_THREADS' :: omp_get_max_threads -!dec$ attributes alias:'OMP_GET_THREAD_NUM' :: omp_get_thread_num -!dec$ attributes alias:'OMP_GET_NUM_PROCS' :: omp_get_num_procs -!dec$ attributes alias:'OMP_IN_PARALLEL' :: omp_in_parallel -!dec$ attributes alias:'OMP_GET_DYNAMIC' :: omp_get_dynamic -!dec$ attributes alias:'OMP_GET_NESTED' :: omp_get_nested -!dec$ attributes alias:'OMP_GET_THREAD_LIMIT' :: omp_get_thread_limit -!dec$ attributes alias:'OMP_SET_MAX_ACTIVE_LEVELS' :: omp_set_max_active_levels -!dec$ attributes alias:'OMP_GET_MAX_ACTIVE_LEVELS' :: omp_get_max_active_levels -!dec$ attributes alias:'OMP_GET_LEVEL' :: omp_get_level -!dec$ attributes alias:'OMP_GET_ACTIVE_LEVEL' :: omp_get_active_level -!dec$ attributes alias:'OMP_GET_ANCESTOR_THREAD_NUM' :: omp_get_ancestor_thread_num -!dec$ attributes alias:'OMP_GET_TEAM_SIZE' :: omp_get_team_size -!dec$ attributes alias:'OMP_SET_SCHEDULE' :: omp_set_schedule -!dec$ attributes alias:'OMP_GET_SCHEDULE' :: omp_get_schedule -!dec$ attributes alias:'OMP_GET_PROC_BIND' :: omp_get_proc_bind -!dec$ attributes alias:'OMP_GET_WTIME' :: omp_get_wtime -!dec$ attributes alias:'OMP_GET_WTICK' :: omp_get_wtick -!dec$ attributes alias:'OMP_GET_DEFAULT_DEVICE' :: omp_get_default_device -!dec$ attributes alias:'OMP_SET_DEFAULT_DEVICE' :: omp_set_default_device -!dec$ attributes alias:'OMP_GET_NUM_DEVICES' :: omp_get_num_devices -!dec$ attributes alias:'OMP_GET_NUM_TEAMS' :: omp_get_num_teams -!dec$ attributes alias:'OMP_GET_TEAM_NUM' :: omp_get_team_num -!dec$ attributes alias:'OMP_GET_CANCELLATION' :: omp_get_cancellation -!dec$ attributes alias:'OMP_IS_INITIAL_DEVICE' :: omp_is_initial_device -!dec$ attributes alias:'OMP_GET_INITIAL_DEVICE' :: omp_get_initial_device -!dec$ attributes alias:'OMP_GET_MAX_TASK_PRIORITY' :: omp_get_max_task_priority -!dec$ attributes alias:'OMP_GET_DEVICE_NUM' :: omp_get_device_num -!dec$ attributes alias:'OMP_CONTROL_TOOL' :: omp_control_tool -!dec$ attributes alias:'OMP_SET_AFFINITY_FORMAT' :: omp_set_affinity_format -!dec$ attributes alias:'OMP_GET_AFFINITY_FORMAT' :: omp_get_affinity_format -!dec$ attributes alias:'OMP_DISPLAY_AFFINITY' :: omp_display_affinity -!dec$ attributes alias:'OMP_CAPTURE_AFFINITY' :: omp_capture_affinity - -!dec$ attributes alias:'omp_init_lock' :: omp_init_lock -!dec$ attributes alias:'omp_init_lock_with_hint' :: omp_init_lock_with_hint -!dec$ attributes alias:'omp_destroy_lock' :: omp_destroy_lock -!dec$ attributes alias:'omp_set_lock' :: omp_set_lock -!dec$ attributes alias:'omp_unset_lock' :: omp_unset_lock -!dec$ attributes alias:'omp_test_lock' :: omp_test_lock -!dec$ attributes alias:'omp_init_nest_lock' :: omp_init_nest_lock -!dec$ attributes alias:'omp_init_nest_lock_with_hint' :: omp_init_nest_lock_with_hint -!dec$ attributes alias:'omp_destroy_nest_lock' :: omp_destroy_nest_lock -!dec$ attributes alias:'omp_set_nest_lock' :: omp_set_nest_lock -!dec$ attributes alias:'omp_unset_nest_lock' :: omp_unset_nest_lock -!dec$ attributes alias:'omp_test_nest_lock' :: omp_test_nest_lock - -!dec$ attributes alias:'KMP_SET_STACKSIZE'::kmp_set_stacksize -!dec$ attributes alias:'KMP_SET_STACKSIZE_S'::kmp_set_stacksize_s -!dec$ attributes alias:'KMP_SET_BLOCKTIME'::kmp_set_blocktime -!dec$ attributes alias:'KMP_SET_LIBRARY_SERIAL'::kmp_set_library_serial -!dec$ attributes alias:'KMP_SET_LIBRARY_TURNAROUND'::kmp_set_library_turnaround -!dec$ attributes alias:'KMP_SET_LIBRARY_THROUGHPUT'::kmp_set_library_throughput -!dec$ attributes alias:'KMP_SET_LIBRARY'::kmp_set_library -!dec$ attributes alias:'KMP_GET_STACKSIZE'::kmp_get_stacksize -!dec$ attributes alias:'KMP_GET_STACKSIZE_S'::kmp_get_stacksize_s -!dec$ attributes alias:'KMP_GET_BLOCKTIME'::kmp_get_blocktime -!dec$ attributes alias:'KMP_GET_LIBRARY'::kmp_get_library -!dec$ attributes alias:'KMP_SET_AFFINITY'::kmp_set_affinity -!dec$ attributes alias:'KMP_GET_AFFINITY'::kmp_get_affinity -!dec$ attributes alias:'KMP_GET_AFFINITY_MAX_PROC'::kmp_get_affinity_max_proc -!dec$ attributes alias:'KMP_CREATE_AFFINITY_MASK'::kmp_create_affinity_mask -!dec$ attributes alias:'KMP_DESTROY_AFFINITY_MASK'::kmp_destroy_affinity_mask -!dec$ attributes alias:'KMP_SET_AFFINITY_MASK_PROC'::kmp_set_affinity_mask_proc -!dec$ attributes alias:'KMP_UNSET_AFFINITY_MASK_PROC'::kmp_unset_affinity_mask_proc -!dec$ attributes alias:'KMP_GET_AFFINITY_MASK_PROC'::kmp_get_affinity_mask_proc -!dec$ attributes alias:'KMP_MALLOC'::kmp_malloc -!dec$ attributes alias:'KMP_ALIGNED_MALLOC'::kmp_aligned_malloc -!dec$ attributes alias:'KMP_CALLOC'::kmp_calloc -!dec$ attributes alias:'KMP_REALLOC'::kmp_realloc -!dec$ attributes alias:'KMP_FREE'::kmp_free - -!dec$ attributes alias:'KMP_SET_WARNINGS_ON'::kmp_set_warnings_on -!dec$ attributes alias:'KMP_SET_WARNINGS_OFF'::kmp_set_warnings_off - -!dec$ attributes alias:'KMP_GET_CANCELLATION_STATUS' :: kmp_get_cancellation_status - -!dec$ else - -!*** -!*** On Windows* OS IA-32 architecture, the Fortran entry points have an underscore prepended. -!*** - -!dec$ attributes alias:'_OMP_SET_NUM_THREADS' :: omp_set_num_threads -!dec$ attributes alias:'_OMP_SET_DYNAMIC' :: omp_set_dynamic -!dec$ attributes alias:'_OMP_SET_NESTED' :: omp_set_nested -!dec$ attributes alias:'_OMP_GET_NUM_THREADS' :: omp_get_num_threads -!dec$ attributes alias:'_OMP_GET_MAX_THREADS' :: omp_get_max_threads -!dec$ attributes alias:'_OMP_GET_THREAD_NUM' :: omp_get_thread_num -!dec$ attributes alias:'_OMP_GET_NUM_PROCS' :: omp_get_num_procs -!dec$ attributes alias:'_OMP_IN_PARALLEL' :: omp_in_parallel -!dec$ attributes alias:'_OMP_GET_DYNAMIC' :: omp_get_dynamic -!dec$ attributes alias:'_OMP_GET_NESTED' :: omp_get_nested -!dec$ attributes alias:'_OMP_GET_THREAD_LIMIT' :: omp_get_thread_limit -!dec$ attributes alias:'_OMP_SET_MAX_ACTIVE_LEVELS' :: omp_set_max_active_levels -!dec$ attributes alias:'_OMP_GET_MAX_ACTIVE_LEVELS' :: omp_get_max_active_levels -!dec$ attributes alias:'_OMP_GET_LEVEL' :: omp_get_level -!dec$ attributes alias:'_OMP_GET_ACTIVE_LEVEL' :: omp_get_active_level -!dec$ attributes alias:'_OMP_GET_ANCESTOR_THREAD_NUM' :: omp_get_ancestor_thread_num -!dec$ attributes alias:'_OMP_GET_TEAM_SIZE' :: omp_get_team_size -!dec$ attributes alias:'_OMP_SET_SCHEDULE' :: omp_set_schedule -!dec$ attributes alias:'_OMP_GET_SCHEDULE' :: omp_get_schedule -!dec$ attributes alias:'_OMP_GET_PROC_BIND' :: omp_get_proc_bind -!dec$ attributes alias:'_OMP_GET_WTIME' :: omp_get_wtime -!dec$ attributes alias:'_OMP_GET_WTICK' :: omp_get_wtick -!dec$ attributes alias:'_OMP_GET_DEFAULT_DEVICE' :: omp_get_default_device -!dec$ attributes alias:'_OMP_SET_DEFAULT_DEVICE' :: omp_set_default_device -!dec$ attributes alias:'_OMP_GET_NUM_DEVICES' :: omp_get_num_devices -!dec$ attributes alias:'_OMP_GET_NUM_TEAMS' :: omp_get_num_teams -!dec$ attributes alias:'_OMP_GET_TEAM_NUM' :: omp_get_team_num -!dec$ attributes alias:'_OMP_GET_CANCELLATION' :: omp_get_cancellation -!dec$ attributes alias:'_OMP_IS_INITIAL_DEVICE' :: omp_is_initial_device -!dec$ attributes alias:'_OMP_GET_INITIAL_DEVICE' :: omp_get_initial_device -!dec$ attributes alias:'_OMP_GET_MAX_TASK_PRIORTY' :: omp_get_max_task_priority -!dec$ attributes alias:'_OMP_GET_DEVICE_NUM' :: omp_get_device_num -!dec$ attributes alias:'_OMP_CONTROL_TOOL' :: omp_control_tool -!dec$ attributes alias:'_OMP_SET_AFFINITY_FORMAT' :: omp_set_affinity_format -!dec$ attributes alias:'_OMP_GET_AFFINITY_FORMAT' :: omp_get_affinity_format -!dec$ attributes alias:'_OMP_DISPLAY_AFFINITY' :: omp_display_affinity -!dec$ attributes alias:'_OMP_CAPTURE_AFFINITY' :: omp_capture_affinity - -!dec$ attributes alias:'_omp_init_lock' :: omp_init_lock -!dec$ attributes alias:'_omp_init_lock_with_hint' :: omp_init_lock_with_hint -!dec$ attributes alias:'_omp_destroy_lock' :: omp_destroy_lock -!dec$ attributes alias:'_omp_set_lock' :: omp_set_lock -!dec$ attributes alias:'_omp_unset_lock' :: omp_unset_lock -!dec$ attributes alias:'_omp_test_lock' :: omp_test_lock -!dec$ attributes alias:'_omp_init_nest_lock' :: omp_init_nest_lock -!dec$ attributes alias:'_omp_init_nest_lock_with_hint' :: omp_init_nest_lock_with_hint -!dec$ attributes alias:'_omp_destroy_nest_lock' :: omp_destroy_nest_lock -!dec$ attributes alias:'_omp_set_nest_lock' :: omp_set_nest_lock -!dec$ attributes alias:'_omp_unset_nest_lock' :: omp_unset_nest_lock -!dec$ attributes alias:'_omp_test_nest_lock' :: omp_test_nest_lock - -!dec$ attributes alias:'_KMP_SET_STACKSIZE'::kmp_set_stacksize -!dec$ attributes alias:'_KMP_SET_STACKSIZE_S'::kmp_set_stacksize_s -!dec$ attributes alias:'_KMP_SET_BLOCKTIME'::kmp_set_blocktime -!dec$ attributes alias:'_KMP_SET_LIBRARY_SERIAL'::kmp_set_library_serial -!dec$ attributes alias:'_KMP_SET_LIBRARY_TURNAROUND'::kmp_set_library_turnaround -!dec$ attributes alias:'_KMP_SET_LIBRARY_THROUGHPUT'::kmp_set_library_throughput -!dec$ attributes alias:'_KMP_SET_LIBRARY'::kmp_set_library -!dec$ attributes alias:'_KMP_GET_STACKSIZE'::kmp_get_stacksize -!dec$ attributes alias:'_KMP_GET_STACKSIZE_S'::kmp_get_stacksize_s -!dec$ attributes alias:'_KMP_GET_BLOCKTIME'::kmp_get_blocktime -!dec$ attributes alias:'_KMP_GET_LIBRARY'::kmp_get_library -!dec$ attributes alias:'_KMP_SET_AFFINITY'::kmp_set_affinity -!dec$ attributes alias:'_KMP_GET_AFFINITY'::kmp_get_affinity -!dec$ attributes alias:'_KMP_GET_AFFINITY_MAX_PROC'::kmp_get_affinity_max_proc -!dec$ attributes alias:'_KMP_CREATE_AFFINITY_MASK'::kmp_create_affinity_mask -!dec$ attributes alias:'_KMP_DESTROY_AFFINITY_MASK'::kmp_destroy_affinity_mask -!dec$ attributes alias:'_KMP_SET_AFFINITY_MASK_PROC'::kmp_set_affinity_mask_proc -!dec$ attributes alias:'_KMP_UNSET_AFFINITY_MASK_PROC'::kmp_unset_affinity_mask_proc -!dec$ attributes alias:'_KMP_GET_AFFINITY_MASK_PROC'::kmp_get_affinity_mask_proc -!dec$ attributes alias:'_KMP_MALLOC'::kmp_malloc -!dec$ attributes alias:'_KMP_ALIGNED_MALLOC'::kmp_aligned_malloc -!dec$ attributes alias:'_KMP_CALLOC'::kmp_calloc -!dec$ attributes alias:'_KMP_REALLOC'::kmp_realloc -!dec$ attributes alias:'_KMP_FREE'::kmp_free - -!dec$ attributes alias:'_KMP_SET_WARNINGS_ON'::kmp_set_warnings_on -!dec$ attributes alias:'_KMP_SET_WARNINGS_OFF'::kmp_set_warnings_off - -!dec$ attributes alias:'_KMP_GET_CANCELLATION_STATUS' :: kmp_get_cancellation_status - -!dec$ endif -!dec$ endif - -!dec$ if defined(__linux) - -!*** -!*** The Linux* OS entry points are in lowercase, with an underscore appended. -!*** - -!dec$ attributes alias:'omp_set_num_threads_'::omp_set_num_threads -!dec$ attributes alias:'omp_set_dynamic_'::omp_set_dynamic -!dec$ attributes alias:'omp_set_nested_'::omp_set_nested -!dec$ attributes alias:'omp_get_num_threads_'::omp_get_num_threads -!dec$ attributes alias:'omp_get_max_threads_'::omp_get_max_threads -!dec$ attributes alias:'omp_get_thread_num_'::omp_get_thread_num -!dec$ attributes alias:'omp_get_num_procs_'::omp_get_num_procs -!dec$ attributes alias:'omp_in_parallel_'::omp_in_parallel -!dec$ attributes alias:'omp_get_dynamic_'::omp_get_dynamic -!dec$ attributes alias:'omp_get_nested_'::omp_get_nested -!dec$ attributes alias:'omp_get_thread_limit_'::omp_get_thread_limit -!dec$ attributes alias:'omp_set_max_active_levels_'::omp_set_max_active_levels -!dec$ attributes alias:'omp_get_max_active_levels_'::omp_get_max_active_levels -!dec$ attributes alias:'omp_get_level_'::omp_get_level -!dec$ attributes alias:'omp_get_active_level_'::omp_get_active_level -!dec$ attributes alias:'omp_get_ancestor_thread_num_'::omp_get_ancestor_thread_num -!dec$ attributes alias:'omp_get_team_size_'::omp_get_team_size -!dec$ attributes alias:'omp_set_schedule_'::omp_set_schedule -!dec$ attributes alias:'omp_get_schedule_'::omp_get_schedule -!dec$ attributes alias:'omp_get_proc_bind_' :: omp_get_proc_bind -!dec$ attributes alias:'omp_get_wtime_'::omp_get_wtime -!dec$ attributes alias:'omp_get_wtick_'::omp_get_wtick -!dec$ attributes alias:'omp_get_default_device_'::omp_get_default_device -!dec$ attributes alias:'omp_set_default_device_'::omp_set_default_device -!dec$ attributes alias:'omp_get_num_devices_'::omp_get_num_devices -!dec$ attributes alias:'omp_get_num_teams_'::omp_get_num_teams -!dec$ attributes alias:'omp_get_team_num_'::omp_get_team_num -!dec$ attributes alias:'omp_get_cancellation_'::omp_get_cancellation -!dec$ attributes alias:'omp_is_initial_device_'::omp_is_initial_device -!dec$ attributes alias:'omp_get_initial_device_'::omp_get_initial_device -!dec$ attributes alias:'omp_get_max_task_priority_'::omp_get_max_task_priority -!dec$ attributes alias:'omp_get_device_num_'::omp_get_device_num -!dec$ attributes alias:'omp_set_affinity_format_' :: omp_set_affinity_format -!dec$ attributes alias:'omp_get_affinity_format_' :: omp_get_affinity_format -!dec$ attributes alias:'omp_display_affinity_' :: omp_display_affinity -!dec$ attributes alias:'omp_capture_affinity_' :: omp_capture_affinity - -!dec$ attributes alias:'omp_init_lock_'::omp_init_lock -!dec$ attributes alias:'omp_init_lock_with_hint_'::omp_init_lock_with_hint -!dec$ attributes alias:'omp_destroy_lock_'::omp_destroy_lock -!dec$ attributes alias:'omp_set_lock_'::omp_set_lock -!dec$ attributes alias:'omp_unset_lock_'::omp_unset_lock -!dec$ attributes alias:'omp_test_lock_'::omp_test_lock -!dec$ attributes alias:'omp_init_nest_lock_'::omp_init_nest_lock -!dec$ attributes alias:'omp_init_nest_lock_with_hint_'::omp_init_nest_lock_with_hint -!dec$ attributes alias:'omp_destroy_nest_lock_'::omp_destroy_nest_lock -!dec$ attributes alias:'omp_set_nest_lock_'::omp_set_nest_lock -!dec$ attributes alias:'omp_unset_nest_lock_'::omp_unset_nest_lock -!dec$ attributes alias:'omp_test_nest_lock_'::omp_test_nest_lock -!dec$ attributes alias:'omp_control_tool_'::omp_control_tool - -!dec$ attributes alias:'kmp_set_stacksize_'::kmp_set_stacksize -!dec$ attributes alias:'kmp_set_stacksize_s_'::kmp_set_stacksize_s -!dec$ attributes alias:'kmp_set_blocktime_'::kmp_set_blocktime -!dec$ attributes alias:'kmp_set_library_serial_'::kmp_set_library_serial -!dec$ attributes alias:'kmp_set_library_turnaround_'::kmp_set_library_turnaround -!dec$ attributes alias:'kmp_set_library_throughput_'::kmp_set_library_throughput -!dec$ attributes alias:'kmp_set_library_'::kmp_set_library -!dec$ attributes alias:'kmp_get_stacksize_'::kmp_get_stacksize -!dec$ attributes alias:'kmp_get_stacksize_s_'::kmp_get_stacksize_s -!dec$ attributes alias:'kmp_get_blocktime_'::kmp_get_blocktime -!dec$ attributes alias:'kmp_get_library_'::kmp_get_library -!dec$ attributes alias:'kmp_set_affinity_'::kmp_set_affinity -!dec$ attributes alias:'kmp_get_affinity_'::kmp_get_affinity -!dec$ attributes alias:'kmp_get_affinity_max_proc_'::kmp_get_affinity_max_proc -!dec$ attributes alias:'kmp_create_affinity_mask_'::kmp_create_affinity_mask -!dec$ attributes alias:'kmp_destroy_affinity_mask_'::kmp_destroy_affinity_mask -!dec$ attributes alias:'kmp_set_affinity_mask_proc_'::kmp_set_affinity_mask_proc -!dec$ attributes alias:'kmp_unset_affinity_mask_proc_'::kmp_unset_affinity_mask_proc -!dec$ attributes alias:'kmp_get_affinity_mask_proc_'::kmp_get_affinity_mask_proc -!dec$ attributes alias:'kmp_malloc_'::kmp_malloc -!dec$ attributes alias:'kmp_aligned_malloc_'::kmp_aligned_malloc -!dec$ attributes alias:'kmp_calloc_'::kmp_calloc -!dec$ attributes alias:'kmp_realloc_'::kmp_realloc -!dec$ attributes alias:'kmp_free_'::kmp_free - -!dec$ attributes alias:'kmp_set_warnings_on_'::kmp_set_warnings_on -!dec$ attributes alias:'kmp_set_warnings_off_'::kmp_set_warnings_off -!dec$ attributes alias:'kmp_get_cancellation_status_'::kmp_get_cancellation_status - -!dec$ endif - -!dec$ if defined(__APPLE__) - -!*** -!*** The Mac entry points are in lowercase, with an both an underscore -!*** appended and an underscore prepended. -!*** - -!dec$ attributes alias:'_omp_set_num_threads_'::omp_set_num_threads -!dec$ attributes alias:'_omp_set_dynamic_'::omp_set_dynamic -!dec$ attributes alias:'_omp_set_nested_'::omp_set_nested -!dec$ attributes alias:'_omp_get_num_threads_'::omp_get_num_threads -!dec$ attributes alias:'_omp_get_max_threads_'::omp_get_max_threads -!dec$ attributes alias:'_omp_get_thread_num_'::omp_get_thread_num -!dec$ attributes alias:'_omp_get_num_procs_'::omp_get_num_procs -!dec$ attributes alias:'_omp_in_parallel_'::omp_in_parallel -!dec$ attributes alias:'_omp_get_dynamic_'::omp_get_dynamic -!dec$ attributes alias:'_omp_get_nested_'::omp_get_nested -!dec$ attributes alias:'_omp_get_thread_limit_'::omp_get_thread_limit -!dec$ attributes alias:'_omp_set_max_active_levels_'::omp_set_max_active_levels -!dec$ attributes alias:'_omp_get_max_active_levels_'::omp_get_max_active_levels -!dec$ attributes alias:'_omp_get_level_'::omp_get_level -!dec$ attributes alias:'_omp_get_active_level_'::omp_get_active_level -!dec$ attributes alias:'_omp_get_ancestor_thread_num_'::omp_get_ancestor_thread_num -!dec$ attributes alias:'_omp_get_team_size_'::omp_get_team_size -!dec$ attributes alias:'_omp_set_schedule_'::omp_set_schedule -!dec$ attributes alias:'_omp_get_schedule_'::omp_get_schedule -!dec$ attributes alias:'_omp_get_proc_bind_' :: omp_get_proc_bind -!dec$ attributes alias:'_omp_get_wtime_'::omp_get_wtime -!dec$ attributes alias:'_omp_get_wtick_'::omp_get_wtick -!dec$ attributes alias:'_omp_get_default_device_'::omp_get_default_device -!dec$ attributes alias:'_omp_set_default_device_'::omp_set_default_device -!dec$ attributes alias:'_omp_get_num_devices_'::omp_get_num_devices -!dec$ attributes alias:'_omp_get_num_teams_'::omp_get_num_teams -!dec$ attributes alias:'_omp_get_team_num_'::omp_get_team_num -!dec$ attributes alias:'_omp_get_cancellation_'::omp_get_cancellation -!dec$ attributes alias:'_omp_is_initial_device_'::omp_is_initial_device -!dec$ attributes alias:'_omp_get_initial_device_'::omp_get_initial_device -!dec$ attributes alias:'_omp_get_max_task_priorty_'::omp_get_max_task_priority -!dec$ attributes alias:'_omp_get_device_num_'::omp_get_device_num -!dec$ attributes alias:'_omp_init_lock_'::omp_init_lock -!dec$ attributes alias:'_omp_init_lock_with_hint_'::omp_init_lock_with_hint -!dec$ attributes alias:'_omp_destroy_lock_'::omp_destroy_lock -!dec$ attributes alias:'_omp_set_lock_'::omp_set_lock -!dec$ attributes alias:'_omp_unset_lock_'::omp_unset_lock -!dec$ attributes alias:'_omp_test_lock_'::omp_test_lock -!dec$ attributes alias:'_omp_init_nest_lock_'::omp_init_nest_lock -!dec$ attributes alias:'_omp_init_nest_lock_with_hint_'::omp_init_nest_lock_with_hint -!dec$ attributes alias:'_omp_destroy_nest_lock_'::omp_destroy_nest_lock -!dec$ attributes alias:'_omp_set_nest_lock_'::omp_set_nest_lock -!dec$ attributes alias:'_omp_unset_nest_lock_'::omp_unset_nest_lock -!dec$ attributes alias:'_omp_test_nest_lock_'::omp_test_nest_lock -!dec$ attributes alias:'_omp_control_tool_'::omp_control_tool -!dec$ attributes alias:'_omp_set_affinity_format_' :: omp_set_affinity_format -!dec$ attributes alias:'_omp_get_affinity_format_' :: omp_get_affinity_format -!dec$ attributes alias:'_omp_display_affinity_' :: omp_display_affinity -!dec$ attributes alias:'_omp_capture_affinity_' :: omp_capture_affinity - -!dec$ attributes alias:'_kmp_set_stacksize_'::kmp_set_stacksize -!dec$ attributes alias:'_kmp_set_stacksize_s_'::kmp_set_stacksize_s -!dec$ attributes alias:'_kmp_set_blocktime_'::kmp_set_blocktime -!dec$ attributes alias:'_kmp_set_library_serial_'::kmp_set_library_serial -!dec$ attributes alias:'_kmp_set_library_turnaround_'::kmp_set_library_turnaround -!dec$ attributes alias:'_kmp_set_library_throughput_'::kmp_set_library_throughput -!dec$ attributes alias:'_kmp_set_library_'::kmp_set_library -!dec$ attributes alias:'_kmp_get_stacksize_'::kmp_get_stacksize -!dec$ attributes alias:'_kmp_get_stacksize_s_'::kmp_get_stacksize_s -!dec$ attributes alias:'_kmp_get_blocktime_'::kmp_get_blocktime -!dec$ attributes alias:'_kmp_get_library_'::kmp_get_library -!dec$ attributes alias:'_kmp_set_affinity_'::kmp_set_affinity -!dec$ attributes alias:'_kmp_get_affinity_'::kmp_get_affinity -!dec$ attributes alias:'_kmp_get_affinity_max_proc_'::kmp_get_affinity_max_proc -!dec$ attributes alias:'_kmp_create_affinity_mask_'::kmp_create_affinity_mask -!dec$ attributes alias:'_kmp_destroy_affinity_mask_'::kmp_destroy_affinity_mask -!dec$ attributes alias:'_kmp_set_affinity_mask_proc_'::kmp_set_affinity_mask_proc -!dec$ attributes alias:'_kmp_unset_affinity_mask_proc_'::kmp_unset_affinity_mask_proc -!dec$ attributes alias:'_kmp_get_affinity_mask_proc_'::kmp_get_affinity_mask_proc -!dec$ attributes alias:'_kmp_malloc_'::kmp_malloc -!dec$ attributes alias:'_kmp_aligned_malloc_'::kmp_aligned_malloc -!dec$ attributes alias:'_kmp_calloc_'::kmp_calloc -!dec$ attributes alias:'_kmp_realloc_'::kmp_realloc -!dec$ attributes alias:'_kmp_free_'::kmp_free - -!dec$ attributes alias:'_kmp_set_warnings_on_'::kmp_set_warnings_on -!dec$ attributes alias:'_kmp_set_warnings_off_'::kmp_set_warnings_off - -!dec$ attributes alias:'_kmp_get_cancellation_status_'::kmp_get_cancellation_status - -!dec$ endif - - end module omp_lib Index: vendor/llvm-openmp/openmp-release80-r363030/runtime/src/include/50/omp_lib.h.var =================================================================== --- vendor/llvm-openmp/openmp-release80-r363030/runtime/src/include/50/omp_lib.h.var (revision 348960) +++ vendor/llvm-openmp/openmp-release80-r363030/runtime/src/include/50/omp_lib.h.var (nonexistent) @@ -1,782 +0,0 @@ -! include/50/omp_lib.h.var - -! -!//===----------------------------------------------------------------------===// -!// -!// The LLVM Compiler Infrastructure -!// -!// This file is dual licensed under the MIT and the University of Illinois Open -!// Source Licenses. See LICENSE.txt for details. -!// -!//===----------------------------------------------------------------------===// -! - - integer omp_integer_kind - parameter(omp_integer_kind=4) - integer omp_logical_kind - parameter(omp_logical_kind=4) - integer omp_real_kind - parameter(omp_real_kind=4) - integer omp_lock_kind - parameter(omp_lock_kind=int_ptr_kind()) - integer omp_nest_lock_kind - parameter(omp_nest_lock_kind=int_ptr_kind()) - integer omp_sched_kind - parameter(omp_sched_kind=omp_integer_kind) - integer omp_proc_bind_kind - parameter(omp_proc_bind_kind=omp_integer_kind) - integer kmp_pointer_kind - parameter(kmp_pointer_kind=int_ptr_kind()) - integer kmp_size_t_kind - parameter(kmp_size_t_kind=int_ptr_kind()) - integer kmp_affinity_mask_kind - parameter(kmp_affinity_mask_kind=int_ptr_kind()) - integer omp_sync_hint_kind - parameter(omp_sync_hint_kind=omp_integer_kind) - integer omp_lock_hint_kind - parameter(omp_lock_hint_kind=omp_sync_hint_kind) - integer omp_control_tool_kind - parameter(omp_control_tool_kind=omp_integer_kind) - integer omp_control_tool_result_kind - parameter(omp_control_tool_result_kind=omp_integer_kind) - integer omp_allocator_kind - parameter(omp_allocator_kind=int_ptr_kind()) - - integer(kind=omp_integer_kind)openmp_version - parameter(openmp_version=@LIBOMP_OMP_YEAR_MONTH@) - integer(kind=omp_integer_kind)kmp_version_major - parameter(kmp_version_major=@LIBOMP_VERSION_MAJOR@) - integer(kind=omp_integer_kind)kmp_version_minor - parameter(kmp_version_minor=@LIBOMP_VERSION_MINOR@) - integer(kind=omp_integer_kind)kmp_version_build - parameter(kmp_version_build=@LIBOMP_VERSION_BUILD@) - character(*)kmp_build_date - parameter(kmp_build_date='@LIBOMP_BUILD_DATE@') - - integer(kind=omp_sched_kind)omp_sched_static - parameter(omp_sched_static=1) - integer(kind=omp_sched_kind)omp_sched_dynamic - parameter(omp_sched_dynamic=2) - integer(kind=omp_sched_kind)omp_sched_guided - parameter(omp_sched_guided=3) - integer(kind=omp_sched_kind)omp_sched_auto - parameter(omp_sched_auto=4) - - integer(kind=omp_proc_bind_kind)omp_proc_bind_false - parameter(omp_proc_bind_false=0) - integer(kind=omp_proc_bind_kind)omp_proc_bind_true - parameter(omp_proc_bind_true=1) - integer(kind=omp_proc_bind_kind)omp_proc_bind_master - parameter(omp_proc_bind_master=2) - integer(kind=omp_proc_bind_kind)omp_proc_bind_close - parameter(omp_proc_bind_close=3) - integer(kind=omp_proc_bind_kind)omp_proc_bind_spread - parameter(omp_proc_bind_spread=4) - - integer(kind=omp_sync_hint_kind)omp_sync_hint_none - parameter(omp_sync_hint_none=0) - integer(kind=omp_sync_hint_kind)omp_sync_hint_uncontended - parameter(omp_sync_hint_uncontended=1) - integer(kind=omp_sync_hint_kind)omp_sync_hint_contended - parameter(omp_sync_hint_contended=2) - integer(kind=omp_sync_hint_kind)omp_sync_hint_nonspeculative - parameter(omp_sync_hint_nonspeculative=4) - integer(kind=omp_sync_hint_kind)omp_sync_hint_speculative - parameter(omp_sync_hint_speculative=8) - integer(kind=omp_lock_hint_kind)omp_lock_hint_none - parameter(omp_lock_hint_none=omp_sync_hint_none) - integer(kind=omp_lock_hint_kind)omp_lock_hint_uncontended - parameter(omp_lock_hint_uncontended=omp_sync_hint_uncontended) - integer(kind=omp_lock_hint_kind)omp_lock_hint_contended - parameter(omp_lock_hint_contended=omp_sync_hint_contended) - integer(kind=omp_lock_hint_kind)omp_lock_hint_nonspeculative - parameter(omp_lock_hint_nonspeculative=4) - integer(kind=omp_lock_hint_kind)omp_lock_hint_speculative - parameter(omp_lock_hint_speculative=omp_sync_hint_speculative) - integer(kind=omp_lock_hint_kind)kmp_lock_hint_hle - parameter(kmp_lock_hint_hle=65536) - integer(kind=omp_lock_hint_kind)kmp_lock_hint_rtm - parameter(kmp_lock_hint_rtm=131072) - integer(kind=omp_lock_hint_kind)kmp_lock_hint_adaptive - parameter(kmp_lock_hint_adaptive=262144) - - integer(kind=omp_control_tool_kind)omp_control_tool_start - parameter(omp_control_tool_start=1) - integer(kind=omp_control_tool_kind)omp_control_tool_pause - parameter(omp_control_tool_pause=2) - integer(kind=omp_control_tool_kind)omp_control_tool_flush - parameter(omp_control_tool_flush=3) - integer(kind=omp_control_tool_kind)omp_control_tool_end - parameter(omp_control_tool_end=4) - - integer(omp_control_tool_result_kind)omp_control_tool_notool - parameter(omp_control_tool_notool=-2) - integer(omp_control_tool_result_kind)omp_control_tool_nocallback - parameter(omp_control_tool_nocallback=-1) - integer(omp_control_tool_result_kind)omp_control_tool_success - parameter(omp_control_tool_success=0) - integer(omp_control_tool_result_kind)omp_control_tool_ignored - parameter(omp_control_tool_ignored=1) - - integer(kind=omp_allocator_kind)omp_null_allocator - parameter(omp_null_allocator=0) - integer(kind=omp_allocator_kind)omp_default_mem_alloc - parameter(omp_default_mem_alloc=1) - integer(kind=omp_allocator_kind)omp_large_cap_mem_alloc - parameter(omp_large_cap_mem_alloc=2) - integer(kind=omp_allocator_kind)omp_const_mem_alloc - parameter(omp_const_mem_alloc=3) - integer(kind=omp_allocator_kind)omp_high_bw_mem_alloc - parameter(omp_high_bw_mem_alloc=4) - integer(kind=omp_allocator_kind)omp_low_lat_mem_alloc - parameter(omp_low_lat_mem_alloc=5) - integer(kind=omp_allocator_kind)omp_cgroup_mem_alloc - parameter(omp_cgroup_mem_alloc=6) - integer(kind=omp_allocator_kind)omp_pteam_mem_alloc - parameter(omp_pteam_mem_alloc=7) - integer(kind=omp_allocator_kind)omp_thread_mem_alloc - parameter(omp_thread_mem_alloc=8) - - interface - -! *** -! *** omp_* entry points -! *** - - subroutine omp_set_num_threads(num_threads) bind(c) - import - integer (kind=omp_integer_kind), value :: num_threads - end subroutine omp_set_num_threads - - subroutine omp_set_dynamic(dynamic_threads) bind(c) - import - logical (kind=omp_logical_kind), value :: dynamic_threads - end subroutine omp_set_dynamic - - subroutine omp_set_nested(nested) bind(c) - import - logical (kind=omp_logical_kind), value :: nested - end subroutine omp_set_nested - - function omp_get_num_threads() bind(c) - import - integer (kind=omp_integer_kind) omp_get_num_threads - end function omp_get_num_threads - - function omp_get_max_threads() bind(c) - import - integer (kind=omp_integer_kind) omp_get_max_threads - end function omp_get_max_threads - - function omp_get_thread_num() bind(c) - import - integer (kind=omp_integer_kind) omp_get_thread_num - end function omp_get_thread_num - - function omp_get_num_procs() bind(c) - import - integer (kind=omp_integer_kind) omp_get_num_procs - end function omp_get_num_procs - - function omp_in_parallel() bind(c) - import - logical (kind=omp_logical_kind) omp_in_parallel - end function omp_in_parallel - - function omp_in_final() bind(c) - import - logical (kind=omp_logical_kind) omp_in_final - end function omp_in_final - - function omp_get_dynamic() bind(c) - import - logical (kind=omp_logical_kind) omp_get_dynamic - end function omp_get_dynamic - - function omp_get_nested() bind(c) - import - logical (kind=omp_logical_kind) omp_get_nested - end function omp_get_nested - - function omp_get_thread_limit() bind(c) - import - integer (kind=omp_integer_kind) omp_get_thread_limit - end function omp_get_thread_limit - - subroutine omp_set_max_active_levels(max_levels) bind(c) - import - integer (kind=omp_integer_kind), value :: max_levels - end subroutine omp_set_max_active_levels - - function omp_get_max_active_levels() bind(c) - import - integer (kind=omp_integer_kind) omp_get_max_active_levels - end function omp_get_max_active_levels - - function omp_get_level() bind(c) - import - integer (kind=omp_integer_kind) omp_get_level - end function omp_get_level - - function omp_get_active_level() bind(c) - import - integer (kind=omp_integer_kind) omp_get_active_level - end function omp_get_active_level - - function omp_get_ancestor_thread_num(level) bind(c) - import - integer (kind=omp_integer_kind) omp_get_ancestor_thread_num - integer (kind=omp_integer_kind), value :: level - end function omp_get_ancestor_thread_num - - function omp_get_team_size(level) bind(c) - import - integer (kind=omp_integer_kind) omp_get_team_size - integer (kind=omp_integer_kind), value :: level - end function omp_get_team_size - - subroutine omp_set_schedule(kind, chunk_size) bind(c) - import - integer (kind=omp_sched_kind), value :: kind - integer (kind=omp_integer_kind), value :: chunk_size - end subroutine omp_set_schedule - - subroutine omp_get_schedule(kind, chunk_size) bind(c) - import - integer (kind=omp_sched_kind) kind - integer (kind=omp_integer_kind) chunk_size - end subroutine omp_get_schedule - - function omp_get_proc_bind() bind(c) - import - integer (kind=omp_proc_bind_kind) omp_get_proc_bind - end function omp_get_proc_bind - - function omp_get_num_places() bind(c) - import - integer (kind=omp_integer_kind) omp_get_num_places - end function omp_get_num_places - - function omp_get_place_num_procs(place_num) bind(c) - import - integer (kind=omp_integer_kind), value :: place_num - integer (kind=omp_integer_kind) omp_get_place_num_procs - end function omp_get_place_num_procs - - subroutine omp_get_place_proc_ids(place_num, ids) bind(c) - import - integer (kind=omp_integer_kind), value :: place_num - integer (kind=omp_integer_kind) ids(*) - end subroutine omp_get_place_proc_ids - - function omp_get_place_num() bind(c) - import - integer (kind=omp_integer_kind) omp_get_place_num - end function omp_get_place_num - - function omp_get_partition_num_places() bind(c) - import - integer (kind=omp_integer_kind) omp_get_partition_num_places - end function omp_get_partition_num_places - - subroutine omp_get_partition_place_nums(place_nums) bind(c) - import - integer (kind=omp_integer_kind) place_nums(*) - end subroutine omp_get_partition_place_nums - - function omp_get_wtime() bind(c) - double precision omp_get_wtime - end function omp_get_wtime - - function omp_get_wtick() bind(c) - double precision omp_get_wtick - end function omp_get_wtick - - function omp_get_default_device() bind(c) - import - integer (kind=omp_integer_kind) omp_get_default_device - end function omp_get_default_device - - subroutine omp_set_default_device(device_num) bind(c) - import - integer (kind=omp_integer_kind), value :: device_num - end subroutine omp_set_default_device - - function omp_get_num_devices() bind(c) - import - integer (kind=omp_integer_kind) omp_get_num_devices - end function omp_get_num_devices - - function omp_get_num_teams() bind(c) - import - integer (kind=omp_integer_kind) omp_get_num_teams - end function omp_get_num_teams - - function omp_get_team_num() bind(c) - import - integer (kind=omp_integer_kind) omp_get_team_num - end function omp_get_team_num - - function omp_is_initial_device() bind(c) - import - logical (kind=omp_logical_kind) omp_is_initial_device - end function omp_is_initial_device - - function omp_get_initial_device() bind(c) - import - integer (kind=omp_integer_kind) omp_get_initial_device - end function omp_get_initial_device - - function omp_get_device_num() bind(c) - import - integer (kind=omp_integer_kind) omp_get_device_num - end function omp_get_device_num - - subroutine omp_init_lock(svar) bind(c) -!DIR$ IF(__INTEL_COMPILER.GE.1400) -!DIR$ attributes known_intrinsic :: omp_init_lock -!DIR$ ENDIF - import - integer (kind=omp_lock_kind) svar - end subroutine omp_init_lock - - subroutine omp_destroy_lock(svar) bind(c) -!DIR$ IF(__INTEL_COMPILER.GE.1400) -!DIR$ attributes known_intrinsic :: omp_destroy_lock -!DIR$ ENDIF - import - integer (kind=omp_lock_kind) svar - end subroutine omp_destroy_lock - - subroutine omp_set_lock(svar) bind(c) -!DIR$ IF(__INTEL_COMPILER.GE.1400) -!DIR$ attributes known_intrinsic :: omp_set_lock -!DIR$ ENDIF - import - integer (kind=omp_lock_kind) svar - end subroutine omp_set_lock - - subroutine omp_unset_lock(svar) bind(c) -!DIR$ IF(__INTEL_COMPILER.GE.1400) -!DIR$ attributes known_intrinsic :: omp_unset_lock -!DIR$ ENDIF - import - integer (kind=omp_lock_kind) svar - end subroutine omp_unset_lock - - function omp_test_lock(svar) bind(c) -!DIR$ IF(__INTEL_COMPILER.GE.1400) -!DIR$ attributes known_intrinsic :: omp_test_lock -!DIR$ ENDIF - import - logical (kind=omp_logical_kind) omp_test_lock - integer (kind=omp_lock_kind) svar - end function omp_test_lock - - subroutine omp_init_nest_lock(nvar) bind(c) -!DIR$ IF(__INTEL_COMPILER.GE.1400) -!DIR$ attributes known_intrinsic :: omp_init_nest_lock -!DIR$ ENDIF - import - integer (kind=omp_nest_lock_kind) nvar - end subroutine omp_init_nest_lock - - subroutine omp_destroy_nest_lock(nvar) bind(c) -!DIR$ IF(__INTEL_COMPILER.GE.1400) -!DIR$ attributes known_intrinsic :: omp_destroy_nest_lock -!DIR$ ENDIF - import - integer (kind=omp_nest_lock_kind) nvar - end subroutine omp_destroy_nest_lock - - subroutine omp_set_nest_lock(nvar) bind(c) -!DIR$ IF(__INTEL_COMPILER.GE.1400) -!DIR$ attributes known_intrinsic :: omp_set_nest_lock -!DIR$ ENDIF - import - integer (kind=omp_nest_lock_kind) nvar - end subroutine omp_set_nest_lock - - subroutine omp_unset_nest_lock(nvar) bind(c) -!DIR$ IF(__INTEL_COMPILER.GE.1400) -!DIR$ attributes known_intrinsic :: omp_unset_nest_lock -!DIR$ ENDIF - import - integer (kind=omp_nest_lock_kind) nvar - end subroutine omp_unset_nest_lock - - function omp_test_nest_lock(nvar) bind(c) -!DIR$ IF(__INTEL_COMPILER.GE.1400) -!DIR$ attributes known_intrinsic :: omp_test_nest_lock -!DIR$ ENDIF - import - integer (kind=omp_integer_kind) omp_test_nest_lock - integer (kind=omp_nest_lock_kind) nvar - end function omp_test_nest_lock - - function omp_get_max_task_priority() bind(c) - import - integer (kind=omp_integer_kind) omp_get_max_task_priority - end function omp_get_max_task_priority - - subroutine omp_set_default_allocator(svar) bind(c) - import - integer (kind=omp_allocator_kind), value :: svar - end subroutine omp_set_default_allocator - - function omp_get_default_allocator() bind(c) - import - integer (kind=omp_allocator_kind) omp_get_default_allocator - end function omp_get_default_allocator - - subroutine omp_set_affinity_format(format) - character (len=*) :: format - end subroutine omp_set_affinity_format - - function omp_get_affinity_format(buffer) - import - character (len=*) :: buffer - integer (kind=kmp_size_t_kind) :: omp_get_affinity_format - end function omp_get_affinity_format - - subroutine omp_display_affinity(format) - character (len=*) :: format - end subroutine omp_display_affinity - - function omp_capture_affinity(buffer, format) - import - character (len=*) :: format - character (len=*) :: buffer - integer (kind=kmp_size_t_kind) :: omp_capture_affinity - end function omp_capture_affinity - -! *** -! *** kmp_* entry points -! *** - - subroutine kmp_set_stacksize(size) bind(c) - import - integer (kind=omp_integer_kind), value :: size - end subroutine kmp_set_stacksize - - subroutine kmp_set_stacksize_s(size) bind(c) - import - integer (kind=kmp_size_t_kind), value :: size - end subroutine kmp_set_stacksize_s - - subroutine kmp_set_blocktime(msec) bind(c) - import - integer (kind=omp_integer_kind), value :: msec - end subroutine kmp_set_blocktime - - subroutine kmp_set_library_serial() bind(c) - end subroutine kmp_set_library_serial - - subroutine kmp_set_library_turnaround() bind(c) - end subroutine kmp_set_library_turnaround - - subroutine kmp_set_library_throughput() bind(c) - end subroutine kmp_set_library_throughput - - subroutine kmp_set_library(libnum) bind(c) - import - integer (kind=omp_integer_kind), value :: libnum - end subroutine kmp_set_library - - subroutine kmp_set_defaults(string) bind(c) - character string(*) - end subroutine kmp_set_defaults - - function kmp_get_stacksize() bind(c) - import - integer (kind=omp_integer_kind) kmp_get_stacksize - end function kmp_get_stacksize - - function kmp_get_stacksize_s() bind(c) - import - integer (kind=kmp_size_t_kind) kmp_get_stacksize_s - end function kmp_get_stacksize_s - - function kmp_get_blocktime() bind(c) - import - integer (kind=omp_integer_kind) kmp_get_blocktime - end function kmp_get_blocktime - - function kmp_get_library() bind(c) - import - integer (kind=omp_integer_kind) kmp_get_library - end function kmp_get_library - - subroutine kmp_set_disp_num_buffers(num) bind(c) - import - integer (kind=omp_integer_kind), value :: num - end subroutine kmp_set_disp_num_buffers - - function kmp_set_affinity(mask) bind(c) - import - integer (kind=omp_integer_kind) kmp_set_affinity - integer (kind=kmp_affinity_mask_kind) mask - end function kmp_set_affinity - - function kmp_get_affinity(mask) bind(c) - import - integer (kind=omp_integer_kind) kmp_get_affinity - integer (kind=kmp_affinity_mask_kind) mask - end function kmp_get_affinity - - function kmp_get_affinity_max_proc() bind(c) - import - integer (kind=omp_integer_kind) kmp_get_affinity_max_proc - end function kmp_get_affinity_max_proc - - subroutine kmp_create_affinity_mask(mask) bind(c) - import - integer (kind=kmp_affinity_mask_kind) mask - end subroutine kmp_create_affinity_mask - - subroutine kmp_destroy_affinity_mask(mask) bind(c) - import - integer (kind=kmp_affinity_mask_kind) mask - end subroutine kmp_destroy_affinity_mask - - function kmp_set_affinity_mask_proc(proc, mask) bind(c) - import - integer (kind=omp_integer_kind) kmp_set_affinity_mask_proc - integer (kind=omp_integer_kind), value :: proc - integer (kind=kmp_affinity_mask_kind) mask - end function kmp_set_affinity_mask_proc - - function kmp_unset_affinity_mask_proc(proc, mask) bind(c) - import - integer (kind=omp_integer_kind) kmp_unset_affinity_mask_proc - integer (kind=omp_integer_kind), value :: proc - integer (kind=kmp_affinity_mask_kind) mask - end function kmp_unset_affinity_mask_proc - - function kmp_get_affinity_mask_proc(proc, mask) bind(c) - import - integer (kind=omp_integer_kind) kmp_get_affinity_mask_proc - integer (kind=omp_integer_kind), value :: proc - integer (kind=kmp_affinity_mask_kind) mask - end function kmp_get_affinity_mask_proc - - function kmp_malloc(size) bind(c) - import - integer (kind=kmp_pointer_kind) kmp_malloc - integer (kind=kmp_size_t_kind), value :: size - end function kmp_malloc - - function kmp_aligned_malloc(size, alignment) bind(c) - import - integer (kind=kmp_pointer_kind) kmp_aligned_malloc - integer (kind=kmp_size_t_kind), value :: size - integer (kind=kmp_size_t_kind), value :: alignment - end function kmp_aligned_malloc - - function kmp_calloc(nelem, elsize) bind(c) - import - integer (kind=kmp_pointer_kind) kmp_calloc - integer (kind=kmp_size_t_kind), value :: nelem - integer (kind=kmp_size_t_kind), value :: elsize - end function kmp_calloc - - function kmp_realloc(ptr, size) bind(c) - import - integer (kind=kmp_pointer_kind) kmp_realloc - integer (kind=kmp_pointer_kind), value :: ptr - integer (kind=kmp_size_t_kind), value :: size - end function kmp_realloc - - subroutine kmp_free(ptr) bind(c) - import - integer (kind=kmp_pointer_kind), value :: ptr - end subroutine kmp_free - - subroutine kmp_set_warnings_on() bind(c) - end subroutine kmp_set_warnings_on - - subroutine kmp_set_warnings_off() bind(c) - end subroutine kmp_set_warnings_off - - subroutine omp_init_lock_with_hint(svar, hint) bind(c) - import - integer (kind=omp_lock_kind) svar - integer (kind=omp_lock_hint_kind), value :: hint - end subroutine omp_init_lock_with_hint - - subroutine omp_init_nest_lock_with_hint(nvar, hint) bind(c) - import - integer (kind=omp_nest_lock_kind) nvar - integer (kind=omp_lock_hint_kind), value :: hint - end subroutine omp_init_nest_lock_with_hint - - function omp_control_tool(command, modifier, arg) bind(c) - import - integer (kind=omp_integer_kind) omp_control_tool - integer (kind=omp_control_tool_kind), value :: command - integer (kind=omp_control_tool_kind), value :: modifier - integer (kind=kmp_pointer_kind), optional :: arg - end function omp_control_tool - - end interface - -!DIR$ IF DEFINED (__INTEL_OFFLOAD) - -!DIR$ IF(__INTEL_COMPILER.LT.1900) -!DIR$ ATTRIBUTES OFFLOAD:MIC :: omp_set_num_threads -!DIR$ ATTRIBUTES OFFLOAD:MIC :: omp_set_dynamic -!DIR$ ATTRIBUTES OFFLOAD:MIC :: omp_set_nested -!DIR$ ATTRIBUTES OFFLOAD:MIC :: omp_get_num_threads -!DIR$ ATTRIBUTES OFFLOAD:MIC :: omp_get_max_threads -!DIR$ ATTRIBUTES OFFLOAD:MIC :: omp_get_thread_num -!DIR$ ATTRIBUTES OFFLOAD:MIC :: omp_get_num_procs -!DIR$ ATTRIBUTES OFFLOAD:MIC :: omp_in_parallel -!DIR$ ATTRIBUTES OFFLOAD:MIC :: omp_in_final -!DIR$ ATTRIBUTES OFFLOAD:MIC :: omp_get_dynamic -!DIR$ ATTRIBUTES OFFLOAD:MIC :: omp_get_nested -!DIR$ ATTRIBUTES OFFLOAD:MIC :: omp_get_thread_limit -!DIR$ ATTRIBUTES OFFLOAD:MIC :: omp_set_max_active_levels -!DIR$ ATTRIBUTES OFFLOAD:MIC :: omp_get_max_active_levels -!DIR$ ATTRIBUTES OFFLOAD:MIC :: omp_get_level -!DIR$ ATTRIBUTES OFFLOAD:MIC :: omp_get_active_level -!DIR$ ATTRIBUTES OFFLOAD:MIC :: omp_get_ancestor_thread_num -!DIR$ ATTRIBUTES OFFLOAD:MIC :: omp_get_team_size -!DIR$ ATTRIBUTES OFFLOAD:MIC :: omp_set_schedule -!DIR$ ATTRIBUTES OFFLOAD:MIC :: omp_get_schedule -!DIR$ ATTRIBUTES OFFLOAD:MIC :: omp_get_proc_bind -!DIR$ ATTRIBUTES OFFLOAD:MIC :: omp_get_wtime -!DIR$ ATTRIBUTES OFFLOAD:MIC :: omp_get_wtick -!DIR$ ATTRIBUTES OFFLOAD:MIC :: omp_get_default_device -!DIR$ ATTRIBUTES OFFLOAD:MIC :: omp_set_default_device -!DIR$ ATTRIBUTES OFFLOAD:MIC :: omp_is_initial_device -!DIR$ ATTRIBUTES OFFLOAD:MIC :: omp_get_initial_device -!DIR$ ATTRIBUTES OFFLOAD:MIC :: omp_get_num_devices -!DIR$ ATTRIBUTES OFFLOAD:MIC :: omp_get_device_num -!DIR$ ATTRIBUTES OFFLOAD:MIC :: omp_get_num_teams -!DIR$ ATTRIBUTES OFFLOAD:MIC :: omp_get_team_num -!DIR$ ATTRIBUTES OFFLOAD:MIC :: omp_init_lock -!DIR$ ATTRIBUTES OFFLOAD:MIC :: omp_destroy_lock -!DIR$ ATTRIBUTES OFFLOAD:MIC :: omp_set_lock -!DIR$ ATTRIBUTES OFFLOAD:MIC :: omp_unset_lock -!DIR$ ATTRIBUTES OFFLOAD:MIC :: omp_test_lock -!DIR$ ATTRIBUTES OFFLOAD:MIC :: omp_init_nest_lock -!DIR$ ATTRIBUTES OFFLOAD:MIC :: omp_destroy_nest_lock -!DIR$ ATTRIBUTES OFFLOAD:MIC :: omp_set_nest_lock -!DIR$ ATTRIBUTES OFFLOAD:MIC :: omp_unset_nest_lock -!DIR$ ATTRIBUTES OFFLOAD:MIC :: omp_test_nest_lock -!DIR$ ATTRIBUTES OFFLOAD:MIC :: omp_get_max_task_priority -!DIR$ ATTRIBUTES OFFLOAD:MIC :: omp_set_affinity_format -!DIR$ ATTRIBUTES OFFLOAD:MIC :: omp_get_affinity_format -!DIR$ ATTRIBUTES OFFLOAD:MIC :: omp_display_affinity -!DIR$ ATTRIBUTES OFFLOAD:MIC :: omp_capture_affinity -!DIR$ ATTRIBUTES OFFLOAD:MIC :: kmp_set_stacksize -!DIR$ ATTRIBUTES OFFLOAD:MIC :: kmp_set_stacksize_s -!DIR$ ATTRIBUTES OFFLOAD:MIC :: kmp_set_blocktime -!DIR$ ATTRIBUTES OFFLOAD:MIC :: kmp_set_library_serial -!DIR$ ATTRIBUTES OFFLOAD:MIC :: kmp_set_library_turnaround -!DIR$ ATTRIBUTES OFFLOAD:MIC :: kmp_set_library_throughput -!DIR$ ATTRIBUTES OFFLOAD:MIC :: kmp_set_library -!DIR$ ATTRIBUTES OFFLOAD:MIC :: kmp_set_defaults -!DIR$ ATTRIBUTES OFFLOAD:MIC :: kmp_get_stacksize -!DIR$ ATTRIBUTES OFFLOAD:MIC :: kmp_get_stacksize_s -!DIR$ ATTRIBUTES OFFLOAD:MIC :: kmp_get_blocktime -!DIR$ ATTRIBUTES OFFLOAD:MIC :: kmp_get_library -!DIR$ ATTRIBUTES OFFLOAD:MIC :: kmp_set_disp_num_buffers -!DIR$ ATTRIBUTES OFFLOAD:MIC :: kmp_set_affinity -!DIR$ ATTRIBUTES OFFLOAD:MIC :: kmp_get_affinity -!DIR$ ATTRIBUTES OFFLOAD:MIC :: kmp_get_affinity_max_proc -!DIR$ ATTRIBUTES OFFLOAD:MIC :: kmp_create_affinity_mask -!DIR$ ATTRIBUTES OFFLOAD:MIC :: kmp_destroy_affinity_mask -!DIR$ ATTRIBUTES OFFLOAD:MIC :: kmp_set_affinity_mask_proc -!DIR$ ATTRIBUTES OFFLOAD:MIC :: kmp_unset_affinity_mask_proc -!DIR$ ATTRIBUTES OFFLOAD:MIC :: kmp_get_affinity_mask_proc -!DIR$ ATTRIBUTES OFFLOAD:MIC :: kmp_malloc -!DIR$ ATTRIBUTES OFFLOAD:MIC :: kmp_aligned_malloc -!DIR$ ATTRIBUTES OFFLOAD:MIC :: kmp_calloc -!DIR$ ATTRIBUTES OFFLOAD:MIC :: kmp_realloc -!DIR$ ATTRIBUTES OFFLOAD:MIC :: kmp_free -!DIR$ ATTRIBUTES OFFLOAD:MIC :: kmp_set_warnings_on -!DIR$ ATTRIBUTES OFFLOAD:MIC :: kmp_set_warnings_off -!DIR$ ATTRIBUTES OFFLOAD:MIC :: omp_init_lock_with_hint -!DIR$ ATTRIBUTES OFFLOAD:MIC :: omp_init_nest_lock_with_hint -!DIR$ ENDIF - -!DIR$ IF(__INTEL_COMPILER.GE.1400) -!$omp declare target(omp_set_num_threads ) -!$omp declare target(omp_set_dynamic ) -!$omp declare target(omp_set_nested ) -!$omp declare target(omp_get_num_threads ) -!$omp declare target(omp_get_max_threads ) -!$omp declare target(omp_get_thread_num ) -!$omp declare target(omp_get_num_procs ) -!$omp declare target(omp_in_parallel ) -!$omp declare target(omp_in_final ) -!$omp declare target(omp_get_dynamic ) -!$omp declare target(omp_get_nested ) -!$omp declare target(omp_get_thread_limit ) -!$omp declare target(omp_set_max_active_levels ) -!$omp declare target(omp_get_max_active_levels ) -!$omp declare target(omp_get_level ) -!$omp declare target(omp_get_active_level ) -!$omp declare target(omp_get_ancestor_thread_num ) -!$omp declare target(omp_get_team_size ) -!$omp declare target(omp_set_schedule ) -!$omp declare target(omp_get_schedule ) -!$omp declare target(omp_get_proc_bind ) -!$omp declare target(omp_get_wtime ) -!$omp declare target(omp_get_wtick ) -!$omp declare target(omp_get_default_device ) -!$omp declare target(omp_set_default_device ) -!$omp declare target(omp_is_initial_device ) -!$omp declare target(omp_get_initial_device ) -!$omp declare target(omp_get_num_devices ) -!$omp declare target(omp_get_device_num ) -!$omp declare target(omp_get_num_teams ) -!$omp declare target(omp_get_team_num ) -!$omp declare target(omp_init_lock ) -!$omp declare target(omp_destroy_lock ) -!$omp declare target(omp_set_lock ) -!$omp declare target(omp_unset_lock ) -!$omp declare target(omp_test_lock ) -!$omp declare target(omp_init_nest_lock ) -!$omp declare target(omp_destroy_nest_lock ) -!$omp declare target(omp_set_nest_lock ) -!$omp declare target(omp_unset_nest_lock ) -!$omp declare target(omp_test_nest_lock ) -!$omp declare target(omp_get_max_task_priority ) -!$omp declare target(omp_set_affinity_format ) -!$omp declare target(omp_get_affinity_format ) -!$omp declare target(omp_display_affinity ) -!$omp declare target(omp_capture_affinity ) -!$omp declare target(kmp_set_stacksize ) -!$omp declare target(kmp_set_stacksize_s ) -!$omp declare target(kmp_set_blocktime ) -!$omp declare target(kmp_set_library_serial ) -!$omp declare target(kmp_set_library_turnaround ) -!$omp declare target(kmp_set_library_throughput ) -!$omp declare target(kmp_set_library ) -!$omp declare target(kmp_set_defaults ) -!$omp declare target(kmp_get_stacksize ) -!$omp declare target(kmp_get_stacksize_s ) -!$omp declare target(kmp_get_blocktime ) -!$omp declare target(kmp_get_library ) -!$omp declare target(kmp_set_disp_num_buffers ) -!$omp declare target(kmp_set_affinity ) -!$omp declare target(kmp_get_affinity ) -!$omp declare target(kmp_get_affinity_max_proc ) -!$omp declare target(kmp_create_affinity_mask ) -!$omp declare target(kmp_destroy_affinity_mask ) -!$omp declare target(kmp_set_affinity_mask_proc ) -!$omp declare target(kmp_unset_affinity_mask_proc ) -!$omp declare target(kmp_get_affinity_mask_proc ) -!$omp declare target(kmp_malloc ) -!$omp declare target(kmp_aligned_malloc ) -!$omp declare target(kmp_calloc ) -!$omp declare target(kmp_realloc ) -!$omp declare target(kmp_free ) -!$omp declare target(kmp_set_warnings_on ) -!$omp declare target(kmp_set_warnings_off ) -!$omp declare target(omp_init_lock_with_hint ) -!$omp declare target(omp_init_nest_lock_with_hint ) -!DIR$ ENDIF -!DIR$ ENDIF Index: vendor/llvm-openmp/openmp-release80-r363030/runtime/src/include/50/omp_lib.f90.var =================================================================== --- vendor/llvm-openmp/openmp-release80-r363030/runtime/src/include/50/omp_lib.f90.var (revision 348960) +++ vendor/llvm-openmp/openmp-release80-r363030/runtime/src/include/50/omp_lib.f90.var (nonexistent) @@ -1,597 +0,0 @@ -! include/50/omp_lib.f90.var - -! -!//===----------------------------------------------------------------------===// -!// -!// The LLVM Compiler Infrastructure -!// -!// This file is dual licensed under the MIT and the University of Illinois Open -!// Source Licenses. See LICENSE.txt for details. -!// -!//===----------------------------------------------------------------------===// -! - - module omp_lib_kinds - - use, intrinsic :: iso_c_binding - - integer, parameter :: omp_integer_kind = c_int - integer, parameter :: omp_logical_kind = 4 - integer, parameter :: omp_real_kind = c_float - integer, parameter :: kmp_double_kind = c_double - integer, parameter :: omp_lock_kind = c_intptr_t - integer, parameter :: omp_nest_lock_kind = c_intptr_t - integer, parameter :: omp_sched_kind = omp_integer_kind - integer, parameter :: omp_proc_bind_kind = omp_integer_kind - integer, parameter :: kmp_pointer_kind = c_intptr_t - integer, parameter :: kmp_size_t_kind = c_size_t - integer, parameter :: kmp_affinity_mask_kind = c_intptr_t - integer, parameter :: kmp_cancel_kind = omp_integer_kind - integer, parameter :: omp_sync_hint_kind = omp_integer_kind - integer, parameter :: omp_lock_hint_kind = omp_sync_hint_kind - integer, parameter :: omp_control_tool_kind = omp_integer_kind - integer, parameter :: omp_control_tool_result_kind = omp_integer_kind - integer, parameter :: omp_allocator_kind = c_intptr_t - - end module omp_lib_kinds - - module omp_lib - - use omp_lib_kinds - - integer (kind=omp_integer_kind), parameter :: openmp_version = @LIBOMP_OMP_YEAR_MONTH@ - integer (kind=omp_integer_kind), parameter :: kmp_version_major = @LIBOMP_VERSION_MAJOR@ - integer (kind=omp_integer_kind), parameter :: kmp_version_minor = @LIBOMP_VERSION_MINOR@ - integer (kind=omp_integer_kind), parameter :: kmp_version_build = @LIBOMP_VERSION_BUILD@ - character(*) kmp_build_date - parameter( kmp_build_date = '@LIBOMP_BUILD_DATE@' ) - - integer(kind=omp_sched_kind), parameter :: omp_sched_static = 1 - integer(kind=omp_sched_kind), parameter :: omp_sched_dynamic = 2 - integer(kind=omp_sched_kind), parameter :: omp_sched_guided = 3 - integer(kind=omp_sched_kind), parameter :: omp_sched_auto = 4 - - - integer (kind=omp_proc_bind_kind), parameter :: omp_proc_bind_false = 0 - integer (kind=omp_proc_bind_kind), parameter :: omp_proc_bind_true = 1 - integer (kind=omp_proc_bind_kind), parameter :: omp_proc_bind_master = 2 - integer (kind=omp_proc_bind_kind), parameter :: omp_proc_bind_close = 3 - integer (kind=omp_proc_bind_kind), parameter :: omp_proc_bind_spread = 4 - - integer (kind=kmp_cancel_kind), parameter :: kmp_cancel_parallel = 1 - integer (kind=kmp_cancel_kind), parameter :: kmp_cancel_loop = 2 - integer (kind=kmp_cancel_kind), parameter :: kmp_cancel_sections = 3 - integer (kind=kmp_cancel_kind), parameter :: kmp_cancel_taskgroup = 4 - - integer (kind=omp_sync_hint_kind), parameter :: omp_sync_hint_none = 0 - integer (kind=omp_sync_hint_kind), parameter :: omp_sync_hint_uncontended = 1 - integer (kind=omp_sync_hint_kind), parameter :: omp_sync_hint_contended = 2 - integer (kind=omp_sync_hint_kind), parameter :: omp_sync_hint_nonspeculative = 4 - integer (kind=omp_sync_hint_kind), parameter :: omp_sync_hint_speculative = 8 - integer (kind=omp_lock_hint_kind), parameter :: omp_lock_hint_none = omp_sync_hint_none - integer (kind=omp_lock_hint_kind), parameter :: omp_lock_hint_uncontended = omp_sync_hint_uncontended - integer (kind=omp_lock_hint_kind), parameter :: omp_lock_hint_contended = omp_sync_hint_contended - integer (kind=omp_lock_hint_kind), parameter :: omp_lock_hint_nonspeculative = omp_sync_hint_nonspeculative - integer (kind=omp_lock_hint_kind), parameter :: omp_lock_hint_speculative = omp_sync_hint_speculative - integer (kind=omp_lock_hint_kind), parameter :: kmp_lock_hint_hle = 65536 - integer (kind=omp_lock_hint_kind), parameter :: kmp_lock_hint_rtm = 131072 - integer (kind=omp_lock_hint_kind), parameter :: kmp_lock_hint_adaptive = 262144 - - integer (kind=omp_control_tool_kind), parameter :: omp_control_tool_start = 1 - integer (kind=omp_control_tool_kind), parameter :: omp_control_tool_pause = 2 - integer (kind=omp_control_tool_kind), parameter :: omp_control_tool_flush = 3 - integer (kind=omp_control_tool_kind), parameter :: omp_control_tool_end = 4 - - integer (kind=omp_control_tool_result_kind), parameter :: omp_control_tool_notool = -2 - integer (kind=omp_control_tool_result_kind), parameter :: omp_control_tool_nocallback = -1 - integer (kind=omp_control_tool_result_kind), parameter :: omp_control_tool_success = 0 - integer (kind=omp_control_tool_result_kind), parameter :: omp_control_tool_ignored = 1 - - integer (kind=omp_allocator_kind), parameter :: omp_null_allocator = 0 - integer (kind=omp_allocator_kind), parameter :: omp_default_mem_alloc = 1 - integer (kind=omp_allocator_kind), parameter :: omp_large_cap_mem_alloc = 2 - integer (kind=omp_allocator_kind), parameter :: omp_const_mem_alloc = 3 - integer (kind=omp_allocator_kind), parameter :: omp_high_bw_mem_alloc = 4 - integer (kind=omp_allocator_kind), parameter :: omp_low_lat_mem_alloc = 5 - integer (kind=omp_allocator_kind), parameter :: omp_cgroup_mem_alloc = 6 - integer (kind=omp_allocator_kind), parameter :: omp_pteam_mem_alloc = 7 - integer (kind=omp_allocator_kind), parameter :: omp_thread_mem_alloc = 8 - - interface - -! *** -! *** omp_* entry points -! *** - - subroutine omp_set_num_threads(num_threads) bind(c) - use omp_lib_kinds - integer (kind=omp_integer_kind), value :: num_threads - end subroutine omp_set_num_threads - - subroutine omp_set_dynamic(dynamic_threads) bind(c) - use omp_lib_kinds - logical (kind=omp_logical_kind), value :: dynamic_threads - end subroutine omp_set_dynamic - - subroutine omp_set_nested(nested) bind(c) - use omp_lib_kinds - logical (kind=omp_logical_kind), value :: nested - end subroutine omp_set_nested - - function omp_get_num_threads() bind(c) - use omp_lib_kinds - integer (kind=omp_integer_kind) omp_get_num_threads - end function omp_get_num_threads - - function omp_get_max_threads() bind(c) - use omp_lib_kinds - integer (kind=omp_integer_kind) omp_get_max_threads - end function omp_get_max_threads - - function omp_get_thread_num() bind(c) - use omp_lib_kinds - integer (kind=omp_integer_kind) omp_get_thread_num - end function omp_get_thread_num - - function omp_get_num_procs() bind(c) - use omp_lib_kinds - integer (kind=omp_integer_kind) omp_get_num_procs - end function omp_get_num_procs - - function omp_in_parallel() bind(c) - use omp_lib_kinds - logical (kind=omp_logical_kind) omp_in_parallel - end function omp_in_parallel - - function omp_in_final() bind(c) - use omp_lib_kinds - logical (kind=omp_logical_kind) omp_in_final - end function omp_in_final - - function omp_get_dynamic() bind(c) - use omp_lib_kinds - logical (kind=omp_logical_kind) omp_get_dynamic - end function omp_get_dynamic - - function omp_get_nested() bind(c) - use omp_lib_kinds - logical (kind=omp_logical_kind) omp_get_nested - end function omp_get_nested - - function omp_get_thread_limit() bind(c) - use omp_lib_kinds - integer (kind=omp_integer_kind) omp_get_thread_limit - end function omp_get_thread_limit - - subroutine omp_set_max_active_levels(max_levels) bind(c) - use omp_lib_kinds - integer (kind=omp_integer_kind), value :: max_levels - end subroutine omp_set_max_active_levels - - function omp_get_max_active_levels() bind(c) - use omp_lib_kinds - integer (kind=omp_integer_kind) omp_get_max_active_levels - end function omp_get_max_active_levels - - function omp_get_level() bind(c) - use omp_lib_kinds - integer (kind=omp_integer_kind) omp_get_level - end function omp_get_level - - function omp_get_active_level() bind(c) - use omp_lib_kinds - integer (kind=omp_integer_kind) omp_get_active_level - end function omp_get_active_level - - function omp_get_ancestor_thread_num(level) bind(c) - use omp_lib_kinds - integer (kind=omp_integer_kind) omp_get_ancestor_thread_num - integer (kind=omp_integer_kind), value :: level - end function omp_get_ancestor_thread_num - - function omp_get_team_size(level) bind(c) - use omp_lib_kinds - integer (kind=omp_integer_kind) omp_get_team_size - integer (kind=omp_integer_kind), value :: level - end function omp_get_team_size - - subroutine omp_set_schedule(kind, chunk_size) bind(c) - use omp_lib_kinds - integer (kind=omp_sched_kind), value :: kind - integer (kind=omp_integer_kind), value :: chunk_size - end subroutine omp_set_schedule - - subroutine omp_get_schedule(kind, chunk_size) bind(c) - use omp_lib_kinds - integer (kind=omp_sched_kind) kind - integer (kind=omp_integer_kind) chunk_size - end subroutine omp_get_schedule - - function omp_get_proc_bind() bind(c) - use omp_lib_kinds - integer (kind=omp_proc_bind_kind) omp_get_proc_bind - end function omp_get_proc_bind - - function omp_get_num_places() bind(c) - use omp_lib_kinds - integer (kind=omp_integer_kind) omp_get_num_places - end function omp_get_num_places - - function omp_get_place_num_procs(place_num) bind(c) - use omp_lib_kinds - integer (kind=omp_integer_kind), value :: place_num - integer (kind=omp_integer_kind) omp_get_place_num_procs - end function omp_get_place_num_procs - - subroutine omp_get_place_proc_ids(place_num, ids) bind(c) - use omp_lib_kinds - integer (kind=omp_integer_kind), value :: place_num - integer (kind=omp_integer_kind) ids(*) - end subroutine omp_get_place_proc_ids - - function omp_get_place_num() bind(c) - use omp_lib_kinds - integer (kind=omp_integer_kind) omp_get_place_num - end function omp_get_place_num - - function omp_get_partition_num_places() bind(c) - use omp_lib_kinds - integer (kind=omp_integer_kind) omp_get_partition_num_places - end function omp_get_partition_num_places - - subroutine omp_get_partition_place_nums(place_nums) bind(c) - use omp_lib_kinds - integer (kind=omp_integer_kind) place_nums(*) - end subroutine omp_get_partition_place_nums - - function omp_get_wtime() bind(c) - use omp_lib_kinds - real (kind=kmp_double_kind) omp_get_wtime - end function omp_get_wtime - - function omp_get_wtick() bind(c) - use omp_lib_kinds - real (kind=kmp_double_kind) omp_get_wtick - end function omp_get_wtick - - function omp_get_default_device() bind(c) - use omp_lib_kinds - integer (kind=omp_integer_kind) omp_get_default_device - end function omp_get_default_device - - subroutine omp_set_default_device(device_num) bind(c) - use omp_lib_kinds - integer (kind=omp_integer_kind), value :: device_num - end subroutine omp_set_default_device - - function omp_get_num_devices() bind(c) - use omp_lib_kinds - integer (kind=omp_integer_kind) omp_get_num_devices - end function omp_get_num_devices - - function omp_get_num_teams() bind(c) - use omp_lib_kinds - integer (kind=omp_integer_kind) omp_get_num_teams - end function omp_get_num_teams - - function omp_get_team_num() bind(c) - use omp_lib_kinds - integer (kind=omp_integer_kind) omp_get_team_num - end function omp_get_team_num - - function omp_get_cancellation() bind(c) - use omp_lib_kinds - integer (kind=omp_integer_kind) omp_get_cancellation - end function omp_get_cancellation - - function omp_is_initial_device() bind(c) - use omp_lib_kinds - logical (kind=omp_logical_kind) omp_is_initial_device - end function omp_is_initial_device - - function omp_get_initial_device() bind(c) - use omp_lib_kinds - integer (kind=omp_integer_kind) omp_get_initial_device - end function omp_get_initial_device - - function omp_get_device_num() bind(c) - use omp_lib_kinds - integer (kind=omp_integer_kind) omp_get_device_num - end function omp_get_device_num - - subroutine omp_init_lock(svar) bind(c) -!DIR$ IF(__INTEL_COMPILER.GE.1400) -!DIR$ attributes known_intrinsic :: omp_init_lock -!DIR$ ENDIF - use omp_lib_kinds - integer (kind=omp_lock_kind) svar - end subroutine omp_init_lock - - subroutine omp_destroy_lock(svar) bind(c) -!DIR$ IF(__INTEL_COMPILER.GE.1400) -!DIR$ attributes known_intrinsic :: omp_destroy_lock -!DIR$ ENDIF - use omp_lib_kinds - integer (kind=omp_lock_kind) svar - end subroutine omp_destroy_lock - - subroutine omp_set_lock(svar) bind(c) -!DIR$ IF(__INTEL_COMPILER.GE.1400) -!DIR$ attributes known_intrinsic :: omp_set_lock -!DIR$ ENDIF - use omp_lib_kinds - integer (kind=omp_lock_kind) svar - end subroutine omp_set_lock - - subroutine omp_unset_lock(svar) bind(c) -!DIR$ IF(__INTEL_COMPILER.GE.1400) -!DIR$ attributes known_intrinsic :: omp_unset_lock -!DIR$ ENDIF - use omp_lib_kinds - integer (kind=omp_lock_kind) svar - end subroutine omp_unset_lock - - function omp_test_lock(svar) bind(c) -!DIR$ IF(__INTEL_COMPILER.GE.1400) -!DIR$ attributes known_intrinsic :: omp_test_lock -!DIR$ ENDIF - use omp_lib_kinds - logical (kind=omp_logical_kind) omp_test_lock - integer (kind=omp_lock_kind) svar - end function omp_test_lock - - subroutine omp_init_nest_lock(nvar) bind(c) -!DIR$ IF(__INTEL_COMPILER.GE.1400) -!DIR$ attributes known_intrinsic :: omp_init_nest_lock -!DIR$ ENDIF - use omp_lib_kinds - integer (kind=omp_nest_lock_kind) nvar - end subroutine omp_init_nest_lock - - subroutine omp_destroy_nest_lock(nvar) bind(c) -!DIR$ IF(__INTEL_COMPILER.GE.1400) -!DIR$ attributes known_intrinsic :: omp_destroy_nest_lock -!DIR$ ENDIF - use omp_lib_kinds - integer (kind=omp_nest_lock_kind) nvar - end subroutine omp_destroy_nest_lock - - subroutine omp_set_nest_lock(nvar) bind(c) -!DIR$ IF(__INTEL_COMPILER.GE.1400) -!DIR$ attributes known_intrinsic :: omp_set_nest_lock -!DIR$ ENDIF - use omp_lib_kinds - integer (kind=omp_nest_lock_kind) nvar - end subroutine omp_set_nest_lock - - subroutine omp_unset_nest_lock(nvar) bind(c) -!DIR$ IF(__INTEL_COMPILER.GE.1400) -!DIR$ attributes known_intrinsic :: omp_unset_nest_lock -!DIR$ ENDIF - use omp_lib_kinds - integer (kind=omp_nest_lock_kind) nvar - end subroutine omp_unset_nest_lock - - function omp_test_nest_lock(nvar) bind(c) -!DIR$ IF(__INTEL_COMPILER.GE.1400) -!DIR$ attributes known_intrinsic :: omp_test_nest_lock -!DIR$ ENDIF - use omp_lib_kinds - integer (kind=omp_integer_kind) omp_test_nest_lock - integer (kind=omp_nest_lock_kind) nvar - end function omp_test_nest_lock - - function omp_get_max_task_priority() bind(c) - use omp_lib_kinds - integer (kind=omp_integer_kind) omp_get_max_task_priority - end function omp_get_max_task_priority - - subroutine omp_set_default_allocator(svar) bind(c) - use omp_lib_kinds - integer (kind=omp_allocator_kind), value :: svar - end subroutine omp_set_default_allocator - - function omp_get_default_allocator() bind(c) - use omp_lib_kinds - integer (kind=omp_allocator_kind) omp_get_default_allocator - end function omp_get_default_allocator - - subroutine omp_set_affinity_format(format) - character (len=*) :: format - end subroutine omp_set_affinity_format - - function omp_get_affinity_format(buffer) - use omp_lib_kinds - character (len=*) :: buffer - integer (kind=kmp_size_t_kind) :: omp_get_affinity_format - end function omp_get_affinity_format - - subroutine omp_display_affinity(format) - character (len=*) :: format - end subroutine omp_display_affinity - - function omp_capture_affinity(buffer, format) - use omp_lib_kinds - character (len=*) :: format - character (len=*) :: buffer - integer (kind=kmp_size_t_kind) :: omp_capture_affinity - end function omp_capture_affinity - -! *** -! *** kmp_* entry points -! *** - - subroutine kmp_set_stacksize(size) bind(c) - use omp_lib_kinds - integer (kind=omp_integer_kind), value :: size - end subroutine kmp_set_stacksize - - subroutine kmp_set_stacksize_s(size) bind(c) - use omp_lib_kinds - integer (kind=kmp_size_t_kind), value :: size - end subroutine kmp_set_stacksize_s - - subroutine kmp_set_blocktime(msec) bind(c) - use omp_lib_kinds - integer (kind=omp_integer_kind), value :: msec - end subroutine kmp_set_blocktime - - subroutine kmp_set_library_serial() bind(c) - end subroutine kmp_set_library_serial - - subroutine kmp_set_library_turnaround() bind(c) - end subroutine kmp_set_library_turnaround - - subroutine kmp_set_library_throughput() bind(c) - end subroutine kmp_set_library_throughput - - subroutine kmp_set_library(libnum) bind(c) - use omp_lib_kinds - integer (kind=omp_integer_kind), value :: libnum - end subroutine kmp_set_library - - subroutine kmp_set_defaults(string) bind(c) - use, intrinsic :: iso_c_binding - character (kind=c_char) :: string(*) - end subroutine kmp_set_defaults - - function kmp_get_stacksize() bind(c) - use omp_lib_kinds - integer (kind=omp_integer_kind) kmp_get_stacksize - end function kmp_get_stacksize - - function kmp_get_stacksize_s() bind(c) - use omp_lib_kinds - integer (kind=kmp_size_t_kind) kmp_get_stacksize_s - end function kmp_get_stacksize_s - - function kmp_get_blocktime() bind(c) - use omp_lib_kinds - integer (kind=omp_integer_kind) kmp_get_blocktime - end function kmp_get_blocktime - - function kmp_get_library() bind(c) - use omp_lib_kinds - integer (kind=omp_integer_kind) kmp_get_library - end function kmp_get_library - - subroutine kmp_set_disp_num_buffers(num) bind(c) - use omp_lib_kinds - integer (kind=omp_integer_kind), value :: num - end subroutine kmp_set_disp_num_buffers - - function kmp_set_affinity(mask) bind(c) - use omp_lib_kinds - integer (kind=omp_integer_kind) kmp_set_affinity - integer (kind=kmp_affinity_mask_kind) mask - end function kmp_set_affinity - - function kmp_get_affinity(mask) bind(c) - use omp_lib_kinds - integer (kind=omp_integer_kind) kmp_get_affinity - integer (kind=kmp_affinity_mask_kind) mask - end function kmp_get_affinity - - function kmp_get_affinity_max_proc() bind(c) - use omp_lib_kinds - integer (kind=omp_integer_kind) kmp_get_affinity_max_proc - end function kmp_get_affinity_max_proc - - subroutine kmp_create_affinity_mask(mask) bind(c) - use omp_lib_kinds - integer (kind=kmp_affinity_mask_kind) mask - end subroutine kmp_create_affinity_mask - - subroutine kmp_destroy_affinity_mask(mask) bind(c) - use omp_lib_kinds - integer (kind=kmp_affinity_mask_kind) mask - end subroutine kmp_destroy_affinity_mask - - function kmp_set_affinity_mask_proc(proc, mask) bind(c) - use omp_lib_kinds - integer (kind=omp_integer_kind) kmp_set_affinity_mask_proc - integer (kind=omp_integer_kind), value :: proc - integer (kind=kmp_affinity_mask_kind) mask - end function kmp_set_affinity_mask_proc - - function kmp_unset_affinity_mask_proc(proc, mask) bind(c) - use omp_lib_kinds - integer (kind=omp_integer_kind) kmp_unset_affinity_mask_proc - integer (kind=omp_integer_kind), value :: proc - integer (kind=kmp_affinity_mask_kind) mask - end function kmp_unset_affinity_mask_proc - - function kmp_get_affinity_mask_proc(proc, mask) bind(c) - use omp_lib_kinds - integer (kind=omp_integer_kind) kmp_get_affinity_mask_proc - integer (kind=omp_integer_kind), value :: proc - integer (kind=kmp_affinity_mask_kind) mask - end function kmp_get_affinity_mask_proc - - function kmp_malloc(size) bind(c) - use omp_lib_kinds - integer (kind=kmp_pointer_kind) kmp_malloc - integer (kind=kmp_size_t_kind), value :: size - end function kmp_malloc - - function kmp_aligned_malloc(size, alignment) bind(c) - use omp_lib_kinds - integer (kind=kmp_pointer_kind) kmp_aligned_malloc - integer (kind=kmp_size_t_kind), value :: size - integer (kind=kmp_size_t_kind), value :: alignment - end function kmp_aligned_malloc - - function kmp_calloc(nelem, elsize) bind(c) - use omp_lib_kinds - integer (kind=kmp_pointer_kind) kmp_calloc - integer (kind=kmp_size_t_kind), value :: nelem - integer (kind=kmp_size_t_kind), value :: elsize - end function kmp_calloc - - function kmp_realloc(ptr, size) bind(c) - use omp_lib_kinds - integer (kind=kmp_pointer_kind) kmp_realloc - integer (kind=kmp_pointer_kind), value :: ptr - integer (kind=kmp_size_t_kind), value :: size - end function kmp_realloc - - subroutine kmp_free(ptr) bind(c) - use omp_lib_kinds - integer (kind=kmp_pointer_kind), value :: ptr - end subroutine kmp_free - - subroutine kmp_set_warnings_on() bind(c) - end subroutine kmp_set_warnings_on - - subroutine kmp_set_warnings_off() bind(c) - end subroutine kmp_set_warnings_off - - function kmp_get_cancellation_status(cancelkind) bind(c) - use omp_lib_kinds - integer (kind=kmp_cancel_kind), value :: cancelkind - logical (kind=omp_logical_kind) kmp_get_cancellation_status - end function kmp_get_cancellation_status - - subroutine omp_init_lock_with_hint(svar, hint) bind(c) - use omp_lib_kinds - integer (kind=omp_lock_kind) svar - integer (kind=omp_lock_hint_kind), value :: hint - end subroutine omp_init_lock_with_hint - - subroutine omp_init_nest_lock_with_hint(nvar, hint) bind(c) - use omp_lib_kinds - integer (kind=omp_nest_lock_kind) nvar - integer (kind=omp_lock_hint_kind), value :: hint - end subroutine omp_init_nest_lock_with_hint - - function omp_control_tool(command, modifier, arg) bind(c) - use omp_lib_kinds - integer (kind=omp_integer_kind) omp_control_tool - integer (kind=omp_control_tool_kind), value :: command - integer (kind=omp_control_tool_kind), value :: modifier - integer (kind=kmp_pointer_kind), optional :: arg - end function omp_control_tool - - end interface - - end module omp_lib Index: vendor/llvm-openmp/openmp-release80-r363030/runtime/src/include/50/omp.h.var =================================================================== --- vendor/llvm-openmp/openmp-release80-r363030/runtime/src/include/50/omp.h.var (revision 348960) +++ vendor/llvm-openmp/openmp-release80-r363030/runtime/src/include/50/omp.h.var (nonexistent) @@ -1,265 +0,0 @@ -/* - * include/50/omp.h.var - */ - - -//===----------------------------------------------------------------------===// -// -// The LLVM Compiler Infrastructure -// -// This file is dual licensed under the MIT and the University of Illinois Open -// Source Licenses. See LICENSE.txt for details. -// -//===----------------------------------------------------------------------===// - - -#ifndef __OMP_H -# define __OMP_H - -# define KMP_VERSION_MAJOR @LIBOMP_VERSION_MAJOR@ -# define KMP_VERSION_MINOR @LIBOMP_VERSION_MINOR@ -# define KMP_VERSION_BUILD @LIBOMP_VERSION_BUILD@ -# define KMP_BUILD_DATE "@LIBOMP_BUILD_DATE@" - -# ifdef __cplusplus - extern "C" { -# endif - -# define omp_set_affinity_format ompc_set_affinity_format -# define omp_get_affinity_format ompc_get_affinity_format -# define omp_display_affinity ompc_display_affinity -# define omp_capture_affinity ompc_capture_affinity - -# if defined(_WIN32) -# define __KAI_KMPC_CONVENTION __cdecl -# ifndef __KMP_IMP -# define __KMP_IMP __declspec(dllimport) -# endif -# else -# define __KAI_KMPC_CONVENTION -# ifndef __KMP_IMP -# define __KMP_IMP -# endif -# endif - - /* schedule kind constants */ - typedef enum omp_sched_t { - omp_sched_static = 1, - omp_sched_dynamic = 2, - omp_sched_guided = 3, - omp_sched_auto = 4 - } omp_sched_t; - - /* set API functions */ - extern void __KAI_KMPC_CONVENTION omp_set_num_threads (int); - extern void __KAI_KMPC_CONVENTION omp_set_dynamic (int); - extern void __KAI_KMPC_CONVENTION omp_set_nested (int); - extern void __KAI_KMPC_CONVENTION omp_set_max_active_levels (int); - extern void __KAI_KMPC_CONVENTION omp_set_schedule (omp_sched_t, int); - - /* query API functions */ - extern int __KAI_KMPC_CONVENTION omp_get_num_threads (void); - extern int __KAI_KMPC_CONVENTION omp_get_dynamic (void); - extern int __KAI_KMPC_CONVENTION omp_get_nested (void); - extern int __KAI_KMPC_CONVENTION omp_get_max_threads (void); - extern int __KAI_KMPC_CONVENTION omp_get_thread_num (void); - extern int __KAI_KMPC_CONVENTION omp_get_num_procs (void); - extern int __KAI_KMPC_CONVENTION omp_in_parallel (void); - extern int __KAI_KMPC_CONVENTION omp_in_final (void); - extern int __KAI_KMPC_CONVENTION omp_get_active_level (void); - extern int __KAI_KMPC_CONVENTION omp_get_level (void); - extern int __KAI_KMPC_CONVENTION omp_get_ancestor_thread_num (int); - extern int __KAI_KMPC_CONVENTION omp_get_team_size (int); - extern int __KAI_KMPC_CONVENTION omp_get_thread_limit (void); - extern int __KAI_KMPC_CONVENTION omp_get_max_active_levels (void); - extern void __KAI_KMPC_CONVENTION omp_get_schedule (omp_sched_t *, int *); - extern int __KAI_KMPC_CONVENTION omp_get_max_task_priority (void); - - /* lock API functions */ - typedef struct omp_lock_t { - void * _lk; - } omp_lock_t; - - extern void __KAI_KMPC_CONVENTION omp_init_lock (omp_lock_t *); - extern void __KAI_KMPC_CONVENTION omp_set_lock (omp_lock_t *); - extern void __KAI_KMPC_CONVENTION omp_unset_lock (omp_lock_t *); - extern void __KAI_KMPC_CONVENTION omp_destroy_lock (omp_lock_t *); - extern int __KAI_KMPC_CONVENTION omp_test_lock (omp_lock_t *); - - /* nested lock API functions */ - typedef struct omp_nest_lock_t { - void * _lk; - } omp_nest_lock_t; - - extern void __KAI_KMPC_CONVENTION omp_init_nest_lock (omp_nest_lock_t *); - extern void __KAI_KMPC_CONVENTION omp_set_nest_lock (omp_nest_lock_t *); - extern void __KAI_KMPC_CONVENTION omp_unset_nest_lock (omp_nest_lock_t *); - extern void __KAI_KMPC_CONVENTION omp_destroy_nest_lock (omp_nest_lock_t *); - extern int __KAI_KMPC_CONVENTION omp_test_nest_lock (omp_nest_lock_t *); - - /* OpenMP 5.0 Synchronization hints*/ - typedef enum omp_sync_hint_t { - omp_sync_hint_none = 0, - omp_lock_hint_none = omp_sync_hint_none, - omp_sync_hint_uncontended = 1, - omp_lock_hint_uncontended = omp_sync_hint_uncontended, - omp_sync_hint_contended = (1<<1), - omp_lock_hint_contended = omp_sync_hint_contended, - omp_sync_hint_nonspeculative = (1<<2), - omp_lock_hint_nonspeculative = omp_sync_hint_nonspeculative, - omp_sync_hint_speculative = (1<<3), - omp_lock_hint_speculative = omp_sync_hint_speculative, - kmp_lock_hint_hle = (1<<16), - kmp_lock_hint_rtm = (1<<17), - kmp_lock_hint_adaptive = (1<<18) - } omp_sync_hint_t; - - /* lock hint type for dynamic user lock */ - typedef omp_sync_hint_t omp_lock_hint_t; - - /* hinted lock initializers */ - extern void __KAI_KMPC_CONVENTION omp_init_lock_with_hint(omp_lock_t *, omp_lock_hint_t); - extern void __KAI_KMPC_CONVENTION omp_init_nest_lock_with_hint(omp_nest_lock_t *, omp_lock_hint_t); - - /* time API functions */ - extern double __KAI_KMPC_CONVENTION omp_get_wtime (void); - extern double __KAI_KMPC_CONVENTION omp_get_wtick (void); - - /* OpenMP 4.0 */ - extern int __KAI_KMPC_CONVENTION omp_get_default_device (void); - extern void __KAI_KMPC_CONVENTION omp_set_default_device (int); - extern int __KAI_KMPC_CONVENTION omp_is_initial_device (void); - extern int __KAI_KMPC_CONVENTION omp_get_num_devices (void); - extern int __KAI_KMPC_CONVENTION omp_get_num_teams (void); - extern int __KAI_KMPC_CONVENTION omp_get_team_num (void); - extern int __KAI_KMPC_CONVENTION omp_get_cancellation (void); - -# include - /* OpenMP 4.5 */ - extern int __KAI_KMPC_CONVENTION omp_get_initial_device (void); - extern void* __KAI_KMPC_CONVENTION omp_target_alloc(size_t, int); - extern void __KAI_KMPC_CONVENTION omp_target_free(void *, int); - extern int __KAI_KMPC_CONVENTION omp_target_is_present(void *, int); - extern int __KAI_KMPC_CONVENTION omp_target_memcpy(void *, void *, size_t, size_t, size_t, int, int); - extern int __KAI_KMPC_CONVENTION omp_target_memcpy_rect(void *, void *, size_t, int, const size_t *, - const size_t *, const size_t *, const size_t *, const size_t *, int, int); - extern int __KAI_KMPC_CONVENTION omp_target_associate_ptr(void *, void *, size_t, size_t, int); - extern int __KAI_KMPC_CONVENTION omp_target_disassociate_ptr(void *, int); - - /* OpenMP 5.0 */ - extern int __KAI_KMPC_CONVENTION omp_get_device_num (void); - - /* kmp API functions */ - extern int __KAI_KMPC_CONVENTION kmp_get_stacksize (void); - extern void __KAI_KMPC_CONVENTION kmp_set_stacksize (int); - extern size_t __KAI_KMPC_CONVENTION kmp_get_stacksize_s (void); - extern void __KAI_KMPC_CONVENTION kmp_set_stacksize_s (size_t); - extern int __KAI_KMPC_CONVENTION kmp_get_blocktime (void); - extern int __KAI_KMPC_CONVENTION kmp_get_library (void); - extern void __KAI_KMPC_CONVENTION kmp_set_blocktime (int); - extern void __KAI_KMPC_CONVENTION kmp_set_library (int); - extern void __KAI_KMPC_CONVENTION kmp_set_library_serial (void); - extern void __KAI_KMPC_CONVENTION kmp_set_library_turnaround (void); - extern void __KAI_KMPC_CONVENTION kmp_set_library_throughput (void); - extern void __KAI_KMPC_CONVENTION kmp_set_defaults (char const *); - extern void __KAI_KMPC_CONVENTION kmp_set_disp_num_buffers (int); - - /* Intel affinity API */ - typedef void * kmp_affinity_mask_t; - - extern int __KAI_KMPC_CONVENTION kmp_set_affinity (kmp_affinity_mask_t *); - extern int __KAI_KMPC_CONVENTION kmp_get_affinity (kmp_affinity_mask_t *); - extern int __KAI_KMPC_CONVENTION kmp_get_affinity_max_proc (void); - extern void __KAI_KMPC_CONVENTION kmp_create_affinity_mask (kmp_affinity_mask_t *); - extern void __KAI_KMPC_CONVENTION kmp_destroy_affinity_mask (kmp_affinity_mask_t *); - extern int __KAI_KMPC_CONVENTION kmp_set_affinity_mask_proc (int, kmp_affinity_mask_t *); - extern int __KAI_KMPC_CONVENTION kmp_unset_affinity_mask_proc (int, kmp_affinity_mask_t *); - extern int __KAI_KMPC_CONVENTION kmp_get_affinity_mask_proc (int, kmp_affinity_mask_t *); - - /* OpenMP 4.0 affinity API */ - typedef enum omp_proc_bind_t { - omp_proc_bind_false = 0, - omp_proc_bind_true = 1, - omp_proc_bind_master = 2, - omp_proc_bind_close = 3, - omp_proc_bind_spread = 4 - } omp_proc_bind_t; - - extern omp_proc_bind_t __KAI_KMPC_CONVENTION omp_get_proc_bind (void); - - /* OpenMP 4.5 affinity API */ - extern int __KAI_KMPC_CONVENTION omp_get_num_places (void); - extern int __KAI_KMPC_CONVENTION omp_get_place_num_procs (int); - extern void __KAI_KMPC_CONVENTION omp_get_place_proc_ids (int, int *); - extern int __KAI_KMPC_CONVENTION omp_get_place_num (void); - extern int __KAI_KMPC_CONVENTION omp_get_partition_num_places (void); - extern void __KAI_KMPC_CONVENTION omp_get_partition_place_nums (int *); - - extern void * __KAI_KMPC_CONVENTION kmp_malloc (size_t); - extern void * __KAI_KMPC_CONVENTION kmp_aligned_malloc (size_t, size_t); - extern void * __KAI_KMPC_CONVENTION kmp_calloc (size_t, size_t); - extern void * __KAI_KMPC_CONVENTION kmp_realloc (void *, size_t); - extern void __KAI_KMPC_CONVENTION kmp_free (void *); - - extern void __KAI_KMPC_CONVENTION kmp_set_warnings_on(void); - extern void __KAI_KMPC_CONVENTION kmp_set_warnings_off(void); - - /* OpenMP 5.0 Tool Control */ - typedef enum omp_control_tool_result_t { - omp_control_tool_notool = -2, - omp_control_tool_nocallback = -1, - omp_control_tool_success = 0, - omp_control_tool_ignored = 1 - } omp_control_tool_result_t; - - typedef enum omp_control_tool_t { - omp_control_tool_start = 1, - omp_control_tool_pause = 2, - omp_control_tool_flush = 3, - omp_control_tool_end = 4 - } omp_control_tool_t; - - extern int __KAI_KMPC_CONVENTION omp_control_tool(int, int, void*); - - /* OpenMP 5.0 Memory Management */ - typedef void *omp_allocator_t; - extern __KMP_IMP const omp_allocator_t *OMP_NULL_ALLOCATOR; - extern __KMP_IMP const omp_allocator_t *omp_default_mem_alloc; - extern __KMP_IMP const omp_allocator_t *omp_large_cap_mem_alloc; - extern __KMP_IMP const omp_allocator_t *omp_const_mem_alloc; - extern __KMP_IMP const omp_allocator_t *omp_high_bw_mem_alloc; - extern __KMP_IMP const omp_allocator_t *omp_low_lat_mem_alloc; - extern __KMP_IMP const omp_allocator_t *omp_cgroup_mem_alloc; - extern __KMP_IMP const omp_allocator_t *omp_pteam_mem_alloc; - extern __KMP_IMP const omp_allocator_t *omp_thread_mem_alloc; - - extern void __KAI_KMPC_CONVENTION omp_set_default_allocator(const omp_allocator_t *); - extern const omp_allocator_t * __KAI_KMPC_CONVENTION omp_get_default_allocator(void); -#ifdef __cplusplus - extern void *__KAI_KMPC_CONVENTION omp_alloc(size_t size, const omp_allocator_t *allocator = OMP_NULL_ALLOCATOR); - extern void __KAI_KMPC_CONVENTION omp_free(void * ptr, const omp_allocator_t *allocator = OMP_NULL_ALLOCATOR); -#else - extern void *__KAI_KMPC_CONVENTION omp_alloc(size_t size, const omp_allocator_t *allocator); - extern void __KAI_KMPC_CONVENTION omp_free(void *ptr, const omp_allocator_t *allocator); -#endif - - /* OpenMP 5.0 Affinity Format */ - extern void __KAI_KMPC_CONVENTION omp_set_affinity_format(char const *); - extern size_t __KAI_KMPC_CONVENTION omp_get_affinity_format(char *, size_t); - extern void __KAI_KMPC_CONVENTION omp_display_affinity(char const *); - extern size_t __KAI_KMPC_CONVENTION omp_capture_affinity(char *, size_t, char const *); - -# undef __KAI_KMPC_CONVENTION -# undef __KMP_IMP - - /* Warning: - The following typedefs are not standard, deprecated and will be removed in a future release. - */ - typedef int omp_int_t; - typedef double omp_wtime_t; - -# ifdef __cplusplus - } -# endif - -#endif /* __OMP_H */ Index: vendor/llvm-openmp/openmp-release80-r363030/runtime/src/include/45/omp_lib.f.var =================================================================== --- vendor/llvm-openmp/openmp-release80-r363030/runtime/src/include/45/omp_lib.f.var (revision 348960) +++ vendor/llvm-openmp/openmp-release80-r363030/runtime/src/include/45/omp_lib.f.var (nonexistent) @@ -1,861 +0,0 @@ -! include/45/omp_lib.f.var - -! -!//===----------------------------------------------------------------------===// -!// -!// The LLVM Compiler Infrastructure -!// -!// This file is dual licensed under the MIT and the University of Illinois Open -!// Source Licenses. See LICENSE.txt for details. -!// -!//===----------------------------------------------------------------------===// -! - -!*** -!*** Some of the directives for the following routine extend past column 72, -!*** so process this file in 132-column mode. -!*** - -!dec$ fixedformlinesize:132 - - module omp_lib_kinds - - integer, parameter :: omp_integer_kind = 4 - integer, parameter :: omp_logical_kind = 4 - integer, parameter :: omp_real_kind = 4 - integer, parameter :: omp_lock_kind = int_ptr_kind() - integer, parameter :: omp_nest_lock_kind = int_ptr_kind() - integer, parameter :: omp_sched_kind = omp_integer_kind - integer, parameter :: omp_proc_bind_kind = omp_integer_kind - integer, parameter :: kmp_pointer_kind = int_ptr_kind() - integer, parameter :: kmp_size_t_kind = int_ptr_kind() - integer, parameter :: kmp_affinity_mask_kind = int_ptr_kind() - integer, parameter :: kmp_cancel_kind = omp_integer_kind - integer, parameter :: omp_lock_hint_kind = omp_integer_kind - - end module omp_lib_kinds - - module omp_lib - - use omp_lib_kinds - - integer (kind=omp_integer_kind), parameter :: kmp_version_major = @LIBOMP_VERSION_MAJOR@ - integer (kind=omp_integer_kind), parameter :: kmp_version_minor = @LIBOMP_VERSION_MINOR@ - integer (kind=omp_integer_kind), parameter :: kmp_version_build = @LIBOMP_VERSION_BUILD@ - character(*), parameter :: kmp_build_date = '@LIBOMP_BUILD_DATE@' - integer (kind=omp_integer_kind), parameter :: openmp_version = @LIBOMP_OMP_YEAR_MONTH@ - - integer(kind=omp_sched_kind), parameter :: omp_sched_static = 1 - integer(kind=omp_sched_kind), parameter :: omp_sched_dynamic = 2 - integer(kind=omp_sched_kind), parameter :: omp_sched_guided = 3 - integer(kind=omp_sched_kind), parameter :: omp_sched_auto = 4 - - integer (kind=omp_proc_bind_kind), parameter :: omp_proc_bind_false = 0 - integer (kind=omp_proc_bind_kind), parameter :: omp_proc_bind_true = 1 - integer (kind=omp_proc_bind_kind), parameter :: omp_proc_bind_master = 2 - integer (kind=omp_proc_bind_kind), parameter :: omp_proc_bind_close = 3 - integer (kind=omp_proc_bind_kind), parameter :: omp_proc_bind_spread = 4 - - integer (kind=kmp_cancel_kind), parameter :: kmp_cancel_parallel = 1 - integer (kind=kmp_cancel_kind), parameter :: kmp_cancel_loop = 2 - integer (kind=kmp_cancel_kind), parameter :: kmp_cancel_sections = 3 - integer (kind=kmp_cancel_kind), parameter :: kmp_cancel_taskgroup = 4 - - integer (kind=omp_lock_hint_kind), parameter :: omp_lock_hint_none = 0 - integer (kind=omp_lock_hint_kind), parameter :: omp_lock_hint_uncontended = 1 - integer (kind=omp_lock_hint_kind), parameter :: omp_lock_hint_contended = 2 - integer (kind=omp_lock_hint_kind), parameter :: omp_lock_hint_nonspeculative = 4 - integer (kind=omp_lock_hint_kind), parameter :: omp_lock_hint_speculative = 8 - integer (kind=omp_lock_hint_kind), parameter :: kmp_lock_hint_hle = 65536 - integer (kind=omp_lock_hint_kind), parameter :: kmp_lock_hint_rtm = 131072 - integer (kind=omp_lock_hint_kind), parameter :: kmp_lock_hint_adaptive = 262144 - - interface - -! *** -! *** omp_* entry points -! *** - - subroutine omp_set_num_threads(num_threads) - use omp_lib_kinds - integer (kind=omp_integer_kind) num_threads - end subroutine omp_set_num_threads - - subroutine omp_set_dynamic(dynamic_threads) - use omp_lib_kinds - logical (kind=omp_logical_kind) dynamic_threads - end subroutine omp_set_dynamic - - subroutine omp_set_nested(nested) - use omp_lib_kinds - logical (kind=omp_logical_kind) nested - end subroutine omp_set_nested - - function omp_get_num_threads() - use omp_lib_kinds - integer (kind=omp_integer_kind) omp_get_num_threads - end function omp_get_num_threads - - function omp_get_max_threads() - use omp_lib_kinds - integer (kind=omp_integer_kind) omp_get_max_threads - end function omp_get_max_threads - - function omp_get_thread_num() - use omp_lib_kinds - integer (kind=omp_integer_kind) omp_get_thread_num - end function omp_get_thread_num - - function omp_get_num_procs() - use omp_lib_kinds - integer (kind=omp_integer_kind) omp_get_num_procs - end function omp_get_num_procs - - function omp_in_parallel() - use omp_lib_kinds - logical (kind=omp_logical_kind) omp_in_parallel - end function omp_in_parallel - - function omp_in_final() - use omp_lib_kinds - logical (kind=omp_logical_kind) omp_in_final - end function omp_in_final - - function omp_get_dynamic() - use omp_lib_kinds - logical (kind=omp_logical_kind) omp_get_dynamic - end function omp_get_dynamic - - function omp_get_nested() - use omp_lib_kinds - logical (kind=omp_logical_kind) omp_get_nested - end function omp_get_nested - - function omp_get_thread_limit() - use omp_lib_kinds - integer (kind=omp_integer_kind) omp_get_thread_limit - end function omp_get_thread_limit - - subroutine omp_set_max_active_levels(max_levels) - use omp_lib_kinds - integer (kind=omp_integer_kind) max_levels - end subroutine omp_set_max_active_levels - - function omp_get_max_active_levels() - use omp_lib_kinds - integer (kind=omp_integer_kind) omp_get_max_active_levels - end function omp_get_max_active_levels - - function omp_get_level() - use omp_lib_kinds - integer (kind=omp_integer_kind) omp_get_level - end function omp_get_level - - function omp_get_active_level() - use omp_lib_kinds - integer (kind=omp_integer_kind) omp_get_active_level - end function omp_get_active_level - - function omp_get_ancestor_thread_num(level) - use omp_lib_kinds - integer (kind=omp_integer_kind) level - integer (kind=omp_integer_kind) omp_get_ancestor_thread_num - end function omp_get_ancestor_thread_num - - function omp_get_team_size(level) - use omp_lib_kinds - integer (kind=omp_integer_kind) level - integer (kind=omp_integer_kind) omp_get_team_size - end function omp_get_team_size - - subroutine omp_set_schedule(kind, chunk_size) - use omp_lib_kinds - integer (kind=omp_sched_kind) kind - integer (kind=omp_integer_kind) chunk_size - end subroutine omp_set_schedule - - subroutine omp_get_schedule(kind, chunk_size) - use omp_lib_kinds - integer (kind=omp_sched_kind) kind - integer (kind=omp_integer_kind) chunk_size - end subroutine omp_get_schedule - - function omp_get_proc_bind() - use omp_lib_kinds - integer (kind=omp_proc_bind_kind) omp_get_proc_bind - end function omp_get_proc_bind - - function omp_get_num_places() - use omp_lib_kinds - integer (kind=omp_integer_kind) omp_get_num_places - end function omp_get_num_places - - function omp_get_place_num_procs(place_num) - use omp_lib_kinds - integer (kind=omp_integer_kind) place_num - integer (kind=omp_integer_kind) omp_get_place_num_procs - end function omp_get_place_num_procs - - subroutine omp_get_place_proc_ids(place_num, ids) - use omp_lib_kinds - integer (kind=omp_integer_kind) place_num - integer (kind=omp_integer_kind) ids(*) - end subroutine omp_get_place_proc_ids - - function omp_get_place_num() - use omp_lib_kinds - integer (kind=omp_integer_kind) omp_get_place_num - end function omp_get_place_num - - function omp_get_partition_num_places() - use omp_lib_kinds - integer (kind=omp_integer_kind) omp_get_partition_num_places - end function omp_get_partition_num_places - - subroutine omp_get_partition_place_nums(place_nums) - use omp_lib_kinds - integer (kind=omp_integer_kind) place_nums(*) - end subroutine omp_get_partition_place_nums - - function omp_get_wtime() - double precision omp_get_wtime - end function omp_get_wtime - - function omp_get_wtick () - double precision omp_get_wtick - end function omp_get_wtick - - function omp_get_default_device() - use omp_lib_kinds - integer (kind=omp_integer_kind) omp_get_default_device - end function omp_get_default_device - - subroutine omp_set_default_device(device_num) - use omp_lib_kinds - integer (kind=omp_integer_kind) device_num - end subroutine omp_set_default_device - - function omp_get_num_devices() - use omp_lib_kinds - integer (kind=omp_integer_kind) omp_get_num_devices - end function omp_get_num_devices - - function omp_get_num_teams() - use omp_lib_kinds - integer (kind=omp_integer_kind) omp_get_num_teams - end function omp_get_num_teams - - function omp_get_team_num() - use omp_lib_kinds - integer (kind=omp_integer_kind) omp_get_team_num - end function omp_get_team_num - - function omp_get_cancellation() - use omp_lib_kinds - integer (kind=omp_integer_kind) omp_get_cancellation - end function omp_get_cancellation - - function omp_is_initial_device() - use omp_lib_kinds - logical (kind=omp_logical_kind) omp_is_initial_device - end function omp_is_initial_device - - function omp_get_initial_device() - use omp_lib_kinds - integer (kind=omp_integer_kind) omp_get_initial_device - end function omp_get_initial_device - - subroutine omp_init_lock(svar) -!DIR$ IF(__INTEL_COMPILER.GE.1400) -!DIR$ attributes known_intrinsic :: omp_init_lock -!DIR$ ENDIF - use omp_lib_kinds - integer (kind=omp_lock_kind) svar - end subroutine omp_init_lock - - subroutine omp_destroy_lock(svar) -!DIR$ IF(__INTEL_COMPILER.GE.1400) -!DIR$ attributes known_intrinsic :: omp_destroy_lock -!DIR$ ENDIF - use omp_lib_kinds - integer (kind=omp_lock_kind) svar - end subroutine omp_destroy_lock - - subroutine omp_set_lock(svar) -!DIR$ IF(__INTEL_COMPILER.GE.1400) -!DIR$ attributes known_intrinsic :: omp_set_lock -!DIR$ ENDIF - use omp_lib_kinds - integer (kind=omp_lock_kind) svar - end subroutine omp_set_lock - - subroutine omp_unset_lock(svar) -!DIR$ IF(__INTEL_COMPILER.GE.1400) -!DIR$ attributes known_intrinsic :: omp_unset_lock -!DIR$ ENDIF - use omp_lib_kinds - integer (kind=omp_lock_kind) svar - end subroutine omp_unset_lock - - function omp_test_lock(svar) -!DIR$ IF(__INTEL_COMPILER.GE.1400) -!DIR$ attributes known_intrinsic :: omp_test_lock -!DIR$ ENDIF - use omp_lib_kinds - logical (kind=omp_logical_kind) omp_test_lock - integer (kind=omp_lock_kind) svar - end function omp_test_lock - - subroutine omp_init_nest_lock(nvar) -!DIR$ IF(__INTEL_COMPILER.GE.1400) -!DIR$ attributes known_intrinsic :: omp_init_nest_lock -!DIR$ ENDIF - use omp_lib_kinds - integer (kind=omp_nest_lock_kind) nvar - end subroutine omp_init_nest_lock - - subroutine omp_destroy_nest_lock(nvar) -!DIR$ IF(__INTEL_COMPILER.GE.1400) -!DIR$ attributes known_intrinsic :: omp_destroy_nest_lock -!DIR$ ENDIF - use omp_lib_kinds - integer (kind=omp_nest_lock_kind) nvar - end subroutine omp_destroy_nest_lock - - subroutine omp_set_nest_lock(nvar) -!DIR$ IF(__INTEL_COMPILER.GE.1400) -!DIR$ attributes known_intrinsic :: omp_set_nest_lock -!DIR$ ENDIF - use omp_lib_kinds - integer (kind=omp_nest_lock_kind) nvar - end subroutine omp_set_nest_lock - - subroutine omp_unset_nest_lock(nvar) -!DIR$ IF(__INTEL_COMPILER.GE.1400) -!DIR$ attributes known_intrinsic :: omp_unset_nest_lock -!DIR$ ENDIF - use omp_lib_kinds - integer (kind=omp_nest_lock_kind) nvar - end subroutine omp_unset_nest_lock - - function omp_test_nest_lock(nvar) -!DIR$ IF(__INTEL_COMPILER.GE.1400) -!DIR$ attributes known_intrinsic :: omp_test_nest_lock -!DIR$ ENDIF - use omp_lib_kinds - integer (kind=omp_integer_kind) omp_test_nest_lock - integer (kind=omp_nest_lock_kind) nvar - end function omp_test_nest_lock - - function omp_get_max_task_priority() - use omp_lib_kinds - integer (kind=omp_integer_kind) omp_get_max_task_priority - end function omp_get_max_task_priority - -! *** -! *** kmp_* entry points -! *** - - subroutine kmp_set_stacksize(size) - use omp_lib_kinds - integer (kind=omp_integer_kind) size - end subroutine kmp_set_stacksize - - subroutine kmp_set_stacksize_s(size) - use omp_lib_kinds - integer (kind=kmp_size_t_kind) size - end subroutine kmp_set_stacksize_s - - subroutine kmp_set_blocktime(msec) - use omp_lib_kinds - integer (kind=omp_integer_kind) msec - end subroutine kmp_set_blocktime - - subroutine kmp_set_library_serial() - end subroutine kmp_set_library_serial - - subroutine kmp_set_library_turnaround() - end subroutine kmp_set_library_turnaround - - subroutine kmp_set_library_throughput() - end subroutine kmp_set_library_throughput - - subroutine kmp_set_library(libnum) - use omp_lib_kinds - integer (kind=omp_integer_kind) libnum - end subroutine kmp_set_library - - subroutine kmp_set_defaults(string) - character*(*) string - end subroutine kmp_set_defaults - - function kmp_get_stacksize() - use omp_lib_kinds - integer (kind=omp_integer_kind) kmp_get_stacksize - end function kmp_get_stacksize - - function kmp_get_stacksize_s() - use omp_lib_kinds - integer (kind=kmp_size_t_kind) kmp_get_stacksize_s - end function kmp_get_stacksize_s - - function kmp_get_blocktime() - use omp_lib_kinds - integer (kind=omp_integer_kind) kmp_get_blocktime - end function kmp_get_blocktime - - function kmp_get_library() - use omp_lib_kinds - integer (kind=omp_integer_kind) kmp_get_library - end function kmp_get_library - - subroutine kmp_set_disp_num_buffers(num) - use omp_lib_kinds - integer (kind=omp_integer_kind) num - end subroutine kmp_set_disp_num_buffers - - function kmp_set_affinity(mask) - use omp_lib_kinds - integer (kind=omp_integer_kind) kmp_set_affinity - integer (kind=kmp_affinity_mask_kind) mask - end function kmp_set_affinity - - function kmp_get_affinity(mask) - use omp_lib_kinds - integer (kind=omp_integer_kind) kmp_get_affinity - integer (kind=kmp_affinity_mask_kind) mask - end function kmp_get_affinity - - function kmp_get_affinity_max_proc() - use omp_lib_kinds - integer (kind=omp_integer_kind) kmp_get_affinity_max_proc - end function kmp_get_affinity_max_proc - - subroutine kmp_create_affinity_mask(mask) - use omp_lib_kinds - integer (kind=kmp_affinity_mask_kind) mask - end subroutine kmp_create_affinity_mask - - subroutine kmp_destroy_affinity_mask(mask) - use omp_lib_kinds - integer (kind=kmp_affinity_mask_kind) mask - end subroutine kmp_destroy_affinity_mask - - function kmp_set_affinity_mask_proc(proc, mask) - use omp_lib_kinds - integer (kind=omp_integer_kind) kmp_set_affinity_mask_proc - integer (kind=omp_integer_kind) proc - integer (kind=kmp_affinity_mask_kind) mask - end function kmp_set_affinity_mask_proc - - function kmp_unset_affinity_mask_proc(proc, mask) - use omp_lib_kinds - integer (kind=omp_integer_kind) kmp_unset_affinity_mask_proc - integer (kind=omp_integer_kind) proc - integer (kind=kmp_affinity_mask_kind) mask - end function kmp_unset_affinity_mask_proc - - function kmp_get_affinity_mask_proc(proc, mask) - use omp_lib_kinds - integer (kind=omp_integer_kind) kmp_get_affinity_mask_proc - integer (kind=omp_integer_kind) proc - integer (kind=kmp_affinity_mask_kind) mask - end function kmp_get_affinity_mask_proc - - function kmp_malloc(size) - use omp_lib_kinds - integer (kind=kmp_pointer_kind) kmp_malloc - integer (kind=kmp_size_t_kind) size - end function kmp_malloc - - function kmp_aligned_malloc(size, alignment) - use omp_lib_kinds - integer (kind=kmp_pointer_kind) kmp_aligned_malloc - integer (kind=kmp_size_t_kind) size - integer (kind=kmp_size_t_kind) alignment - end function kmp_aligned_malloc - - function kmp_calloc(nelem, elsize) - use omp_lib_kinds - integer (kind=kmp_pointer_kind) kmp_calloc - integer (kind=kmp_size_t_kind) nelem - integer (kind=kmp_size_t_kind) elsize - end function kmp_calloc - - function kmp_realloc(ptr, size) - use omp_lib_kinds - integer (kind=kmp_pointer_kind) kmp_realloc - integer (kind=kmp_pointer_kind) ptr - integer (kind=kmp_size_t_kind) size - end function kmp_realloc - - subroutine kmp_free(ptr) - use omp_lib_kinds - integer (kind=kmp_pointer_kind) ptr - end subroutine kmp_free - - subroutine kmp_set_warnings_on() - end subroutine kmp_set_warnings_on - - subroutine kmp_set_warnings_off() - end subroutine kmp_set_warnings_off - - function kmp_get_cancellation_status(cancelkind) - use omp_lib_kinds - integer (kind=kmp_cancel_kind) cancelkind - logical (kind=omp_logical_kind) kmp_get_cancellation_status - end function kmp_get_cancellation_status - - subroutine omp_init_lock_with_hint(svar, hint) - use omp_lib_kinds - integer (kind=omp_lock_kind) svar - integer (kind=omp_lock_hint_kind) hint - end subroutine omp_init_lock_with_hint - - subroutine omp_init_nest_lock_with_hint(nvar, hint) - use omp_lib_kinds - integer (kind=omp_nest_lock_kind) nvar - integer (kind=omp_lock_hint_kind) hint - end subroutine omp_init_nest_lock_with_hint - - end interface - -!dec$ if defined(_WIN32) -!dec$ if defined(_WIN64) .or. defined(_M_AMD64) - -!*** -!*** The Fortran entry points must be in uppercase, even if the /Qlowercase -!*** option is specified. The alias attribute ensures that the specified -!*** string is used as the entry point. -!*** -!*** On the Windows* OS IA-32 architecture, the Fortran entry points have an -!*** underscore prepended. On the Windows* OS Intel(R) 64 -!*** architecture, no underscore is prepended. -!*** - -!dec$ attributes alias:'OMP_SET_NUM_THREADS' :: omp_set_num_threads -!dec$ attributes alias:'OMP_SET_DYNAMIC' :: omp_set_dynamic -!dec$ attributes alias:'OMP_SET_NESTED' :: omp_set_nested -!dec$ attributes alias:'OMP_GET_NUM_THREADS' :: omp_get_num_threads -!dec$ attributes alias:'OMP_GET_MAX_THREADS' :: omp_get_max_threads -!dec$ attributes alias:'OMP_GET_THREAD_NUM' :: omp_get_thread_num -!dec$ attributes alias:'OMP_GET_NUM_PROCS' :: omp_get_num_procs -!dec$ attributes alias:'OMP_IN_PARALLEL' :: omp_in_parallel -!dec$ attributes alias:'OMP_GET_DYNAMIC' :: omp_get_dynamic -!dec$ attributes alias:'OMP_GET_NESTED' :: omp_get_nested -!dec$ attributes alias:'OMP_GET_THREAD_LIMIT' :: omp_get_thread_limit -!dec$ attributes alias:'OMP_SET_MAX_ACTIVE_LEVELS' :: omp_set_max_active_levels -!dec$ attributes alias:'OMP_GET_MAX_ACTIVE_LEVELS' :: omp_get_max_active_levels -!dec$ attributes alias:'OMP_GET_LEVEL' :: omp_get_level -!dec$ attributes alias:'OMP_GET_ACTIVE_LEVEL' :: omp_get_active_level -!dec$ attributes alias:'OMP_GET_ANCESTOR_THREAD_NUM' :: omp_get_ancestor_thread_num -!dec$ attributes alias:'OMP_GET_TEAM_SIZE' :: omp_get_team_size -!dec$ attributes alias:'OMP_SET_SCHEDULE' :: omp_set_schedule -!dec$ attributes alias:'OMP_GET_SCHEDULE' :: omp_get_schedule -!dec$ attributes alias:'OMP_GET_PROC_BIND' :: omp_get_proc_bind -!dec$ attributes alias:'OMP_GET_WTIME' :: omp_get_wtime -!dec$ attributes alias:'OMP_GET_WTICK' :: omp_get_wtick -!dec$ attributes alias:'OMP_GET_DEFAULT_DEVICE' :: omp_get_default_device -!dec$ attributes alias:'OMP_SET_DEFAULT_DEVICE' :: omp_set_default_device -!dec$ attributes alias:'OMP_GET_NUM_DEVICES' :: omp_get_num_devices -!dec$ attributes alias:'OMP_GET_NUM_TEAMS' :: omp_get_num_teams -!dec$ attributes alias:'OMP_GET_TEAM_NUM' :: omp_get_team_num -!dec$ attributes alias:'OMP_GET_CANCELLATION' :: omp_get_cancellation -!dec$ attributes alias:'OMP_IS_INITIAL_DEVICE' :: omp_is_initial_device -!dec$ attributes alias:'OMP_GET_INITIAL_DEVICE' :: omp_get_initial_device -!dec$ attributes alias:'OMP_GET_MAX_TASK_PRIORITY' :: omp_get_max_task_priority - -!dec$ attributes alias:'omp_init_lock' :: omp_init_lock -!dec$ attributes alias:'omp_init_lock_with_hint' :: omp_init_lock_with_hint -!dec$ attributes alias:'omp_destroy_lock' :: omp_destroy_lock -!dec$ attributes alias:'omp_set_lock' :: omp_set_lock -!dec$ attributes alias:'omp_unset_lock' :: omp_unset_lock -!dec$ attributes alias:'omp_test_lock' :: omp_test_lock -!dec$ attributes alias:'omp_init_nest_lock' :: omp_init_nest_lock -!dec$ attributes alias:'omp_init_nest_lock_with_hint' :: omp_init_nest_lock_with_hint -!dec$ attributes alias:'omp_destroy_nest_lock' :: omp_destroy_nest_lock -!dec$ attributes alias:'omp_set_nest_lock' :: omp_set_nest_lock -!dec$ attributes alias:'omp_unset_nest_lock' :: omp_unset_nest_lock -!dec$ attributes alias:'omp_test_nest_lock' :: omp_test_nest_lock - -!dec$ attributes alias:'KMP_SET_STACKSIZE'::kmp_set_stacksize -!dec$ attributes alias:'KMP_SET_STACKSIZE_S'::kmp_set_stacksize_s -!dec$ attributes alias:'KMP_SET_BLOCKTIME'::kmp_set_blocktime -!dec$ attributes alias:'KMP_SET_LIBRARY_SERIAL'::kmp_set_library_serial -!dec$ attributes alias:'KMP_SET_LIBRARY_TURNAROUND'::kmp_set_library_turnaround -!dec$ attributes alias:'KMP_SET_LIBRARY_THROUGHPUT'::kmp_set_library_throughput -!dec$ attributes alias:'KMP_SET_LIBRARY'::kmp_set_library -!dec$ attributes alias:'KMP_GET_STACKSIZE'::kmp_get_stacksize -!dec$ attributes alias:'KMP_GET_STACKSIZE_S'::kmp_get_stacksize_s -!dec$ attributes alias:'KMP_GET_BLOCKTIME'::kmp_get_blocktime -!dec$ attributes alias:'KMP_GET_LIBRARY'::kmp_get_library -!dec$ attributes alias:'KMP_SET_AFFINITY'::kmp_set_affinity -!dec$ attributes alias:'KMP_GET_AFFINITY'::kmp_get_affinity -!dec$ attributes alias:'KMP_GET_AFFINITY_MAX_PROC'::kmp_get_affinity_max_proc -!dec$ attributes alias:'KMP_CREATE_AFFINITY_MASK'::kmp_create_affinity_mask -!dec$ attributes alias:'KMP_DESTROY_AFFINITY_MASK'::kmp_destroy_affinity_mask -!dec$ attributes alias:'KMP_SET_AFFINITY_MASK_PROC'::kmp_set_affinity_mask_proc -!dec$ attributes alias:'KMP_UNSET_AFFINITY_MASK_PROC'::kmp_unset_affinity_mask_proc -!dec$ attributes alias:'KMP_GET_AFFINITY_MASK_PROC'::kmp_get_affinity_mask_proc -!dec$ attributes alias:'KMP_MALLOC'::kmp_malloc -!dec$ attributes alias:'KMP_ALIGNED_MALLOC'::kmp_aligned_malloc -!dec$ attributes alias:'KMP_CALLOC'::kmp_calloc -!dec$ attributes alias:'KMP_REALLOC'::kmp_realloc -!dec$ attributes alias:'KMP_FREE'::kmp_free - -!dec$ attributes alias:'KMP_SET_WARNINGS_ON'::kmp_set_warnings_on -!dec$ attributes alias:'KMP_SET_WARNINGS_OFF'::kmp_set_warnings_off - -!dec$ attributes alias:'KMP_GET_CANCELLATION_STATUS' :: kmp_get_cancellation_status - -!dec$ else - -!*** -!*** On Windows* OS IA-32 architecture, the Fortran entry points have an underscore prepended. -!*** - -!dec$ attributes alias:'_OMP_SET_NUM_THREADS' :: omp_set_num_threads -!dec$ attributes alias:'_OMP_SET_DYNAMIC' :: omp_set_dynamic -!dec$ attributes alias:'_OMP_SET_NESTED' :: omp_set_nested -!dec$ attributes alias:'_OMP_GET_NUM_THREADS' :: omp_get_num_threads -!dec$ attributes alias:'_OMP_GET_MAX_THREADS' :: omp_get_max_threads -!dec$ attributes alias:'_OMP_GET_THREAD_NUM' :: omp_get_thread_num -!dec$ attributes alias:'_OMP_GET_NUM_PROCS' :: omp_get_num_procs -!dec$ attributes alias:'_OMP_IN_PARALLEL' :: omp_in_parallel -!dec$ attributes alias:'_OMP_GET_DYNAMIC' :: omp_get_dynamic -!dec$ attributes alias:'_OMP_GET_NESTED' :: omp_get_nested -!dec$ attributes alias:'_OMP_GET_THREAD_LIMIT' :: omp_get_thread_limit -!dec$ attributes alias:'_OMP_SET_MAX_ACTIVE_LEVELS' :: omp_set_max_active_levels -!dec$ attributes alias:'_OMP_GET_MAX_ACTIVE_LEVELS' :: omp_get_max_active_levels -!dec$ attributes alias:'_OMP_GET_LEVEL' :: omp_get_level -!dec$ attributes alias:'_OMP_GET_ACTIVE_LEVEL' :: omp_get_active_level -!dec$ attributes alias:'_OMP_GET_ANCESTOR_THREAD_NUM' :: omp_get_ancestor_thread_num -!dec$ attributes alias:'_OMP_GET_TEAM_SIZE' :: omp_get_team_size -!dec$ attributes alias:'_OMP_SET_SCHEDULE' :: omp_set_schedule -!dec$ attributes alias:'_OMP_GET_SCHEDULE' :: omp_get_schedule -!dec$ attributes alias:'_OMP_GET_PROC_BIND' :: omp_get_proc_bind -!dec$ attributes alias:'_OMP_GET_WTIME' :: omp_get_wtime -!dec$ attributes alias:'_OMP_GET_WTICK' :: omp_get_wtick -!dec$ attributes alias:'_OMP_GET_DEFAULT_DEVICE' :: omp_get_default_device -!dec$ attributes alias:'_OMP_SET_DEFAULT_DEVICE' :: omp_set_default_device -!dec$ attributes alias:'_OMP_GET_NUM_DEVICES' :: omp_get_num_devices -!dec$ attributes alias:'_OMP_GET_NUM_TEAMS' :: omp_get_num_teams -!dec$ attributes alias:'_OMP_GET_TEAM_NUM' :: omp_get_team_num -!dec$ attributes alias:'_OMP_GET_CANCELLATION' :: omp_get_cancellation -!dec$ attributes alias:'_OMP_IS_INITIAL_DEVICE' :: omp_is_initial_device -!dec$ attributes alias:'_OMP_GET_INITIAL_DEVICE' :: omp_get_initial_device -!dec$ attributes alias:'_OMP_GET_MAX_TASK_PRIORTY' :: omp_get_max_task_priority - -!dec$ attributes alias:'_omp_init_lock' :: omp_init_lock -!dec$ attributes alias:'_omp_init_lock_with_hint' :: omp_init_lock_with_hint -!dec$ attributes alias:'_omp_destroy_lock' :: omp_destroy_lock -!dec$ attributes alias:'_omp_set_lock' :: omp_set_lock -!dec$ attributes alias:'_omp_unset_lock' :: omp_unset_lock -!dec$ attributes alias:'_omp_test_lock' :: omp_test_lock -!dec$ attributes alias:'_omp_init_nest_lock' :: omp_init_nest_lock -!dec$ attributes alias:'_omp_init_nest_lock_with_hint' :: omp_init_nest_lock_with_hint -!dec$ attributes alias:'_omp_destroy_nest_lock' :: omp_destroy_nest_lock -!dec$ attributes alias:'_omp_set_nest_lock' :: omp_set_nest_lock -!dec$ attributes alias:'_omp_unset_nest_lock' :: omp_unset_nest_lock -!dec$ attributes alias:'_omp_test_nest_lock' :: omp_test_nest_lock - -!dec$ attributes alias:'_KMP_SET_STACKSIZE'::kmp_set_stacksize -!dec$ attributes alias:'_KMP_SET_STACKSIZE_S'::kmp_set_stacksize_s -!dec$ attributes alias:'_KMP_SET_BLOCKTIME'::kmp_set_blocktime -!dec$ attributes alias:'_KMP_SET_LIBRARY_SERIAL'::kmp_set_library_serial -!dec$ attributes alias:'_KMP_SET_LIBRARY_TURNAROUND'::kmp_set_library_turnaround -!dec$ attributes alias:'_KMP_SET_LIBRARY_THROUGHPUT'::kmp_set_library_throughput -!dec$ attributes alias:'_KMP_SET_LIBRARY'::kmp_set_library -!dec$ attributes alias:'_KMP_GET_STACKSIZE'::kmp_get_stacksize -!dec$ attributes alias:'_KMP_GET_STACKSIZE_S'::kmp_get_stacksize_s -!dec$ attributes alias:'_KMP_GET_BLOCKTIME'::kmp_get_blocktime -!dec$ attributes alias:'_KMP_GET_LIBRARY'::kmp_get_library -!dec$ attributes alias:'_KMP_SET_AFFINITY'::kmp_set_affinity -!dec$ attributes alias:'_KMP_GET_AFFINITY'::kmp_get_affinity -!dec$ attributes alias:'_KMP_GET_AFFINITY_MAX_PROC'::kmp_get_affinity_max_proc -!dec$ attributes alias:'_KMP_CREATE_AFFINITY_MASK'::kmp_create_affinity_mask -!dec$ attributes alias:'_KMP_DESTROY_AFFINITY_MASK'::kmp_destroy_affinity_mask -!dec$ attributes alias:'_KMP_SET_AFFINITY_MASK_PROC'::kmp_set_affinity_mask_proc -!dec$ attributes alias:'_KMP_UNSET_AFFINITY_MASK_PROC'::kmp_unset_affinity_mask_proc -!dec$ attributes alias:'_KMP_GET_AFFINITY_MASK_PROC'::kmp_get_affinity_mask_proc -!dec$ attributes alias:'_KMP_MALLOC'::kmp_malloc -!dec$ attributes alias:'_KMP_ALIGNED_MALLOC'::kmp_aligned_malloc -!dec$ attributes alias:'_KMP_CALLOC'::kmp_calloc -!dec$ attributes alias:'_KMP_REALLOC'::kmp_realloc -!dec$ attributes alias:'_KMP_FREE'::kmp_free - -!dec$ attributes alias:'_KMP_SET_WARNINGS_ON'::kmp_set_warnings_on -!dec$ attributes alias:'_KMP_SET_WARNINGS_OFF'::kmp_set_warnings_off - -!dec$ attributes alias:'_KMP_GET_CANCELLATION_STATUS' :: kmp_get_cancellation_status - -!dec$ endif -!dec$ endif - -!dec$ if defined(__linux) - -!*** -!*** The Linux* OS entry points are in lowercase, with an underscore appended. -!*** - -!dec$ attributes alias:'omp_set_num_threads_'::omp_set_num_threads -!dec$ attributes alias:'omp_set_dynamic_'::omp_set_dynamic -!dec$ attributes alias:'omp_set_nested_'::omp_set_nested -!dec$ attributes alias:'omp_get_num_threads_'::omp_get_num_threads -!dec$ attributes alias:'omp_get_max_threads_'::omp_get_max_threads -!dec$ attributes alias:'omp_get_thread_num_'::omp_get_thread_num -!dec$ attributes alias:'omp_get_num_procs_'::omp_get_num_procs -!dec$ attributes alias:'omp_in_parallel_'::omp_in_parallel -!dec$ attributes alias:'omp_get_dynamic_'::omp_get_dynamic -!dec$ attributes alias:'omp_get_nested_'::omp_get_nested -!dec$ attributes alias:'omp_get_thread_limit_'::omp_get_thread_limit -!dec$ attributes alias:'omp_set_max_active_levels_'::omp_set_max_active_levels -!dec$ attributes alias:'omp_get_max_active_levels_'::omp_get_max_active_levels -!dec$ attributes alias:'omp_get_level_'::omp_get_level -!dec$ attributes alias:'omp_get_active_level_'::omp_get_active_level -!dec$ attributes alias:'omp_get_ancestor_thread_num_'::omp_get_ancestor_thread_num -!dec$ attributes alias:'omp_get_team_size_'::omp_get_team_size -!dec$ attributes alias:'omp_set_schedule_'::omp_set_schedule -!dec$ attributes alias:'omp_get_schedule_'::omp_get_schedule -!dec$ attributes alias:'omp_get_proc_bind_' :: omp_get_proc_bind -!dec$ attributes alias:'omp_get_wtime_'::omp_get_wtime -!dec$ attributes alias:'omp_get_wtick_'::omp_get_wtick -!dec$ attributes alias:'omp_get_default_device_'::omp_get_default_device -!dec$ attributes alias:'omp_set_default_device_'::omp_set_default_device -!dec$ attributes alias:'omp_get_num_devices_'::omp_get_num_devices -!dec$ attributes alias:'omp_get_num_teams_'::omp_get_num_teams -!dec$ attributes alias:'omp_get_team_num_'::omp_get_team_num -!dec$ attributes alias:'omp_get_cancellation_'::omp_get_cancellation -!dec$ attributes alias:'omp_is_initial_device_'::omp_is_initial_device -!dec$ attributes alias:'omp_get_initial_device_'::omp_get_initial_device -!dec$ attributes alias:'omp_get_max_task_priority_'::omp_get_max_task_priority - -!dec$ attributes alias:'omp_init_lock_'::omp_init_lock -!dec$ attributes alias:'omp_init_lock_with_hint_'::omp_init_lock_with_hint -!dec$ attributes alias:'omp_destroy_lock_'::omp_destroy_lock -!dec$ attributes alias:'omp_set_lock_'::omp_set_lock -!dec$ attributes alias:'omp_unset_lock_'::omp_unset_lock -!dec$ attributes alias:'omp_test_lock_'::omp_test_lock -!dec$ attributes alias:'omp_init_nest_lock_'::omp_init_nest_lock -!dec$ attributes alias:'omp_init_nest_lock_with_hint_'::omp_init_nest_lock_with_hint -!dec$ attributes alias:'omp_destroy_nest_lock_'::omp_destroy_nest_lock -!dec$ attributes alias:'omp_set_nest_lock_'::omp_set_nest_lock -!dec$ attributes alias:'omp_unset_nest_lock_'::omp_unset_nest_lock -!dec$ attributes alias:'omp_test_nest_lock_'::omp_test_nest_lock - -!dec$ attributes alias:'kmp_set_stacksize_'::kmp_set_stacksize -!dec$ attributes alias:'kmp_set_stacksize_s_'::kmp_set_stacksize_s -!dec$ attributes alias:'kmp_set_blocktime_'::kmp_set_blocktime -!dec$ attributes alias:'kmp_set_library_serial_'::kmp_set_library_serial -!dec$ attributes alias:'kmp_set_library_turnaround_'::kmp_set_library_turnaround -!dec$ attributes alias:'kmp_set_library_throughput_'::kmp_set_library_throughput -!dec$ attributes alias:'kmp_set_library_'::kmp_set_library -!dec$ attributes alias:'kmp_get_stacksize_'::kmp_get_stacksize -!dec$ attributes alias:'kmp_get_stacksize_s_'::kmp_get_stacksize_s -!dec$ attributes alias:'kmp_get_blocktime_'::kmp_get_blocktime -!dec$ attributes alias:'kmp_get_library_'::kmp_get_library -!dec$ attributes alias:'kmp_set_affinity_'::kmp_set_affinity -!dec$ attributes alias:'kmp_get_affinity_'::kmp_get_affinity -!dec$ attributes alias:'kmp_get_affinity_max_proc_'::kmp_get_affinity_max_proc -!dec$ attributes alias:'kmp_create_affinity_mask_'::kmp_create_affinity_mask -!dec$ attributes alias:'kmp_destroy_affinity_mask_'::kmp_destroy_affinity_mask -!dec$ attributes alias:'kmp_set_affinity_mask_proc_'::kmp_set_affinity_mask_proc -!dec$ attributes alias:'kmp_unset_affinity_mask_proc_'::kmp_unset_affinity_mask_proc -!dec$ attributes alias:'kmp_get_affinity_mask_proc_'::kmp_get_affinity_mask_proc -!dec$ attributes alias:'kmp_malloc_'::kmp_malloc -!dec$ attributes alias:'kmp_aligned_malloc_'::kmp_aligned_malloc -!dec$ attributes alias:'kmp_calloc_'::kmp_calloc -!dec$ attributes alias:'kmp_realloc_'::kmp_realloc -!dec$ attributes alias:'kmp_free_'::kmp_free - -!dec$ attributes alias:'kmp_set_warnings_on_'::kmp_set_warnings_on -!dec$ attributes alias:'kmp_set_warnings_off_'::kmp_set_warnings_off -!dec$ attributes alias:'kmp_get_cancellation_status_'::kmp_get_cancellation_status - -!dec$ endif - -!dec$ if defined(__APPLE__) - -!*** -!*** The Mac entry points are in lowercase, with an both an underscore -!*** appended and an underscore prepended. -!*** - -!dec$ attributes alias:'_omp_set_num_threads_'::omp_set_num_threads -!dec$ attributes alias:'_omp_set_dynamic_'::omp_set_dynamic -!dec$ attributes alias:'_omp_set_nested_'::omp_set_nested -!dec$ attributes alias:'_omp_get_num_threads_'::omp_get_num_threads -!dec$ attributes alias:'_omp_get_max_threads_'::omp_get_max_threads -!dec$ attributes alias:'_omp_get_thread_num_'::omp_get_thread_num -!dec$ attributes alias:'_omp_get_num_procs_'::omp_get_num_procs -!dec$ attributes alias:'_omp_in_parallel_'::omp_in_parallel -!dec$ attributes alias:'_omp_get_dynamic_'::omp_get_dynamic -!dec$ attributes alias:'_omp_get_nested_'::omp_get_nested -!dec$ attributes alias:'_omp_get_thread_limit_'::omp_get_thread_limit -!dec$ attributes alias:'_omp_set_max_active_levels_'::omp_set_max_active_levels -!dec$ attributes alias:'_omp_get_max_active_levels_'::omp_get_max_active_levels -!dec$ attributes alias:'_omp_get_level_'::omp_get_level -!dec$ attributes alias:'_omp_get_active_level_'::omp_get_active_level -!dec$ attributes alias:'_omp_get_ancestor_thread_num_'::omp_get_ancestor_thread_num -!dec$ attributes alias:'_omp_get_team_size_'::omp_get_team_size -!dec$ attributes alias:'_omp_set_schedule_'::omp_set_schedule -!dec$ attributes alias:'_omp_get_schedule_'::omp_get_schedule -!dec$ attributes alias:'_omp_get_proc_bind_' :: omp_get_proc_bind -!dec$ attributes alias:'_omp_get_wtime_'::omp_get_wtime -!dec$ attributes alias:'_omp_get_wtick_'::omp_get_wtick -!dec$ attributes alias:'_omp_get_default_device_'::omp_get_default_device -!dec$ attributes alias:'_omp_set_default_device_'::omp_set_default_device -!dec$ attributes alias:'_omp_get_num_devices_'::omp_get_num_devices -!dec$ attributes alias:'_omp_get_num_teams_'::omp_get_num_teams -!dec$ attributes alias:'_omp_get_team_num_'::omp_get_team_num -!dec$ attributes alias:'_omp_get_cancellation_'::omp_get_cancellation -!dec$ attributes alias:'_omp_is_initial_device_'::omp_is_initial_device -!dec$ attributes alias:'_omp_get_initial_device_'::omp_get_initial_device -!dec$ attributes alias:'_omp_get_max_task_priorty_'::omp_get_max_task_priority - -!dec$ attributes alias:'_omp_init_lock_'::omp_init_lock -!dec$ attributes alias:'_omp_init_lock_with_hint_'::omp_init_lock_with_hint -!dec$ attributes alias:'_omp_destroy_lock_'::omp_destroy_lock -!dec$ attributes alias:'_omp_set_lock_'::omp_set_lock -!dec$ attributes alias:'_omp_unset_lock_'::omp_unset_lock -!dec$ attributes alias:'_omp_test_lock_'::omp_test_lock -!dec$ attributes alias:'_omp_init_nest_lock_'::omp_init_nest_lock -!dec$ attributes alias:'_omp_init_nest_lock_with_hint_'::omp_init_nest_lock_with_hint -!dec$ attributes alias:'_omp_destroy_nest_lock_'::omp_destroy_nest_lock -!dec$ attributes alias:'_omp_set_nest_lock_'::omp_set_nest_lock -!dec$ attributes alias:'_omp_unset_nest_lock_'::omp_unset_nest_lock -!dec$ attributes alias:'_omp_test_nest_lock_'::omp_test_nest_lock - -!dec$ attributes alias:'_kmp_set_stacksize_'::kmp_set_stacksize -!dec$ attributes alias:'_kmp_set_stacksize_s_'::kmp_set_stacksize_s -!dec$ attributes alias:'_kmp_set_blocktime_'::kmp_set_blocktime -!dec$ attributes alias:'_kmp_set_library_serial_'::kmp_set_library_serial -!dec$ attributes alias:'_kmp_set_library_turnaround_'::kmp_set_library_turnaround -!dec$ attributes alias:'_kmp_set_library_throughput_'::kmp_set_library_throughput -!dec$ attributes alias:'_kmp_set_library_'::kmp_set_library -!dec$ attributes alias:'_kmp_get_stacksize_'::kmp_get_stacksize -!dec$ attributes alias:'_kmp_get_stacksize_s_'::kmp_get_stacksize_s -!dec$ attributes alias:'_kmp_get_blocktime_'::kmp_get_blocktime -!dec$ attributes alias:'_kmp_get_library_'::kmp_get_library -!dec$ attributes alias:'_kmp_set_affinity_'::kmp_set_affinity -!dec$ attributes alias:'_kmp_get_affinity_'::kmp_get_affinity -!dec$ attributes alias:'_kmp_get_affinity_max_proc_'::kmp_get_affinity_max_proc -!dec$ attributes alias:'_kmp_create_affinity_mask_'::kmp_create_affinity_mask -!dec$ attributes alias:'_kmp_destroy_affinity_mask_'::kmp_destroy_affinity_mask -!dec$ attributes alias:'_kmp_set_affinity_mask_proc_'::kmp_set_affinity_mask_proc -!dec$ attributes alias:'_kmp_unset_affinity_mask_proc_'::kmp_unset_affinity_mask_proc -!dec$ attributes alias:'_kmp_get_affinity_mask_proc_'::kmp_get_affinity_mask_proc -!dec$ attributes alias:'_kmp_malloc_'::kmp_malloc -!dec$ attributes alias:'_kmp_aligned_malloc_'::kmp_aligned_malloc -!dec$ attributes alias:'_kmp_calloc_'::kmp_calloc -!dec$ attributes alias:'_kmp_realloc_'::kmp_realloc -!dec$ attributes alias:'_kmp_free_'::kmp_free - -!dec$ attributes alias:'_kmp_set_warnings_on_'::kmp_set_warnings_on -!dec$ attributes alias:'_kmp_set_warnings_off_'::kmp_set_warnings_off - -!dec$ attributes alias:'_kmp_get_cancellation_status_'::kmp_get_cancellation_status - -!dec$ endif - - end module omp_lib Index: vendor/llvm-openmp/openmp-release80-r363030/runtime/src/include/45/omp_lib.h.var =================================================================== --- vendor/llvm-openmp/openmp-release80-r363030/runtime/src/include/45/omp_lib.h.var (revision 348960) +++ vendor/llvm-openmp/openmp-release80-r363030/runtime/src/include/45/omp_lib.h.var (nonexistent) @@ -1,645 +0,0 @@ -! include/45/omp_lib.h.var - -! -!//===----------------------------------------------------------------------===// -!// -!// The LLVM Compiler Infrastructure -!// -!// This file is dual licensed under the MIT and the University of Illinois Open -!// Source Licenses. See LICENSE.txt for details. -!// -!//===----------------------------------------------------------------------===// -! - -!*** -!*** Some of the directives for the following routine extend past column 72, -!*** so process this file in 132-column mode. -!*** - -!DIR$ fixedformlinesize:132 - - integer, parameter :: omp_integer_kind = 4 - integer, parameter :: omp_logical_kind = 4 - integer, parameter :: omp_real_kind = 4 - integer, parameter :: omp_lock_kind = int_ptr_kind() - integer, parameter :: omp_nest_lock_kind = int_ptr_kind() - integer, parameter :: omp_sched_kind = omp_integer_kind - integer, parameter :: omp_proc_bind_kind = omp_integer_kind - integer, parameter :: kmp_pointer_kind = int_ptr_kind() - integer, parameter :: kmp_size_t_kind = int_ptr_kind() - integer, parameter :: kmp_affinity_mask_kind = int_ptr_kind() - integer, parameter :: omp_lock_hint_kind = omp_integer_kind - - integer (kind=omp_integer_kind), parameter :: openmp_version = @LIBOMP_OMP_YEAR_MONTH@ - integer (kind=omp_integer_kind), parameter :: kmp_version_major = @LIBOMP_VERSION_MAJOR@ - integer (kind=omp_integer_kind), parameter :: kmp_version_minor = @LIBOMP_VERSION_MINOR@ - integer (kind=omp_integer_kind), parameter :: kmp_version_build = @LIBOMP_VERSION_BUILD@ - character(*) kmp_build_date - parameter( kmp_build_date = '@LIBOMP_BUILD_DATE@' ) - - integer(kind=omp_sched_kind), parameter :: omp_sched_static = 1 - integer(kind=omp_sched_kind), parameter :: omp_sched_dynamic = 2 - integer(kind=omp_sched_kind), parameter :: omp_sched_guided = 3 - integer(kind=omp_sched_kind), parameter :: omp_sched_auto = 4 - - integer (kind=omp_proc_bind_kind), parameter :: omp_proc_bind_false = 0 - integer (kind=omp_proc_bind_kind), parameter :: omp_proc_bind_true = 1 - integer (kind=omp_proc_bind_kind), parameter :: omp_proc_bind_master = 2 - integer (kind=omp_proc_bind_kind), parameter :: omp_proc_bind_close = 3 - integer (kind=omp_proc_bind_kind), parameter :: omp_proc_bind_spread = 4 - - integer (kind=omp_lock_hint_kind), parameter :: omp_lock_hint_none = 0 - integer (kind=omp_lock_hint_kind), parameter :: omp_lock_hint_uncontended = 1 - integer (kind=omp_lock_hint_kind), parameter :: omp_lock_hint_contended = 2 - integer (kind=omp_lock_hint_kind), parameter :: omp_lock_hint_nonspeculative = 4 - integer (kind=omp_lock_hint_kind), parameter :: omp_lock_hint_speculative = 8 - integer (kind=omp_lock_hint_kind), parameter :: kmp_lock_hint_hle = 65536 - integer (kind=omp_lock_hint_kind), parameter :: kmp_lock_hint_rtm = 131072 - integer (kind=omp_lock_hint_kind), parameter :: kmp_lock_hint_adaptive = 262144 - - interface - -! *** -! *** omp_* entry points -! *** - - subroutine omp_set_num_threads(num_threads) bind(c) - import - integer (kind=omp_integer_kind), value :: num_threads - end subroutine omp_set_num_threads - - subroutine omp_set_dynamic(dynamic_threads) bind(c) - import - logical (kind=omp_logical_kind), value :: dynamic_threads - end subroutine omp_set_dynamic - - subroutine omp_set_nested(nested) bind(c) - import - logical (kind=omp_logical_kind), value :: nested - end subroutine omp_set_nested - - function omp_get_num_threads() bind(c) - import - integer (kind=omp_integer_kind) omp_get_num_threads - end function omp_get_num_threads - - function omp_get_max_threads() bind(c) - import - integer (kind=omp_integer_kind) omp_get_max_threads - end function omp_get_max_threads - - function omp_get_thread_num() bind(c) - import - integer (kind=omp_integer_kind) omp_get_thread_num - end function omp_get_thread_num - - function omp_get_num_procs() bind(c) - import - integer (kind=omp_integer_kind) omp_get_num_procs - end function omp_get_num_procs - - function omp_in_parallel() bind(c) - import - logical (kind=omp_logical_kind) omp_in_parallel - end function omp_in_parallel - - function omp_in_final() bind(c) - import - logical (kind=omp_logical_kind) omp_in_final - end function omp_in_final - - function omp_get_dynamic() bind(c) - import - logical (kind=omp_logical_kind) omp_get_dynamic - end function omp_get_dynamic - - function omp_get_nested() bind(c) - import - logical (kind=omp_logical_kind) omp_get_nested - end function omp_get_nested - - function omp_get_thread_limit() bind(c) - import - integer (kind=omp_integer_kind) omp_get_thread_limit - end function omp_get_thread_limit - - subroutine omp_set_max_active_levels(max_levels) bind(c) - import - integer (kind=omp_integer_kind), value :: max_levels - end subroutine omp_set_max_active_levels - - function omp_get_max_active_levels() bind(c) - import - integer (kind=omp_integer_kind) omp_get_max_active_levels - end function omp_get_max_active_levels - - function omp_get_level() bind(c) - import - integer (kind=omp_integer_kind) omp_get_level - end function omp_get_level - - function omp_get_active_level() bind(c) - import - integer (kind=omp_integer_kind) omp_get_active_level - end function omp_get_active_level - - function omp_get_ancestor_thread_num(level) bind(c) - import - integer (kind=omp_integer_kind) omp_get_ancestor_thread_num - integer (kind=omp_integer_kind), value :: level - end function omp_get_ancestor_thread_num - - function omp_get_team_size(level) bind(c) - import - integer (kind=omp_integer_kind) omp_get_team_size - integer (kind=omp_integer_kind), value :: level - end function omp_get_team_size - - subroutine omp_set_schedule(kind, chunk_size) bind(c) - import - integer (kind=omp_sched_kind), value :: kind - integer (kind=omp_integer_kind), value :: chunk_size - end subroutine omp_set_schedule - - subroutine omp_get_schedule(kind, chunk_size) bind(c) - import - integer (kind=omp_sched_kind) kind - integer (kind=omp_integer_kind) chunk_size - end subroutine omp_get_schedule - - function omp_get_proc_bind() bind(c) - import - integer (kind=omp_proc_bind_kind) omp_get_proc_bind - end function omp_get_proc_bind - - function omp_get_num_places() bind(c) - import - integer (kind=omp_integer_kind) omp_get_num_places - end function omp_get_num_places - - function omp_get_place_num_procs(place_num) bind(c) - import - integer (kind=omp_integer_kind), value :: place_num - integer (kind=omp_integer_kind) omp_get_place_num_procs - end function omp_get_place_num_procs - - subroutine omp_get_place_proc_ids(place_num, ids) bind(c) - import - integer (kind=omp_integer_kind), value :: place_num - integer (kind=omp_integer_kind) ids(*) - end subroutine omp_get_place_proc_ids - - function omp_get_place_num() bind(c) - import - integer (kind=omp_integer_kind) omp_get_place_num - end function omp_get_place_num - - function omp_get_partition_num_places() bind(c) - import - integer (kind=omp_integer_kind) omp_get_partition_num_places - end function omp_get_partition_num_places - - subroutine omp_get_partition_place_nums(place_nums) bind(c) - import - integer (kind=omp_integer_kind) place_nums(*) - end subroutine omp_get_partition_place_nums - - function omp_get_wtime() bind(c) - double precision omp_get_wtime - end function omp_get_wtime - - function omp_get_wtick() bind(c) - double precision omp_get_wtick - end function omp_get_wtick - - function omp_get_default_device() bind(c) - import - integer (kind=omp_integer_kind) omp_get_default_device - end function omp_get_default_device - - subroutine omp_set_default_device(device_num) bind(c) - import - integer (kind=omp_integer_kind), value :: device_num - end subroutine omp_set_default_device - - function omp_get_num_devices() bind(c) - import - integer (kind=omp_integer_kind) omp_get_num_devices - end function omp_get_num_devices - - function omp_get_num_teams() bind(c) - import - integer (kind=omp_integer_kind) omp_get_num_teams - end function omp_get_num_teams - - function omp_get_team_num() bind(c) - import - integer (kind=omp_integer_kind) omp_get_team_num - end function omp_get_team_num - - function omp_is_initial_device() bind(c) - import - logical (kind=omp_logical_kind) omp_is_initial_device - end function omp_is_initial_device - - function omp_get_initial_device() bind(c) - import - integer (kind=omp_integer_kind) omp_get_initial_device - end function omp_get_initial_device - - subroutine omp_init_lock(svar) bind(c) -!DIR$ IF(__INTEL_COMPILER.GE.1400) -!DIR$ attributes known_intrinsic :: omp_init_lock -!DIR$ ENDIF - import - integer (kind=omp_lock_kind) svar - end subroutine omp_init_lock - - subroutine omp_destroy_lock(svar) bind(c) -!DIR$ IF(__INTEL_COMPILER.GE.1400) -!DIR$ attributes known_intrinsic :: omp_destroy_lock -!DIR$ ENDIF - import - integer (kind=omp_lock_kind) svar - end subroutine omp_destroy_lock - - subroutine omp_set_lock(svar) bind(c) -!DIR$ IF(__INTEL_COMPILER.GE.1400) -!DIR$ attributes known_intrinsic :: omp_set_lock -!DIR$ ENDIF - import - integer (kind=omp_lock_kind) svar - end subroutine omp_set_lock - - subroutine omp_unset_lock(svar) bind(c) -!DIR$ IF(__INTEL_COMPILER.GE.1400) -!DIR$ attributes known_intrinsic :: omp_unset_lock -!DIR$ ENDIF - import - integer (kind=omp_lock_kind) svar - end subroutine omp_unset_lock - - function omp_test_lock(svar) bind(c) -!DIR$ IF(__INTEL_COMPILER.GE.1400) -!DIR$ attributes known_intrinsic :: omp_test_lock -!DIR$ ENDIF - import - logical (kind=omp_logical_kind) omp_test_lock - integer (kind=omp_lock_kind) svar - end function omp_test_lock - - subroutine omp_init_nest_lock(nvar) bind(c) -!DIR$ IF(__INTEL_COMPILER.GE.1400) -!DIR$ attributes known_intrinsic :: omp_init_nest_lock -!DIR$ ENDIF - import - integer (kind=omp_nest_lock_kind) nvar - end subroutine omp_init_nest_lock - - subroutine omp_destroy_nest_lock(nvar) bind(c) -!DIR$ IF(__INTEL_COMPILER.GE.1400) -!DIR$ attributes known_intrinsic :: omp_destroy_nest_lock -!DIR$ ENDIF - import - integer (kind=omp_nest_lock_kind) nvar - end subroutine omp_destroy_nest_lock - - subroutine omp_set_nest_lock(nvar) bind(c) -!DIR$ IF(__INTEL_COMPILER.GE.1400) -!DIR$ attributes known_intrinsic :: omp_set_nest_lock -!DIR$ ENDIF - import - integer (kind=omp_nest_lock_kind) nvar - end subroutine omp_set_nest_lock - - subroutine omp_unset_nest_lock(nvar) bind(c) -!DIR$ IF(__INTEL_COMPILER.GE.1400) -!DIR$ attributes known_intrinsic :: omp_unset_nest_lock -!DIR$ ENDIF - import - integer (kind=omp_nest_lock_kind) nvar - end subroutine omp_unset_nest_lock - - function omp_test_nest_lock(nvar) bind(c) -!DIR$ IF(__INTEL_COMPILER.GE.1400) -!DIR$ attributes known_intrinsic :: omp_test_nest_lock -!DIR$ ENDIF - import - integer (kind=omp_integer_kind) omp_test_nest_lock - integer (kind=omp_nest_lock_kind) nvar - end function omp_test_nest_lock - - function omp_get_max_task_priority() bind(c) - import - integer (kind=omp_integer_kind) omp_get_max_task_priority - end function omp_get_max_task_priority - -! *** -! *** kmp_* entry points -! *** - - subroutine kmp_set_stacksize(size) bind(c) - import - integer (kind=omp_integer_kind), value :: size - end subroutine kmp_set_stacksize - - subroutine kmp_set_stacksize_s(size) bind(c) - import - integer (kind=kmp_size_t_kind), value :: size - end subroutine kmp_set_stacksize_s - - subroutine kmp_set_blocktime(msec) bind(c) - import - integer (kind=omp_integer_kind), value :: msec - end subroutine kmp_set_blocktime - - subroutine kmp_set_library_serial() bind(c) - end subroutine kmp_set_library_serial - - subroutine kmp_set_library_turnaround() bind(c) - end subroutine kmp_set_library_turnaround - - subroutine kmp_set_library_throughput() bind(c) - end subroutine kmp_set_library_throughput - - subroutine kmp_set_library(libnum) bind(c) - import - integer (kind=omp_integer_kind), value :: libnum - end subroutine kmp_set_library - - subroutine kmp_set_defaults(string) bind(c) - character string(*) - end subroutine kmp_set_defaults - - function kmp_get_stacksize() bind(c) - import - integer (kind=omp_integer_kind) kmp_get_stacksize - end function kmp_get_stacksize - - function kmp_get_stacksize_s() bind(c) - import - integer (kind=kmp_size_t_kind) kmp_get_stacksize_s - end function kmp_get_stacksize_s - - function kmp_get_blocktime() bind(c) - import - integer (kind=omp_integer_kind) kmp_get_blocktime - end function kmp_get_blocktime - - function kmp_get_library() bind(c) - import - integer (kind=omp_integer_kind) kmp_get_library - end function kmp_get_library - - subroutine kmp_set_disp_num_buffers(num) bind(c) - import - integer (kind=omp_integer_kind), value :: num - end subroutine kmp_set_disp_num_buffers - - function kmp_set_affinity(mask) bind(c) - import - integer (kind=omp_integer_kind) kmp_set_affinity - integer (kind=kmp_affinity_mask_kind) mask - end function kmp_set_affinity - - function kmp_get_affinity(mask) bind(c) - import - integer (kind=omp_integer_kind) kmp_get_affinity - integer (kind=kmp_affinity_mask_kind) mask - end function kmp_get_affinity - - function kmp_get_affinity_max_proc() bind(c) - import - integer (kind=omp_integer_kind) kmp_get_affinity_max_proc - end function kmp_get_affinity_max_proc - - subroutine kmp_create_affinity_mask(mask) bind(c) - import - integer (kind=kmp_affinity_mask_kind) mask - end subroutine kmp_create_affinity_mask - - subroutine kmp_destroy_affinity_mask(mask) bind(c) - import - integer (kind=kmp_affinity_mask_kind) mask - end subroutine kmp_destroy_affinity_mask - - function kmp_set_affinity_mask_proc(proc, mask) bind(c) - import - integer (kind=omp_integer_kind) kmp_set_affinity_mask_proc - integer (kind=omp_integer_kind), value :: proc - integer (kind=kmp_affinity_mask_kind) mask - end function kmp_set_affinity_mask_proc - - function kmp_unset_affinity_mask_proc(proc, mask) bind(c) - import - integer (kind=omp_integer_kind) kmp_unset_affinity_mask_proc - integer (kind=omp_integer_kind), value :: proc - integer (kind=kmp_affinity_mask_kind) mask - end function kmp_unset_affinity_mask_proc - - function kmp_get_affinity_mask_proc(proc, mask) bind(c) - import - integer (kind=omp_integer_kind) kmp_get_affinity_mask_proc - integer (kind=omp_integer_kind), value :: proc - integer (kind=kmp_affinity_mask_kind) mask - end function kmp_get_affinity_mask_proc - - function kmp_malloc(size) bind(c) - import - integer (kind=kmp_pointer_kind) kmp_malloc - integer (kind=kmp_size_t_kind), value :: size - end function kmp_malloc - - function kmp_aligned_malloc(size, alignment) bind(c) - import - integer (kind=kmp_pointer_kind) kmp_aligned_malloc - integer (kind=kmp_size_t_kind), value :: size - integer (kind=kmp_size_t_kind), value :: alignment - end function kmp_aligned_malloc - - function kmp_calloc(nelem, elsize) bind(c) - import - integer (kind=kmp_pointer_kind) kmp_calloc - integer (kind=kmp_size_t_kind), value :: nelem - integer (kind=kmp_size_t_kind), value :: elsize - end function kmp_calloc - - function kmp_realloc(ptr, size) bind(c) - import - integer (kind=kmp_pointer_kind) kmp_realloc - integer (kind=kmp_pointer_kind), value :: ptr - integer (kind=kmp_size_t_kind), value :: size - end function kmp_realloc - - subroutine kmp_free(ptr) bind(c) - import - integer (kind=kmp_pointer_kind), value :: ptr - end subroutine kmp_free - - subroutine kmp_set_warnings_on() bind(c) - end subroutine kmp_set_warnings_on - - subroutine kmp_set_warnings_off() bind(c) - end subroutine kmp_set_warnings_off - - subroutine omp_init_lock_with_hint(svar, hint) bind(c) - import - integer (kind=omp_lock_kind) svar - integer (kind=omp_lock_hint_kind), value :: hint - end subroutine omp_init_lock_with_hint - - subroutine omp_init_nest_lock_with_hint(nvar, hint) bind(c) - import - integer (kind=omp_nest_lock_kind) nvar - integer (kind=omp_lock_hint_kind), value :: hint - end subroutine omp_init_nest_lock_with_hint - - end interface - -!DIR$ IF DEFINED (__INTEL_OFFLOAD) -!DIR$ ATTRIBUTES OFFLOAD:MIC :: omp_set_num_threads -!DIR$ ATTRIBUTES OFFLOAD:MIC :: omp_set_dynamic -!DIR$ ATTRIBUTES OFFLOAD:MIC :: omp_set_nested -!DIR$ ATTRIBUTES OFFLOAD:MIC :: omp_get_num_threads -!DIR$ ATTRIBUTES OFFLOAD:MIC :: omp_get_max_threads -!DIR$ ATTRIBUTES OFFLOAD:MIC :: omp_get_thread_num -!DIR$ ATTRIBUTES OFFLOAD:MIC :: omp_get_num_procs -!DIR$ ATTRIBUTES OFFLOAD:MIC :: omp_in_parallel -!DIR$ ATTRIBUTES OFFLOAD:MIC :: omp_in_final -!DIR$ ATTRIBUTES OFFLOAD:MIC :: omp_get_dynamic -!DIR$ ATTRIBUTES OFFLOAD:MIC :: omp_get_nested -!DIR$ ATTRIBUTES OFFLOAD:MIC :: omp_get_thread_limit -!DIR$ ATTRIBUTES OFFLOAD:MIC :: omp_set_max_active_levels -!DIR$ ATTRIBUTES OFFLOAD:MIC :: omp_get_max_active_levels -!DIR$ ATTRIBUTES OFFLOAD:MIC :: omp_get_level -!DIR$ ATTRIBUTES OFFLOAD:MIC :: omp_get_active_level -!DIR$ ATTRIBUTES OFFLOAD:MIC :: omp_get_ancestor_thread_num -!DIR$ ATTRIBUTES OFFLOAD:MIC :: omp_get_team_size -!DIR$ ATTRIBUTES OFFLOAD:MIC :: omp_set_schedule -!DIR$ ATTRIBUTES OFFLOAD:MIC :: omp_get_schedule -!DIR$ ATTRIBUTES OFFLOAD:MIC :: omp_get_proc_bind -!DIR$ ATTRIBUTES OFFLOAD:MIC :: omp_get_wtime -!DIR$ ATTRIBUTES OFFLOAD:MIC :: omp_get_wtick -!DIR$ ATTRIBUTES OFFLOAD:MIC :: omp_get_default_device -!DIR$ ATTRIBUTES OFFLOAD:MIC :: omp_set_default_device -!DIR$ ATTRIBUTES OFFLOAD:MIC :: omp_is_initial_device -!DIR$ ATTRIBUTES OFFLOAD:MIC :: omp_get_initial_device -!DIR$ ATTRIBUTES OFFLOAD:MIC :: omp_get_num_devices -!DIR$ ATTRIBUTES OFFLOAD:MIC :: omp_get_num_teams -!DIR$ ATTRIBUTES OFFLOAD:MIC :: omp_get_team_num -!DIR$ ATTRIBUTES OFFLOAD:MIC :: omp_init_lock -!DIR$ ATTRIBUTES OFFLOAD:MIC :: omp_destroy_lock -!DIR$ ATTRIBUTES OFFLOAD:MIC :: omp_set_lock -!DIR$ ATTRIBUTES OFFLOAD:MIC :: omp_unset_lock -!DIR$ ATTRIBUTES OFFLOAD:MIC :: omp_test_lock -!DIR$ ATTRIBUTES OFFLOAD:MIC :: omp_init_nest_lock -!DIR$ ATTRIBUTES OFFLOAD:MIC :: omp_destroy_nest_lock -!DIR$ ATTRIBUTES OFFLOAD:MIC :: omp_set_nest_lock -!DIR$ ATTRIBUTES OFFLOAD:MIC :: omp_unset_nest_lock -!DIR$ ATTRIBUTES OFFLOAD:MIC :: omp_test_nest_lock -!DIR$ ATTRIBUTES OFFLOAD:MIC :: omp_get_max_task_priority -!DIR$ ATTRIBUTES OFFLOAD:MIC :: kmp_set_stacksize -!DIR$ ATTRIBUTES OFFLOAD:MIC :: kmp_set_stacksize_s -!DIR$ ATTRIBUTES OFFLOAD:MIC :: kmp_set_blocktime -!DIR$ ATTRIBUTES OFFLOAD:MIC :: kmp_set_library_serial -!DIR$ ATTRIBUTES OFFLOAD:MIC :: kmp_set_library_turnaround -!DIR$ ATTRIBUTES OFFLOAD:MIC :: kmp_set_library_throughput -!DIR$ ATTRIBUTES OFFLOAD:MIC :: kmp_set_library -!DIR$ ATTRIBUTES OFFLOAD:MIC :: kmp_set_defaults -!DIR$ ATTRIBUTES OFFLOAD:MIC :: kmp_get_stacksize -!DIR$ ATTRIBUTES OFFLOAD:MIC :: kmp_get_stacksize_s -!DIR$ ATTRIBUTES OFFLOAD:MIC :: kmp_get_blocktime -!DIR$ ATTRIBUTES OFFLOAD:MIC :: kmp_get_library -!DIR$ ATTRIBUTES OFFLOAD:MIC :: kmp_set_disp_num_buffers -!DIR$ ATTRIBUTES OFFLOAD:MIC :: kmp_set_affinity -!DIR$ ATTRIBUTES OFFLOAD:MIC :: kmp_get_affinity -!DIR$ ATTRIBUTES OFFLOAD:MIC :: kmp_get_affinity_max_proc -!DIR$ ATTRIBUTES OFFLOAD:MIC :: kmp_create_affinity_mask -!DIR$ ATTRIBUTES OFFLOAD:MIC :: kmp_destroy_affinity_mask -!DIR$ ATTRIBUTES OFFLOAD:MIC :: kmp_set_affinity_mask_proc -!DIR$ ATTRIBUTES OFFLOAD:MIC :: kmp_unset_affinity_mask_proc -!DIR$ ATTRIBUTES OFFLOAD:MIC :: kmp_get_affinity_mask_proc -!DIR$ ATTRIBUTES OFFLOAD:MIC :: kmp_malloc -!DIR$ ATTRIBUTES OFFLOAD:MIC :: kmp_aligned_malloc -!DIR$ ATTRIBUTES OFFLOAD:MIC :: kmp_calloc -!DIR$ ATTRIBUTES OFFLOAD:MIC :: kmp_realloc -!DIR$ ATTRIBUTES OFFLOAD:MIC :: kmp_free -!DIR$ ATTRIBUTES OFFLOAD:MIC :: kmp_set_warnings_on -!DIR$ ATTRIBUTES OFFLOAD:MIC :: kmp_set_warnings_off -!DIR$ ATTRIBUTES OFFLOAD:MIC :: omp_init_lock_with_hint -!DIR$ ATTRIBUTES OFFLOAD:MIC :: omp_init_nest_lock_with_hint - -!DIR$ IF(__INTEL_COMPILER.GE.1400) -!$omp declare target(omp_set_num_threads ) -!$omp declare target(omp_set_dynamic ) -!$omp declare target(omp_set_nested ) -!$omp declare target(omp_get_num_threads ) -!$omp declare target(omp_get_max_threads ) -!$omp declare target(omp_get_thread_num ) -!$omp declare target(omp_get_num_procs ) -!$omp declare target(omp_in_parallel ) -!$omp declare target(omp_in_final ) -!$omp declare target(omp_get_dynamic ) -!$omp declare target(omp_get_nested ) -!$omp declare target(omp_get_thread_limit ) -!$omp declare target(omp_set_max_active_levels ) -!$omp declare target(omp_get_max_active_levels ) -!$omp declare target(omp_get_level ) -!$omp declare target(omp_get_active_level ) -!$omp declare target(omp_get_ancestor_thread_num ) -!$omp declare target(omp_get_team_size ) -!$omp declare target(omp_set_schedule ) -!$omp declare target(omp_get_schedule ) -!$omp declare target(omp_get_proc_bind ) -!$omp declare target(omp_get_wtime ) -!$omp declare target(omp_get_wtick ) -!$omp declare target(omp_get_default_device ) -!$omp declare target(omp_set_default_device ) -!$omp declare target(omp_is_initial_device ) -!$omp declare target(omp_get_initial_device ) -!$omp declare target(omp_get_num_devices ) -!$omp declare target(omp_get_num_teams ) -!$omp declare target(omp_get_team_num ) -!$omp declare target(omp_init_lock ) -!$omp declare target(omp_destroy_lock ) -!$omp declare target(omp_set_lock ) -!$omp declare target(omp_unset_lock ) -!$omp declare target(omp_test_lock ) -!$omp declare target(omp_init_nest_lock ) -!$omp declare target(omp_destroy_nest_lock ) -!$omp declare target(omp_set_nest_lock ) -!$omp declare target(omp_unset_nest_lock ) -!$omp declare target(omp_test_nest_lock ) -!$omp declare target(omp_get_max_task_priority ) -!$omp declare target(kmp_set_stacksize ) -!$omp declare target(kmp_set_stacksize_s ) -!$omp declare target(kmp_set_blocktime ) -!$omp declare target(kmp_set_library_serial ) -!$omp declare target(kmp_set_library_turnaround ) -!$omp declare target(kmp_set_library_throughput ) -!$omp declare target(kmp_set_library ) -!$omp declare target(kmp_set_defaults ) -!$omp declare target(kmp_get_stacksize ) -!$omp declare target(kmp_get_stacksize_s ) -!$omp declare target(kmp_get_blocktime ) -!$omp declare target(kmp_get_library ) -!$omp declare target(kmp_set_disp_num_buffers ) -!$omp declare target(kmp_set_affinity ) -!$omp declare target(kmp_get_affinity ) -!$omp declare target(kmp_get_affinity_max_proc ) -!$omp declare target(kmp_create_affinity_mask ) -!$omp declare target(kmp_destroy_affinity_mask ) -!$omp declare target(kmp_set_affinity_mask_proc ) -!$omp declare target(kmp_unset_affinity_mask_proc ) -!$omp declare target(kmp_get_affinity_mask_proc ) -!$omp declare target(kmp_malloc ) -!$omp declare target(kmp_aligned_malloc ) -!$omp declare target(kmp_calloc ) -!$omp declare target(kmp_realloc ) -!$omp declare target(kmp_free ) -!$omp declare target(kmp_set_warnings_on ) -!$omp declare target(kmp_set_warnings_off ) -!$omp declare target(omp_init_lock_with_hint ) -!$omp declare target(omp_init_nest_lock_with_hint ) -!DIR$ ENDIF -!DIR$ ENDIF Index: vendor/llvm-openmp/openmp-release80-r363030/runtime/src/include/45/omp_lib.f90.var =================================================================== --- vendor/llvm-openmp/openmp-release80-r363030/runtime/src/include/45/omp_lib.f90.var (revision 348960) +++ vendor/llvm-openmp/openmp-release80-r363030/runtime/src/include/45/omp_lib.f90.var (nonexistent) @@ -1,524 +0,0 @@ -! include/45/omp_lib.f90.var - -! -!//===----------------------------------------------------------------------===// -!// -!// The LLVM Compiler Infrastructure -!// -!// This file is dual licensed under the MIT and the University of Illinois Open -!// Source Licenses. See LICENSE.txt for details. -!// -!//===----------------------------------------------------------------------===// -! - - module omp_lib_kinds - - use, intrinsic :: iso_c_binding - - integer, parameter :: omp_integer_kind = c_int - integer, parameter :: omp_logical_kind = 4 - integer, parameter :: omp_real_kind = c_float - integer, parameter :: kmp_double_kind = c_double - integer, parameter :: omp_lock_kind = c_intptr_t - integer, parameter :: omp_nest_lock_kind = c_intptr_t - integer, parameter :: omp_sched_kind = omp_integer_kind - integer, parameter :: omp_proc_bind_kind = omp_integer_kind - integer, parameter :: kmp_pointer_kind = c_intptr_t - integer, parameter :: kmp_size_t_kind = c_size_t - integer, parameter :: kmp_affinity_mask_kind = c_intptr_t - integer, parameter :: kmp_cancel_kind = omp_integer_kind - integer, parameter :: omp_lock_hint_kind = omp_integer_kind - - end module omp_lib_kinds - - module omp_lib - - use omp_lib_kinds - - integer (kind=omp_integer_kind), parameter :: openmp_version = @LIBOMP_OMP_YEAR_MONTH@ - integer (kind=omp_integer_kind), parameter :: kmp_version_major = @LIBOMP_VERSION_MAJOR@ - integer (kind=omp_integer_kind), parameter :: kmp_version_minor = @LIBOMP_VERSION_MINOR@ - integer (kind=omp_integer_kind), parameter :: kmp_version_build = @LIBOMP_VERSION_BUILD@ - character(*) kmp_build_date - parameter( kmp_build_date = '@LIBOMP_BUILD_DATE@' ) - - integer(kind=omp_sched_kind), parameter :: omp_sched_static = 1 - integer(kind=omp_sched_kind), parameter :: omp_sched_dynamic = 2 - integer(kind=omp_sched_kind), parameter :: omp_sched_guided = 3 - integer(kind=omp_sched_kind), parameter :: omp_sched_auto = 4 - - - integer (kind=omp_proc_bind_kind), parameter :: omp_proc_bind_false = 0 - integer (kind=omp_proc_bind_kind), parameter :: omp_proc_bind_true = 1 - integer (kind=omp_proc_bind_kind), parameter :: omp_proc_bind_master = 2 - integer (kind=omp_proc_bind_kind), parameter :: omp_proc_bind_close = 3 - integer (kind=omp_proc_bind_kind), parameter :: omp_proc_bind_spread = 4 - - integer (kind=kmp_cancel_kind), parameter :: kmp_cancel_parallel = 1 - integer (kind=kmp_cancel_kind), parameter :: kmp_cancel_loop = 2 - integer (kind=kmp_cancel_kind), parameter :: kmp_cancel_sections = 3 - integer (kind=kmp_cancel_kind), parameter :: kmp_cancel_taskgroup = 4 - - integer (kind=omp_lock_hint_kind), parameter :: omp_lock_hint_none = 0 - integer (kind=omp_lock_hint_kind), parameter :: omp_lock_hint_uncontended = 1 - integer (kind=omp_lock_hint_kind), parameter :: omp_lock_hint_contended = 2 - integer (kind=omp_lock_hint_kind), parameter :: omp_lock_hint_nonspeculative = 4 - integer (kind=omp_lock_hint_kind), parameter :: omp_lock_hint_speculative = 8 - integer (kind=omp_lock_hint_kind), parameter :: kmp_lock_hint_hle = 65536 - integer (kind=omp_lock_hint_kind), parameter :: kmp_lock_hint_rtm = 131072 - integer (kind=omp_lock_hint_kind), parameter :: kmp_lock_hint_adaptive = 262144 - - interface - -! *** -! *** omp_* entry points -! *** - - subroutine omp_set_num_threads(num_threads) bind(c) - use omp_lib_kinds - integer (kind=omp_integer_kind), value :: num_threads - end subroutine omp_set_num_threads - - subroutine omp_set_dynamic(dynamic_threads) bind(c) - use omp_lib_kinds - logical (kind=omp_logical_kind), value :: dynamic_threads - end subroutine omp_set_dynamic - - subroutine omp_set_nested(nested) bind(c) - use omp_lib_kinds - logical (kind=omp_logical_kind), value :: nested - end subroutine omp_set_nested - - function omp_get_num_threads() bind(c) - use omp_lib_kinds - integer (kind=omp_integer_kind) omp_get_num_threads - end function omp_get_num_threads - - function omp_get_max_threads() bind(c) - use omp_lib_kinds - integer (kind=omp_integer_kind) omp_get_max_threads - end function omp_get_max_threads - - function omp_get_thread_num() bind(c) - use omp_lib_kinds - integer (kind=omp_integer_kind) omp_get_thread_num - end function omp_get_thread_num - - function omp_get_num_procs() bind(c) - use omp_lib_kinds - integer (kind=omp_integer_kind) omp_get_num_procs - end function omp_get_num_procs - - function omp_in_parallel() bind(c) - use omp_lib_kinds - logical (kind=omp_logical_kind) omp_in_parallel - end function omp_in_parallel - - function omp_in_final() bind(c) - use omp_lib_kinds - logical (kind=omp_logical_kind) omp_in_final - end function omp_in_final - - function omp_get_dynamic() bind(c) - use omp_lib_kinds - logical (kind=omp_logical_kind) omp_get_dynamic - end function omp_get_dynamic - - function omp_get_nested() bind(c) - use omp_lib_kinds - logical (kind=omp_logical_kind) omp_get_nested - end function omp_get_nested - - function omp_get_thread_limit() bind(c) - use omp_lib_kinds - integer (kind=omp_integer_kind) omp_get_thread_limit - end function omp_get_thread_limit - - subroutine omp_set_max_active_levels(max_levels) bind(c) - use omp_lib_kinds - integer (kind=omp_integer_kind), value :: max_levels - end subroutine omp_set_max_active_levels - - function omp_get_max_active_levels() bind(c) - use omp_lib_kinds - integer (kind=omp_integer_kind) omp_get_max_active_levels - end function omp_get_max_active_levels - - function omp_get_level() bind(c) - use omp_lib_kinds - integer (kind=omp_integer_kind) omp_get_level - end function omp_get_level - - function omp_get_active_level() bind(c) - use omp_lib_kinds - integer (kind=omp_integer_kind) omp_get_active_level - end function omp_get_active_level - - function omp_get_ancestor_thread_num(level) bind(c) - use omp_lib_kinds - integer (kind=omp_integer_kind) omp_get_ancestor_thread_num - integer (kind=omp_integer_kind), value :: level - end function omp_get_ancestor_thread_num - - function omp_get_team_size(level) bind(c) - use omp_lib_kinds - integer (kind=omp_integer_kind) omp_get_team_size - integer (kind=omp_integer_kind), value :: level - end function omp_get_team_size - - subroutine omp_set_schedule(kind, chunk_size) bind(c) - use omp_lib_kinds - integer (kind=omp_sched_kind), value :: kind - integer (kind=omp_integer_kind), value :: chunk_size - end subroutine omp_set_schedule - - subroutine omp_get_schedule(kind, chunk_size) bind(c) - use omp_lib_kinds - integer (kind=omp_sched_kind) kind - integer (kind=omp_integer_kind) chunk_size - end subroutine omp_get_schedule - - function omp_get_proc_bind() bind(c) - use omp_lib_kinds - integer (kind=omp_proc_bind_kind) omp_get_proc_bind - end function omp_get_proc_bind - - function omp_get_num_places() bind(c) - use omp_lib_kinds - integer (kind=omp_integer_kind) omp_get_num_places - end function omp_get_num_places - - function omp_get_place_num_procs(place_num) bind(c) - use omp_lib_kinds - integer (kind=omp_integer_kind), value :: place_num - integer (kind=omp_integer_kind) omp_get_place_num_procs - end function omp_get_place_num_procs - - subroutine omp_get_place_proc_ids(place_num, ids) bind(c) - use omp_lib_kinds - integer (kind=omp_integer_kind), value :: place_num - integer (kind=omp_integer_kind) ids(*) - end subroutine omp_get_place_proc_ids - - function omp_get_place_num() bind(c) - use omp_lib_kinds - integer (kind=omp_integer_kind) omp_get_place_num - end function omp_get_place_num - - function omp_get_partition_num_places() bind(c) - use omp_lib_kinds - integer (kind=omp_integer_kind) omp_get_partition_num_places - end function omp_get_partition_num_places - - subroutine omp_get_partition_place_nums(place_nums) bind(c) - use omp_lib_kinds - integer (kind=omp_integer_kind) place_nums(*) - end subroutine omp_get_partition_place_nums - - function omp_get_wtime() bind(c) - use omp_lib_kinds - real (kind=kmp_double_kind) omp_get_wtime - end function omp_get_wtime - - function omp_get_wtick() bind(c) - use omp_lib_kinds - real (kind=kmp_double_kind) omp_get_wtick - end function omp_get_wtick - - function omp_get_default_device() bind(c) - use omp_lib_kinds - integer (kind=omp_integer_kind) omp_get_default_device - end function omp_get_default_device - - subroutine omp_set_default_device(device_num) bind(c) - use omp_lib_kinds - integer (kind=omp_integer_kind), value :: device_num - end subroutine omp_set_default_device - - function omp_get_num_devices() bind(c) - use omp_lib_kinds - integer (kind=omp_integer_kind) omp_get_num_devices - end function omp_get_num_devices - - function omp_get_num_teams() bind(c) - use omp_lib_kinds - integer (kind=omp_integer_kind) omp_get_num_teams - end function omp_get_num_teams - - function omp_get_team_num() bind(c) - use omp_lib_kinds - integer (kind=omp_integer_kind) omp_get_team_num - end function omp_get_team_num - - function omp_get_cancellation() bind(c) - use omp_lib_kinds - integer (kind=omp_integer_kind) omp_get_cancellation - end function omp_get_cancellation - - function omp_is_initial_device() bind(c) - use omp_lib_kinds - logical (kind=omp_logical_kind) omp_is_initial_device - end function omp_is_initial_device - - function omp_get_initial_device() bind(c) - use omp_lib_kinds - integer (kind=omp_integer_kind) omp_get_initial_device - end function omp_get_initial_device - - subroutine omp_init_lock(svar) bind(c) -!DIR$ IF(__INTEL_COMPILER.GE.1400) -!DIR$ attributes known_intrinsic :: omp_init_lock -!DIR$ ENDIF - use omp_lib_kinds - integer (kind=omp_lock_kind) svar - end subroutine omp_init_lock - - subroutine omp_destroy_lock(svar) bind(c) -!DIR$ IF(__INTEL_COMPILER.GE.1400) -!DIR$ attributes known_intrinsic :: omp_destroy_lock -!DIR$ ENDIF - use omp_lib_kinds - integer (kind=omp_lock_kind) svar - end subroutine omp_destroy_lock - - subroutine omp_set_lock(svar) bind(c) -!DIR$ IF(__INTEL_COMPILER.GE.1400) -!DIR$ attributes known_intrinsic :: omp_set_lock -!DIR$ ENDIF - use omp_lib_kinds - integer (kind=omp_lock_kind) svar - end subroutine omp_set_lock - - subroutine omp_unset_lock(svar) bind(c) -!DIR$ IF(__INTEL_COMPILER.GE.1400) -!DIR$ attributes known_intrinsic :: omp_unset_lock -!DIR$ ENDIF - use omp_lib_kinds - integer (kind=omp_lock_kind) svar - end subroutine omp_unset_lock - - function omp_test_lock(svar) bind(c) -!DIR$ IF(__INTEL_COMPILER.GE.1400) -!DIR$ attributes known_intrinsic :: omp_test_lock -!DIR$ ENDIF - use omp_lib_kinds - logical (kind=omp_logical_kind) omp_test_lock - integer (kind=omp_lock_kind) svar - end function omp_test_lock - - subroutine omp_init_nest_lock(nvar) bind(c) -!DIR$ IF(__INTEL_COMPILER.GE.1400) -!DIR$ attributes known_intrinsic :: omp_init_nest_lock -!DIR$ ENDIF - use omp_lib_kinds - integer (kind=omp_nest_lock_kind) nvar - end subroutine omp_init_nest_lock - - subroutine omp_destroy_nest_lock(nvar) bind(c) -!DIR$ IF(__INTEL_COMPILER.GE.1400) -!DIR$ attributes known_intrinsic :: omp_destroy_nest_lock -!DIR$ ENDIF - use omp_lib_kinds - integer (kind=omp_nest_lock_kind) nvar - end subroutine omp_destroy_nest_lock - - subroutine omp_set_nest_lock(nvar) bind(c) -!DIR$ IF(__INTEL_COMPILER.GE.1400) -!DIR$ attributes known_intrinsic :: omp_set_nest_lock -!DIR$ ENDIF - use omp_lib_kinds - integer (kind=omp_nest_lock_kind) nvar - end subroutine omp_set_nest_lock - - subroutine omp_unset_nest_lock(nvar) bind(c) -!DIR$ IF(__INTEL_COMPILER.GE.1400) -!DIR$ attributes known_intrinsic :: omp_unset_nest_lock -!DIR$ ENDIF - use omp_lib_kinds - integer (kind=omp_nest_lock_kind) nvar - end subroutine omp_unset_nest_lock - - function omp_test_nest_lock(nvar) bind(c) -!DIR$ IF(__INTEL_COMPILER.GE.1400) -!DIR$ attributes known_intrinsic :: omp_test_nest_lock -!DIR$ ENDIF - use omp_lib_kinds - integer (kind=omp_integer_kind) omp_test_nest_lock - integer (kind=omp_nest_lock_kind) nvar - end function omp_test_nest_lock - - function omp_get_max_task_priority() bind(c) - use omp_lib_kinds - integer (kind=omp_integer_kind) omp_get_max_task_priority - end function omp_get_max_task_priority - -! *** -! *** kmp_* entry points -! *** - - subroutine kmp_set_stacksize(size) bind(c) - use omp_lib_kinds - integer (kind=omp_integer_kind), value :: size - end subroutine kmp_set_stacksize - - subroutine kmp_set_stacksize_s(size) bind(c) - use omp_lib_kinds - integer (kind=kmp_size_t_kind), value :: size - end subroutine kmp_set_stacksize_s - - subroutine kmp_set_blocktime(msec) bind(c) - use omp_lib_kinds - integer (kind=omp_integer_kind), value :: msec - end subroutine kmp_set_blocktime - - subroutine kmp_set_library_serial() bind(c) - end subroutine kmp_set_library_serial - - subroutine kmp_set_library_turnaround() bind(c) - end subroutine kmp_set_library_turnaround - - subroutine kmp_set_library_throughput() bind(c) - end subroutine kmp_set_library_throughput - - subroutine kmp_set_library(libnum) bind(c) - use omp_lib_kinds - integer (kind=omp_integer_kind), value :: libnum - end subroutine kmp_set_library - - subroutine kmp_set_defaults(string) bind(c) - use, intrinsic :: iso_c_binding - character (kind=c_char) :: string(*) - end subroutine kmp_set_defaults - - function kmp_get_stacksize() bind(c) - use omp_lib_kinds - integer (kind=omp_integer_kind) kmp_get_stacksize - end function kmp_get_stacksize - - function kmp_get_stacksize_s() bind(c) - use omp_lib_kinds - integer (kind=kmp_size_t_kind) kmp_get_stacksize_s - end function kmp_get_stacksize_s - - function kmp_get_blocktime() bind(c) - use omp_lib_kinds - integer (kind=omp_integer_kind) kmp_get_blocktime - end function kmp_get_blocktime - - function kmp_get_library() bind(c) - use omp_lib_kinds - integer (kind=omp_integer_kind) kmp_get_library - end function kmp_get_library - - subroutine kmp_set_disp_num_buffers(num) bind(c) - use omp_lib_kinds - integer (kind=omp_integer_kind), value :: num - end subroutine kmp_set_disp_num_buffers - - function kmp_set_affinity(mask) bind(c) - use omp_lib_kinds - integer (kind=omp_integer_kind) kmp_set_affinity - integer (kind=kmp_affinity_mask_kind) mask - end function kmp_set_affinity - - function kmp_get_affinity(mask) bind(c) - use omp_lib_kinds - integer (kind=omp_integer_kind) kmp_get_affinity - integer (kind=kmp_affinity_mask_kind) mask - end function kmp_get_affinity - - function kmp_get_affinity_max_proc() bind(c) - use omp_lib_kinds - integer (kind=omp_integer_kind) kmp_get_affinity_max_proc - end function kmp_get_affinity_max_proc - - subroutine kmp_create_affinity_mask(mask) bind(c) - use omp_lib_kinds - integer (kind=kmp_affinity_mask_kind) mask - end subroutine kmp_create_affinity_mask - - subroutine kmp_destroy_affinity_mask(mask) bind(c) - use omp_lib_kinds - integer (kind=kmp_affinity_mask_kind) mask - end subroutine kmp_destroy_affinity_mask - - function kmp_set_affinity_mask_proc(proc, mask) bind(c) - use omp_lib_kinds - integer (kind=omp_integer_kind) kmp_set_affinity_mask_proc - integer (kind=omp_integer_kind), value :: proc - integer (kind=kmp_affinity_mask_kind) mask - end function kmp_set_affinity_mask_proc - - function kmp_unset_affinity_mask_proc(proc, mask) bind(c) - use omp_lib_kinds - integer (kind=omp_integer_kind) kmp_unset_affinity_mask_proc - integer (kind=omp_integer_kind), value :: proc - integer (kind=kmp_affinity_mask_kind) mask - end function kmp_unset_affinity_mask_proc - - function kmp_get_affinity_mask_proc(proc, mask) bind(c) - use omp_lib_kinds - integer (kind=omp_integer_kind) kmp_get_affinity_mask_proc - integer (kind=omp_integer_kind), value :: proc - integer (kind=kmp_affinity_mask_kind) mask - end function kmp_get_affinity_mask_proc - - function kmp_malloc(size) bind(c) - use omp_lib_kinds - integer (kind=kmp_pointer_kind) kmp_malloc - integer (kind=kmp_size_t_kind), value :: size - end function kmp_malloc - - function kmp_aligned_malloc(size, alignment) bind(c) - use omp_lib_kinds - integer (kind=kmp_pointer_kind) kmp_aligned_malloc - integer (kind=kmp_size_t_kind), value :: size - integer (kind=kmp_size_t_kind), value :: alignment - end function kmp_aligned_malloc - - function kmp_calloc(nelem, elsize) bind(c) - use omp_lib_kinds - integer (kind=kmp_pointer_kind) kmp_calloc - integer (kind=kmp_size_t_kind), value :: nelem - integer (kind=kmp_size_t_kind), value :: elsize - end function kmp_calloc - - function kmp_realloc(ptr, size) bind(c) - use omp_lib_kinds - integer (kind=kmp_pointer_kind) kmp_realloc - integer (kind=kmp_pointer_kind), value :: ptr - integer (kind=kmp_size_t_kind), value :: size - end function kmp_realloc - - subroutine kmp_free(ptr) bind(c) - use omp_lib_kinds - integer (kind=kmp_pointer_kind), value :: ptr - end subroutine kmp_free - - subroutine kmp_set_warnings_on() bind(c) - end subroutine kmp_set_warnings_on - - subroutine kmp_set_warnings_off() bind(c) - end subroutine kmp_set_warnings_off - - function kmp_get_cancellation_status(cancelkind) bind(c) - use omp_lib_kinds - integer (kind=kmp_cancel_kind), value :: cancelkind - logical (kind=omp_logical_kind) kmp_get_cancellation_status - end function kmp_get_cancellation_status - - subroutine omp_init_lock_with_hint(svar, hint) bind(c) - use omp_lib_kinds - integer (kind=omp_lock_kind) svar - integer (kind=omp_lock_hint_kind), value :: hint - end subroutine omp_init_lock_with_hint - - subroutine omp_init_nest_lock_with_hint(nvar, hint) bind(c) - use omp_lib_kinds - integer (kind=omp_nest_lock_kind) nvar - integer (kind=omp_lock_hint_kind), value :: hint - end subroutine omp_init_nest_lock_with_hint - - end interface - - end module omp_lib Index: vendor/llvm-openmp/openmp-release80-r363030/runtime/src/include/45/omp.h.var =================================================================== --- vendor/llvm-openmp/openmp-release80-r363030/runtime/src/include/45/omp.h.var (revision 348960) +++ vendor/llvm-openmp/openmp-release80-r363030/runtime/src/include/45/omp.h.var (nonexistent) @@ -1,197 +0,0 @@ -/* - * include/45/omp.h.var - */ - - -//===----------------------------------------------------------------------===// -// -// The LLVM Compiler Infrastructure -// -// This file is dual licensed under the MIT and the University of Illinois Open -// Source Licenses. See LICENSE.txt for details. -// -//===----------------------------------------------------------------------===// - - -#ifndef __OMP_H -# define __OMP_H - -# define KMP_VERSION_MAJOR @LIBOMP_VERSION_MAJOR@ -# define KMP_VERSION_MINOR @LIBOMP_VERSION_MINOR@ -# define KMP_VERSION_BUILD @LIBOMP_VERSION_BUILD@ -# define KMP_BUILD_DATE "@LIBOMP_BUILD_DATE@" - -# ifdef __cplusplus - extern "C" { -# endif - -# if defined(_WIN32) -# define __KAI_KMPC_CONVENTION __cdecl -# else -# define __KAI_KMPC_CONVENTION -# endif - - /* schedule kind constants */ - typedef enum omp_sched_t { - omp_sched_static = 1, - omp_sched_dynamic = 2, - omp_sched_guided = 3, - omp_sched_auto = 4 - } omp_sched_t; - - /* set API functions */ - extern void __KAI_KMPC_CONVENTION omp_set_num_threads (int); - extern void __KAI_KMPC_CONVENTION omp_set_dynamic (int); - extern void __KAI_KMPC_CONVENTION omp_set_nested (int); - extern void __KAI_KMPC_CONVENTION omp_set_max_active_levels (int); - extern void __KAI_KMPC_CONVENTION omp_set_schedule (omp_sched_t, int); - - /* query API functions */ - extern int __KAI_KMPC_CONVENTION omp_get_num_threads (void); - extern int __KAI_KMPC_CONVENTION omp_get_dynamic (void); - extern int __KAI_KMPC_CONVENTION omp_get_nested (void); - extern int __KAI_KMPC_CONVENTION omp_get_max_threads (void); - extern int __KAI_KMPC_CONVENTION omp_get_thread_num (void); - extern int __KAI_KMPC_CONVENTION omp_get_num_procs (void); - extern int __KAI_KMPC_CONVENTION omp_in_parallel (void); - extern int __KAI_KMPC_CONVENTION omp_in_final (void); - extern int __KAI_KMPC_CONVENTION omp_get_active_level (void); - extern int __KAI_KMPC_CONVENTION omp_get_level (void); - extern int __KAI_KMPC_CONVENTION omp_get_ancestor_thread_num (int); - extern int __KAI_KMPC_CONVENTION omp_get_team_size (int); - extern int __KAI_KMPC_CONVENTION omp_get_thread_limit (void); - extern int __KAI_KMPC_CONVENTION omp_get_max_active_levels (void); - extern void __KAI_KMPC_CONVENTION omp_get_schedule (omp_sched_t *, int *); - extern int __KAI_KMPC_CONVENTION omp_get_max_task_priority (void); - - /* lock API functions */ - typedef struct omp_lock_t { - void * _lk; - } omp_lock_t; - - extern void __KAI_KMPC_CONVENTION omp_init_lock (omp_lock_t *); - extern void __KAI_KMPC_CONVENTION omp_set_lock (omp_lock_t *); - extern void __KAI_KMPC_CONVENTION omp_unset_lock (omp_lock_t *); - extern void __KAI_KMPC_CONVENTION omp_destroy_lock (omp_lock_t *); - extern int __KAI_KMPC_CONVENTION omp_test_lock (omp_lock_t *); - - /* nested lock API functions */ - typedef struct omp_nest_lock_t { - void * _lk; - } omp_nest_lock_t; - - extern void __KAI_KMPC_CONVENTION omp_init_nest_lock (omp_nest_lock_t *); - extern void __KAI_KMPC_CONVENTION omp_set_nest_lock (omp_nest_lock_t *); - extern void __KAI_KMPC_CONVENTION omp_unset_nest_lock (omp_nest_lock_t *); - extern void __KAI_KMPC_CONVENTION omp_destroy_nest_lock (omp_nest_lock_t *); - extern int __KAI_KMPC_CONVENTION omp_test_nest_lock (omp_nest_lock_t *); - - /* lock hint type for dynamic user lock */ - typedef enum omp_lock_hint_t { - omp_lock_hint_none = 0, - omp_lock_hint_uncontended = 1, - omp_lock_hint_contended = (1<<1 ), - omp_lock_hint_nonspeculative = (1<<2 ), - omp_lock_hint_speculative = (1<<3 ), - kmp_lock_hint_hle = (1<<16), - kmp_lock_hint_rtm = (1<<17), - kmp_lock_hint_adaptive = (1<<18) - } omp_lock_hint_t; - - /* hinted lock initializers */ - extern void __KAI_KMPC_CONVENTION omp_init_lock_with_hint(omp_lock_t *, omp_lock_hint_t); - extern void __KAI_KMPC_CONVENTION omp_init_nest_lock_with_hint(omp_nest_lock_t *, omp_lock_hint_t); - - /* time API functions */ - extern double __KAI_KMPC_CONVENTION omp_get_wtime (void); - extern double __KAI_KMPC_CONVENTION omp_get_wtick (void); - - /* OpenMP 4.0 */ - extern int __KAI_KMPC_CONVENTION omp_get_default_device (void); - extern void __KAI_KMPC_CONVENTION omp_set_default_device (int); - extern int __KAI_KMPC_CONVENTION omp_is_initial_device (void); - extern int __KAI_KMPC_CONVENTION omp_get_num_devices (void); - extern int __KAI_KMPC_CONVENTION omp_get_num_teams (void); - extern int __KAI_KMPC_CONVENTION omp_get_team_num (void); - extern int __KAI_KMPC_CONVENTION omp_get_cancellation (void); - -# include - /* OpenMP 4.5 */ - extern int __KAI_KMPC_CONVENTION omp_get_initial_device (void); - extern void* __KAI_KMPC_CONVENTION omp_target_alloc(size_t, int); - extern void __KAI_KMPC_CONVENTION omp_target_free(void *, int); - extern int __KAI_KMPC_CONVENTION omp_target_is_present(void *, int); - extern int __KAI_KMPC_CONVENTION omp_target_memcpy(void *, void *, size_t, size_t, size_t, int, int); - extern int __KAI_KMPC_CONVENTION omp_target_memcpy_rect(void *, void *, size_t, int, const size_t *, - const size_t *, const size_t *, const size_t *, const size_t *, int, int); - extern int __KAI_KMPC_CONVENTION omp_target_associate_ptr(void *, void *, size_t, size_t, int); - extern int __KAI_KMPC_CONVENTION omp_target_disassociate_ptr(void *, int); - - /* kmp API functions */ - extern int __KAI_KMPC_CONVENTION kmp_get_stacksize (void); - extern void __KAI_KMPC_CONVENTION kmp_set_stacksize (int); - extern size_t __KAI_KMPC_CONVENTION kmp_get_stacksize_s (void); - extern void __KAI_KMPC_CONVENTION kmp_set_stacksize_s (size_t); - extern int __KAI_KMPC_CONVENTION kmp_get_blocktime (void); - extern int __KAI_KMPC_CONVENTION kmp_get_library (void); - extern void __KAI_KMPC_CONVENTION kmp_set_blocktime (int); - extern void __KAI_KMPC_CONVENTION kmp_set_library (int); - extern void __KAI_KMPC_CONVENTION kmp_set_library_serial (void); - extern void __KAI_KMPC_CONVENTION kmp_set_library_turnaround (void); - extern void __KAI_KMPC_CONVENTION kmp_set_library_throughput (void); - extern void __KAI_KMPC_CONVENTION kmp_set_defaults (char const *); - extern void __KAI_KMPC_CONVENTION kmp_set_disp_num_buffers (int); - - /* Intel affinity API */ - typedef void * kmp_affinity_mask_t; - - extern int __KAI_KMPC_CONVENTION kmp_set_affinity (kmp_affinity_mask_t *); - extern int __KAI_KMPC_CONVENTION kmp_get_affinity (kmp_affinity_mask_t *); - extern int __KAI_KMPC_CONVENTION kmp_get_affinity_max_proc (void); - extern void __KAI_KMPC_CONVENTION kmp_create_affinity_mask (kmp_affinity_mask_t *); - extern void __KAI_KMPC_CONVENTION kmp_destroy_affinity_mask (kmp_affinity_mask_t *); - extern int __KAI_KMPC_CONVENTION kmp_set_affinity_mask_proc (int, kmp_affinity_mask_t *); - extern int __KAI_KMPC_CONVENTION kmp_unset_affinity_mask_proc (int, kmp_affinity_mask_t *); - extern int __KAI_KMPC_CONVENTION kmp_get_affinity_mask_proc (int, kmp_affinity_mask_t *); - - /* OpenMP 4.0 affinity API */ - typedef enum omp_proc_bind_t { - omp_proc_bind_false = 0, - omp_proc_bind_true = 1, - omp_proc_bind_master = 2, - omp_proc_bind_close = 3, - omp_proc_bind_spread = 4 - } omp_proc_bind_t; - - extern omp_proc_bind_t __KAI_KMPC_CONVENTION omp_get_proc_bind (void); - - /* OpenMP 4.5 affinity API */ - extern int __KAI_KMPC_CONVENTION omp_get_num_places (void); - extern int __KAI_KMPC_CONVENTION omp_get_place_num_procs (int); - extern void __KAI_KMPC_CONVENTION omp_get_place_proc_ids (int, int *); - extern int __KAI_KMPC_CONVENTION omp_get_place_num (void); - extern int __KAI_KMPC_CONVENTION omp_get_partition_num_places (void); - extern void __KAI_KMPC_CONVENTION omp_get_partition_place_nums (int *); - - extern void * __KAI_KMPC_CONVENTION kmp_malloc (size_t); - extern void * __KAI_KMPC_CONVENTION kmp_aligned_malloc (size_t, size_t); - extern void * __KAI_KMPC_CONVENTION kmp_calloc (size_t, size_t); - extern void * __KAI_KMPC_CONVENTION kmp_realloc (void *, size_t); - extern void __KAI_KMPC_CONVENTION kmp_free (void *); - - extern void __KAI_KMPC_CONVENTION kmp_set_warnings_on(void); - extern void __KAI_KMPC_CONVENTION kmp_set_warnings_off(void); - -# undef __KAI_KMPC_CONVENTION - - /* Warning: - The following typedefs are not standard, deprecated and will be removed in a future release. - */ - typedef int omp_int_t; - typedef double omp_wtime_t; - -# ifdef __cplusplus - } -# endif - -#endif /* __OMP_H */ Index: vendor/llvm-openmp/openmp-release80-r363030/runtime/src/kmp_global.cpp =================================================================== --- vendor/llvm-openmp/openmp-release80-r363030/runtime/src/kmp_global.cpp (revision 348960) +++ vendor/llvm-openmp/openmp-release80-r363030/runtime/src/kmp_global.cpp (nonexistent) @@ -1,537 +0,0 @@ -/* - * kmp_global.cpp -- KPTS global variables for runtime support library - */ - -//===----------------------------------------------------------------------===// -// -// The LLVM Compiler Infrastructure -// -// This file is dual licensed under the MIT and the University of Illinois Open -// Source Licenses. See LICENSE.txt for details. -// -//===----------------------------------------------------------------------===// - -#include "kmp.h" -#include "kmp_affinity.h" -#if KMP_USE_HIER_SCHED -#include "kmp_dispatch_hier.h" -#endif - -kmp_key_t __kmp_gtid_threadprivate_key; - -#if KMP_ARCH_X86 || KMP_ARCH_X86_64 -kmp_cpuinfo_t __kmp_cpuinfo = {0}; // Not initialized -#endif - -#if KMP_STATS_ENABLED -#include "kmp_stats.h" -// lock for modifying the global __kmp_stats_list -kmp_tas_lock_t __kmp_stats_lock; - -// global list of per thread stats, the head is a sentinel node which -// accumulates all stats produced before __kmp_create_worker is called. -kmp_stats_list *__kmp_stats_list; - -// thread local pointer to stats node within list -KMP_THREAD_LOCAL kmp_stats_list *__kmp_stats_thread_ptr = NULL; - -// gives reference tick for all events (considered the 0 tick) -tsc_tick_count __kmp_stats_start_time; -#endif - -/* ----------------------------------------------------- */ -/* INITIALIZATION VARIABLES */ -/* they are syncronized to write during init, but read anytime */ -volatile int __kmp_init_serial = FALSE; -volatile int __kmp_init_gtid = FALSE; -volatile int __kmp_init_common = FALSE; -volatile int __kmp_init_middle = FALSE; -volatile int __kmp_init_parallel = FALSE; -#if KMP_USE_MONITOR -volatile int __kmp_init_monitor = - 0; /* 1 - launched, 2 - actually started (Windows* OS only) */ -#endif -volatile int __kmp_init_user_locks = FALSE; - -/* list of address of allocated caches for commons */ -kmp_cached_addr_t *__kmp_threadpriv_cache_list = NULL; - -int __kmp_init_counter = 0; -int __kmp_root_counter = 0; -int __kmp_version = 0; - -std::atomic __kmp_team_counter = ATOMIC_VAR_INIT(0); -std::atomic __kmp_task_counter = ATOMIC_VAR_INIT(0); - -unsigned int __kmp_init_wait = - KMP_DEFAULT_INIT_WAIT; /* initial number of spin-tests */ -unsigned int __kmp_next_wait = - KMP_DEFAULT_NEXT_WAIT; /* susequent number of spin-tests */ - -size_t __kmp_stksize = KMP_DEFAULT_STKSIZE; -#if KMP_USE_MONITOR -size_t __kmp_monitor_stksize = 0; // auto adjust -#endif -size_t __kmp_stkoffset = KMP_DEFAULT_STKOFFSET; -int __kmp_stkpadding = KMP_MIN_STKPADDING; - -size_t __kmp_malloc_pool_incr = KMP_DEFAULT_MALLOC_POOL_INCR; - -// Barrier method defaults, settings, and strings. -// branch factor = 2^branch_bits (only relevant for tree & hyper barrier types) -kmp_uint32 __kmp_barrier_gather_bb_dflt = 2; -/* branch_factor = 4 */ /* hyper2: C78980 */ -kmp_uint32 __kmp_barrier_release_bb_dflt = 2; -/* branch_factor = 4 */ /* hyper2: C78980 */ - -kmp_bar_pat_e __kmp_barrier_gather_pat_dflt = bp_hyper_bar; -/* hyper2: C78980 */ -kmp_bar_pat_e __kmp_barrier_release_pat_dflt = bp_hyper_bar; -/* hyper2: C78980 */ - -kmp_uint32 __kmp_barrier_gather_branch_bits[bs_last_barrier] = {0}; -kmp_uint32 __kmp_barrier_release_branch_bits[bs_last_barrier] = {0}; -kmp_bar_pat_e __kmp_barrier_gather_pattern[bs_last_barrier] = {bp_linear_bar}; -kmp_bar_pat_e __kmp_barrier_release_pattern[bs_last_barrier] = {bp_linear_bar}; -char const *__kmp_barrier_branch_bit_env_name[bs_last_barrier] = { - "KMP_PLAIN_BARRIER", "KMP_FORKJOIN_BARRIER" -#if KMP_FAST_REDUCTION_BARRIER - , - "KMP_REDUCTION_BARRIER" -#endif // KMP_FAST_REDUCTION_BARRIER -}; -char const *__kmp_barrier_pattern_env_name[bs_last_barrier] = { - "KMP_PLAIN_BARRIER_PATTERN", "KMP_FORKJOIN_BARRIER_PATTERN" -#if KMP_FAST_REDUCTION_BARRIER - , - "KMP_REDUCTION_BARRIER_PATTERN" -#endif // KMP_FAST_REDUCTION_BARRIER -}; -char const *__kmp_barrier_type_name[bs_last_barrier] = {"plain", "forkjoin" -#if KMP_FAST_REDUCTION_BARRIER - , - "reduction" -#endif // KMP_FAST_REDUCTION_BARRIER -}; -char const *__kmp_barrier_pattern_name[bp_last_bar] = {"linear", "tree", - "hyper", "hierarchical"}; - -int __kmp_allThreadsSpecified = 0; -size_t __kmp_align_alloc = CACHE_LINE; - -int __kmp_generate_warnings = kmp_warnings_low; -int __kmp_reserve_warn = 0; -int __kmp_xproc = 0; -int __kmp_avail_proc = 0; -size_t __kmp_sys_min_stksize = KMP_MIN_STKSIZE; -int __kmp_sys_max_nth = KMP_MAX_NTH; -int __kmp_max_nth = 0; -int __kmp_cg_max_nth = 0; -int __kmp_teams_max_nth = 0; -int __kmp_threads_capacity = 0; -int __kmp_dflt_team_nth = 0; -int __kmp_dflt_team_nth_ub = 0; -int __kmp_tp_capacity = 0; -int __kmp_tp_cached = 0; -int __kmp_dflt_nested = FALSE; -int __kmp_dispatch_num_buffers = KMP_DFLT_DISP_NUM_BUFF; -int __kmp_dflt_max_active_levels = - KMP_MAX_ACTIVE_LEVELS_LIMIT; /* max_active_levels limit */ -#if KMP_NESTED_HOT_TEAMS -int __kmp_hot_teams_mode = 0; /* 0 - free extra threads when reduced */ -/* 1 - keep extra threads when reduced */ -int __kmp_hot_teams_max_level = 1; /* nesting level of hot teams */ -#endif -enum library_type __kmp_library = library_none; -enum sched_type __kmp_sched = - kmp_sch_default; /* scheduling method for runtime scheduling */ -enum sched_type __kmp_static = - kmp_sch_static_greedy; /* default static scheduling method */ -enum sched_type __kmp_guided = - kmp_sch_guided_iterative_chunked; /* default guided scheduling method */ -enum sched_type __kmp_auto = - kmp_sch_guided_analytical_chunked; /* default auto scheduling method */ -#if KMP_USE_HIER_SCHED -int __kmp_dispatch_hand_threading = 0; -int __kmp_hier_max_units[kmp_hier_layer_e::LAYER_LAST + 1]; -int __kmp_hier_threads_per[kmp_hier_layer_e::LAYER_LAST + 1]; -kmp_hier_sched_env_t __kmp_hier_scheds = {0, 0, NULL, NULL, NULL}; -#endif -int __kmp_dflt_blocktime = KMP_DEFAULT_BLOCKTIME; -#if KMP_USE_MONITOR -int __kmp_monitor_wakeups = KMP_MIN_MONITOR_WAKEUPS; -int __kmp_bt_intervals = KMP_INTERVALS_FROM_BLOCKTIME(KMP_DEFAULT_BLOCKTIME, - KMP_MIN_MONITOR_WAKEUPS); -#endif -#ifdef KMP_ADJUST_BLOCKTIME -int __kmp_zero_bt = FALSE; -#endif /* KMP_ADJUST_BLOCKTIME */ -#ifdef KMP_DFLT_NTH_CORES -int __kmp_ncores = 0; -#endif -int __kmp_chunk = 0; -int __kmp_abort_delay = 0; -#if KMP_OS_LINUX && defined(KMP_TDATA_GTID) -int __kmp_gtid_mode = 3; /* use __declspec(thread) TLS to store gtid */ -int __kmp_adjust_gtid_mode = FALSE; -#elif KMP_OS_WINDOWS -int __kmp_gtid_mode = 2; /* use TLS functions to store gtid */ -int __kmp_adjust_gtid_mode = FALSE; -#else -int __kmp_gtid_mode = 0; /* select method to get gtid based on #threads */ -int __kmp_adjust_gtid_mode = TRUE; -#endif /* KMP_OS_LINUX && defined(KMP_TDATA_GTID) */ -#ifdef KMP_TDATA_GTID -KMP_THREAD_LOCAL int __kmp_gtid = KMP_GTID_DNE; -#endif /* KMP_TDATA_GTID */ -int __kmp_tls_gtid_min = INT_MAX; -int __kmp_foreign_tp = TRUE; -#if KMP_ARCH_X86 || KMP_ARCH_X86_64 -int __kmp_inherit_fp_control = TRUE; -kmp_int16 __kmp_init_x87_fpu_control_word = 0; -kmp_uint32 __kmp_init_mxcsr = 0; -#endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */ - -#ifdef USE_LOAD_BALANCE -double __kmp_load_balance_interval = 1.0; -#endif /* USE_LOAD_BALANCE */ - -kmp_nested_nthreads_t __kmp_nested_nth = {NULL, 0, 0}; - -#if KMP_USE_ADAPTIVE_LOCKS - -kmp_adaptive_backoff_params_t __kmp_adaptive_backoff_params = { - 1, 1024}; // TODO: tune it! - -#if KMP_DEBUG_ADAPTIVE_LOCKS -const char *__kmp_speculative_statsfile = "-"; -#endif - -#endif // KMP_USE_ADAPTIVE_LOCKS - -#if OMP_40_ENABLED -int __kmp_display_env = FALSE; -int __kmp_display_env_verbose = FALSE; -int __kmp_omp_cancellation = FALSE; -#endif - -/* map OMP 3.0 schedule types with our internal schedule types */ -enum sched_type __kmp_sch_map[kmp_sched_upper - kmp_sched_lower_ext + - kmp_sched_upper_std - kmp_sched_lower - 2] = { - kmp_sch_static_chunked, // ==> kmp_sched_static = 1 - kmp_sch_dynamic_chunked, // ==> kmp_sched_dynamic = 2 - kmp_sch_guided_chunked, // ==> kmp_sched_guided = 3 - kmp_sch_auto, // ==> kmp_sched_auto = 4 - kmp_sch_trapezoidal // ==> kmp_sched_trapezoidal = 101 - // will likely not be used, introduced here just to debug the code - // of public intel extension schedules -}; - -#if KMP_OS_LINUX -enum clock_function_type __kmp_clock_function; -int __kmp_clock_function_param; -#endif /* KMP_OS_LINUX */ - -#if KMP_MIC_SUPPORTED -enum mic_type __kmp_mic_type = non_mic; -#endif - -#if KMP_AFFINITY_SUPPORTED - -KMPAffinity *__kmp_affinity_dispatch = NULL; - -#if KMP_USE_HWLOC -int __kmp_hwloc_error = FALSE; -hwloc_topology_t __kmp_hwloc_topology = NULL; -int __kmp_numa_detected = FALSE; -int __kmp_tile_depth = 0; -#endif - -#if KMP_OS_WINDOWS -#if KMP_GROUP_AFFINITY -int __kmp_num_proc_groups = 1; -#endif /* KMP_GROUP_AFFINITY */ -kmp_GetActiveProcessorCount_t __kmp_GetActiveProcessorCount = NULL; -kmp_GetActiveProcessorGroupCount_t __kmp_GetActiveProcessorGroupCount = NULL; -kmp_GetThreadGroupAffinity_t __kmp_GetThreadGroupAffinity = NULL; -kmp_SetThreadGroupAffinity_t __kmp_SetThreadGroupAffinity = NULL; -#endif /* KMP_OS_WINDOWS */ - -size_t __kmp_affin_mask_size = 0; -enum affinity_type __kmp_affinity_type = affinity_default; -enum affinity_gran __kmp_affinity_gran = affinity_gran_default; -int __kmp_affinity_gran_levels = -1; -int __kmp_affinity_dups = TRUE; -enum affinity_top_method __kmp_affinity_top_method = - affinity_top_method_default; -int __kmp_affinity_compact = 0; -int __kmp_affinity_offset = 0; -int __kmp_affinity_verbose = FALSE; -int __kmp_affinity_warnings = TRUE; -int __kmp_affinity_respect_mask = affinity_respect_mask_default; -char *__kmp_affinity_proclist = NULL; -kmp_affin_mask_t *__kmp_affinity_masks = NULL; -unsigned __kmp_affinity_num_masks = 0; - -char *__kmp_cpuinfo_file = NULL; - -#endif /* KMP_AFFINITY_SUPPORTED */ - -#if OMP_40_ENABLED -kmp_nested_proc_bind_t __kmp_nested_proc_bind = {NULL, 0, 0}; -int __kmp_affinity_num_places = 0; -#endif - -#if OMP_50_ENABLED -int __kmp_display_affinity = FALSE; -char *__kmp_affinity_format = NULL; -#endif // OMP_50_ENABLED - -kmp_hws_item_t __kmp_hws_socket = {0, 0}; -kmp_hws_item_t __kmp_hws_node = {0, 0}; -kmp_hws_item_t __kmp_hws_tile = {0, 0}; -kmp_hws_item_t __kmp_hws_core = {0, 0}; -kmp_hws_item_t __kmp_hws_proc = {0, 0}; -int __kmp_hws_requested = 0; -int __kmp_hws_abs_flag = 0; // absolute or per-item number requested - -#if OMP_40_ENABLED -kmp_int32 __kmp_default_device = 0; -#endif - -kmp_tasking_mode_t __kmp_tasking_mode = tskm_task_teams; -#if OMP_45_ENABLED -kmp_int32 __kmp_max_task_priority = 0; -kmp_uint64 __kmp_taskloop_min_tasks = 0; -#endif - -#if OMP_50_ENABLED -int __kmp_memkind_available = 0; -int __kmp_hbw_mem_available = 0; -const omp_allocator_t *OMP_NULL_ALLOCATOR = NULL; -const omp_allocator_t *omp_default_mem_alloc = (const omp_allocator_t *)1; -const omp_allocator_t *omp_large_cap_mem_alloc = (const omp_allocator_t *)2; -const omp_allocator_t *omp_const_mem_alloc = (const omp_allocator_t *)3; -const omp_allocator_t *omp_high_bw_mem_alloc = (const omp_allocator_t *)4; -const omp_allocator_t *omp_low_lat_mem_alloc = (const omp_allocator_t *)5; -const omp_allocator_t *omp_cgroup_mem_alloc = (const omp_allocator_t *)6; -const omp_allocator_t *omp_pteam_mem_alloc = (const omp_allocator_t *)7; -const omp_allocator_t *omp_thread_mem_alloc = (const omp_allocator_t *)8; -void *const *__kmp_def_allocator = omp_default_mem_alloc; -#endif - -/* This check ensures that the compiler is passing the correct data type for the - flags formal parameter of the function kmpc_omp_task_alloc(). If the type is - not a 4-byte type, then give an error message about a non-positive length - array pointing here. If that happens, the kmp_tasking_flags_t structure must - be redefined to have exactly 32 bits. */ -KMP_BUILD_ASSERT(sizeof(kmp_tasking_flags_t) == 4); - -int __kmp_task_stealing_constraint = 1; /* Constrain task stealing by default */ - -#ifdef DEBUG_SUSPEND -int __kmp_suspend_count = 0; -#endif - -int __kmp_settings = FALSE; -int __kmp_duplicate_library_ok = 0; -#if USE_ITT_BUILD -int __kmp_forkjoin_frames = 1; -int __kmp_forkjoin_frames_mode = 3; -#endif -PACKED_REDUCTION_METHOD_T __kmp_force_reduction_method = - reduction_method_not_defined; -int __kmp_determ_red = FALSE; - -#ifdef KMP_DEBUG -int kmp_a_debug = 0; -int kmp_b_debug = 0; -int kmp_c_debug = 0; -int kmp_d_debug = 0; -int kmp_e_debug = 0; -int kmp_f_debug = 0; -int kmp_diag = 0; -#endif - -/* For debug information logging using rotating buffer */ -int __kmp_debug_buf = - FALSE; /* TRUE means use buffer, FALSE means print to stderr */ -int __kmp_debug_buf_lines = - KMP_DEBUG_BUF_LINES_INIT; /* Lines of debug stored in buffer */ -int __kmp_debug_buf_chars = - KMP_DEBUG_BUF_CHARS_INIT; /* Characters allowed per line in buffer */ -int __kmp_debug_buf_atomic = - FALSE; /* TRUE means use atomic update of buffer entry pointer */ - -char *__kmp_debug_buffer = NULL; /* Debug buffer itself */ -std::atomic __kmp_debug_count = - ATOMIC_VAR_INIT(0); /* number of lines printed in buffer so far */ -int __kmp_debug_buf_warn_chars = - 0; /* Keep track of char increase recommended in warnings */ -/* end rotating debug buffer */ - -#ifdef KMP_DEBUG -int __kmp_par_range; /* +1 => only go par for constructs in range */ -/* -1 => only go par for constructs outside range */ -char __kmp_par_range_routine[KMP_PAR_RANGE_ROUTINE_LEN] = {'\0'}; -char __kmp_par_range_filename[KMP_PAR_RANGE_FILENAME_LEN] = {'\0'}; -int __kmp_par_range_lb = 0; -int __kmp_par_range_ub = INT_MAX; -#endif /* KMP_DEBUG */ - -/* For printing out dynamic storage map for threads and teams */ -int __kmp_storage_map = - FALSE; /* True means print storage map for threads and teams */ -int __kmp_storage_map_verbose = - FALSE; /* True means storage map includes placement info */ -int __kmp_storage_map_verbose_specified = FALSE; -/* Initialize the library data structures when we fork a child process, defaults - * to TRUE */ -int __kmp_need_register_atfork = - TRUE; /* At initialization, call pthread_atfork to install fork handler */ -int __kmp_need_register_atfork_specified = TRUE; - -int __kmp_env_stksize = FALSE; /* KMP_STACKSIZE specified? */ -int __kmp_env_blocktime = FALSE; /* KMP_BLOCKTIME specified? */ -int __kmp_env_checks = FALSE; /* KMP_CHECKS specified? */ -int __kmp_env_consistency_check = FALSE; /* KMP_CONSISTENCY_CHECK specified? */ - -kmp_uint32 __kmp_yield_init = KMP_INIT_WAIT; -kmp_uint32 __kmp_yield_next = KMP_NEXT_WAIT; - -#if KMP_USE_MONITOR -kmp_uint32 __kmp_yielding_on = 1; -#endif -#if KMP_OS_CNK -kmp_uint32 __kmp_yield_cycle = 0; -#else -kmp_uint32 __kmp_yield_cycle = 1; /* Yield-cycle is on by default */ -#endif -kmp_int32 __kmp_yield_on_count = - 10; /* By default, yielding is on for 10 monitor periods. */ -kmp_int32 __kmp_yield_off_count = - 1; /* By default, yielding is off for 1 monitor periods. */ - -/* ------------------------------------------------------ */ -/* STATE mostly syncronized with global lock */ -/* data written to rarely by masters, read often by workers */ -/* TODO: None of this global padding stuff works consistently because the order - of declaration is not necessarily correlated to storage order. To fix this, - all the important globals must be put in a big structure instead. */ -KMP_ALIGN_CACHE -kmp_info_t **__kmp_threads = NULL; -kmp_root_t **__kmp_root = NULL; - -/* data read/written to often by masters */ -KMP_ALIGN_CACHE -volatile int __kmp_nth = 0; -volatile int __kmp_all_nth = 0; -int __kmp_thread_pool_nth = 0; -volatile kmp_info_t *__kmp_thread_pool = NULL; -volatile kmp_team_t *__kmp_team_pool = NULL; - -KMP_ALIGN_CACHE -std::atomic __kmp_thread_pool_active_nth = ATOMIC_VAR_INIT(0); - -/* ------------------------------------------------- - * GLOBAL/ROOT STATE */ -KMP_ALIGN_CACHE -kmp_global_t __kmp_global = {{0}}; - -/* ----------------------------------------------- */ -/* GLOBAL SYNCHRONIZATION LOCKS */ -/* TODO verify the need for these locks and if they need to be global */ - -#if KMP_USE_INTERNODE_ALIGNMENT -/* Multinode systems have larger cache line granularity which can cause - * false sharing if the alignment is not large enough for these locks */ -KMP_ALIGN_CACHE_INTERNODE - -KMP_BOOTSTRAP_LOCK_INIT(__kmp_initz_lock); /* Control initializations */ -KMP_ALIGN_CACHE_INTERNODE -KMP_BOOTSTRAP_LOCK_INIT(__kmp_forkjoin_lock); /* control fork/join access */ -KMP_ALIGN_CACHE_INTERNODE -KMP_BOOTSTRAP_LOCK_INIT(__kmp_exit_lock); /* exit() is not always thread-safe */ -#if KMP_USE_MONITOR -/* control monitor thread creation */ -KMP_ALIGN_CACHE_INTERNODE -KMP_BOOTSTRAP_LOCK_INIT(__kmp_monitor_lock); -#endif -/* used for the hack to allow threadprivate cache and __kmp_threads expansion - to co-exist */ -KMP_ALIGN_CACHE_INTERNODE -KMP_BOOTSTRAP_LOCK_INIT(__kmp_tp_cached_lock); - -KMP_ALIGN_CACHE_INTERNODE -KMP_LOCK_INIT(__kmp_global_lock); /* Control OS/global access */ -KMP_ALIGN_CACHE_INTERNODE -kmp_queuing_lock_t __kmp_dispatch_lock; /* Control dispatch access */ -KMP_ALIGN_CACHE_INTERNODE -KMP_LOCK_INIT(__kmp_debug_lock); /* Control I/O access for KMP_DEBUG */ -#else -KMP_ALIGN_CACHE - -KMP_BOOTSTRAP_LOCK_INIT(__kmp_initz_lock); /* Control initializations */ -KMP_BOOTSTRAP_LOCK_INIT(__kmp_forkjoin_lock); /* control fork/join access */ -KMP_BOOTSTRAP_LOCK_INIT(__kmp_exit_lock); /* exit() is not always thread-safe */ -#if KMP_USE_MONITOR -/* control monitor thread creation */ -KMP_BOOTSTRAP_LOCK_INIT(__kmp_monitor_lock); -#endif -/* used for the hack to allow threadprivate cache and __kmp_threads expansion - to co-exist */ -KMP_BOOTSTRAP_LOCK_INIT(__kmp_tp_cached_lock); - -KMP_ALIGN(128) -KMP_LOCK_INIT(__kmp_global_lock); /* Control OS/global access */ -KMP_ALIGN(128) -kmp_queuing_lock_t __kmp_dispatch_lock; /* Control dispatch access */ -KMP_ALIGN(128) -KMP_LOCK_INIT(__kmp_debug_lock); /* Control I/O access for KMP_DEBUG */ -#endif - -/* ----------------------------------------------- */ - -#if KMP_HANDLE_SIGNALS -/* Signal handling is disabled by default, because it confuses users: In case of - sigsegv (or other trouble) in user code signal handler catches the signal, - which then "appears" in the monitor thread (when the monitor executes raise() - function). Users see signal in the monitor thread and blame OpenMP RTL. - - Grant said signal handling required on some older OSes (Irix?) supported by - KAI, because bad applications hung but not aborted. Currently it is not a - problem for Linux* OS, OS X* and Windows* OS. - - Grant: Found new hangs for EL4, EL5, and a Fedora Core machine. So I'm - putting the default back for now to see if that fixes hangs on those - machines. - - 2010-04013 Lev: It was a bug in Fortran RTL. Fortran RTL prints a kind of - stack backtrace when program is aborting, but the code is not signal-safe. - When multiple signals raised at the same time (which occurs in dynamic - negative tests because all the worker threads detects the same error), - Fortran RTL may hang. The bug finally fixed in Fortran RTL library provided - by Steve R., and will be available soon. */ -int __kmp_handle_signals = FALSE; -#endif - -#ifdef DEBUG_SUSPEND -int get_suspend_count_(void) { - int count = __kmp_suspend_count; - __kmp_suspend_count = 0; - return count; -} -void set_suspend_count_(int *value) { __kmp_suspend_count = *value; } -#endif - -// Symbols for MS mutual detection. -int _You_must_link_with_exactly_one_OpenMP_library = 1; -int _You_must_link_with_Intel_OpenMP_library = 1; -#if KMP_OS_WINDOWS && (KMP_VERSION_MAJOR > 4) -int _You_must_link_with_Microsoft_OpenMP_library = 1; -#endif - -#if OMP_50_ENABLED -kmp_target_offload_kind_t __kmp_target_offload = tgt_default; -#endif -// end of file // Property changes on: vendor/llvm-openmp/openmp-release80-r363030/runtime/src/kmp_global.cpp ___________________________________________________________________ Deleted: svn:eol-style ## -1 +0,0 ## -native \ No newline at end of property Deleted: svn:keywords ## -1 +0,0 ## -FreeBSD=%H \ No newline at end of property Deleted: svn:mime-type ## -1 +0,0 ## -text/plain \ No newline at end of property Index: vendor/llvm-openmp/openmp-release80-r363030/runtime/src/kmp_cancel.cpp =================================================================== --- vendor/llvm-openmp/openmp-release80-r363030/runtime/src/kmp_cancel.cpp (revision 348960) +++ vendor/llvm-openmp/openmp-release80-r363030/runtime/src/kmp_cancel.cpp (nonexistent) @@ -1,336 +0,0 @@ - -//===----------------------------------------------------------------------===// -// -// The LLVM Compiler Infrastructure -// -// This file is dual licensed under the MIT and the University of Illinois Open -// Source Licenses. See LICENSE.txt for details. -// -//===----------------------------------------------------------------------===// - -#include "kmp.h" -#include "kmp_i18n.h" -#include "kmp_io.h" -#include "kmp_str.h" -#if OMPT_SUPPORT -#include "ompt-specific.h" -#endif - -#if OMP_40_ENABLED - -/*! -@ingroup CANCELLATION -@param loc_ref location of the original task directive -@param gtid Global thread ID of encountering thread -@param cncl_kind Cancellation kind (parallel, for, sections, taskgroup) - -@return returns true if the cancellation request has been activated and the -execution thread needs to proceed to the end of the canceled region. - -Request cancellation of the binding OpenMP region. -*/ -kmp_int32 __kmpc_cancel(ident_t *loc_ref, kmp_int32 gtid, kmp_int32 cncl_kind) { - kmp_info_t *this_thr = __kmp_threads[gtid]; - - KC_TRACE(10, ("__kmpc_cancel: T#%d request %d OMP_CANCELLATION=%d\n", gtid, - cncl_kind, __kmp_omp_cancellation)); - - KMP_DEBUG_ASSERT(cncl_kind != cancel_noreq); - KMP_DEBUG_ASSERT(cncl_kind == cancel_parallel || cncl_kind == cancel_loop || - cncl_kind == cancel_sections || - cncl_kind == cancel_taskgroup); - KMP_DEBUG_ASSERT(__kmp_get_gtid() == gtid); - - if (__kmp_omp_cancellation) { - switch (cncl_kind) { - case cancel_parallel: - case cancel_loop: - case cancel_sections: - // cancellation requests for parallel and worksharing constructs - // are handled through the team structure - { - kmp_team_t *this_team = this_thr->th.th_team; - KMP_DEBUG_ASSERT(this_team); - kmp_int32 old = cancel_noreq; - this_team->t.t_cancel_request.compare_exchange_strong(old, cncl_kind); - if (old == cancel_noreq || old == cncl_kind) { -// we do not have a cancellation request in this team or we do have -// one that matches the current request -> cancel -#if OMPT_SUPPORT && OMPT_OPTIONAL - if (ompt_enabled.ompt_callback_cancel) { - ompt_data_t *task_data; - __ompt_get_task_info_internal(0, NULL, &task_data, NULL, NULL, - NULL); - ompt_cancel_flag_t type = ompt_cancel_parallel; - if (cncl_kind == cancel_parallel) - type = ompt_cancel_parallel; - else if (cncl_kind == cancel_loop) - type = ompt_cancel_loop; - else if (cncl_kind == cancel_sections) - type = ompt_cancel_sections; - ompt_callbacks.ompt_callback(ompt_callback_cancel)( - task_data, type | ompt_cancel_activated, - OMPT_GET_RETURN_ADDRESS(0)); - } -#endif - return 1 /* true */; - } - break; - } - case cancel_taskgroup: - // cancellation requests for a task group - // are handled through the taskgroup structure - { - kmp_taskdata_t *task; - kmp_taskgroup_t *taskgroup; - - task = this_thr->th.th_current_task; - KMP_DEBUG_ASSERT(task); - - taskgroup = task->td_taskgroup; - if (taskgroup) { - kmp_int32 old = cancel_noreq; - taskgroup->cancel_request.compare_exchange_strong(old, cncl_kind); - if (old == cancel_noreq || old == cncl_kind) { -// we do not have a cancellation request in this taskgroup or we do -// have one that matches the current request -> cancel -#if OMPT_SUPPORT && OMPT_OPTIONAL - if (ompt_enabled.ompt_callback_cancel) { - ompt_data_t *task_data; - __ompt_get_task_info_internal(0, NULL, &task_data, NULL, NULL, - NULL); - ompt_callbacks.ompt_callback(ompt_callback_cancel)( - task_data, ompt_cancel_taskgroup | ompt_cancel_activated, - OMPT_GET_RETURN_ADDRESS(0)); - } -#endif - return 1 /* true */; - } - } else { - // TODO: what needs to happen here? - // the specification disallows cancellation w/o taskgroups - // so we might do anything here, let's abort for now - KMP_ASSERT(0 /* false */); - } - } - break; - default: - KMP_ASSERT(0 /* false */); - } - } - - // ICV OMP_CANCELLATION=false, so we ignored this cancel request - KMP_DEBUG_ASSERT(!__kmp_omp_cancellation); - return 0 /* false */; -} - -/*! -@ingroup CANCELLATION -@param loc_ref location of the original task directive -@param gtid Global thread ID of encountering thread -@param cncl_kind Cancellation kind (parallel, for, sections, taskgroup) - -@return returns true if a matching cancellation request has been flagged in the -RTL and the encountering thread has to cancel.. - -Cancellation point for the encountering thread. -*/ -kmp_int32 __kmpc_cancellationpoint(ident_t *loc_ref, kmp_int32 gtid, - kmp_int32 cncl_kind) { - kmp_info_t *this_thr = __kmp_threads[gtid]; - - KC_TRACE(10, - ("__kmpc_cancellationpoint: T#%d request %d OMP_CANCELLATION=%d\n", - gtid, cncl_kind, __kmp_omp_cancellation)); - - KMP_DEBUG_ASSERT(cncl_kind != cancel_noreq); - KMP_DEBUG_ASSERT(cncl_kind == cancel_parallel || cncl_kind == cancel_loop || - cncl_kind == cancel_sections || - cncl_kind == cancel_taskgroup); - KMP_DEBUG_ASSERT(__kmp_get_gtid() == gtid); - - if (__kmp_omp_cancellation) { - switch (cncl_kind) { - case cancel_parallel: - case cancel_loop: - case cancel_sections: - // cancellation requests for parallel and worksharing constructs - // are handled through the team structure - { - kmp_team_t *this_team = this_thr->th.th_team; - KMP_DEBUG_ASSERT(this_team); - if (this_team->t.t_cancel_request) { - if (cncl_kind == this_team->t.t_cancel_request) { -// the request in the team structure matches the type of -// cancellation point so we can cancel -#if OMPT_SUPPORT && OMPT_OPTIONAL - if (ompt_enabled.ompt_callback_cancel) { - ompt_data_t *task_data; - __ompt_get_task_info_internal(0, NULL, &task_data, NULL, NULL, - NULL); - ompt_cancel_flag_t type = ompt_cancel_parallel; - if (cncl_kind == cancel_parallel) - type = ompt_cancel_parallel; - else if (cncl_kind == cancel_loop) - type = ompt_cancel_loop; - else if (cncl_kind == cancel_sections) - type = ompt_cancel_sections; - ompt_callbacks.ompt_callback(ompt_callback_cancel)( - task_data, type | ompt_cancel_detected, - OMPT_GET_RETURN_ADDRESS(0)); - } -#endif - return 1 /* true */; - } - KMP_ASSERT(0 /* false */); - } else { - // we do not have a cancellation request pending, so we just - // ignore this cancellation point - return 0; - } - break; - } - case cancel_taskgroup: - // cancellation requests for a task group - // are handled through the taskgroup structure - { - kmp_taskdata_t *task; - kmp_taskgroup_t *taskgroup; - - task = this_thr->th.th_current_task; - KMP_DEBUG_ASSERT(task); - - taskgroup = task->td_taskgroup; - if (taskgroup) { -// return the current status of cancellation for the taskgroup -#if OMPT_SUPPORT && OMPT_OPTIONAL - if (ompt_enabled.ompt_callback_cancel && - !!taskgroup->cancel_request) { - ompt_data_t *task_data; - __ompt_get_task_info_internal(0, NULL, &task_data, NULL, NULL, - NULL); - ompt_callbacks.ompt_callback(ompt_callback_cancel)( - task_data, ompt_cancel_taskgroup | ompt_cancel_detected, - OMPT_GET_RETURN_ADDRESS(0)); - } -#endif - return !!taskgroup->cancel_request; - } else { - // if a cancellation point is encountered by a task that does not - // belong to a taskgroup, it is OK to ignore it - return 0 /* false */; - } - } - default: - KMP_ASSERT(0 /* false */); - } - } - - // ICV OMP_CANCELLATION=false, so we ignore the cancellation point - KMP_DEBUG_ASSERT(!__kmp_omp_cancellation); - return 0 /* false */; -} - -/*! -@ingroup CANCELLATION -@param loc_ref location of the original task directive -@param gtid Global thread ID of encountering thread - -@return returns true if a matching cancellation request has been flagged in the -RTL and the encountering thread has to cancel.. - -Barrier with cancellation point to send threads from the barrier to the -end of the parallel region. Needs a special code pattern as documented -in the design document for the cancellation feature. -*/ -kmp_int32 __kmpc_cancel_barrier(ident_t *loc, kmp_int32 gtid) { - int ret = 0 /* false */; - kmp_info_t *this_thr = __kmp_threads[gtid]; - kmp_team_t *this_team = this_thr->th.th_team; - - KMP_DEBUG_ASSERT(__kmp_get_gtid() == gtid); - - // call into the standard barrier - __kmpc_barrier(loc, gtid); - - // if cancellation is active, check cancellation flag - if (__kmp_omp_cancellation) { - // depending on which construct to cancel, check the flag and - // reset the flag - switch (KMP_ATOMIC_LD_RLX(&(this_team->t.t_cancel_request))) { - case cancel_parallel: - ret = 1; - // ensure that threads have checked the flag, when - // leaving the above barrier - __kmpc_barrier(loc, gtid); - this_team->t.t_cancel_request = cancel_noreq; - // the next barrier is the fork/join barrier, which - // synchronizes the threads leaving here - break; - case cancel_loop: - case cancel_sections: - ret = 1; - // ensure that threads have checked the flag, when - // leaving the above barrier - __kmpc_barrier(loc, gtid); - this_team->t.t_cancel_request = cancel_noreq; - // synchronize the threads again to make sure we do not have any run-away - // threads that cause a race on the cancellation flag - __kmpc_barrier(loc, gtid); - break; - case cancel_taskgroup: - // this case should not occur - KMP_ASSERT(0 /* false */); - break; - case cancel_noreq: - // do nothing - break; - default: - KMP_ASSERT(0 /* false */); - } - } - - return ret; -} - -/*! -@ingroup CANCELLATION -@param loc_ref location of the original task directive -@param gtid Global thread ID of encountering thread - -@return returns true if a matching cancellation request has been flagged in the -RTL and the encountering thread has to cancel.. - -Query function to query the current status of cancellation requests. -Can be used to implement the following pattern: - -if (kmp_get_cancellation_status(kmp_cancel_parallel)) { - perform_cleanup(); - #pragma omp cancellation point parallel -} -*/ -int __kmp_get_cancellation_status(int cancel_kind) { - if (__kmp_omp_cancellation) { - kmp_info_t *this_thr = __kmp_entry_thread(); - - switch (cancel_kind) { - case cancel_parallel: - case cancel_loop: - case cancel_sections: { - kmp_team_t *this_team = this_thr->th.th_team; - return this_team->t.t_cancel_request == cancel_kind; - } - case cancel_taskgroup: { - kmp_taskdata_t *task; - kmp_taskgroup_t *taskgroup; - task = this_thr->th.th_current_task; - taskgroup = task->td_taskgroup; - return taskgroup && taskgroup->cancel_request; - } - } - } - - return 0 /* false */; -} - -#endif Property changes on: vendor/llvm-openmp/openmp-release80-r363030/runtime/src/kmp_cancel.cpp ___________________________________________________________________ Deleted: svn:eol-style ## -1 +0,0 ## -native \ No newline at end of property Deleted: svn:keywords ## -1 +0,0 ## -FreeBSD=%H \ No newline at end of property Deleted: svn:mime-type ## -1 +0,0 ## -text/plain \ No newline at end of property Index: vendor/llvm-openmp/openmp-release80-r363030/runtime/src/z_Linux_util.cpp =================================================================== --- vendor/llvm-openmp/openmp-release80-r363030/runtime/src/z_Linux_util.cpp (revision 348960) +++ vendor/llvm-openmp/openmp-release80-r363030/runtime/src/z_Linux_util.cpp (nonexistent) @@ -1,2422 +0,0 @@ -/* - * z_Linux_util.cpp -- platform specific routines. - */ - -//===----------------------------------------------------------------------===// -// -// The LLVM Compiler Infrastructure -// -// This file is dual licensed under the MIT and the University of Illinois Open -// Source Licenses. See LICENSE.txt for details. -// -//===----------------------------------------------------------------------===// - -#include "kmp.h" -#include "kmp_affinity.h" -#include "kmp_i18n.h" -#include "kmp_io.h" -#include "kmp_itt.h" -#include "kmp_lock.h" -#include "kmp_stats.h" -#include "kmp_str.h" -#include "kmp_wait_release.h" -#include "kmp_wrapper_getpid.h" - -#if !KMP_OS_DRAGONFLY && !KMP_OS_FREEBSD && !KMP_OS_NETBSD && !KMP_OS_OPENBSD -#include -#endif -#include // HUGE_VAL. -#include -#include -#include -#include -#include - -#if KMP_OS_LINUX && !KMP_OS_CNK -#include -#if KMP_USE_FUTEX -// We should really include , but that causes compatibility problems on -// different Linux* OS distributions that either require that you include (or -// break when you try to include) . Since all we need is the two -// macros below (which are part of the kernel ABI, so can't change) we just -// define the constants here and don't include -#ifndef FUTEX_WAIT -#define FUTEX_WAIT 0 -#endif -#ifndef FUTEX_WAKE -#define FUTEX_WAKE 1 -#endif -#endif -#elif KMP_OS_DARWIN -#include -#include -#elif KMP_OS_DRAGONFLY || KMP_OS_FREEBSD -#include -#elif KMP_OS_NETBSD -#include -#include -#endif - -#include -#include -#include - -#include "tsan_annotations.h" - -struct kmp_sys_timer { - struct timespec start; -}; - -// Convert timespec to nanoseconds. -#define TS2NS(timespec) (((timespec).tv_sec * 1e9) + (timespec).tv_nsec) - -static struct kmp_sys_timer __kmp_sys_timer_data; - -#if KMP_HANDLE_SIGNALS -typedef void (*sig_func_t)(int); -STATIC_EFI2_WORKAROUND struct sigaction __kmp_sighldrs[NSIG]; -static sigset_t __kmp_sigset; -#endif - -static int __kmp_init_runtime = FALSE; - -static int __kmp_fork_count = 0; - -static pthread_condattr_t __kmp_suspend_cond_attr; -static pthread_mutexattr_t __kmp_suspend_mutex_attr; - -static kmp_cond_align_t __kmp_wait_cv; -static kmp_mutex_align_t __kmp_wait_mx; - -kmp_uint64 __kmp_ticks_per_msec = 1000000; - -#ifdef DEBUG_SUSPEND -static void __kmp_print_cond(char *buffer, kmp_cond_align_t *cond) { - KMP_SNPRINTF(buffer, 128, "(cond (lock (%ld, %d)), (descr (%p)))", - cond->c_cond.__c_lock.__status, cond->c_cond.__c_lock.__spinlock, - cond->c_cond.__c_waiting); -} -#endif - -#if (KMP_OS_LINUX && KMP_AFFINITY_SUPPORTED) - -/* Affinity support */ - -void __kmp_affinity_bind_thread(int which) { - KMP_ASSERT2(KMP_AFFINITY_CAPABLE(), - "Illegal set affinity operation when not capable"); - - kmp_affin_mask_t *mask; - KMP_CPU_ALLOC_ON_STACK(mask); - KMP_CPU_ZERO(mask); - KMP_CPU_SET(which, mask); - __kmp_set_system_affinity(mask, TRUE); - KMP_CPU_FREE_FROM_STACK(mask); -} - -/* Determine if we can access affinity functionality on this version of - * Linux* OS by checking __NR_sched_{get,set}affinity system calls, and set - * __kmp_affin_mask_size to the appropriate value (0 means not capable). */ -void __kmp_affinity_determine_capable(const char *env_var) { -// Check and see if the OS supports thread affinity. - -#define KMP_CPU_SET_SIZE_LIMIT (1024 * 1024) - - int gCode; - int sCode; - unsigned char *buf; - buf = (unsigned char *)KMP_INTERNAL_MALLOC(KMP_CPU_SET_SIZE_LIMIT); - - // If Linux* OS: - // If the syscall fails or returns a suggestion for the size, - // then we don't have to search for an appropriate size. - gCode = syscall(__NR_sched_getaffinity, 0, KMP_CPU_SET_SIZE_LIMIT, buf); - KA_TRACE(30, ("__kmp_affinity_determine_capable: " - "initial getaffinity call returned %d errno = %d\n", - gCode, errno)); - - // if ((gCode < 0) && (errno == ENOSYS)) - if (gCode < 0) { - // System call not supported - if (__kmp_affinity_verbose || - (__kmp_affinity_warnings && (__kmp_affinity_type != affinity_none) && - (__kmp_affinity_type != affinity_default) && - (__kmp_affinity_type != affinity_disabled))) { - int error = errno; - kmp_msg_t err_code = KMP_ERR(error); - __kmp_msg(kmp_ms_warning, KMP_MSG(GetAffSysCallNotSupported, env_var), - err_code, __kmp_msg_null); - if (__kmp_generate_warnings == kmp_warnings_off) { - __kmp_str_free(&err_code.str); - } - } - KMP_AFFINITY_DISABLE(); - KMP_INTERNAL_FREE(buf); - return; - } - if (gCode > 0) { // Linux* OS only - // The optimal situation: the OS returns the size of the buffer it expects. - // - // A verification of correct behavior is that Isetaffinity on a NULL - // buffer with the same size fails with errno set to EFAULT. - sCode = syscall(__NR_sched_setaffinity, 0, gCode, NULL); - KA_TRACE(30, ("__kmp_affinity_determine_capable: " - "setaffinity for mask size %d returned %d errno = %d\n", - gCode, sCode, errno)); - if (sCode < 0) { - if (errno == ENOSYS) { - if (__kmp_affinity_verbose || - (__kmp_affinity_warnings && - (__kmp_affinity_type != affinity_none) && - (__kmp_affinity_type != affinity_default) && - (__kmp_affinity_type != affinity_disabled))) { - int error = errno; - kmp_msg_t err_code = KMP_ERR(error); - __kmp_msg(kmp_ms_warning, KMP_MSG(SetAffSysCallNotSupported, env_var), - err_code, __kmp_msg_null); - if (__kmp_generate_warnings == kmp_warnings_off) { - __kmp_str_free(&err_code.str); - } - } - KMP_AFFINITY_DISABLE(); - KMP_INTERNAL_FREE(buf); - } - if (errno == EFAULT) { - KMP_AFFINITY_ENABLE(gCode); - KA_TRACE(10, ("__kmp_affinity_determine_capable: " - "affinity supported (mask size %d)\n", - (int)__kmp_affin_mask_size)); - KMP_INTERNAL_FREE(buf); - return; - } - } - } - - // Call the getaffinity system call repeatedly with increasing set sizes - // until we succeed, or reach an upper bound on the search. - KA_TRACE(30, ("__kmp_affinity_determine_capable: " - "searching for proper set size\n")); - int size; - for (size = 1; size <= KMP_CPU_SET_SIZE_LIMIT; size *= 2) { - gCode = syscall(__NR_sched_getaffinity, 0, size, buf); - KA_TRACE(30, ("__kmp_affinity_determine_capable: " - "getaffinity for mask size %d returned %d errno = %d\n", - size, gCode, errno)); - - if (gCode < 0) { - if (errno == ENOSYS) { - // We shouldn't get here - KA_TRACE(30, ("__kmp_affinity_determine_capable: " - "inconsistent OS call behavior: errno == ENOSYS for mask " - "size %d\n", - size)); - if (__kmp_affinity_verbose || - (__kmp_affinity_warnings && - (__kmp_affinity_type != affinity_none) && - (__kmp_affinity_type != affinity_default) && - (__kmp_affinity_type != affinity_disabled))) { - int error = errno; - kmp_msg_t err_code = KMP_ERR(error); - __kmp_msg(kmp_ms_warning, KMP_MSG(GetAffSysCallNotSupported, env_var), - err_code, __kmp_msg_null); - if (__kmp_generate_warnings == kmp_warnings_off) { - __kmp_str_free(&err_code.str); - } - } - KMP_AFFINITY_DISABLE(); - KMP_INTERNAL_FREE(buf); - return; - } - continue; - } - - sCode = syscall(__NR_sched_setaffinity, 0, gCode, NULL); - KA_TRACE(30, ("__kmp_affinity_determine_capable: " - "setaffinity for mask size %d returned %d errno = %d\n", - gCode, sCode, errno)); - if (sCode < 0) { - if (errno == ENOSYS) { // Linux* OS only - // We shouldn't get here - KA_TRACE(30, ("__kmp_affinity_determine_capable: " - "inconsistent OS call behavior: errno == ENOSYS for mask " - "size %d\n", - size)); - if (__kmp_affinity_verbose || - (__kmp_affinity_warnings && - (__kmp_affinity_type != affinity_none) && - (__kmp_affinity_type != affinity_default) && - (__kmp_affinity_type != affinity_disabled))) { - int error = errno; - kmp_msg_t err_code = KMP_ERR(error); - __kmp_msg(kmp_ms_warning, KMP_MSG(SetAffSysCallNotSupported, env_var), - err_code, __kmp_msg_null); - if (__kmp_generate_warnings == kmp_warnings_off) { - __kmp_str_free(&err_code.str); - } - } - KMP_AFFINITY_DISABLE(); - KMP_INTERNAL_FREE(buf); - return; - } - if (errno == EFAULT) { - KMP_AFFINITY_ENABLE(gCode); - KA_TRACE(10, ("__kmp_affinity_determine_capable: " - "affinity supported (mask size %d)\n", - (int)__kmp_affin_mask_size)); - KMP_INTERNAL_FREE(buf); - return; - } - } - } - // save uncaught error code - // int error = errno; - KMP_INTERNAL_FREE(buf); - // restore uncaught error code, will be printed at the next KMP_WARNING below - // errno = error; - - // Affinity is not supported - KMP_AFFINITY_DISABLE(); - KA_TRACE(10, ("__kmp_affinity_determine_capable: " - "cannot determine mask size - affinity not supported\n")); - if (__kmp_affinity_verbose || - (__kmp_affinity_warnings && (__kmp_affinity_type != affinity_none) && - (__kmp_affinity_type != affinity_default) && - (__kmp_affinity_type != affinity_disabled))) { - KMP_WARNING(AffCantGetMaskSize, env_var); - } -} - -#endif // KMP_OS_LINUX && KMP_AFFINITY_SUPPORTED - -#if KMP_USE_FUTEX - -int __kmp_futex_determine_capable() { - int loc = 0; - int rc = syscall(__NR_futex, &loc, FUTEX_WAKE, 1, NULL, NULL, 0); - int retval = (rc == 0) || (errno != ENOSYS); - - KA_TRACE(10, - ("__kmp_futex_determine_capable: rc = %d errno = %d\n", rc, errno)); - KA_TRACE(10, ("__kmp_futex_determine_capable: futex syscall%s supported\n", - retval ? "" : " not")); - - return retval; -} - -#endif // KMP_USE_FUTEX - -#if (KMP_ARCH_X86 || KMP_ARCH_X86_64) && (!KMP_ASM_INTRINS) -/* Only 32-bit "add-exchange" instruction on IA-32 architecture causes us to - use compare_and_store for these routines */ - -kmp_int8 __kmp_test_then_or8(volatile kmp_int8 *p, kmp_int8 d) { - kmp_int8 old_value, new_value; - - old_value = TCR_1(*p); - new_value = old_value | d; - - while (!KMP_COMPARE_AND_STORE_REL8(p, old_value, new_value)) { - KMP_CPU_PAUSE(); - old_value = TCR_1(*p); - new_value = old_value | d; - } - return old_value; -} - -kmp_int8 __kmp_test_then_and8(volatile kmp_int8 *p, kmp_int8 d) { - kmp_int8 old_value, new_value; - - old_value = TCR_1(*p); - new_value = old_value & d; - - while (!KMP_COMPARE_AND_STORE_REL8(p, old_value, new_value)) { - KMP_CPU_PAUSE(); - old_value = TCR_1(*p); - new_value = old_value & d; - } - return old_value; -} - -kmp_uint32 __kmp_test_then_or32(volatile kmp_uint32 *p, kmp_uint32 d) { - kmp_uint32 old_value, new_value; - - old_value = TCR_4(*p); - new_value = old_value | d; - - while (!KMP_COMPARE_AND_STORE_REL32(p, old_value, new_value)) { - KMP_CPU_PAUSE(); - old_value = TCR_4(*p); - new_value = old_value | d; - } - return old_value; -} - -kmp_uint32 __kmp_test_then_and32(volatile kmp_uint32 *p, kmp_uint32 d) { - kmp_uint32 old_value, new_value; - - old_value = TCR_4(*p); - new_value = old_value & d; - - while (!KMP_COMPARE_AND_STORE_REL32(p, old_value, new_value)) { - KMP_CPU_PAUSE(); - old_value = TCR_4(*p); - new_value = old_value & d; - } - return old_value; -} - -#if KMP_ARCH_X86 -kmp_int8 __kmp_test_then_add8(volatile kmp_int8 *p, kmp_int8 d) { - kmp_int8 old_value, new_value; - - old_value = TCR_1(*p); - new_value = old_value + d; - - while (!KMP_COMPARE_AND_STORE_REL8(p, old_value, new_value)) { - KMP_CPU_PAUSE(); - old_value = TCR_1(*p); - new_value = old_value + d; - } - return old_value; -} - -kmp_int64 __kmp_test_then_add64(volatile kmp_int64 *p, kmp_int64 d) { - kmp_int64 old_value, new_value; - - old_value = TCR_8(*p); - new_value = old_value + d; - - while (!KMP_COMPARE_AND_STORE_REL64(p, old_value, new_value)) { - KMP_CPU_PAUSE(); - old_value = TCR_8(*p); - new_value = old_value + d; - } - return old_value; -} -#endif /* KMP_ARCH_X86 */ - -kmp_uint64 __kmp_test_then_or64(volatile kmp_uint64 *p, kmp_uint64 d) { - kmp_uint64 old_value, new_value; - - old_value = TCR_8(*p); - new_value = old_value | d; - while (!KMP_COMPARE_AND_STORE_REL64(p, old_value, new_value)) { - KMP_CPU_PAUSE(); - old_value = TCR_8(*p); - new_value = old_value | d; - } - return old_value; -} - -kmp_uint64 __kmp_test_then_and64(volatile kmp_uint64 *p, kmp_uint64 d) { - kmp_uint64 old_value, new_value; - - old_value = TCR_8(*p); - new_value = old_value & d; - while (!KMP_COMPARE_AND_STORE_REL64(p, old_value, new_value)) { - KMP_CPU_PAUSE(); - old_value = TCR_8(*p); - new_value = old_value & d; - } - return old_value; -} - -#endif /* (KMP_ARCH_X86 || KMP_ARCH_X86_64) && (! KMP_ASM_INTRINS) */ - -void __kmp_terminate_thread(int gtid) { - int status; - kmp_info_t *th = __kmp_threads[gtid]; - - if (!th) - return; - -#ifdef KMP_CANCEL_THREADS - KA_TRACE(10, ("__kmp_terminate_thread: kill (%d)\n", gtid)); - status = pthread_cancel(th->th.th_info.ds.ds_thread); - if (status != 0 && status != ESRCH) { - __kmp_fatal(KMP_MSG(CantTerminateWorkerThread), KMP_ERR(status), - __kmp_msg_null); - } -#endif - __kmp_yield(TRUE); -} // - -/* Set thread stack info according to values returned by pthread_getattr_np(). - If values are unreasonable, assume call failed and use incremental stack - refinement method instead. Returns TRUE if the stack parameters could be - determined exactly, FALSE if incremental refinement is necessary. */ -static kmp_int32 __kmp_set_stack_info(int gtid, kmp_info_t *th) { - int stack_data; -#if KMP_OS_LINUX || KMP_OS_DRAGONFLY || KMP_OS_FREEBSD || KMP_OS_NETBSD || \ - KMP_OS_HURD - pthread_attr_t attr; - int status; - size_t size = 0; - void *addr = 0; - - /* Always do incremental stack refinement for ubermaster threads since the - initial thread stack range can be reduced by sibling thread creation so - pthread_attr_getstack may cause thread gtid aliasing */ - if (!KMP_UBER_GTID(gtid)) { - - /* Fetch the real thread attributes */ - status = pthread_attr_init(&attr); - KMP_CHECK_SYSFAIL("pthread_attr_init", status); -#if KMP_OS_DRAGONFLY || KMP_OS_FREEBSD || KMP_OS_NETBSD - status = pthread_attr_get_np(pthread_self(), &attr); - KMP_CHECK_SYSFAIL("pthread_attr_get_np", status); -#else - status = pthread_getattr_np(pthread_self(), &attr); - KMP_CHECK_SYSFAIL("pthread_getattr_np", status); -#endif - status = pthread_attr_getstack(&attr, &addr, &size); - KMP_CHECK_SYSFAIL("pthread_attr_getstack", status); - KA_TRACE(60, - ("__kmp_set_stack_info: T#%d pthread_attr_getstack returned size:" - " %lu, low addr: %p\n", - gtid, size, addr)); - status = pthread_attr_destroy(&attr); - KMP_CHECK_SYSFAIL("pthread_attr_destroy", status); - } - - if (size != 0 && addr != 0) { // was stack parameter determination successful? - /* Store the correct base and size */ - TCW_PTR(th->th.th_info.ds.ds_stackbase, (((char *)addr) + size)); - TCW_PTR(th->th.th_info.ds.ds_stacksize, size); - TCW_4(th->th.th_info.ds.ds_stackgrow, FALSE); - return TRUE; - } -#endif /* KMP_OS_LINUX || KMP_OS_DRAGONFLY || KMP_OS_FREEBSD || KMP_OS_NETBSD || - KMP_OS_HURD */ - /* Use incremental refinement starting from initial conservative estimate */ - TCW_PTR(th->th.th_info.ds.ds_stacksize, 0); - TCW_PTR(th->th.th_info.ds.ds_stackbase, &stack_data); - TCW_4(th->th.th_info.ds.ds_stackgrow, TRUE); - return FALSE; -} - -static void *__kmp_launch_worker(void *thr) { - int status, old_type, old_state; -#ifdef KMP_BLOCK_SIGNALS - sigset_t new_set, old_set; -#endif /* KMP_BLOCK_SIGNALS */ - void *exit_val; -#if KMP_OS_LINUX || KMP_OS_DRAGONFLY || KMP_OS_FREEBSD || KMP_OS_NETBSD || \ - KMP_OS_OPENBSD || KMP_OS_HURD - void *volatile padding = 0; -#endif - int gtid; - - gtid = ((kmp_info_t *)thr)->th.th_info.ds.ds_gtid; - __kmp_gtid_set_specific(gtid); -#ifdef KMP_TDATA_GTID - __kmp_gtid = gtid; -#endif -#if KMP_STATS_ENABLED - // set thread local index to point to thread-specific stats - __kmp_stats_thread_ptr = ((kmp_info_t *)thr)->th.th_stats; - __kmp_stats_thread_ptr->startLife(); - KMP_SET_THREAD_STATE(IDLE); - KMP_INIT_PARTITIONED_TIMERS(OMP_idle); -#endif - -#if USE_ITT_BUILD - __kmp_itt_thread_name(gtid); -#endif /* USE_ITT_BUILD */ - -#if KMP_AFFINITY_SUPPORTED - __kmp_affinity_set_init_mask(gtid, FALSE); -#endif - -#ifdef KMP_CANCEL_THREADS - status = pthread_setcanceltype(PTHREAD_CANCEL_ASYNCHRONOUS, &old_type); - KMP_CHECK_SYSFAIL("pthread_setcanceltype", status); - // josh todo: isn't PTHREAD_CANCEL_ENABLE default for newly-created threads? - status = pthread_setcancelstate(PTHREAD_CANCEL_ENABLE, &old_state); - KMP_CHECK_SYSFAIL("pthread_setcancelstate", status); -#endif - -#if KMP_ARCH_X86 || KMP_ARCH_X86_64 - // Set FP control regs to be a copy of the parallel initialization thread's. - __kmp_clear_x87_fpu_status_word(); - __kmp_load_x87_fpu_control_word(&__kmp_init_x87_fpu_control_word); - __kmp_load_mxcsr(&__kmp_init_mxcsr); -#endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */ - -#ifdef KMP_BLOCK_SIGNALS - status = sigfillset(&new_set); - KMP_CHECK_SYSFAIL_ERRNO("sigfillset", status); - status = pthread_sigmask(SIG_BLOCK, &new_set, &old_set); - KMP_CHECK_SYSFAIL("pthread_sigmask", status); -#endif /* KMP_BLOCK_SIGNALS */ - -#if KMP_OS_LINUX || KMP_OS_DRAGONFLY || KMP_OS_FREEBSD || KMP_OS_NETBSD || \ - KMP_OS_OPENBSD - if (__kmp_stkoffset > 0 && gtid > 0) { - padding = KMP_ALLOCA(gtid * __kmp_stkoffset); - } -#endif - - KMP_MB(); - __kmp_set_stack_info(gtid, (kmp_info_t *)thr); - - __kmp_check_stack_overlap((kmp_info_t *)thr); - - exit_val = __kmp_launch_thread((kmp_info_t *)thr); - -#ifdef KMP_BLOCK_SIGNALS - status = pthread_sigmask(SIG_SETMASK, &old_set, NULL); - KMP_CHECK_SYSFAIL("pthread_sigmask", status); -#endif /* KMP_BLOCK_SIGNALS */ - - return exit_val; -} - -#if KMP_USE_MONITOR -/* The monitor thread controls all of the threads in the complex */ - -static void *__kmp_launch_monitor(void *thr) { - int status, old_type, old_state; -#ifdef KMP_BLOCK_SIGNALS - sigset_t new_set; -#endif /* KMP_BLOCK_SIGNALS */ - struct timespec interval; - int yield_count; - int yield_cycles = 0; - - KMP_MB(); /* Flush all pending memory write invalidates. */ - - KA_TRACE(10, ("__kmp_launch_monitor: #1 launched\n")); - - /* register us as the monitor thread */ - __kmp_gtid_set_specific(KMP_GTID_MONITOR); -#ifdef KMP_TDATA_GTID - __kmp_gtid = KMP_GTID_MONITOR; -#endif - - KMP_MB(); - -#if USE_ITT_BUILD - // Instruct Intel(R) Threading Tools to ignore monitor thread. - __kmp_itt_thread_ignore(); -#endif /* USE_ITT_BUILD */ - - __kmp_set_stack_info(((kmp_info_t *)thr)->th.th_info.ds.ds_gtid, - (kmp_info_t *)thr); - - __kmp_check_stack_overlap((kmp_info_t *)thr); - -#ifdef KMP_CANCEL_THREADS - status = pthread_setcanceltype(PTHREAD_CANCEL_ASYNCHRONOUS, &old_type); - KMP_CHECK_SYSFAIL("pthread_setcanceltype", status); - // josh todo: isn't PTHREAD_CANCEL_ENABLE default for newly-created threads? - status = pthread_setcancelstate(PTHREAD_CANCEL_ENABLE, &old_state); - KMP_CHECK_SYSFAIL("pthread_setcancelstate", status); -#endif - -#if KMP_REAL_TIME_FIX - // This is a potential fix which allows application with real-time scheduling - // policy work. However, decision about the fix is not made yet, so it is - // disabled by default. - { // Are program started with real-time scheduling policy? - int sched = sched_getscheduler(0); - if (sched == SCHED_FIFO || sched == SCHED_RR) { - // Yes, we are a part of real-time application. Try to increase the - // priority of the monitor. - struct sched_param param; - int max_priority = sched_get_priority_max(sched); - int rc; - KMP_WARNING(RealTimeSchedNotSupported); - sched_getparam(0, ¶m); - if (param.sched_priority < max_priority) { - param.sched_priority += 1; - rc = sched_setscheduler(0, sched, ¶m); - if (rc != 0) { - int error = errno; - kmp_msg_t err_code = KMP_ERR(error); - __kmp_msg(kmp_ms_warning, KMP_MSG(CantChangeMonitorPriority), - err_code, KMP_MSG(MonitorWillStarve), __kmp_msg_null); - if (__kmp_generate_warnings == kmp_warnings_off) { - __kmp_str_free(&err_code.str); - } - } - } else { - // We cannot abort here, because number of CPUs may be enough for all - // the threads, including the monitor thread, so application could - // potentially work... - __kmp_msg(kmp_ms_warning, KMP_MSG(RunningAtMaxPriority), - KMP_MSG(MonitorWillStarve), KMP_HNT(RunningAtMaxPriority), - __kmp_msg_null); - } - } - // AC: free thread that waits for monitor started - TCW_4(__kmp_global.g.g_time.dt.t_value, 0); - } -#endif // KMP_REAL_TIME_FIX - - KMP_MB(); /* Flush all pending memory write invalidates. */ - - if (__kmp_monitor_wakeups == 1) { - interval.tv_sec = 1; - interval.tv_nsec = 0; - } else { - interval.tv_sec = 0; - interval.tv_nsec = (KMP_NSEC_PER_SEC / __kmp_monitor_wakeups); - } - - KA_TRACE(10, ("__kmp_launch_monitor: #2 monitor\n")); - - if (__kmp_yield_cycle) { - __kmp_yielding_on = 0; /* Start out with yielding shut off */ - yield_count = __kmp_yield_off_count; - } else { - __kmp_yielding_on = 1; /* Yielding is on permanently */ - } - - while (!TCR_4(__kmp_global.g.g_done)) { - struct timespec now; - struct timeval tval; - - /* This thread monitors the state of the system */ - - KA_TRACE(15, ("__kmp_launch_monitor: update\n")); - - status = gettimeofday(&tval, NULL); - KMP_CHECK_SYSFAIL_ERRNO("gettimeofday", status); - TIMEVAL_TO_TIMESPEC(&tval, &now); - - now.tv_sec += interval.tv_sec; - now.tv_nsec += interval.tv_nsec; - - if (now.tv_nsec >= KMP_NSEC_PER_SEC) { - now.tv_sec += 1; - now.tv_nsec -= KMP_NSEC_PER_SEC; - } - - status = pthread_mutex_lock(&__kmp_wait_mx.m_mutex); - KMP_CHECK_SYSFAIL("pthread_mutex_lock", status); - // AC: the monitor should not fall asleep if g_done has been set - if (!TCR_4(__kmp_global.g.g_done)) { // check once more under mutex - status = pthread_cond_timedwait(&__kmp_wait_cv.c_cond, - &__kmp_wait_mx.m_mutex, &now); - if (status != 0) { - if (status != ETIMEDOUT && status != EINTR) { - KMP_SYSFAIL("pthread_cond_timedwait", status); - } - } - } - status = pthread_mutex_unlock(&__kmp_wait_mx.m_mutex); - KMP_CHECK_SYSFAIL("pthread_mutex_unlock", status); - - if (__kmp_yield_cycle) { - yield_cycles++; - if ((yield_cycles % yield_count) == 0) { - if (__kmp_yielding_on) { - __kmp_yielding_on = 0; /* Turn it off now */ - yield_count = __kmp_yield_off_count; - } else { - __kmp_yielding_on = 1; /* Turn it on now */ - yield_count = __kmp_yield_on_count; - } - yield_cycles = 0; - } - } else { - __kmp_yielding_on = 1; - } - - TCW_4(__kmp_global.g.g_time.dt.t_value, - TCR_4(__kmp_global.g.g_time.dt.t_value) + 1); - - KMP_MB(); /* Flush all pending memory write invalidates. */ - } - - KA_TRACE(10, ("__kmp_launch_monitor: #3 cleanup\n")); - -#ifdef KMP_BLOCK_SIGNALS - status = sigfillset(&new_set); - KMP_CHECK_SYSFAIL_ERRNO("sigfillset", status); - status = pthread_sigmask(SIG_UNBLOCK, &new_set, NULL); - KMP_CHECK_SYSFAIL("pthread_sigmask", status); -#endif /* KMP_BLOCK_SIGNALS */ - - KA_TRACE(10, ("__kmp_launch_monitor: #4 finished\n")); - - if (__kmp_global.g.g_abort != 0) { - /* now we need to terminate the worker threads */ - /* the value of t_abort is the signal we caught */ - - int gtid; - - KA_TRACE(10, ("__kmp_launch_monitor: #5 terminate sig=%d\n", - __kmp_global.g.g_abort)); - - /* terminate the OpenMP worker threads */ - /* TODO this is not valid for sibling threads!! - * the uber master might not be 0 anymore.. */ - for (gtid = 1; gtid < __kmp_threads_capacity; ++gtid) - __kmp_terminate_thread(gtid); - - __kmp_cleanup(); - - KA_TRACE(10, ("__kmp_launch_monitor: #6 raise sig=%d\n", - __kmp_global.g.g_abort)); - - if (__kmp_global.g.g_abort > 0) - raise(__kmp_global.g.g_abort); - } - - KA_TRACE(10, ("__kmp_launch_monitor: #7 exit\n")); - - return thr; -} -#endif // KMP_USE_MONITOR - -void __kmp_create_worker(int gtid, kmp_info_t *th, size_t stack_size) { - pthread_t handle; - pthread_attr_t thread_attr; - int status; - - th->th.th_info.ds.ds_gtid = gtid; - -#if KMP_STATS_ENABLED - // sets up worker thread stats - __kmp_acquire_tas_lock(&__kmp_stats_lock, gtid); - - // th->th.th_stats is used to transfer thread-specific stats-pointer to - // __kmp_launch_worker. So when thread is created (goes into - // __kmp_launch_worker) it will set its thread local pointer to - // th->th.th_stats - if (!KMP_UBER_GTID(gtid)) { - th->th.th_stats = __kmp_stats_list->push_back(gtid); - } else { - // For root threads, __kmp_stats_thread_ptr is set in __kmp_register_root(), - // so set the th->th.th_stats field to it. - th->th.th_stats = __kmp_stats_thread_ptr; - } - __kmp_release_tas_lock(&__kmp_stats_lock, gtid); - -#endif // KMP_STATS_ENABLED - - if (KMP_UBER_GTID(gtid)) { - KA_TRACE(10, ("__kmp_create_worker: uber thread (%d)\n", gtid)); - th->th.th_info.ds.ds_thread = pthread_self(); - __kmp_set_stack_info(gtid, th); - __kmp_check_stack_overlap(th); - return; - } - - KA_TRACE(10, ("__kmp_create_worker: try to create thread (%d)\n", gtid)); - - KMP_MB(); /* Flush all pending memory write invalidates. */ - -#ifdef KMP_THREAD_ATTR - status = pthread_attr_init(&thread_attr); - if (status != 0) { - __kmp_fatal(KMP_MSG(CantInitThreadAttrs), KMP_ERR(status), __kmp_msg_null); - } - status = pthread_attr_setdetachstate(&thread_attr, PTHREAD_CREATE_JOINABLE); - if (status != 0) { - __kmp_fatal(KMP_MSG(CantSetWorkerState), KMP_ERR(status), __kmp_msg_null); - } - - /* Set stack size for this thread now. - The multiple of 2 is there because on some machines, requesting an unusual - stacksize causes the thread to have an offset before the dummy alloca() - takes place to create the offset. Since we want the user to have a - sufficient stacksize AND support a stack offset, we alloca() twice the - offset so that the upcoming alloca() does not eliminate any premade offset, - and also gives the user the stack space they requested for all threads */ - stack_size += gtid * __kmp_stkoffset * 2; - - KA_TRACE(10, ("__kmp_create_worker: T#%d, default stacksize = %lu bytes, " - "__kmp_stksize = %lu bytes, final stacksize = %lu bytes\n", - gtid, KMP_DEFAULT_STKSIZE, __kmp_stksize, stack_size)); - -#ifdef _POSIX_THREAD_ATTR_STACKSIZE - status = pthread_attr_setstacksize(&thread_attr, stack_size); -#ifdef KMP_BACKUP_STKSIZE - if (status != 0) { - if (!__kmp_env_stksize) { - stack_size = KMP_BACKUP_STKSIZE + gtid * __kmp_stkoffset; - __kmp_stksize = KMP_BACKUP_STKSIZE; - KA_TRACE(10, ("__kmp_create_worker: T#%d, default stacksize = %lu bytes, " - "__kmp_stksize = %lu bytes, (backup) final stacksize = %lu " - "bytes\n", - gtid, KMP_DEFAULT_STKSIZE, __kmp_stksize, stack_size)); - status = pthread_attr_setstacksize(&thread_attr, stack_size); - } - } -#endif /* KMP_BACKUP_STKSIZE */ - if (status != 0) { - __kmp_fatal(KMP_MSG(CantSetWorkerStackSize, stack_size), KMP_ERR(status), - KMP_HNT(ChangeWorkerStackSize), __kmp_msg_null); - } -#endif /* _POSIX_THREAD_ATTR_STACKSIZE */ - -#endif /* KMP_THREAD_ATTR */ - - status = - pthread_create(&handle, &thread_attr, __kmp_launch_worker, (void *)th); - if (status != 0 || !handle) { // ??? Why do we check handle?? -#ifdef _POSIX_THREAD_ATTR_STACKSIZE - if (status == EINVAL) { - __kmp_fatal(KMP_MSG(CantSetWorkerStackSize, stack_size), KMP_ERR(status), - KMP_HNT(IncreaseWorkerStackSize), __kmp_msg_null); - } - if (status == ENOMEM) { - __kmp_fatal(KMP_MSG(CantSetWorkerStackSize, stack_size), KMP_ERR(status), - KMP_HNT(DecreaseWorkerStackSize), __kmp_msg_null); - } -#endif /* _POSIX_THREAD_ATTR_STACKSIZE */ - if (status == EAGAIN) { - __kmp_fatal(KMP_MSG(NoResourcesForWorkerThread), KMP_ERR(status), - KMP_HNT(Decrease_NUM_THREADS), __kmp_msg_null); - } - KMP_SYSFAIL("pthread_create", status); - } - - th->th.th_info.ds.ds_thread = handle; - -#ifdef KMP_THREAD_ATTR - status = pthread_attr_destroy(&thread_attr); - if (status) { - kmp_msg_t err_code = KMP_ERR(status); - __kmp_msg(kmp_ms_warning, KMP_MSG(CantDestroyThreadAttrs), err_code, - __kmp_msg_null); - if (__kmp_generate_warnings == kmp_warnings_off) { - __kmp_str_free(&err_code.str); - } - } -#endif /* KMP_THREAD_ATTR */ - - KMP_MB(); /* Flush all pending memory write invalidates. */ - - KA_TRACE(10, ("__kmp_create_worker: done creating thread (%d)\n", gtid)); - -} // __kmp_create_worker - -#if KMP_USE_MONITOR -void __kmp_create_monitor(kmp_info_t *th) { - pthread_t handle; - pthread_attr_t thread_attr; - size_t size; - int status; - int auto_adj_size = FALSE; - - if (__kmp_dflt_blocktime == KMP_MAX_BLOCKTIME) { - // We don't need monitor thread in case of MAX_BLOCKTIME - KA_TRACE(10, ("__kmp_create_monitor: skipping monitor thread because of " - "MAX blocktime\n")); - th->th.th_info.ds.ds_tid = 0; // this makes reap_monitor no-op - th->th.th_info.ds.ds_gtid = 0; - return; - } - KA_TRACE(10, ("__kmp_create_monitor: try to create monitor\n")); - - KMP_MB(); /* Flush all pending memory write invalidates. */ - - th->th.th_info.ds.ds_tid = KMP_GTID_MONITOR; - th->th.th_info.ds.ds_gtid = KMP_GTID_MONITOR; -#if KMP_REAL_TIME_FIX - TCW_4(__kmp_global.g.g_time.dt.t_value, - -1); // Will use it for synchronization a bit later. -#else - TCW_4(__kmp_global.g.g_time.dt.t_value, 0); -#endif // KMP_REAL_TIME_FIX - -#ifdef KMP_THREAD_ATTR - if (__kmp_monitor_stksize == 0) { - __kmp_monitor_stksize = KMP_DEFAULT_MONITOR_STKSIZE; - auto_adj_size = TRUE; - } - status = pthread_attr_init(&thread_attr); - if (status != 0) { - __kmp_fatal(KMP_MSG(CantInitThreadAttrs), KMP_ERR(status), __kmp_msg_null); - } - status = pthread_attr_setdetachstate(&thread_attr, PTHREAD_CREATE_JOINABLE); - if (status != 0) { - __kmp_fatal(KMP_MSG(CantSetMonitorState), KMP_ERR(status), __kmp_msg_null); - } - -#ifdef _POSIX_THREAD_ATTR_STACKSIZE - status = pthread_attr_getstacksize(&thread_attr, &size); - KMP_CHECK_SYSFAIL("pthread_attr_getstacksize", status); -#else - size = __kmp_sys_min_stksize; -#endif /* _POSIX_THREAD_ATTR_STACKSIZE */ -#endif /* KMP_THREAD_ATTR */ - - if (__kmp_monitor_stksize == 0) { - __kmp_monitor_stksize = KMP_DEFAULT_MONITOR_STKSIZE; - } - if (__kmp_monitor_stksize < __kmp_sys_min_stksize) { - __kmp_monitor_stksize = __kmp_sys_min_stksize; - } - - KA_TRACE(10, ("__kmp_create_monitor: default stacksize = %lu bytes," - "requested stacksize = %lu bytes\n", - size, __kmp_monitor_stksize)); - -retry: - -/* Set stack size for this thread now. */ -#ifdef _POSIX_THREAD_ATTR_STACKSIZE - KA_TRACE(10, ("__kmp_create_monitor: setting stacksize = %lu bytes,", - __kmp_monitor_stksize)); - status = pthread_attr_setstacksize(&thread_attr, __kmp_monitor_stksize); - if (status != 0) { - if (auto_adj_size) { - __kmp_monitor_stksize *= 2; - goto retry; - } - kmp_msg_t err_code = KMP_ERR(status); - __kmp_msg(kmp_ms_warning, // should this be fatal? BB - KMP_MSG(CantSetMonitorStackSize, (long int)__kmp_monitor_stksize), - err_code, KMP_HNT(ChangeMonitorStackSize), __kmp_msg_null); - if (__kmp_generate_warnings == kmp_warnings_off) { - __kmp_str_free(&err_code.str); - } - } -#endif /* _POSIX_THREAD_ATTR_STACKSIZE */ - - status = - pthread_create(&handle, &thread_attr, __kmp_launch_monitor, (void *)th); - - if (status != 0) { -#ifdef _POSIX_THREAD_ATTR_STACKSIZE - if (status == EINVAL) { - if (auto_adj_size && (__kmp_monitor_stksize < (size_t)0x40000000)) { - __kmp_monitor_stksize *= 2; - goto retry; - } - __kmp_fatal(KMP_MSG(CantSetMonitorStackSize, __kmp_monitor_stksize), - KMP_ERR(status), KMP_HNT(IncreaseMonitorStackSize), - __kmp_msg_null); - } - if (status == ENOMEM) { - __kmp_fatal(KMP_MSG(CantSetMonitorStackSize, __kmp_monitor_stksize), - KMP_ERR(status), KMP_HNT(DecreaseMonitorStackSize), - __kmp_msg_null); - } -#endif /* _POSIX_THREAD_ATTR_STACKSIZE */ - if (status == EAGAIN) { - __kmp_fatal(KMP_MSG(NoResourcesForMonitorThread), KMP_ERR(status), - KMP_HNT(DecreaseNumberOfThreadsInUse), __kmp_msg_null); - } - KMP_SYSFAIL("pthread_create", status); - } - - th->th.th_info.ds.ds_thread = handle; - -#if KMP_REAL_TIME_FIX - // Wait for the monitor thread is really started and set its *priority*. - KMP_DEBUG_ASSERT(sizeof(kmp_uint32) == - sizeof(__kmp_global.g.g_time.dt.t_value)); - __kmp_wait_yield_4((kmp_uint32 volatile *)&__kmp_global.g.g_time.dt.t_value, - -1, &__kmp_neq_4, NULL); -#endif // KMP_REAL_TIME_FIX - -#ifdef KMP_THREAD_ATTR - status = pthread_attr_destroy(&thread_attr); - if (status != 0) { - kmp_msg_t err_code = KMP_ERR(status); - __kmp_msg(kmp_ms_warning, KMP_MSG(CantDestroyThreadAttrs), err_code, - __kmp_msg_null); - if (__kmp_generate_warnings == kmp_warnings_off) { - __kmp_str_free(&err_code.str); - } - } -#endif - - KMP_MB(); /* Flush all pending memory write invalidates. */ - - KA_TRACE(10, ("__kmp_create_monitor: monitor created %#.8lx\n", - th->th.th_info.ds.ds_thread)); - -} // __kmp_create_monitor -#endif // KMP_USE_MONITOR - -void __kmp_exit_thread(int exit_status) { - pthread_exit((void *)(intptr_t)exit_status); -} // __kmp_exit_thread - -#if KMP_USE_MONITOR -void __kmp_resume_monitor(); - -void __kmp_reap_monitor(kmp_info_t *th) { - int status; - void *exit_val; - - KA_TRACE(10, ("__kmp_reap_monitor: try to reap monitor thread with handle" - " %#.8lx\n", - th->th.th_info.ds.ds_thread)); - - // If monitor has been created, its tid and gtid should be KMP_GTID_MONITOR. - // If both tid and gtid are 0, it means the monitor did not ever start. - // If both tid and gtid are KMP_GTID_DNE, the monitor has been shut down. - KMP_DEBUG_ASSERT(th->th.th_info.ds.ds_tid == th->th.th_info.ds.ds_gtid); - if (th->th.th_info.ds.ds_gtid != KMP_GTID_MONITOR) { - KA_TRACE(10, ("__kmp_reap_monitor: monitor did not start, returning\n")); - return; - } - - KMP_MB(); /* Flush all pending memory write invalidates. */ - - /* First, check to see whether the monitor thread exists to wake it up. This - is to avoid performance problem when the monitor sleeps during - blocktime-size interval */ - - status = pthread_kill(th->th.th_info.ds.ds_thread, 0); - if (status != ESRCH) { - __kmp_resume_monitor(); // Wake up the monitor thread - } - KA_TRACE(10, ("__kmp_reap_monitor: try to join with monitor\n")); - status = pthread_join(th->th.th_info.ds.ds_thread, &exit_val); - if (exit_val != th) { - __kmp_fatal(KMP_MSG(ReapMonitorError), KMP_ERR(status), __kmp_msg_null); - } - - th->th.th_info.ds.ds_tid = KMP_GTID_DNE; - th->th.th_info.ds.ds_gtid = KMP_GTID_DNE; - - KA_TRACE(10, ("__kmp_reap_monitor: done reaping monitor thread with handle" - " %#.8lx\n", - th->th.th_info.ds.ds_thread)); - - KMP_MB(); /* Flush all pending memory write invalidates. */ -} -#endif // KMP_USE_MONITOR - -void __kmp_reap_worker(kmp_info_t *th) { - int status; - void *exit_val; - - KMP_MB(); /* Flush all pending memory write invalidates. */ - - KA_TRACE( - 10, ("__kmp_reap_worker: try to reap T#%d\n", th->th.th_info.ds.ds_gtid)); - - status = pthread_join(th->th.th_info.ds.ds_thread, &exit_val); -#ifdef KMP_DEBUG - /* Don't expose these to the user until we understand when they trigger */ - if (status != 0) { - __kmp_fatal(KMP_MSG(ReapWorkerError), KMP_ERR(status), __kmp_msg_null); - } - if (exit_val != th) { - KA_TRACE(10, ("__kmp_reap_worker: worker T#%d did not reap properly, " - "exit_val = %p\n", - th->th.th_info.ds.ds_gtid, exit_val)); - } -#endif /* KMP_DEBUG */ - - KA_TRACE(10, ("__kmp_reap_worker: done reaping T#%d\n", - th->th.th_info.ds.ds_gtid)); - - KMP_MB(); /* Flush all pending memory write invalidates. */ -} - -#if KMP_HANDLE_SIGNALS - -static void __kmp_null_handler(int signo) { - // Do nothing, for doing SIG_IGN-type actions. -} // __kmp_null_handler - -static void __kmp_team_handler(int signo) { - if (__kmp_global.g.g_abort == 0) { -/* Stage 1 signal handler, let's shut down all of the threads */ -#ifdef KMP_DEBUG - __kmp_debug_printf("__kmp_team_handler: caught signal = %d\n", signo); -#endif - switch (signo) { - case SIGHUP: - case SIGINT: - case SIGQUIT: - case SIGILL: - case SIGABRT: - case SIGFPE: - case SIGBUS: - case SIGSEGV: -#ifdef SIGSYS - case SIGSYS: -#endif - case SIGTERM: - if (__kmp_debug_buf) { - __kmp_dump_debug_buffer(); - } - KMP_MB(); // Flush all pending memory write invalidates. - TCW_4(__kmp_global.g.g_abort, signo); - KMP_MB(); // Flush all pending memory write invalidates. - TCW_4(__kmp_global.g.g_done, TRUE); - KMP_MB(); // Flush all pending memory write invalidates. - break; - default: -#ifdef KMP_DEBUG - __kmp_debug_printf("__kmp_team_handler: unknown signal type"); -#endif - break; - } - } -} // __kmp_team_handler - -static void __kmp_sigaction(int signum, const struct sigaction *act, - struct sigaction *oldact) { - int rc = sigaction(signum, act, oldact); - KMP_CHECK_SYSFAIL_ERRNO("sigaction", rc); -} - -static void __kmp_install_one_handler(int sig, sig_func_t handler_func, - int parallel_init) { - KMP_MB(); // Flush all pending memory write invalidates. - KB_TRACE(60, - ("__kmp_install_one_handler( %d, ..., %d )\n", sig, parallel_init)); - if (parallel_init) { - struct sigaction new_action; - struct sigaction old_action; - new_action.sa_handler = handler_func; - new_action.sa_flags = 0; - sigfillset(&new_action.sa_mask); - __kmp_sigaction(sig, &new_action, &old_action); - if (old_action.sa_handler == __kmp_sighldrs[sig].sa_handler) { - sigaddset(&__kmp_sigset, sig); - } else { - // Restore/keep user's handler if one previously installed. - __kmp_sigaction(sig, &old_action, NULL); - } - } else { - // Save initial/system signal handlers to see if user handlers installed. - __kmp_sigaction(sig, NULL, &__kmp_sighldrs[sig]); - } - KMP_MB(); // Flush all pending memory write invalidates. -} // __kmp_install_one_handler - -static void __kmp_remove_one_handler(int sig) { - KB_TRACE(60, ("__kmp_remove_one_handler( %d )\n", sig)); - if (sigismember(&__kmp_sigset, sig)) { - struct sigaction old; - KMP_MB(); // Flush all pending memory write invalidates. - __kmp_sigaction(sig, &__kmp_sighldrs[sig], &old); - if ((old.sa_handler != __kmp_team_handler) && - (old.sa_handler != __kmp_null_handler)) { - // Restore the users signal handler. - KB_TRACE(10, ("__kmp_remove_one_handler: oops, not our handler, " - "restoring: sig=%d\n", - sig)); - __kmp_sigaction(sig, &old, NULL); - } - sigdelset(&__kmp_sigset, sig); - KMP_MB(); // Flush all pending memory write invalidates. - } -} // __kmp_remove_one_handler - -void __kmp_install_signals(int parallel_init) { - KB_TRACE(10, ("__kmp_install_signals( %d )\n", parallel_init)); - if (__kmp_handle_signals || !parallel_init) { - // If ! parallel_init, we do not install handlers, just save original - // handlers. Let us do it even __handle_signals is 0. - sigemptyset(&__kmp_sigset); - __kmp_install_one_handler(SIGHUP, __kmp_team_handler, parallel_init); - __kmp_install_one_handler(SIGINT, __kmp_team_handler, parallel_init); - __kmp_install_one_handler(SIGQUIT, __kmp_team_handler, parallel_init); - __kmp_install_one_handler(SIGILL, __kmp_team_handler, parallel_init); - __kmp_install_one_handler(SIGABRT, __kmp_team_handler, parallel_init); - __kmp_install_one_handler(SIGFPE, __kmp_team_handler, parallel_init); - __kmp_install_one_handler(SIGBUS, __kmp_team_handler, parallel_init); - __kmp_install_one_handler(SIGSEGV, __kmp_team_handler, parallel_init); -#ifdef SIGSYS - __kmp_install_one_handler(SIGSYS, __kmp_team_handler, parallel_init); -#endif // SIGSYS - __kmp_install_one_handler(SIGTERM, __kmp_team_handler, parallel_init); -#ifdef SIGPIPE - __kmp_install_one_handler(SIGPIPE, __kmp_team_handler, parallel_init); -#endif // SIGPIPE - } -} // __kmp_install_signals - -void __kmp_remove_signals(void) { - int sig; - KB_TRACE(10, ("__kmp_remove_signals()\n")); - for (sig = 1; sig < NSIG; ++sig) { - __kmp_remove_one_handler(sig); - } -} // __kmp_remove_signals - -#endif // KMP_HANDLE_SIGNALS - -void __kmp_enable(int new_state) { -#ifdef KMP_CANCEL_THREADS - int status, old_state; - status = pthread_setcancelstate(new_state, &old_state); - KMP_CHECK_SYSFAIL("pthread_setcancelstate", status); - KMP_DEBUG_ASSERT(old_state == PTHREAD_CANCEL_DISABLE); -#endif -} - -void __kmp_disable(int *old_state) { -#ifdef KMP_CANCEL_THREADS - int status; - status = pthread_setcancelstate(PTHREAD_CANCEL_DISABLE, old_state); - KMP_CHECK_SYSFAIL("pthread_setcancelstate", status); -#endif -} - -static void __kmp_atfork_prepare(void) { - __kmp_acquire_bootstrap_lock(&__kmp_initz_lock); - __kmp_acquire_bootstrap_lock(&__kmp_forkjoin_lock); -} - -static void __kmp_atfork_parent(void) { - __kmp_release_bootstrap_lock(&__kmp_initz_lock); - __kmp_release_bootstrap_lock(&__kmp_forkjoin_lock); -} - -/* Reset the library so execution in the child starts "all over again" with - clean data structures in initial states. Don't worry about freeing memory - allocated by parent, just abandon it to be safe. */ -static void __kmp_atfork_child(void) { - __kmp_release_bootstrap_lock(&__kmp_forkjoin_lock); - /* TODO make sure this is done right for nested/sibling */ - // ATT: Memory leaks are here? TODO: Check it and fix. - /* KMP_ASSERT( 0 ); */ - - ++__kmp_fork_count; - -#if KMP_AFFINITY_SUPPORTED -#if KMP_OS_LINUX - // reset the affinity in the child to the initial thread - // affinity in the parent - kmp_set_thread_affinity_mask_initial(); -#endif - // Set default not to bind threads tightly in the child (we’re expecting - // over-subscription after the fork and this can improve things for - // scripting languages that use OpenMP inside process-parallel code). - __kmp_affinity_type = affinity_none; -#if OMP_40_ENABLED - if (__kmp_nested_proc_bind.bind_types != NULL) { - __kmp_nested_proc_bind.bind_types[0] = proc_bind_false; - } -#endif // OMP_40_ENABLED -#endif // KMP_AFFINITY_SUPPORTED - - __kmp_init_runtime = FALSE; -#if KMP_USE_MONITOR - __kmp_init_monitor = 0; -#endif - __kmp_init_parallel = FALSE; - __kmp_init_middle = FALSE; - __kmp_init_serial = FALSE; - TCW_4(__kmp_init_gtid, FALSE); - __kmp_init_common = FALSE; - - TCW_4(__kmp_init_user_locks, FALSE); -#if !KMP_USE_DYNAMIC_LOCK - __kmp_user_lock_table.used = 1; - __kmp_user_lock_table.allocated = 0; - __kmp_user_lock_table.table = NULL; - __kmp_lock_blocks = NULL; -#endif - - __kmp_all_nth = 0; - TCW_4(__kmp_nth, 0); - - __kmp_thread_pool = NULL; - __kmp_thread_pool_insert_pt = NULL; - __kmp_team_pool = NULL; - - /* Must actually zero all the *cache arguments passed to __kmpc_threadprivate - here so threadprivate doesn't use stale data */ - KA_TRACE(10, ("__kmp_atfork_child: checking cache address list %p\n", - __kmp_threadpriv_cache_list)); - - while (__kmp_threadpriv_cache_list != NULL) { - - if (*__kmp_threadpriv_cache_list->addr != NULL) { - KC_TRACE(50, ("__kmp_atfork_child: zeroing cache at address %p\n", - &(*__kmp_threadpriv_cache_list->addr))); - - *__kmp_threadpriv_cache_list->addr = NULL; - } - __kmp_threadpriv_cache_list = __kmp_threadpriv_cache_list->next; - } - - __kmp_init_runtime = FALSE; - - /* reset statically initialized locks */ - __kmp_init_bootstrap_lock(&__kmp_initz_lock); - __kmp_init_bootstrap_lock(&__kmp_stdio_lock); - __kmp_init_bootstrap_lock(&__kmp_console_lock); - __kmp_init_bootstrap_lock(&__kmp_task_team_lock); - -#if USE_ITT_BUILD - __kmp_itt_reset(); // reset ITT's global state -#endif /* USE_ITT_BUILD */ - - /* This is necessary to make sure no stale data is left around */ - /* AC: customers complain that we use unsafe routines in the atfork - handler. Mathworks: dlsym() is unsafe. We call dlsym and dlopen - in dynamic_link when check the presence of shared tbbmalloc library. - Suggestion is to make the library initialization lazier, similar - to what done for __kmpc_begin(). */ - // TODO: synchronize all static initializations with regular library - // startup; look at kmp_global.cpp and etc. - //__kmp_internal_begin (); -} - -void __kmp_register_atfork(void) { - if (__kmp_need_register_atfork) { - int status = pthread_atfork(__kmp_atfork_prepare, __kmp_atfork_parent, - __kmp_atfork_child); - KMP_CHECK_SYSFAIL("pthread_atfork", status); - __kmp_need_register_atfork = FALSE; - } -} - -void __kmp_suspend_initialize(void) { - int status; - status = pthread_mutexattr_init(&__kmp_suspend_mutex_attr); - KMP_CHECK_SYSFAIL("pthread_mutexattr_init", status); - status = pthread_condattr_init(&__kmp_suspend_cond_attr); - KMP_CHECK_SYSFAIL("pthread_condattr_init", status); -} - -static void __kmp_suspend_initialize_thread(kmp_info_t *th) { - ANNOTATE_HAPPENS_AFTER(&th->th.th_suspend_init_count); - if (th->th.th_suspend_init_count <= __kmp_fork_count) { - /* this means we haven't initialized the suspension pthread objects for this - thread in this instance of the process */ - int status; - status = pthread_cond_init(&th->th.th_suspend_cv.c_cond, - &__kmp_suspend_cond_attr); - KMP_CHECK_SYSFAIL("pthread_cond_init", status); - status = pthread_mutex_init(&th->th.th_suspend_mx.m_mutex, - &__kmp_suspend_mutex_attr); - KMP_CHECK_SYSFAIL("pthread_mutex_init", status); - *(volatile int *)&th->th.th_suspend_init_count = __kmp_fork_count + 1; - ANNOTATE_HAPPENS_BEFORE(&th->th.th_suspend_init_count); - } -} - -void __kmp_suspend_uninitialize_thread(kmp_info_t *th) { - if (th->th.th_suspend_init_count > __kmp_fork_count) { - /* this means we have initialize the suspension pthread objects for this - thread in this instance of the process */ - int status; - - status = pthread_cond_destroy(&th->th.th_suspend_cv.c_cond); - if (status != 0 && status != EBUSY) { - KMP_SYSFAIL("pthread_cond_destroy", status); - } - status = pthread_mutex_destroy(&th->th.th_suspend_mx.m_mutex); - if (status != 0 && status != EBUSY) { - KMP_SYSFAIL("pthread_mutex_destroy", status); - } - --th->th.th_suspend_init_count; - KMP_DEBUG_ASSERT(th->th.th_suspend_init_count == __kmp_fork_count); - } -} - -/* This routine puts the calling thread to sleep after setting the - sleep bit for the indicated flag variable to true. */ -template -static inline void __kmp_suspend_template(int th_gtid, C *flag) { - KMP_TIME_DEVELOPER_PARTITIONED_BLOCK(USER_suspend); - kmp_info_t *th = __kmp_threads[th_gtid]; - int status; - typename C::flag_t old_spin; - - KF_TRACE(30, ("__kmp_suspend_template: T#%d enter for flag = %p\n", th_gtid, - flag->get())); - - __kmp_suspend_initialize_thread(th); - - status = pthread_mutex_lock(&th->th.th_suspend_mx.m_mutex); - KMP_CHECK_SYSFAIL("pthread_mutex_lock", status); - - KF_TRACE(10, ("__kmp_suspend_template: T#%d setting sleep bit for spin(%p)\n", - th_gtid, flag->get())); - - /* TODO: shouldn't this use release semantics to ensure that - __kmp_suspend_initialize_thread gets called first? */ - old_spin = flag->set_sleeping(); - - KF_TRACE(5, ("__kmp_suspend_template: T#%d set sleep bit for spin(%p)==%x," - " was %x\n", - th_gtid, flag->get(), flag->load(), old_spin)); - - if (flag->done_check_val(old_spin)) { - old_spin = flag->unset_sleeping(); - KF_TRACE(5, ("__kmp_suspend_template: T#%d false alarm, reset sleep bit " - "for spin(%p)\n", - th_gtid, flag->get())); - } else { - /* Encapsulate in a loop as the documentation states that this may - "with low probability" return when the condition variable has - not been signaled or broadcast */ - int deactivated = FALSE; - TCW_PTR(th->th.th_sleep_loc, (void *)flag); - - while (flag->is_sleeping()) { -#ifdef DEBUG_SUSPEND - char buffer[128]; - __kmp_suspend_count++; - __kmp_print_cond(buffer, &th->th.th_suspend_cv); - __kmp_printf("__kmp_suspend_template: suspending T#%d: %s\n", th_gtid, - buffer); -#endif - // Mark the thread as no longer active (only in the first iteration of the - // loop). - if (!deactivated) { - th->th.th_active = FALSE; - if (th->th.th_active_in_pool) { - th->th.th_active_in_pool = FALSE; - KMP_ATOMIC_DEC(&__kmp_thread_pool_active_nth); - KMP_DEBUG_ASSERT(TCR_4(__kmp_thread_pool_active_nth) >= 0); - } - deactivated = TRUE; - } - -#if USE_SUSPEND_TIMEOUT - struct timespec now; - struct timeval tval; - int msecs; - - status = gettimeofday(&tval, NULL); - KMP_CHECK_SYSFAIL_ERRNO("gettimeofday", status); - TIMEVAL_TO_TIMESPEC(&tval, &now); - - msecs = (4 * __kmp_dflt_blocktime) + 200; - now.tv_sec += msecs / 1000; - now.tv_nsec += (msecs % 1000) * 1000; - - KF_TRACE(15, ("__kmp_suspend_template: T#%d about to perform " - "pthread_cond_timedwait\n", - th_gtid)); - status = pthread_cond_timedwait(&th->th.th_suspend_cv.c_cond, - &th->th.th_suspend_mx.m_mutex, &now); -#else - KF_TRACE(15, ("__kmp_suspend_template: T#%d about to perform" - " pthread_cond_wait\n", - th_gtid)); - status = pthread_cond_wait(&th->th.th_suspend_cv.c_cond, - &th->th.th_suspend_mx.m_mutex); -#endif - - if ((status != 0) && (status != EINTR) && (status != ETIMEDOUT)) { - KMP_SYSFAIL("pthread_cond_wait", status); - } -#ifdef KMP_DEBUG - if (status == ETIMEDOUT) { - if (flag->is_sleeping()) { - KF_TRACE(100, - ("__kmp_suspend_template: T#%d timeout wakeup\n", th_gtid)); - } else { - KF_TRACE(2, ("__kmp_suspend_template: T#%d timeout wakeup, sleep bit " - "not set!\n", - th_gtid)); - } - } else if (flag->is_sleeping()) { - KF_TRACE(100, - ("__kmp_suspend_template: T#%d spurious wakeup\n", th_gtid)); - } -#endif - } // while - - // Mark the thread as active again (if it was previous marked as inactive) - if (deactivated) { - th->th.th_active = TRUE; - if (TCR_4(th->th.th_in_pool)) { - KMP_ATOMIC_INC(&__kmp_thread_pool_active_nth); - th->th.th_active_in_pool = TRUE; - } - } - } -#ifdef DEBUG_SUSPEND - { - char buffer[128]; - __kmp_print_cond(buffer, &th->th.th_suspend_cv); - __kmp_printf("__kmp_suspend_template: T#%d has awakened: %s\n", th_gtid, - buffer); - } -#endif - - status = pthread_mutex_unlock(&th->th.th_suspend_mx.m_mutex); - KMP_CHECK_SYSFAIL("pthread_mutex_unlock", status); - KF_TRACE(30, ("__kmp_suspend_template: T#%d exit\n", th_gtid)); -} - -void __kmp_suspend_32(int th_gtid, kmp_flag_32 *flag) { - __kmp_suspend_template(th_gtid, flag); -} -void __kmp_suspend_64(int th_gtid, kmp_flag_64 *flag) { - __kmp_suspend_template(th_gtid, flag); -} -void __kmp_suspend_oncore(int th_gtid, kmp_flag_oncore *flag) { - __kmp_suspend_template(th_gtid, flag); -} - -/* This routine signals the thread specified by target_gtid to wake up - after setting the sleep bit indicated by the flag argument to FALSE. - The target thread must already have called __kmp_suspend_template() */ -template -static inline void __kmp_resume_template(int target_gtid, C *flag) { - KMP_TIME_DEVELOPER_PARTITIONED_BLOCK(USER_resume); - kmp_info_t *th = __kmp_threads[target_gtid]; - int status; - -#ifdef KMP_DEBUG - int gtid = TCR_4(__kmp_init_gtid) ? __kmp_get_gtid() : -1; -#endif - - KF_TRACE(30, ("__kmp_resume_template: T#%d wants to wakeup T#%d enter\n", - gtid, target_gtid)); - KMP_DEBUG_ASSERT(gtid != target_gtid); - - __kmp_suspend_initialize_thread(th); - - status = pthread_mutex_lock(&th->th.th_suspend_mx.m_mutex); - KMP_CHECK_SYSFAIL("pthread_mutex_lock", status); - - if (!flag) { // coming from __kmp_null_resume_wrapper - flag = (C *)CCAST(void *, th->th.th_sleep_loc); - } - - // First, check if the flag is null or its type has changed. If so, someone - // else woke it up. - if (!flag || flag->get_type() != flag->get_ptr_type()) { // get_ptr_type - // simply shows what - // flag was cast to - KF_TRACE(5, ("__kmp_resume_template: T#%d exiting, thread T#%d already " - "awake: flag(%p)\n", - gtid, target_gtid, NULL)); - status = pthread_mutex_unlock(&th->th.th_suspend_mx.m_mutex); - KMP_CHECK_SYSFAIL("pthread_mutex_unlock", status); - return; - } else { // if multiple threads are sleeping, flag should be internally - // referring to a specific thread here - typename C::flag_t old_spin = flag->unset_sleeping(); - if (!flag->is_sleeping_val(old_spin)) { - KF_TRACE(5, ("__kmp_resume_template: T#%d exiting, thread T#%d already " - "awake: flag(%p): " - "%u => %u\n", - gtid, target_gtid, flag->get(), old_spin, flag->load())); - status = pthread_mutex_unlock(&th->th.th_suspend_mx.m_mutex); - KMP_CHECK_SYSFAIL("pthread_mutex_unlock", status); - return; - } - KF_TRACE(5, ("__kmp_resume_template: T#%d about to wakeup T#%d, reset " - "sleep bit for flag's loc(%p): " - "%u => %u\n", - gtid, target_gtid, flag->get(), old_spin, flag->load())); - } - TCW_PTR(th->th.th_sleep_loc, NULL); - -#ifdef DEBUG_SUSPEND - { - char buffer[128]; - __kmp_print_cond(buffer, &th->th.th_suspend_cv); - __kmp_printf("__kmp_resume_template: T#%d resuming T#%d: %s\n", gtid, - target_gtid, buffer); - } -#endif - status = pthread_cond_signal(&th->th.th_suspend_cv.c_cond); - KMP_CHECK_SYSFAIL("pthread_cond_signal", status); - status = pthread_mutex_unlock(&th->th.th_suspend_mx.m_mutex); - KMP_CHECK_SYSFAIL("pthread_mutex_unlock", status); - KF_TRACE(30, ("__kmp_resume_template: T#%d exiting after signaling wake up" - " for T#%d\n", - gtid, target_gtid)); -} - -void __kmp_resume_32(int target_gtid, kmp_flag_32 *flag) { - __kmp_resume_template(target_gtid, flag); -} -void __kmp_resume_64(int target_gtid, kmp_flag_64 *flag) { - __kmp_resume_template(target_gtid, flag); -} -void __kmp_resume_oncore(int target_gtid, kmp_flag_oncore *flag) { - __kmp_resume_template(target_gtid, flag); -} - -#if KMP_USE_MONITOR -void __kmp_resume_monitor() { - KMP_TIME_DEVELOPER_PARTITIONED_BLOCK(USER_resume); - int status; -#ifdef KMP_DEBUG - int gtid = TCR_4(__kmp_init_gtid) ? __kmp_get_gtid() : -1; - KF_TRACE(30, ("__kmp_resume_monitor: T#%d wants to wakeup T#%d enter\n", gtid, - KMP_GTID_MONITOR)); - KMP_DEBUG_ASSERT(gtid != KMP_GTID_MONITOR); -#endif - status = pthread_mutex_lock(&__kmp_wait_mx.m_mutex); - KMP_CHECK_SYSFAIL("pthread_mutex_lock", status); -#ifdef DEBUG_SUSPEND - { - char buffer[128]; - __kmp_print_cond(buffer, &__kmp_wait_cv.c_cond); - __kmp_printf("__kmp_resume_monitor: T#%d resuming T#%d: %s\n", gtid, - KMP_GTID_MONITOR, buffer); - } -#endif - status = pthread_cond_signal(&__kmp_wait_cv.c_cond); - KMP_CHECK_SYSFAIL("pthread_cond_signal", status); - status = pthread_mutex_unlock(&__kmp_wait_mx.m_mutex); - KMP_CHECK_SYSFAIL("pthread_mutex_unlock", status); - KF_TRACE(30, ("__kmp_resume_monitor: T#%d exiting after signaling wake up" - " for T#%d\n", - gtid, KMP_GTID_MONITOR)); -} -#endif // KMP_USE_MONITOR - -void __kmp_yield(int cond) { - if (!cond) - return; -#if KMP_USE_MONITOR - if (!__kmp_yielding_on) - return; -#else - if (__kmp_yield_cycle && !KMP_YIELD_NOW()) - return; -#endif - sched_yield(); -} - -void __kmp_gtid_set_specific(int gtid) { - if (__kmp_init_gtid) { - int status; - status = pthread_setspecific(__kmp_gtid_threadprivate_key, - (void *)(intptr_t)(gtid + 1)); - KMP_CHECK_SYSFAIL("pthread_setspecific", status); - } else { - KA_TRACE(50, ("__kmp_gtid_set_specific: runtime shutdown, returning\n")); - } -} - -int __kmp_gtid_get_specific() { - int gtid; - if (!__kmp_init_gtid) { - KA_TRACE(50, ("__kmp_gtid_get_specific: runtime shutdown, returning " - "KMP_GTID_SHUTDOWN\n")); - return KMP_GTID_SHUTDOWN; - } - gtid = (int)(size_t)pthread_getspecific(__kmp_gtid_threadprivate_key); - if (gtid == 0) { - gtid = KMP_GTID_DNE; - } else { - gtid--; - } - KA_TRACE(50, ("__kmp_gtid_get_specific: key:%d gtid:%d\n", - __kmp_gtid_threadprivate_key, gtid)); - return gtid; -} - -double __kmp_read_cpu_time(void) { - /*clock_t t;*/ - struct tms buffer; - - /*t =*/times(&buffer); - - return (buffer.tms_utime + buffer.tms_cutime) / (double)CLOCKS_PER_SEC; -} - -int __kmp_read_system_info(struct kmp_sys_info *info) { - int status; - struct rusage r_usage; - - memset(info, 0, sizeof(*info)); - - status = getrusage(RUSAGE_SELF, &r_usage); - KMP_CHECK_SYSFAIL_ERRNO("getrusage", status); - - // The maximum resident set size utilized (in kilobytes) - info->maxrss = r_usage.ru_maxrss; - // The number of page faults serviced without any I/O - info->minflt = r_usage.ru_minflt; - // The number of page faults serviced that required I/O - info->majflt = r_usage.ru_majflt; - // The number of times a process was "swapped" out of memory - info->nswap = r_usage.ru_nswap; - // The number of times the file system had to perform input - info->inblock = r_usage.ru_inblock; - // The number of times the file system had to perform output - info->oublock = r_usage.ru_oublock; - // The number of times a context switch was voluntarily - info->nvcsw = r_usage.ru_nvcsw; - // The number of times a context switch was forced - info->nivcsw = r_usage.ru_nivcsw; - - return (status != 0); -} - -void __kmp_read_system_time(double *delta) { - double t_ns; - struct timeval tval; - struct timespec stop; - int status; - - status = gettimeofday(&tval, NULL); - KMP_CHECK_SYSFAIL_ERRNO("gettimeofday", status); - TIMEVAL_TO_TIMESPEC(&tval, &stop); - t_ns = TS2NS(stop) - TS2NS(__kmp_sys_timer_data.start); - *delta = (t_ns * 1e-9); -} - -void __kmp_clear_system_time(void) { - struct timeval tval; - int status; - status = gettimeofday(&tval, NULL); - KMP_CHECK_SYSFAIL_ERRNO("gettimeofday", status); - TIMEVAL_TO_TIMESPEC(&tval, &__kmp_sys_timer_data.start); -} - -static int __kmp_get_xproc(void) { - - int r = 0; - -#if KMP_OS_LINUX || KMP_OS_DRAGONFLY || KMP_OS_FREEBSD || KMP_OS_NETBSD || \ - KMP_OS_OPENBSD || KMP_OS_HURD - - r = sysconf(_SC_NPROCESSORS_ONLN); - -#elif KMP_OS_DARWIN - - // Bug C77011 High "OpenMP Threads and number of active cores". - - // Find the number of available CPUs. - kern_return_t rc; - host_basic_info_data_t info; - mach_msg_type_number_t num = HOST_BASIC_INFO_COUNT; - rc = host_info(mach_host_self(), HOST_BASIC_INFO, (host_info_t)&info, &num); - if (rc == 0 && num == HOST_BASIC_INFO_COUNT) { - // Cannot use KA_TRACE() here because this code works before trace support - // is initialized. - r = info.avail_cpus; - } else { - KMP_WARNING(CantGetNumAvailCPU); - KMP_INFORM(AssumedNumCPU); - } - -#else - -#error "Unknown or unsupported OS." - -#endif - - return r > 0 ? r : 2; /* guess value of 2 if OS told us 0 */ - -} // __kmp_get_xproc - -int __kmp_read_from_file(char const *path, char const *format, ...) { - int result; - va_list args; - - va_start(args, format); - FILE *f = fopen(path, "rb"); - if (f == NULL) - return 0; - result = vfscanf(f, format, args); - fclose(f); - - return result; -} - -void __kmp_runtime_initialize(void) { - int status; - pthread_mutexattr_t mutex_attr; - pthread_condattr_t cond_attr; - - if (__kmp_init_runtime) { - return; - } - -#if (KMP_ARCH_X86 || KMP_ARCH_X86_64) - if (!__kmp_cpuinfo.initialized) { - __kmp_query_cpuid(&__kmp_cpuinfo); - } -#endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */ - - __kmp_xproc = __kmp_get_xproc(); - - if (sysconf(_SC_THREADS)) { - - /* Query the maximum number of threads */ - __kmp_sys_max_nth = sysconf(_SC_THREAD_THREADS_MAX); - if (__kmp_sys_max_nth == -1) { - /* Unlimited threads for NPTL */ - __kmp_sys_max_nth = INT_MAX; - } else if (__kmp_sys_max_nth <= 1) { - /* Can't tell, just use PTHREAD_THREADS_MAX */ - __kmp_sys_max_nth = KMP_MAX_NTH; - } - - /* Query the minimum stack size */ - __kmp_sys_min_stksize = sysconf(_SC_THREAD_STACK_MIN); - if (__kmp_sys_min_stksize <= 1) { - __kmp_sys_min_stksize = KMP_MIN_STKSIZE; - } - } - - /* Set up minimum number of threads to switch to TLS gtid */ - __kmp_tls_gtid_min = KMP_TLS_GTID_MIN; - - status = pthread_key_create(&__kmp_gtid_threadprivate_key, - __kmp_internal_end_dest); - KMP_CHECK_SYSFAIL("pthread_key_create", status); - status = pthread_mutexattr_init(&mutex_attr); - KMP_CHECK_SYSFAIL("pthread_mutexattr_init", status); - status = pthread_mutex_init(&__kmp_wait_mx.m_mutex, &mutex_attr); - KMP_CHECK_SYSFAIL("pthread_mutex_init", status); - status = pthread_condattr_init(&cond_attr); - KMP_CHECK_SYSFAIL("pthread_condattr_init", status); - status = pthread_cond_init(&__kmp_wait_cv.c_cond, &cond_attr); - KMP_CHECK_SYSFAIL("pthread_cond_init", status); -#if USE_ITT_BUILD - __kmp_itt_initialize(); -#endif /* USE_ITT_BUILD */ - - __kmp_init_runtime = TRUE; -} - -void __kmp_runtime_destroy(void) { - int status; - - if (!__kmp_init_runtime) { - return; // Nothing to do. - } - -#if USE_ITT_BUILD - __kmp_itt_destroy(); -#endif /* USE_ITT_BUILD */ - - status = pthread_key_delete(__kmp_gtid_threadprivate_key); - KMP_CHECK_SYSFAIL("pthread_key_delete", status); - - status = pthread_mutex_destroy(&__kmp_wait_mx.m_mutex); - if (status != 0 && status != EBUSY) { - KMP_SYSFAIL("pthread_mutex_destroy", status); - } - status = pthread_cond_destroy(&__kmp_wait_cv.c_cond); - if (status != 0 && status != EBUSY) { - KMP_SYSFAIL("pthread_cond_destroy", status); - } -#if KMP_AFFINITY_SUPPORTED - __kmp_affinity_uninitialize(); -#endif - - __kmp_init_runtime = FALSE; -} - -/* Put the thread to sleep for a time period */ -/* NOTE: not currently used anywhere */ -void __kmp_thread_sleep(int millis) { sleep((millis + 500) / 1000); } - -/* Calculate the elapsed wall clock time for the user */ -void __kmp_elapsed(double *t) { - int status; -#ifdef FIX_SGI_CLOCK - struct timespec ts; - - status = clock_gettime(CLOCK_PROCESS_CPUTIME_ID, &ts); - KMP_CHECK_SYSFAIL_ERRNO("clock_gettime", status); - *t = - (double)ts.tv_nsec * (1.0 / (double)KMP_NSEC_PER_SEC) + (double)ts.tv_sec; -#else - struct timeval tv; - - status = gettimeofday(&tv, NULL); - KMP_CHECK_SYSFAIL_ERRNO("gettimeofday", status); - *t = - (double)tv.tv_usec * (1.0 / (double)KMP_USEC_PER_SEC) + (double)tv.tv_sec; -#endif -} - -/* Calculate the elapsed wall clock tick for the user */ -void __kmp_elapsed_tick(double *t) { *t = 1 / (double)CLOCKS_PER_SEC; } - -/* Return the current time stamp in nsec */ -kmp_uint64 __kmp_now_nsec() { - struct timeval t; - gettimeofday(&t, NULL); - kmp_uint64 nsec = (kmp_uint64)KMP_NSEC_PER_SEC * (kmp_uint64)t.tv_sec + - (kmp_uint64)1000 * (kmp_uint64)t.tv_usec; - return nsec; -} - -#if KMP_ARCH_X86 || KMP_ARCH_X86_64 -/* Measure clock ticks per millisecond */ -void __kmp_initialize_system_tick() { - kmp_uint64 now, nsec2, diff; - kmp_uint64 delay = 100000; // 50~100 usec on most machines. - kmp_uint64 nsec = __kmp_now_nsec(); - kmp_uint64 goal = __kmp_hardware_timestamp() + delay; - while ((now = __kmp_hardware_timestamp()) < goal) - ; - nsec2 = __kmp_now_nsec(); - diff = nsec2 - nsec; - if (diff > 0) { - kmp_uint64 tpms = (kmp_uint64)(1e6 * (delay + (now - goal)) / diff); - if (tpms > 0) - __kmp_ticks_per_msec = tpms; - } -} -#endif - -/* Determine whether the given address is mapped into the current address - space. */ - -int __kmp_is_address_mapped(void *addr) { - - int found = 0; - int rc; - -#if KMP_OS_LINUX || KMP_OS_FREEBSD || KMP_OS_HURD - - /* On GNUish OSes, read the /proc//maps pseudo-file to get all the address - ranges mapped into the address space. */ - - char *name = __kmp_str_format("/proc/%d/maps", getpid()); - FILE *file = NULL; - - file = fopen(name, "r"); - KMP_ASSERT(file != NULL); - - for (;;) { - - void *beginning = NULL; - void *ending = NULL; - char perms[5]; - - rc = fscanf(file, "%p-%p %4s %*[^\n]\n", &beginning, &ending, perms); - if (rc == EOF) { - break; - } - KMP_ASSERT(rc == 3 && - KMP_STRLEN(perms) == 4); // Make sure all fields are read. - - // Ending address is not included in the region, but beginning is. - if ((addr >= beginning) && (addr < ending)) { - perms[2] = 0; // 3th and 4th character does not matter. - if (strcmp(perms, "rw") == 0) { - // Memory we are looking for should be readable and writable. - found = 1; - } - break; - } - } - - // Free resources. - fclose(file); - KMP_INTERNAL_FREE(name); - -#elif KMP_OS_DARWIN - - /* On OS X*, /proc pseudo filesystem is not available. Try to read memory - using vm interface. */ - - int buffer; - vm_size_t count; - rc = vm_read_overwrite( - mach_task_self(), // Task to read memory of. - (vm_address_t)(addr), // Address to read from. - 1, // Number of bytes to be read. - (vm_address_t)(&buffer), // Address of buffer to save read bytes in. - &count // Address of var to save number of read bytes in. - ); - if (rc == 0) { - // Memory successfully read. - found = 1; - } - -#elif KMP_OS_NETBSD - - int mib[5]; - mib[0] = CTL_VM; - mib[1] = VM_PROC; - mib[2] = VM_PROC_MAP; - mib[3] = getpid(); - mib[4] = sizeof(struct kinfo_vmentry); - - size_t size; - rc = sysctl(mib, __arraycount(mib), NULL, &size, NULL, 0); - KMP_ASSERT(!rc); - KMP_ASSERT(size); - - size = size * 4 / 3; - struct kinfo_vmentry *kiv = (struct kinfo_vmentry *)KMP_INTERNAL_MALLOC(size); - KMP_ASSERT(kiv); - - rc = sysctl(mib, __arraycount(mib), kiv, &size, NULL, 0); - KMP_ASSERT(!rc); - KMP_ASSERT(size); - - for (size_t i = 0; i < size; i++) { - if (kiv[i].kve_start >= (uint64_t)addr && - kiv[i].kve_end <= (uint64_t)addr) { - found = 1; - break; - } - } - KMP_INTERNAL_FREE(kiv); -#elif KMP_OS_DRAGONFLY || KMP_OS_OPENBSD - - // FIXME(DragonFly, OpenBSD): Implement this - found = 1; - -#else - -#error "Unknown or unsupported OS" - -#endif - - return found; - -} // __kmp_is_address_mapped - -#ifdef USE_LOAD_BALANCE - -#if KMP_OS_DARWIN || KMP_OS_NETBSD - -// The function returns the rounded value of the system load average -// during given time interval which depends on the value of -// __kmp_load_balance_interval variable (default is 60 sec, other values -// may be 300 sec or 900 sec). -// It returns -1 in case of error. -int __kmp_get_load_balance(int max) { - double averages[3]; - int ret_avg = 0; - - int res = getloadavg(averages, 3); - - // Check __kmp_load_balance_interval to determine which of averages to use. - // getloadavg() may return the number of samples less than requested that is - // less than 3. - if (__kmp_load_balance_interval < 180 && (res >= 1)) { - ret_avg = averages[0]; // 1 min - } else if ((__kmp_load_balance_interval >= 180 && - __kmp_load_balance_interval < 600) && - (res >= 2)) { - ret_avg = averages[1]; // 5 min - } else if ((__kmp_load_balance_interval >= 600) && (res == 3)) { - ret_avg = averages[2]; // 15 min - } else { // Error occurred - return -1; - } - - return ret_avg; -} - -#else // Linux* OS - -// The fuction returns number of running (not sleeping) threads, or -1 in case -// of error. Error could be reported if Linux* OS kernel too old (without -// "/proc" support). Counting running threads stops if max running threads -// encountered. -int __kmp_get_load_balance(int max) { - static int permanent_error = 0; - static int glb_running_threads = 0; // Saved count of the running threads for - // the thread balance algortihm - static double glb_call_time = 0; /* Thread balance algorithm call time */ - - int running_threads = 0; // Number of running threads in the system. - - DIR *proc_dir = NULL; // Handle of "/proc/" directory. - struct dirent *proc_entry = NULL; - - kmp_str_buf_t task_path; // "/proc//task//" path. - DIR *task_dir = NULL; // Handle of "/proc//task//" directory. - struct dirent *task_entry = NULL; - int task_path_fixed_len; - - kmp_str_buf_t stat_path; // "/proc//task//stat" path. - int stat_file = -1; - int stat_path_fixed_len; - - int total_processes = 0; // Total number of processes in system. - int total_threads = 0; // Total number of threads in system. - - double call_time = 0.0; - - __kmp_str_buf_init(&task_path); - __kmp_str_buf_init(&stat_path); - - __kmp_elapsed(&call_time); - - if (glb_call_time && - (call_time - glb_call_time < __kmp_load_balance_interval)) { - running_threads = glb_running_threads; - goto finish; - } - - glb_call_time = call_time; - - // Do not spend time on scanning "/proc/" if we have a permanent error. - if (permanent_error) { - running_threads = -1; - goto finish; - } - - if (max <= 0) { - max = INT_MAX; - } - - // Open "/proc/" directory. - proc_dir = opendir("/proc"); - if (proc_dir == NULL) { - // Cannot open "/prroc/". Probably the kernel does not support it. Return an - // error now and in subsequent calls. - running_threads = -1; - permanent_error = 1; - goto finish; - } - - // Initialize fixed part of task_path. This part will not change. - __kmp_str_buf_cat(&task_path, "/proc/", 6); - task_path_fixed_len = task_path.used; // Remember number of used characters. - - proc_entry = readdir(proc_dir); - while (proc_entry != NULL) { - // Proc entry is a directory and name starts with a digit. Assume it is a - // process' directory. - if (proc_entry->d_type == DT_DIR && isdigit(proc_entry->d_name[0])) { - - ++total_processes; - // Make sure init process is the very first in "/proc", so we can replace - // strcmp( proc_entry->d_name, "1" ) == 0 with simpler total_processes == - // 1. We are going to check that total_processes == 1 => d_name == "1" is - // true (where "=>" is implication). Since C++ does not have => operator, - // let us replace it with its equivalent: a => b == ! a || b. - KMP_DEBUG_ASSERT(total_processes != 1 || - strcmp(proc_entry->d_name, "1") == 0); - - // Construct task_path. - task_path.used = task_path_fixed_len; // Reset task_path to "/proc/". - __kmp_str_buf_cat(&task_path, proc_entry->d_name, - KMP_STRLEN(proc_entry->d_name)); - __kmp_str_buf_cat(&task_path, "/task", 5); - - task_dir = opendir(task_path.str); - if (task_dir == NULL) { - // Process can finish between reading "/proc/" directory entry and - // opening process' "task/" directory. So, in general case we should not - // complain, but have to skip this process and read the next one. But on - // systems with no "task/" support we will spend lot of time to scan - // "/proc/" tree again and again without any benefit. "init" process - // (its pid is 1) should exist always, so, if we cannot open - // "/proc/1/task/" directory, it means "task/" is not supported by - // kernel. Report an error now and in the future. - if (strcmp(proc_entry->d_name, "1") == 0) { - running_threads = -1; - permanent_error = 1; - goto finish; - } - } else { - // Construct fixed part of stat file path. - __kmp_str_buf_clear(&stat_path); - __kmp_str_buf_cat(&stat_path, task_path.str, task_path.used); - __kmp_str_buf_cat(&stat_path, "/", 1); - stat_path_fixed_len = stat_path.used; - - task_entry = readdir(task_dir); - while (task_entry != NULL) { - // It is a directory and name starts with a digit. - if (proc_entry->d_type == DT_DIR && isdigit(task_entry->d_name[0])) { - ++total_threads; - - // Consruct complete stat file path. Easiest way would be: - // __kmp_str_buf_print( & stat_path, "%s/%s/stat", task_path.str, - // task_entry->d_name ); - // but seriae of __kmp_str_buf_cat works a bit faster. - stat_path.used = - stat_path_fixed_len; // Reset stat path to its fixed part. - __kmp_str_buf_cat(&stat_path, task_entry->d_name, - KMP_STRLEN(task_entry->d_name)); - __kmp_str_buf_cat(&stat_path, "/stat", 5); - - // Note: Low-level API (open/read/close) is used. High-level API - // (fopen/fclose) works ~ 30 % slower. - stat_file = open(stat_path.str, O_RDONLY); - if (stat_file == -1) { - // We cannot report an error because task (thread) can terminate - // just before reading this file. - } else { - /* Content of "stat" file looks like: - 24285 (program) S ... - - It is a single line (if program name does not include funny - symbols). First number is a thread id, then name of executable - file name in paretheses, then state of the thread. We need just - thread state. - - Good news: Length of program name is 15 characters max. Longer - names are truncated. - - Thus, we need rather short buffer: 15 chars for program name + - 2 parenthesis, + 3 spaces + ~7 digits of pid = 37. - - Bad news: Program name may contain special symbols like space, - closing parenthesis, or even new line. This makes parsing - "stat" file not 100 % reliable. In case of fanny program names - parsing may fail (report incorrect thread state). - - Parsing "status" file looks more promissing (due to different - file structure and escaping special symbols) but reading and - parsing of "status" file works slower. - -- ln - */ - char buffer[65]; - int len; - len = read(stat_file, buffer, sizeof(buffer) - 1); - if (len >= 0) { - buffer[len] = 0; - // Using scanf: - // sscanf( buffer, "%*d (%*s) %c ", & state ); - // looks very nice, but searching for a closing parenthesis - // works a bit faster. - char *close_parent = strstr(buffer, ") "); - if (close_parent != NULL) { - char state = *(close_parent + 2); - if (state == 'R') { - ++running_threads; - if (running_threads >= max) { - goto finish; - } - } - } - } - close(stat_file); - stat_file = -1; - } - } - task_entry = readdir(task_dir); - } - closedir(task_dir); - task_dir = NULL; - } - } - proc_entry = readdir(proc_dir); - } - - // There _might_ be a timing hole where the thread executing this - // code get skipped in the load balance, and running_threads is 0. - // Assert in the debug builds only!!! - KMP_DEBUG_ASSERT(running_threads > 0); - if (running_threads <= 0) { - running_threads = 1; - } - -finish: // Clean up and exit. - if (proc_dir != NULL) { - closedir(proc_dir); - } - __kmp_str_buf_free(&task_path); - if (task_dir != NULL) { - closedir(task_dir); - } - __kmp_str_buf_free(&stat_path); - if (stat_file != -1) { - close(stat_file); - } - - glb_running_threads = running_threads; - - return running_threads; - -} // __kmp_get_load_balance - -#endif // KMP_OS_DARWIN - -#endif // USE_LOAD_BALANCE - -#if !(KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_MIC || \ - ((KMP_OS_LINUX || KMP_OS_DARWIN) && KMP_ARCH_AARCH64) || KMP_ARCH_PPC64) - -// we really only need the case with 1 argument, because CLANG always build -// a struct of pointers to shared variables referenced in the outlined function -int __kmp_invoke_microtask(microtask_t pkfn, int gtid, int tid, int argc, - void *p_argv[] -#if OMPT_SUPPORT - , - void **exit_frame_ptr -#endif - ) { -#if OMPT_SUPPORT - *exit_frame_ptr = OMPT_GET_FRAME_ADDRESS(0); -#endif - - switch (argc) { - default: - fprintf(stderr, "Too many args to microtask: %d!\n", argc); - fflush(stderr); - exit(-1); - case 0: - (*pkfn)(>id, &tid); - break; - case 1: - (*pkfn)(>id, &tid, p_argv[0]); - break; - case 2: - (*pkfn)(>id, &tid, p_argv[0], p_argv[1]); - break; - case 3: - (*pkfn)(>id, &tid, p_argv[0], p_argv[1], p_argv[2]); - break; - case 4: - (*pkfn)(>id, &tid, p_argv[0], p_argv[1], p_argv[2], p_argv[3]); - break; - case 5: - (*pkfn)(>id, &tid, p_argv[0], p_argv[1], p_argv[2], p_argv[3], p_argv[4]); - break; - case 6: - (*pkfn)(>id, &tid, p_argv[0], p_argv[1], p_argv[2], p_argv[3], p_argv[4], - p_argv[5]); - break; - case 7: - (*pkfn)(>id, &tid, p_argv[0], p_argv[1], p_argv[2], p_argv[3], p_argv[4], - p_argv[5], p_argv[6]); - break; - case 8: - (*pkfn)(>id, &tid, p_argv[0], p_argv[1], p_argv[2], p_argv[3], p_argv[4], - p_argv[5], p_argv[6], p_argv[7]); - break; - case 9: - (*pkfn)(>id, &tid, p_argv[0], p_argv[1], p_argv[2], p_argv[3], p_argv[4], - p_argv[5], p_argv[6], p_argv[7], p_argv[8]); - break; - case 10: - (*pkfn)(>id, &tid, p_argv[0], p_argv[1], p_argv[2], p_argv[3], p_argv[4], - p_argv[5], p_argv[6], p_argv[7], p_argv[8], p_argv[9]); - break; - case 11: - (*pkfn)(>id, &tid, p_argv[0], p_argv[1], p_argv[2], p_argv[3], p_argv[4], - p_argv[5], p_argv[6], p_argv[7], p_argv[8], p_argv[9], p_argv[10]); - break; - case 12: - (*pkfn)(>id, &tid, p_argv[0], p_argv[1], p_argv[2], p_argv[3], p_argv[4], - p_argv[5], p_argv[6], p_argv[7], p_argv[8], p_argv[9], p_argv[10], - p_argv[11]); - break; - case 13: - (*pkfn)(>id, &tid, p_argv[0], p_argv[1], p_argv[2], p_argv[3], p_argv[4], - p_argv[5], p_argv[6], p_argv[7], p_argv[8], p_argv[9], p_argv[10], - p_argv[11], p_argv[12]); - break; - case 14: - (*pkfn)(>id, &tid, p_argv[0], p_argv[1], p_argv[2], p_argv[3], p_argv[4], - p_argv[5], p_argv[6], p_argv[7], p_argv[8], p_argv[9], p_argv[10], - p_argv[11], p_argv[12], p_argv[13]); - break; - case 15: - (*pkfn)(>id, &tid, p_argv[0], p_argv[1], p_argv[2], p_argv[3], p_argv[4], - p_argv[5], p_argv[6], p_argv[7], p_argv[8], p_argv[9], p_argv[10], - p_argv[11], p_argv[12], p_argv[13], p_argv[14]); - break; - } - -#if OMPT_SUPPORT - *exit_frame_ptr = 0; -#endif - - return 1; -} - -#endif - -// end of file // Property changes on: vendor/llvm-openmp/openmp-release80-r363030/runtime/src/z_Linux_util.cpp ___________________________________________________________________ Deleted: svn:eol-style ## -1 +0,0 ## -native \ No newline at end of property Deleted: svn:keywords ## -1 +0,0 ## -FreeBSD=%H \ No newline at end of property Deleted: svn:mime-type ## -1 +0,0 ## -text/plain \ No newline at end of property Index: vendor/llvm-openmp/openmp-release80-r363030/runtime/src/kmp_atomic.cpp =================================================================== --- vendor/llvm-openmp/openmp-release80-r363030/runtime/src/kmp_atomic.cpp (revision 348960) +++ vendor/llvm-openmp/openmp-release80-r363030/runtime/src/kmp_atomic.cpp (nonexistent) @@ -1,3630 +0,0 @@ -/* - * kmp_atomic.cpp -- ATOMIC implementation routines - */ - -//===----------------------------------------------------------------------===// -// -// The LLVM Compiler Infrastructure -// -// This file is dual licensed under the MIT and the University of Illinois Open -// Source Licenses. See LICENSE.txt for details. -// -//===----------------------------------------------------------------------===// - -#include "kmp_atomic.h" -#include "kmp.h" // TRUE, asm routines prototypes - -typedef unsigned char uchar; -typedef unsigned short ushort; - -/*! -@defgroup ATOMIC_OPS Atomic Operations -These functions are used for implementing the many different varieties of atomic -operations. - -The compiler is at liberty to inline atomic operations that are naturally -supported by the target architecture. For instance on IA-32 architecture an -atomic like this can be inlined -@code -static int s = 0; -#pragma omp atomic - s++; -@endcode -using the single instruction: `lock; incl s` - -However the runtime does provide entrypoints for these operations to support -compilers that choose not to inline them. (For instance, -`__kmpc_atomic_fixed4_add` could be used to perform the increment above.) - -The names of the functions are encoded by using the data type name and the -operation name, as in these tables. - -Data Type | Data type encoding ------------|--------------- -int8_t | `fixed1` -uint8_t | `fixed1u` -int16_t | `fixed2` -uint16_t | `fixed2u` -int32_t | `fixed4` -uint32_t | `fixed4u` -int32_t | `fixed8` -uint32_t | `fixed8u` -float | `float4` -double | `float8` -float 10 (8087 eighty bit float) | `float10` -complex | `cmplx4` -complex | `cmplx8` -complex | `cmplx10` -
- -Operation | Operation encoding -----------|------------------- -+ | add -- | sub -\* | mul -/ | div -& | andb -<< | shl -\>\> | shr -\| | orb -^ | xor -&& | andl -\|\| | orl -maximum | max -minimum | min -.eqv. | eqv -.neqv. | neqv - -
-For non-commutative operations, `_rev` can also be added for the reversed -operation. For the functions that capture the result, the suffix `_cpt` is -added. - -Update Functions -================ -The general form of an atomic function that just performs an update (without a -`capture`) -@code -void __kmpc_atomic__( ident_t *id_ref, int gtid, TYPE * -lhs, TYPE rhs ); -@endcode -@param ident_t a pointer to source location -@param gtid the global thread id -@param lhs a pointer to the left operand -@param rhs the right operand - -`capture` functions -=================== -The capture functions perform an atomic update and return a result, which is -either the value before the capture, or that after. They take an additional -argument to determine which result is returned. -Their general form is therefore -@code -TYPE __kmpc_atomic___cpt( ident_t *id_ref, int gtid, TYPE * -lhs, TYPE rhs, int flag ); -@endcode -@param ident_t a pointer to source location -@param gtid the global thread id -@param lhs a pointer to the left operand -@param rhs the right operand -@param flag one if the result is to be captured *after* the operation, zero if -captured *before*. - -The one set of exceptions to this is the `complex` type where the value -is not returned, rather an extra argument pointer is passed. - -They look like -@code -void __kmpc_atomic_cmplx4__cpt( ident_t *id_ref, int gtid, kmp_cmplx32 * -lhs, kmp_cmplx32 rhs, kmp_cmplx32 * out, int flag ); -@endcode - -Read and Write Operations -========================= -The OpenMP* standard now supports atomic operations that simply -ensure that the value is read or written atomically, with no modification -performed. In many cases on IA-32 architecture these operations can be inlined -since the architecture guarantees that no tearing occurs on aligned objects -accessed with a single memory operation of up to 64 bits in size. - -The general form of the read operations is -@code -TYPE __kmpc_atomic__rd ( ident_t *id_ref, int gtid, TYPE * loc ); -@endcode - -For the write operations the form is -@code -void __kmpc_atomic__wr ( ident_t *id_ref, int gtid, TYPE * lhs, TYPE rhs -); -@endcode - -Full list of functions -====================== -This leads to the generation of 376 atomic functions, as follows. - -Functons for integers ---------------------- -There are versions here for integers of size 1,2,4 and 8 bytes both signed and -unsigned (where that matters). -@code - __kmpc_atomic_fixed1_add - __kmpc_atomic_fixed1_add_cpt - __kmpc_atomic_fixed1_add_fp - __kmpc_atomic_fixed1_andb - __kmpc_atomic_fixed1_andb_cpt - __kmpc_atomic_fixed1_andl - __kmpc_atomic_fixed1_andl_cpt - __kmpc_atomic_fixed1_div - __kmpc_atomic_fixed1_div_cpt - __kmpc_atomic_fixed1_div_cpt_rev - __kmpc_atomic_fixed1_div_float8 - __kmpc_atomic_fixed1_div_fp - __kmpc_atomic_fixed1_div_rev - __kmpc_atomic_fixed1_eqv - __kmpc_atomic_fixed1_eqv_cpt - __kmpc_atomic_fixed1_max - __kmpc_atomic_fixed1_max_cpt - __kmpc_atomic_fixed1_min - __kmpc_atomic_fixed1_min_cpt - __kmpc_atomic_fixed1_mul - __kmpc_atomic_fixed1_mul_cpt - __kmpc_atomic_fixed1_mul_float8 - __kmpc_atomic_fixed1_mul_fp - __kmpc_atomic_fixed1_neqv - __kmpc_atomic_fixed1_neqv_cpt - __kmpc_atomic_fixed1_orb - __kmpc_atomic_fixed1_orb_cpt - __kmpc_atomic_fixed1_orl - __kmpc_atomic_fixed1_orl_cpt - __kmpc_atomic_fixed1_rd - __kmpc_atomic_fixed1_shl - __kmpc_atomic_fixed1_shl_cpt - __kmpc_atomic_fixed1_shl_cpt_rev - __kmpc_atomic_fixed1_shl_rev - __kmpc_atomic_fixed1_shr - __kmpc_atomic_fixed1_shr_cpt - __kmpc_atomic_fixed1_shr_cpt_rev - __kmpc_atomic_fixed1_shr_rev - __kmpc_atomic_fixed1_sub - __kmpc_atomic_fixed1_sub_cpt - __kmpc_atomic_fixed1_sub_cpt_rev - __kmpc_atomic_fixed1_sub_fp - __kmpc_atomic_fixed1_sub_rev - __kmpc_atomic_fixed1_swp - __kmpc_atomic_fixed1_wr - __kmpc_atomic_fixed1_xor - __kmpc_atomic_fixed1_xor_cpt - __kmpc_atomic_fixed1u_add_fp - __kmpc_atomic_fixed1u_sub_fp - __kmpc_atomic_fixed1u_mul_fp - __kmpc_atomic_fixed1u_div - __kmpc_atomic_fixed1u_div_cpt - __kmpc_atomic_fixed1u_div_cpt_rev - __kmpc_atomic_fixed1u_div_fp - __kmpc_atomic_fixed1u_div_rev - __kmpc_atomic_fixed1u_shr - __kmpc_atomic_fixed1u_shr_cpt - __kmpc_atomic_fixed1u_shr_cpt_rev - __kmpc_atomic_fixed1u_shr_rev - __kmpc_atomic_fixed2_add - __kmpc_atomic_fixed2_add_cpt - __kmpc_atomic_fixed2_add_fp - __kmpc_atomic_fixed2_andb - __kmpc_atomic_fixed2_andb_cpt - __kmpc_atomic_fixed2_andl - __kmpc_atomic_fixed2_andl_cpt - __kmpc_atomic_fixed2_div - __kmpc_atomic_fixed2_div_cpt - __kmpc_atomic_fixed2_div_cpt_rev - __kmpc_atomic_fixed2_div_float8 - __kmpc_atomic_fixed2_div_fp - __kmpc_atomic_fixed2_div_rev - __kmpc_atomic_fixed2_eqv - __kmpc_atomic_fixed2_eqv_cpt - __kmpc_atomic_fixed2_max - __kmpc_atomic_fixed2_max_cpt - __kmpc_atomic_fixed2_min - __kmpc_atomic_fixed2_min_cpt - __kmpc_atomic_fixed2_mul - __kmpc_atomic_fixed2_mul_cpt - __kmpc_atomic_fixed2_mul_float8 - __kmpc_atomic_fixed2_mul_fp - __kmpc_atomic_fixed2_neqv - __kmpc_atomic_fixed2_neqv_cpt - __kmpc_atomic_fixed2_orb - __kmpc_atomic_fixed2_orb_cpt - __kmpc_atomic_fixed2_orl - __kmpc_atomic_fixed2_orl_cpt - __kmpc_atomic_fixed2_rd - __kmpc_atomic_fixed2_shl - __kmpc_atomic_fixed2_shl_cpt - __kmpc_atomic_fixed2_shl_cpt_rev - __kmpc_atomic_fixed2_shl_rev - __kmpc_atomic_fixed2_shr - __kmpc_atomic_fixed2_shr_cpt - __kmpc_atomic_fixed2_shr_cpt_rev - __kmpc_atomic_fixed2_shr_rev - __kmpc_atomic_fixed2_sub - __kmpc_atomic_fixed2_sub_cpt - __kmpc_atomic_fixed2_sub_cpt_rev - __kmpc_atomic_fixed2_sub_fp - __kmpc_atomic_fixed2_sub_rev - __kmpc_atomic_fixed2_swp - __kmpc_atomic_fixed2_wr - __kmpc_atomic_fixed2_xor - __kmpc_atomic_fixed2_xor_cpt - __kmpc_atomic_fixed2u_add_fp - __kmpc_atomic_fixed2u_sub_fp - __kmpc_atomic_fixed2u_mul_fp - __kmpc_atomic_fixed2u_div - __kmpc_atomic_fixed2u_div_cpt - __kmpc_atomic_fixed2u_div_cpt_rev - __kmpc_atomic_fixed2u_div_fp - __kmpc_atomic_fixed2u_div_rev - __kmpc_atomic_fixed2u_shr - __kmpc_atomic_fixed2u_shr_cpt - __kmpc_atomic_fixed2u_shr_cpt_rev - __kmpc_atomic_fixed2u_shr_rev - __kmpc_atomic_fixed4_add - __kmpc_atomic_fixed4_add_cpt - __kmpc_atomic_fixed4_add_fp - __kmpc_atomic_fixed4_andb - __kmpc_atomic_fixed4_andb_cpt - __kmpc_atomic_fixed4_andl - __kmpc_atomic_fixed4_andl_cpt - __kmpc_atomic_fixed4_div - __kmpc_atomic_fixed4_div_cpt - __kmpc_atomic_fixed4_div_cpt_rev - __kmpc_atomic_fixed4_div_float8 - __kmpc_atomic_fixed4_div_fp - __kmpc_atomic_fixed4_div_rev - __kmpc_atomic_fixed4_eqv - __kmpc_atomic_fixed4_eqv_cpt - __kmpc_atomic_fixed4_max - __kmpc_atomic_fixed4_max_cpt - __kmpc_atomic_fixed4_min - __kmpc_atomic_fixed4_min_cpt - __kmpc_atomic_fixed4_mul - __kmpc_atomic_fixed4_mul_cpt - __kmpc_atomic_fixed4_mul_float8 - __kmpc_atomic_fixed4_mul_fp - __kmpc_atomic_fixed4_neqv - __kmpc_atomic_fixed4_neqv_cpt - __kmpc_atomic_fixed4_orb - __kmpc_atomic_fixed4_orb_cpt - __kmpc_atomic_fixed4_orl - __kmpc_atomic_fixed4_orl_cpt - __kmpc_atomic_fixed4_rd - __kmpc_atomic_fixed4_shl - __kmpc_atomic_fixed4_shl_cpt - __kmpc_atomic_fixed4_shl_cpt_rev - __kmpc_atomic_fixed4_shl_rev - __kmpc_atomic_fixed4_shr - __kmpc_atomic_fixed4_shr_cpt - __kmpc_atomic_fixed4_shr_cpt_rev - __kmpc_atomic_fixed4_shr_rev - __kmpc_atomic_fixed4_sub - __kmpc_atomic_fixed4_sub_cpt - __kmpc_atomic_fixed4_sub_cpt_rev - __kmpc_atomic_fixed4_sub_fp - __kmpc_atomic_fixed4_sub_rev - __kmpc_atomic_fixed4_swp - __kmpc_atomic_fixed4_wr - __kmpc_atomic_fixed4_xor - __kmpc_atomic_fixed4_xor_cpt - __kmpc_atomic_fixed4u_add_fp - __kmpc_atomic_fixed4u_sub_fp - __kmpc_atomic_fixed4u_mul_fp - __kmpc_atomic_fixed4u_div - __kmpc_atomic_fixed4u_div_cpt - __kmpc_atomic_fixed4u_div_cpt_rev - __kmpc_atomic_fixed4u_div_fp - __kmpc_atomic_fixed4u_div_rev - __kmpc_atomic_fixed4u_shr - __kmpc_atomic_fixed4u_shr_cpt - __kmpc_atomic_fixed4u_shr_cpt_rev - __kmpc_atomic_fixed4u_shr_rev - __kmpc_atomic_fixed8_add - __kmpc_atomic_fixed8_add_cpt - __kmpc_atomic_fixed8_add_fp - __kmpc_atomic_fixed8_andb - __kmpc_atomic_fixed8_andb_cpt - __kmpc_atomic_fixed8_andl - __kmpc_atomic_fixed8_andl_cpt - __kmpc_atomic_fixed8_div - __kmpc_atomic_fixed8_div_cpt - __kmpc_atomic_fixed8_div_cpt_rev - __kmpc_atomic_fixed8_div_float8 - __kmpc_atomic_fixed8_div_fp - __kmpc_atomic_fixed8_div_rev - __kmpc_atomic_fixed8_eqv - __kmpc_atomic_fixed8_eqv_cpt - __kmpc_atomic_fixed8_max - __kmpc_atomic_fixed8_max_cpt - __kmpc_atomic_fixed8_min - __kmpc_atomic_fixed8_min_cpt - __kmpc_atomic_fixed8_mul - __kmpc_atomic_fixed8_mul_cpt - __kmpc_atomic_fixed8_mul_float8 - __kmpc_atomic_fixed8_mul_fp - __kmpc_atomic_fixed8_neqv - __kmpc_atomic_fixed8_neqv_cpt - __kmpc_atomic_fixed8_orb - __kmpc_atomic_fixed8_orb_cpt - __kmpc_atomic_fixed8_orl - __kmpc_atomic_fixed8_orl_cpt - __kmpc_atomic_fixed8_rd - __kmpc_atomic_fixed8_shl - __kmpc_atomic_fixed8_shl_cpt - __kmpc_atomic_fixed8_shl_cpt_rev - __kmpc_atomic_fixed8_shl_rev - __kmpc_atomic_fixed8_shr - __kmpc_atomic_fixed8_shr_cpt - __kmpc_atomic_fixed8_shr_cpt_rev - __kmpc_atomic_fixed8_shr_rev - __kmpc_atomic_fixed8_sub - __kmpc_atomic_fixed8_sub_cpt - __kmpc_atomic_fixed8_sub_cpt_rev - __kmpc_atomic_fixed8_sub_fp - __kmpc_atomic_fixed8_sub_rev - __kmpc_atomic_fixed8_swp - __kmpc_atomic_fixed8_wr - __kmpc_atomic_fixed8_xor - __kmpc_atomic_fixed8_xor_cpt - __kmpc_atomic_fixed8u_add_fp - __kmpc_atomic_fixed8u_sub_fp - __kmpc_atomic_fixed8u_mul_fp - __kmpc_atomic_fixed8u_div - __kmpc_atomic_fixed8u_div_cpt - __kmpc_atomic_fixed8u_div_cpt_rev - __kmpc_atomic_fixed8u_div_fp - __kmpc_atomic_fixed8u_div_rev - __kmpc_atomic_fixed8u_shr - __kmpc_atomic_fixed8u_shr_cpt - __kmpc_atomic_fixed8u_shr_cpt_rev - __kmpc_atomic_fixed8u_shr_rev -@endcode - -Functions for floating point ----------------------------- -There are versions here for floating point numbers of size 4, 8, 10 and 16 -bytes. (Ten byte floats are used by X87, but are now rare). -@code - __kmpc_atomic_float4_add - __kmpc_atomic_float4_add_cpt - __kmpc_atomic_float4_add_float8 - __kmpc_atomic_float4_add_fp - __kmpc_atomic_float4_div - __kmpc_atomic_float4_div_cpt - __kmpc_atomic_float4_div_cpt_rev - __kmpc_atomic_float4_div_float8 - __kmpc_atomic_float4_div_fp - __kmpc_atomic_float4_div_rev - __kmpc_atomic_float4_max - __kmpc_atomic_float4_max_cpt - __kmpc_atomic_float4_min - __kmpc_atomic_float4_min_cpt - __kmpc_atomic_float4_mul - __kmpc_atomic_float4_mul_cpt - __kmpc_atomic_float4_mul_float8 - __kmpc_atomic_float4_mul_fp - __kmpc_atomic_float4_rd - __kmpc_atomic_float4_sub - __kmpc_atomic_float4_sub_cpt - __kmpc_atomic_float4_sub_cpt_rev - __kmpc_atomic_float4_sub_float8 - __kmpc_atomic_float4_sub_fp - __kmpc_atomic_float4_sub_rev - __kmpc_atomic_float4_swp - __kmpc_atomic_float4_wr - __kmpc_atomic_float8_add - __kmpc_atomic_float8_add_cpt - __kmpc_atomic_float8_add_fp - __kmpc_atomic_float8_div - __kmpc_atomic_float8_div_cpt - __kmpc_atomic_float8_div_cpt_rev - __kmpc_atomic_float8_div_fp - __kmpc_atomic_float8_div_rev - __kmpc_atomic_float8_max - __kmpc_atomic_float8_max_cpt - __kmpc_atomic_float8_min - __kmpc_atomic_float8_min_cpt - __kmpc_atomic_float8_mul - __kmpc_atomic_float8_mul_cpt - __kmpc_atomic_float8_mul_fp - __kmpc_atomic_float8_rd - __kmpc_atomic_float8_sub - __kmpc_atomic_float8_sub_cpt - __kmpc_atomic_float8_sub_cpt_rev - __kmpc_atomic_float8_sub_fp - __kmpc_atomic_float8_sub_rev - __kmpc_atomic_float8_swp - __kmpc_atomic_float8_wr - __kmpc_atomic_float10_add - __kmpc_atomic_float10_add_cpt - __kmpc_atomic_float10_add_fp - __kmpc_atomic_float10_div - __kmpc_atomic_float10_div_cpt - __kmpc_atomic_float10_div_cpt_rev - __kmpc_atomic_float10_div_fp - __kmpc_atomic_float10_div_rev - __kmpc_atomic_float10_mul - __kmpc_atomic_float10_mul_cpt - __kmpc_atomic_float10_mul_fp - __kmpc_atomic_float10_rd - __kmpc_atomic_float10_sub - __kmpc_atomic_float10_sub_cpt - __kmpc_atomic_float10_sub_cpt_rev - __kmpc_atomic_float10_sub_fp - __kmpc_atomic_float10_sub_rev - __kmpc_atomic_float10_swp - __kmpc_atomic_float10_wr - __kmpc_atomic_float16_add - __kmpc_atomic_float16_add_cpt - __kmpc_atomic_float16_div - __kmpc_atomic_float16_div_cpt - __kmpc_atomic_float16_div_cpt_rev - __kmpc_atomic_float16_div_rev - __kmpc_atomic_float16_max - __kmpc_atomic_float16_max_cpt - __kmpc_atomic_float16_min - __kmpc_atomic_float16_min_cpt - __kmpc_atomic_float16_mul - __kmpc_atomic_float16_mul_cpt - __kmpc_atomic_float16_rd - __kmpc_atomic_float16_sub - __kmpc_atomic_float16_sub_cpt - __kmpc_atomic_float16_sub_cpt_rev - __kmpc_atomic_float16_sub_rev - __kmpc_atomic_float16_swp - __kmpc_atomic_float16_wr -@endcode - -Functions for Complex types ---------------------------- -Functions for complex types whose component floating point variables are of size -4,8,10 or 16 bytes. The names here are based on the size of the component float, -*not* the size of the complex type. So `__kmpc_atomc_cmplx8_add` is an operation -on a `complex` or `complex(kind=8)`, *not* `complex`. - -@code - __kmpc_atomic_cmplx4_add - __kmpc_atomic_cmplx4_add_cmplx8 - __kmpc_atomic_cmplx4_add_cpt - __kmpc_atomic_cmplx4_div - __kmpc_atomic_cmplx4_div_cmplx8 - __kmpc_atomic_cmplx4_div_cpt - __kmpc_atomic_cmplx4_div_cpt_rev - __kmpc_atomic_cmplx4_div_rev - __kmpc_atomic_cmplx4_mul - __kmpc_atomic_cmplx4_mul_cmplx8 - __kmpc_atomic_cmplx4_mul_cpt - __kmpc_atomic_cmplx4_rd - __kmpc_atomic_cmplx4_sub - __kmpc_atomic_cmplx4_sub_cmplx8 - __kmpc_atomic_cmplx4_sub_cpt - __kmpc_atomic_cmplx4_sub_cpt_rev - __kmpc_atomic_cmplx4_sub_rev - __kmpc_atomic_cmplx4_swp - __kmpc_atomic_cmplx4_wr - __kmpc_atomic_cmplx8_add - __kmpc_atomic_cmplx8_add_cpt - __kmpc_atomic_cmplx8_div - __kmpc_atomic_cmplx8_div_cpt - __kmpc_atomic_cmplx8_div_cpt_rev - __kmpc_atomic_cmplx8_div_rev - __kmpc_atomic_cmplx8_mul - __kmpc_atomic_cmplx8_mul_cpt - __kmpc_atomic_cmplx8_rd - __kmpc_atomic_cmplx8_sub - __kmpc_atomic_cmplx8_sub_cpt - __kmpc_atomic_cmplx8_sub_cpt_rev - __kmpc_atomic_cmplx8_sub_rev - __kmpc_atomic_cmplx8_swp - __kmpc_atomic_cmplx8_wr - __kmpc_atomic_cmplx10_add - __kmpc_atomic_cmplx10_add_cpt - __kmpc_atomic_cmplx10_div - __kmpc_atomic_cmplx10_div_cpt - __kmpc_atomic_cmplx10_div_cpt_rev - __kmpc_atomic_cmplx10_div_rev - __kmpc_atomic_cmplx10_mul - __kmpc_atomic_cmplx10_mul_cpt - __kmpc_atomic_cmplx10_rd - __kmpc_atomic_cmplx10_sub - __kmpc_atomic_cmplx10_sub_cpt - __kmpc_atomic_cmplx10_sub_cpt_rev - __kmpc_atomic_cmplx10_sub_rev - __kmpc_atomic_cmplx10_swp - __kmpc_atomic_cmplx10_wr - __kmpc_atomic_cmplx16_add - __kmpc_atomic_cmplx16_add_cpt - __kmpc_atomic_cmplx16_div - __kmpc_atomic_cmplx16_div_cpt - __kmpc_atomic_cmplx16_div_cpt_rev - __kmpc_atomic_cmplx16_div_rev - __kmpc_atomic_cmplx16_mul - __kmpc_atomic_cmplx16_mul_cpt - __kmpc_atomic_cmplx16_rd - __kmpc_atomic_cmplx16_sub - __kmpc_atomic_cmplx16_sub_cpt - __kmpc_atomic_cmplx16_sub_cpt_rev - __kmpc_atomic_cmplx16_swp - __kmpc_atomic_cmplx16_wr -@endcode -*/ - -/*! -@ingroup ATOMIC_OPS -@{ -*/ - -/* - * Global vars - */ - -#ifndef KMP_GOMP_COMPAT -int __kmp_atomic_mode = 1; // Intel perf -#else -int __kmp_atomic_mode = 2; // GOMP compatibility -#endif /* KMP_GOMP_COMPAT */ - -KMP_ALIGN(128) - -// Control access to all user coded atomics in Gnu compat mode -kmp_atomic_lock_t __kmp_atomic_lock; -// Control access to all user coded atomics for 1-byte fixed data types -kmp_atomic_lock_t __kmp_atomic_lock_1i; -// Control access to all user coded atomics for 2-byte fixed data types -kmp_atomic_lock_t __kmp_atomic_lock_2i; -// Control access to all user coded atomics for 4-byte fixed data types -kmp_atomic_lock_t __kmp_atomic_lock_4i; -// Control access to all user coded atomics for kmp_real32 data type -kmp_atomic_lock_t __kmp_atomic_lock_4r; -// Control access to all user coded atomics for 8-byte fixed data types -kmp_atomic_lock_t __kmp_atomic_lock_8i; -// Control access to all user coded atomics for kmp_real64 data type -kmp_atomic_lock_t __kmp_atomic_lock_8r; -// Control access to all user coded atomics for complex byte data type -kmp_atomic_lock_t __kmp_atomic_lock_8c; -// Control access to all user coded atomics for long double data type -kmp_atomic_lock_t __kmp_atomic_lock_10r; -// Control access to all user coded atomics for _Quad data type -kmp_atomic_lock_t __kmp_atomic_lock_16r; -// Control access to all user coded atomics for double complex data type -kmp_atomic_lock_t __kmp_atomic_lock_16c; -// Control access to all user coded atomics for long double complex type -kmp_atomic_lock_t __kmp_atomic_lock_20c; -// Control access to all user coded atomics for _Quad complex data type -kmp_atomic_lock_t __kmp_atomic_lock_32c; - -/* 2007-03-02: - Without "volatile" specifier in OP_CMPXCHG and MIN_MAX_CMPXCHG we have a bug - on *_32 and *_32e. This is just a temporary workaround for the problem. It - seems the right solution is writing OP_CMPXCHG and MIN_MAX_CMPXCHG routines - in assembler language. */ -#define KMP_ATOMIC_VOLATILE volatile - -#if (KMP_ARCH_X86) && KMP_HAVE_QUAD - -static inline void operator+=(Quad_a4_t &lhs, Quad_a4_t &rhs) { - lhs.q += rhs.q; -} -static inline void operator-=(Quad_a4_t &lhs, Quad_a4_t &rhs) { - lhs.q -= rhs.q; -} -static inline void operator*=(Quad_a4_t &lhs, Quad_a4_t &rhs) { - lhs.q *= rhs.q; -} -static inline void operator/=(Quad_a4_t &lhs, Quad_a4_t &rhs) { - lhs.q /= rhs.q; -} -static inline bool operator<(Quad_a4_t &lhs, Quad_a4_t &rhs) { - return lhs.q < rhs.q; -} -static inline bool operator>(Quad_a4_t &lhs, Quad_a4_t &rhs) { - return lhs.q > rhs.q; -} - -static inline void operator+=(Quad_a16_t &lhs, Quad_a16_t &rhs) { - lhs.q += rhs.q; -} -static inline void operator-=(Quad_a16_t &lhs, Quad_a16_t &rhs) { - lhs.q -= rhs.q; -} -static inline void operator*=(Quad_a16_t &lhs, Quad_a16_t &rhs) { - lhs.q *= rhs.q; -} -static inline void operator/=(Quad_a16_t &lhs, Quad_a16_t &rhs) { - lhs.q /= rhs.q; -} -static inline bool operator<(Quad_a16_t &lhs, Quad_a16_t &rhs) { - return lhs.q < rhs.q; -} -static inline bool operator>(Quad_a16_t &lhs, Quad_a16_t &rhs) { - return lhs.q > rhs.q; -} - -static inline void operator+=(kmp_cmplx128_a4_t &lhs, kmp_cmplx128_a4_t &rhs) { - lhs.q += rhs.q; -} -static inline void operator-=(kmp_cmplx128_a4_t &lhs, kmp_cmplx128_a4_t &rhs) { - lhs.q -= rhs.q; -} -static inline void operator*=(kmp_cmplx128_a4_t &lhs, kmp_cmplx128_a4_t &rhs) { - lhs.q *= rhs.q; -} -static inline void operator/=(kmp_cmplx128_a4_t &lhs, kmp_cmplx128_a4_t &rhs) { - lhs.q /= rhs.q; -} - -static inline void operator+=(kmp_cmplx128_a16_t &lhs, - kmp_cmplx128_a16_t &rhs) { - lhs.q += rhs.q; -} -static inline void operator-=(kmp_cmplx128_a16_t &lhs, - kmp_cmplx128_a16_t &rhs) { - lhs.q -= rhs.q; -} -static inline void operator*=(kmp_cmplx128_a16_t &lhs, - kmp_cmplx128_a16_t &rhs) { - lhs.q *= rhs.q; -} -static inline void operator/=(kmp_cmplx128_a16_t &lhs, - kmp_cmplx128_a16_t &rhs) { - lhs.q /= rhs.q; -} - -#endif - -// ATOMIC implementation routines ----------------------------------------- -// One routine for each operation and operand type. -// All routines declarations looks like -// void __kmpc_atomic_RTYPE_OP( ident_t*, int, TYPE *lhs, TYPE rhs ); - -#define KMP_CHECK_GTID \ - if (gtid == KMP_GTID_UNKNOWN) { \ - gtid = __kmp_entry_gtid(); \ - } // check and get gtid when needed - -// Beginning of a definition (provides name, parameters, gebug trace) -// TYPE_ID - operands type and size (fixed*, fixed*u for signed, unsigned -// fixed) -// OP_ID - operation identifier (add, sub, mul, ...) -// TYPE - operands' type -#define ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, RET_TYPE) \ - RET_TYPE __kmpc_atomic_##TYPE_ID##_##OP_ID(ident_t *id_ref, int gtid, \ - TYPE *lhs, TYPE rhs) { \ - KMP_DEBUG_ASSERT(__kmp_init_serial); \ - KA_TRACE(100, ("__kmpc_atomic_" #TYPE_ID "_" #OP_ID ": T#%d\n", gtid)); - -// ------------------------------------------------------------------------ -// Lock variables used for critical sections for various size operands -#define ATOMIC_LOCK0 __kmp_atomic_lock // all types, for Gnu compat -#define ATOMIC_LOCK1i __kmp_atomic_lock_1i // char -#define ATOMIC_LOCK2i __kmp_atomic_lock_2i // short -#define ATOMIC_LOCK4i __kmp_atomic_lock_4i // long int -#define ATOMIC_LOCK4r __kmp_atomic_lock_4r // float -#define ATOMIC_LOCK8i __kmp_atomic_lock_8i // long long int -#define ATOMIC_LOCK8r __kmp_atomic_lock_8r // double -#define ATOMIC_LOCK8c __kmp_atomic_lock_8c // float complex -#define ATOMIC_LOCK10r __kmp_atomic_lock_10r // long double -#define ATOMIC_LOCK16r __kmp_atomic_lock_16r // _Quad -#define ATOMIC_LOCK16c __kmp_atomic_lock_16c // double complex -#define ATOMIC_LOCK20c __kmp_atomic_lock_20c // long double complex -#define ATOMIC_LOCK32c __kmp_atomic_lock_32c // _Quad complex - -// ------------------------------------------------------------------------ -// Operation on *lhs, rhs bound by critical section -// OP - operator (it's supposed to contain an assignment) -// LCK_ID - lock identifier -// Note: don't check gtid as it should always be valid -// 1, 2-byte - expect valid parameter, other - check before this macro -#define OP_CRITICAL(OP, LCK_ID) \ - __kmp_acquire_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); \ - \ - (*lhs) OP(rhs); \ - \ - __kmp_release_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); - -// ------------------------------------------------------------------------ -// For GNU compatibility, we may need to use a critical section, -// even though it is not required by the ISA. -// -// On IA-32 architecture, all atomic operations except for fixed 4 byte add, -// sub, and bitwise logical ops, and 1 & 2 byte logical ops use a common -// critical section. On Intel(R) 64, all atomic operations are done with fetch -// and add or compare and exchange. Therefore, the FLAG parameter to this -// macro is either KMP_ARCH_X86 or 0 (or 1, for Intel-specific extension which -// require a critical section, where we predict that they will be implemented -// in the Gnu codegen by calling GOMP_atomic_start() / GOMP_atomic_end()). -// -// When the OP_GOMP_CRITICAL macro is used in a *CRITICAL* macro construct, -// the FLAG parameter should always be 1. If we know that we will be using -// a critical section, then we want to make certain that we use the generic -// lock __kmp_atomic_lock to protect the atomic update, and not of of the -// locks that are specialized based upon the size or type of the data. -// -// If FLAG is 0, then we are relying on dead code elimination by the build -// compiler to get rid of the useless block of code, and save a needless -// branch at runtime. - -#ifdef KMP_GOMP_COMPAT -#define OP_GOMP_CRITICAL(OP, FLAG) \ - if ((FLAG) && (__kmp_atomic_mode == 2)) { \ - KMP_CHECK_GTID; \ - OP_CRITICAL(OP, 0); \ - return; \ - } -#else -#define OP_GOMP_CRITICAL(OP, FLAG) -#endif /* KMP_GOMP_COMPAT */ - -#if KMP_MIC -#define KMP_DO_PAUSE _mm_delay_32(1) -#else -#define KMP_DO_PAUSE KMP_CPU_PAUSE() -#endif /* KMP_MIC */ - -// ------------------------------------------------------------------------ -// Operation on *lhs, rhs using "compare_and_store" routine -// TYPE - operands' type -// BITS - size in bits, used to distinguish low level calls -// OP - operator -#define OP_CMPXCHG(TYPE, BITS, OP) \ - { \ - TYPE old_value, new_value; \ - old_value = *(TYPE volatile *)lhs; \ - new_value = old_value OP rhs; \ - while (!KMP_COMPARE_AND_STORE_ACQ##BITS( \ - (kmp_int##BITS *)lhs, *VOLATILE_CAST(kmp_int##BITS *) & old_value, \ - *VOLATILE_CAST(kmp_int##BITS *) & new_value)) { \ - KMP_DO_PAUSE; \ - \ - old_value = *(TYPE volatile *)lhs; \ - new_value = old_value OP rhs; \ - } \ - } - -#if USE_CMPXCHG_FIX -// 2007-06-25: -// workaround for C78287 (complex(kind=4) data type). lin_32, lin_32e, win_32 -// and win_32e are affected (I verified the asm). Compiler ignores the volatile -// qualifier of the temp_val in the OP_CMPXCHG macro. This is a problem of the -// compiler. Related tracker is C76005, targeted to 11.0. I verified the asm of -// the workaround. -#define OP_CMPXCHG_WORKAROUND(TYPE, BITS, OP) \ - { \ - struct _sss { \ - TYPE cmp; \ - kmp_int##BITS *vvv; \ - }; \ - struct _sss old_value, new_value; \ - old_value.vvv = (kmp_int##BITS *)&old_value.cmp; \ - new_value.vvv = (kmp_int##BITS *)&new_value.cmp; \ - *old_value.vvv = *(volatile kmp_int##BITS *)lhs; \ - new_value.cmp = old_value.cmp OP rhs; \ - while (!KMP_COMPARE_AND_STORE_ACQ##BITS( \ - (kmp_int##BITS *)lhs, *VOLATILE_CAST(kmp_int##BITS *) old_value.vvv, \ - *VOLATILE_CAST(kmp_int##BITS *) new_value.vvv)) { \ - KMP_DO_PAUSE; \ - \ - *old_value.vvv = *(volatile kmp_int##BITS *)lhs; \ - new_value.cmp = old_value.cmp OP rhs; \ - } \ - } -// end of the first part of the workaround for C78287 -#endif // USE_CMPXCHG_FIX - -#if KMP_ARCH_X86 || KMP_ARCH_X86_64 - -// ------------------------------------------------------------------------ -// X86 or X86_64: no alignment problems ==================================== -#define ATOMIC_FIXED_ADD(TYPE_ID, OP_ID, TYPE, BITS, OP, LCK_ID, MASK, \ - GOMP_FLAG) \ - ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void) \ - OP_GOMP_CRITICAL(OP## =, GOMP_FLAG) \ - /* OP used as a sign for subtraction: (lhs-rhs) --> (lhs+-rhs) */ \ - KMP_TEST_THEN_ADD##BITS(lhs, OP rhs); \ - } -// ------------------------------------------------------------------------- -#define ATOMIC_CMPXCHG(TYPE_ID, OP_ID, TYPE, BITS, OP, LCK_ID, MASK, \ - GOMP_FLAG) \ - ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void) \ - OP_GOMP_CRITICAL(OP## =, GOMP_FLAG) \ - OP_CMPXCHG(TYPE, BITS, OP) \ - } -#if USE_CMPXCHG_FIX -// ------------------------------------------------------------------------- -// workaround for C78287 (complex(kind=4) data type) -#define ATOMIC_CMPXCHG_WORKAROUND(TYPE_ID, OP_ID, TYPE, BITS, OP, LCK_ID, \ - MASK, GOMP_FLAG) \ - ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void) \ - OP_GOMP_CRITICAL(OP## =, GOMP_FLAG) \ - OP_CMPXCHG_WORKAROUND(TYPE, BITS, OP) \ - } -// end of the second part of the workaround for C78287 -#endif - -#else -// ------------------------------------------------------------------------- -// Code for other architectures that don't handle unaligned accesses. -#define ATOMIC_FIXED_ADD(TYPE_ID, OP_ID, TYPE, BITS, OP, LCK_ID, MASK, \ - GOMP_FLAG) \ - ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void) \ - OP_GOMP_CRITICAL(OP## =, GOMP_FLAG) \ - if (!((kmp_uintptr_t)lhs & 0x##MASK)) { \ - /* OP used as a sign for subtraction: (lhs-rhs) --> (lhs+-rhs) */ \ - KMP_TEST_THEN_ADD##BITS(lhs, OP rhs); \ - } else { \ - KMP_CHECK_GTID; \ - OP_CRITICAL(OP## =, LCK_ID) /* unaligned address - use critical */ \ - } \ - } -// ------------------------------------------------------------------------- -#define ATOMIC_CMPXCHG(TYPE_ID, OP_ID, TYPE, BITS, OP, LCK_ID, MASK, \ - GOMP_FLAG) \ - ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void) \ - OP_GOMP_CRITICAL(OP## =, GOMP_FLAG) \ - if (!((kmp_uintptr_t)lhs & 0x##MASK)) { \ - OP_CMPXCHG(TYPE, BITS, OP) /* aligned address */ \ - } else { \ - KMP_CHECK_GTID; \ - OP_CRITICAL(OP## =, LCK_ID) /* unaligned address - use critical */ \ - } \ - } -#if USE_CMPXCHG_FIX -// ------------------------------------------------------------------------- -// workaround for C78287 (complex(kind=4) data type) -#define ATOMIC_CMPXCHG_WORKAROUND(TYPE_ID, OP_ID, TYPE, BITS, OP, LCK_ID, \ - MASK, GOMP_FLAG) \ - ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void) \ - OP_GOMP_CRITICAL(OP## =, GOMP_FLAG) \ - if (!((kmp_uintptr_t)lhs & 0x##MASK)) { \ - OP_CMPXCHG(TYPE, BITS, OP) /* aligned address */ \ - } else { \ - KMP_CHECK_GTID; \ - OP_CRITICAL(OP## =, LCK_ID) /* unaligned address - use critical */ \ - } \ - } -// end of the second part of the workaround for C78287 -#endif // USE_CMPXCHG_FIX -#endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */ - -// Routines for ATOMIC 4-byte operands addition and subtraction -ATOMIC_FIXED_ADD(fixed4, add, kmp_int32, 32, +, 4i, 3, - 0) // __kmpc_atomic_fixed4_add -ATOMIC_FIXED_ADD(fixed4, sub, kmp_int32, 32, -, 4i, 3, - 0) // __kmpc_atomic_fixed4_sub - -ATOMIC_CMPXCHG(float4, add, kmp_real32, 32, +, 4r, 3, - KMP_ARCH_X86) // __kmpc_atomic_float4_add -ATOMIC_CMPXCHG(float4, sub, kmp_real32, 32, -, 4r, 3, - KMP_ARCH_X86) // __kmpc_atomic_float4_sub - -// Routines for ATOMIC 8-byte operands addition and subtraction -ATOMIC_FIXED_ADD(fixed8, add, kmp_int64, 64, +, 8i, 7, - KMP_ARCH_X86) // __kmpc_atomic_fixed8_add -ATOMIC_FIXED_ADD(fixed8, sub, kmp_int64, 64, -, 8i, 7, - KMP_ARCH_X86) // __kmpc_atomic_fixed8_sub - -ATOMIC_CMPXCHG(float8, add, kmp_real64, 64, +, 8r, 7, - KMP_ARCH_X86) // __kmpc_atomic_float8_add -ATOMIC_CMPXCHG(float8, sub, kmp_real64, 64, -, 8r, 7, - KMP_ARCH_X86) // __kmpc_atomic_float8_sub - -// ------------------------------------------------------------------------ -// Entries definition for integer operands -// TYPE_ID - operands type and size (fixed4, float4) -// OP_ID - operation identifier (add, sub, mul, ...) -// TYPE - operand type -// BITS - size in bits, used to distinguish low level calls -// OP - operator (used in critical section) -// LCK_ID - lock identifier, used to possibly distinguish lock variable -// MASK - used for alignment check - -// TYPE_ID,OP_ID, TYPE, BITS,OP,LCK_ID,MASK,GOMP_FLAG -// ------------------------------------------------------------------------ -// Routines for ATOMIC integer operands, other operators -// ------------------------------------------------------------------------ -// TYPE_ID,OP_ID, TYPE, OP, LCK_ID, GOMP_FLAG -ATOMIC_CMPXCHG(fixed1, add, kmp_int8, 8, +, 1i, 0, - KMP_ARCH_X86) // __kmpc_atomic_fixed1_add -ATOMIC_CMPXCHG(fixed1, andb, kmp_int8, 8, &, 1i, 0, - 0) // __kmpc_atomic_fixed1_andb -ATOMIC_CMPXCHG(fixed1, div, kmp_int8, 8, /, 1i, 0, - KMP_ARCH_X86) // __kmpc_atomic_fixed1_div -ATOMIC_CMPXCHG(fixed1u, div, kmp_uint8, 8, /, 1i, 0, - KMP_ARCH_X86) // __kmpc_atomic_fixed1u_div -ATOMIC_CMPXCHG(fixed1, mul, kmp_int8, 8, *, 1i, 0, - KMP_ARCH_X86) // __kmpc_atomic_fixed1_mul -ATOMIC_CMPXCHG(fixed1, orb, kmp_int8, 8, |, 1i, 0, - 0) // __kmpc_atomic_fixed1_orb -ATOMIC_CMPXCHG(fixed1, shl, kmp_int8, 8, <<, 1i, 0, - KMP_ARCH_X86) // __kmpc_atomic_fixed1_shl -ATOMIC_CMPXCHG(fixed1, shr, kmp_int8, 8, >>, 1i, 0, - KMP_ARCH_X86) // __kmpc_atomic_fixed1_shr -ATOMIC_CMPXCHG(fixed1u, shr, kmp_uint8, 8, >>, 1i, 0, - KMP_ARCH_X86) // __kmpc_atomic_fixed1u_shr -ATOMIC_CMPXCHG(fixed1, sub, kmp_int8, 8, -, 1i, 0, - KMP_ARCH_X86) // __kmpc_atomic_fixed1_sub -ATOMIC_CMPXCHG(fixed1, xor, kmp_int8, 8, ^, 1i, 0, - 0) // __kmpc_atomic_fixed1_xor -ATOMIC_CMPXCHG(fixed2, add, kmp_int16, 16, +, 2i, 1, - KMP_ARCH_X86) // __kmpc_atomic_fixed2_add -ATOMIC_CMPXCHG(fixed2, andb, kmp_int16, 16, &, 2i, 1, - 0) // __kmpc_atomic_fixed2_andb -ATOMIC_CMPXCHG(fixed2, div, kmp_int16, 16, /, 2i, 1, - KMP_ARCH_X86) // __kmpc_atomic_fixed2_div -ATOMIC_CMPXCHG(fixed2u, div, kmp_uint16, 16, /, 2i, 1, - KMP_ARCH_X86) // __kmpc_atomic_fixed2u_div -ATOMIC_CMPXCHG(fixed2, mul, kmp_int16, 16, *, 2i, 1, - KMP_ARCH_X86) // __kmpc_atomic_fixed2_mul -ATOMIC_CMPXCHG(fixed2, orb, kmp_int16, 16, |, 2i, 1, - 0) // __kmpc_atomic_fixed2_orb -ATOMIC_CMPXCHG(fixed2, shl, kmp_int16, 16, <<, 2i, 1, - KMP_ARCH_X86) // __kmpc_atomic_fixed2_shl -ATOMIC_CMPXCHG(fixed2, shr, kmp_int16, 16, >>, 2i, 1, - KMP_ARCH_X86) // __kmpc_atomic_fixed2_shr -ATOMIC_CMPXCHG(fixed2u, shr, kmp_uint16, 16, >>, 2i, 1, - KMP_ARCH_X86) // __kmpc_atomic_fixed2u_shr -ATOMIC_CMPXCHG(fixed2, sub, kmp_int16, 16, -, 2i, 1, - KMP_ARCH_X86) // __kmpc_atomic_fixed2_sub -ATOMIC_CMPXCHG(fixed2, xor, kmp_int16, 16, ^, 2i, 1, - 0) // __kmpc_atomic_fixed2_xor -ATOMIC_CMPXCHG(fixed4, andb, kmp_int32, 32, &, 4i, 3, - 0) // __kmpc_atomic_fixed4_andb -ATOMIC_CMPXCHG(fixed4, div, kmp_int32, 32, /, 4i, 3, - KMP_ARCH_X86) // __kmpc_atomic_fixed4_div -ATOMIC_CMPXCHG(fixed4u, div, kmp_uint32, 32, /, 4i, 3, - KMP_ARCH_X86) // __kmpc_atomic_fixed4u_div -ATOMIC_CMPXCHG(fixed4, mul, kmp_int32, 32, *, 4i, 3, - KMP_ARCH_X86) // __kmpc_atomic_fixed4_mul -ATOMIC_CMPXCHG(fixed4, orb, kmp_int32, 32, |, 4i, 3, - 0) // __kmpc_atomic_fixed4_orb -ATOMIC_CMPXCHG(fixed4, shl, kmp_int32, 32, <<, 4i, 3, - KMP_ARCH_X86) // __kmpc_atomic_fixed4_shl -ATOMIC_CMPXCHG(fixed4, shr, kmp_int32, 32, >>, 4i, 3, - KMP_ARCH_X86) // __kmpc_atomic_fixed4_shr -ATOMIC_CMPXCHG(fixed4u, shr, kmp_uint32, 32, >>, 4i, 3, - KMP_ARCH_X86) // __kmpc_atomic_fixed4u_shr -ATOMIC_CMPXCHG(fixed4, xor, kmp_int32, 32, ^, 4i, 3, - 0) // __kmpc_atomic_fixed4_xor -ATOMIC_CMPXCHG(fixed8, andb, kmp_int64, 64, &, 8i, 7, - KMP_ARCH_X86) // __kmpc_atomic_fixed8_andb -ATOMIC_CMPXCHG(fixed8, div, kmp_int64, 64, /, 8i, 7, - KMP_ARCH_X86) // __kmpc_atomic_fixed8_div -ATOMIC_CMPXCHG(fixed8u, div, kmp_uint64, 64, /, 8i, 7, - KMP_ARCH_X86) // __kmpc_atomic_fixed8u_div -ATOMIC_CMPXCHG(fixed8, mul, kmp_int64, 64, *, 8i, 7, - KMP_ARCH_X86) // __kmpc_atomic_fixed8_mul -ATOMIC_CMPXCHG(fixed8, orb, kmp_int64, 64, |, 8i, 7, - KMP_ARCH_X86) // __kmpc_atomic_fixed8_orb -ATOMIC_CMPXCHG(fixed8, shl, kmp_int64, 64, <<, 8i, 7, - KMP_ARCH_X86) // __kmpc_atomic_fixed8_shl -ATOMIC_CMPXCHG(fixed8, shr, kmp_int64, 64, >>, 8i, 7, - KMP_ARCH_X86) // __kmpc_atomic_fixed8_shr -ATOMIC_CMPXCHG(fixed8u, shr, kmp_uint64, 64, >>, 8i, 7, - KMP_ARCH_X86) // __kmpc_atomic_fixed8u_shr -ATOMIC_CMPXCHG(fixed8, xor, kmp_int64, 64, ^, 8i, 7, - KMP_ARCH_X86) // __kmpc_atomic_fixed8_xor -ATOMIC_CMPXCHG(float4, div, kmp_real32, 32, /, 4r, 3, - KMP_ARCH_X86) // __kmpc_atomic_float4_div -ATOMIC_CMPXCHG(float4, mul, kmp_real32, 32, *, 4r, 3, - KMP_ARCH_X86) // __kmpc_atomic_float4_mul -ATOMIC_CMPXCHG(float8, div, kmp_real64, 64, /, 8r, 7, - KMP_ARCH_X86) // __kmpc_atomic_float8_div -ATOMIC_CMPXCHG(float8, mul, kmp_real64, 64, *, 8r, 7, - KMP_ARCH_X86) // __kmpc_atomic_float8_mul -// TYPE_ID,OP_ID, TYPE, OP, LCK_ID, GOMP_FLAG - -/* ------------------------------------------------------------------------ */ -/* Routines for C/C++ Reduction operators && and || */ - -// ------------------------------------------------------------------------ -// Need separate macros for &&, || because there is no combined assignment -// TODO: eliminate ATOMIC_CRIT_{L,EQV} macros as not used -#define ATOMIC_CRIT_L(TYPE_ID, OP_ID, TYPE, OP, LCK_ID, GOMP_FLAG) \ - ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void) \ - OP_GOMP_CRITICAL(= *lhs OP, GOMP_FLAG) \ - OP_CRITICAL(= *lhs OP, LCK_ID) \ - } - -#if KMP_ARCH_X86 || KMP_ARCH_X86_64 - -// ------------------------------------------------------------------------ -// X86 or X86_64: no alignment problems =================================== -#define ATOMIC_CMPX_L(TYPE_ID, OP_ID, TYPE, BITS, OP, LCK_ID, MASK, GOMP_FLAG) \ - ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void) \ - OP_GOMP_CRITICAL(= *lhs OP, GOMP_FLAG) \ - OP_CMPXCHG(TYPE, BITS, OP) \ - } - -#else -// ------------------------------------------------------------------------ -// Code for other architectures that don't handle unaligned accesses. -#define ATOMIC_CMPX_L(TYPE_ID, OP_ID, TYPE, BITS, OP, LCK_ID, MASK, GOMP_FLAG) \ - ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void) \ - OP_GOMP_CRITICAL(= *lhs OP, GOMP_FLAG) \ - if (!((kmp_uintptr_t)lhs & 0x##MASK)) { \ - OP_CMPXCHG(TYPE, BITS, OP) /* aligned address */ \ - } else { \ - KMP_CHECK_GTID; \ - OP_CRITICAL(= *lhs OP, LCK_ID) /* unaligned - use critical */ \ - } \ - } -#endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */ - -ATOMIC_CMPX_L(fixed1, andl, char, 8, &&, 1i, 0, - KMP_ARCH_X86) // __kmpc_atomic_fixed1_andl -ATOMIC_CMPX_L(fixed1, orl, char, 8, ||, 1i, 0, - KMP_ARCH_X86) // __kmpc_atomic_fixed1_orl -ATOMIC_CMPX_L(fixed2, andl, short, 16, &&, 2i, 1, - KMP_ARCH_X86) // __kmpc_atomic_fixed2_andl -ATOMIC_CMPX_L(fixed2, orl, short, 16, ||, 2i, 1, - KMP_ARCH_X86) // __kmpc_atomic_fixed2_orl -ATOMIC_CMPX_L(fixed4, andl, kmp_int32, 32, &&, 4i, 3, - 0) // __kmpc_atomic_fixed4_andl -ATOMIC_CMPX_L(fixed4, orl, kmp_int32, 32, ||, 4i, 3, - 0) // __kmpc_atomic_fixed4_orl -ATOMIC_CMPX_L(fixed8, andl, kmp_int64, 64, &&, 8i, 7, - KMP_ARCH_X86) // __kmpc_atomic_fixed8_andl -ATOMIC_CMPX_L(fixed8, orl, kmp_int64, 64, ||, 8i, 7, - KMP_ARCH_X86) // __kmpc_atomic_fixed8_orl - -/* ------------------------------------------------------------------------- */ -/* Routines for Fortran operators that matched no one in C: */ -/* MAX, MIN, .EQV., .NEQV. */ -/* Operators .AND., .OR. are covered by __kmpc_atomic_*_{andl,orl} */ -/* Intrinsics IAND, IOR, IEOR are covered by __kmpc_atomic_*_{andb,orb,xor} */ - -// ------------------------------------------------------------------------- -// MIN and MAX need separate macros -// OP - operator to check if we need any actions? -#define MIN_MAX_CRITSECT(OP, LCK_ID) \ - __kmp_acquire_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); \ - \ - if (*lhs OP rhs) { /* still need actions? */ \ - *lhs = rhs; \ - } \ - __kmp_release_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); - -// ------------------------------------------------------------------------- -#ifdef KMP_GOMP_COMPAT -#define GOMP_MIN_MAX_CRITSECT(OP, FLAG) \ - if ((FLAG) && (__kmp_atomic_mode == 2)) { \ - KMP_CHECK_GTID; \ - MIN_MAX_CRITSECT(OP, 0); \ - return; \ - } -#else -#define GOMP_MIN_MAX_CRITSECT(OP, FLAG) -#endif /* KMP_GOMP_COMPAT */ - -// ------------------------------------------------------------------------- -#define MIN_MAX_CMPXCHG(TYPE, BITS, OP) \ - { \ - TYPE KMP_ATOMIC_VOLATILE temp_val; \ - TYPE old_value; \ - temp_val = *lhs; \ - old_value = temp_val; \ - while (old_value OP rhs && /* still need actions? */ \ - !KMP_COMPARE_AND_STORE_ACQ##BITS( \ - (kmp_int##BITS *)lhs, \ - *VOLATILE_CAST(kmp_int##BITS *) & old_value, \ - *VOLATILE_CAST(kmp_int##BITS *) & rhs)) { \ - KMP_CPU_PAUSE(); \ - temp_val = *lhs; \ - old_value = temp_val; \ - } \ - } - -// ------------------------------------------------------------------------- -// 1-byte, 2-byte operands - use critical section -#define MIN_MAX_CRITICAL(TYPE_ID, OP_ID, TYPE, OP, LCK_ID, GOMP_FLAG) \ - ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void) \ - if (*lhs OP rhs) { /* need actions? */ \ - GOMP_MIN_MAX_CRITSECT(OP, GOMP_FLAG) \ - MIN_MAX_CRITSECT(OP, LCK_ID) \ - } \ - } - -#if KMP_ARCH_X86 || KMP_ARCH_X86_64 - -// ------------------------------------------------------------------------- -// X86 or X86_64: no alignment problems ==================================== -#define MIN_MAX_COMPXCHG(TYPE_ID, OP_ID, TYPE, BITS, OP, LCK_ID, MASK, \ - GOMP_FLAG) \ - ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void) \ - if (*lhs OP rhs) { \ - GOMP_MIN_MAX_CRITSECT(OP, GOMP_FLAG) \ - MIN_MAX_CMPXCHG(TYPE, BITS, OP) \ - } \ - } - -#else -// ------------------------------------------------------------------------- -// Code for other architectures that don't handle unaligned accesses. -#define MIN_MAX_COMPXCHG(TYPE_ID, OP_ID, TYPE, BITS, OP, LCK_ID, MASK, \ - GOMP_FLAG) \ - ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void) \ - if (*lhs OP rhs) { \ - GOMP_MIN_MAX_CRITSECT(OP, GOMP_FLAG) \ - if (!((kmp_uintptr_t)lhs & 0x##MASK)) { \ - MIN_MAX_CMPXCHG(TYPE, BITS, OP) /* aligned address */ \ - } else { \ - KMP_CHECK_GTID; \ - MIN_MAX_CRITSECT(OP, LCK_ID) /* unaligned address */ \ - } \ - } \ - } -#endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */ - -MIN_MAX_COMPXCHG(fixed1, max, char, 8, <, 1i, 0, - KMP_ARCH_X86) // __kmpc_atomic_fixed1_max -MIN_MAX_COMPXCHG(fixed1, min, char, 8, >, 1i, 0, - KMP_ARCH_X86) // __kmpc_atomic_fixed1_min -MIN_MAX_COMPXCHG(fixed2, max, short, 16, <, 2i, 1, - KMP_ARCH_X86) // __kmpc_atomic_fixed2_max -MIN_MAX_COMPXCHG(fixed2, min, short, 16, >, 2i, 1, - KMP_ARCH_X86) // __kmpc_atomic_fixed2_min -MIN_MAX_COMPXCHG(fixed4, max, kmp_int32, 32, <, 4i, 3, - 0) // __kmpc_atomic_fixed4_max -MIN_MAX_COMPXCHG(fixed4, min, kmp_int32, 32, >, 4i, 3, - 0) // __kmpc_atomic_fixed4_min -MIN_MAX_COMPXCHG(fixed8, max, kmp_int64, 64, <, 8i, 7, - KMP_ARCH_X86) // __kmpc_atomic_fixed8_max -MIN_MAX_COMPXCHG(fixed8, min, kmp_int64, 64, >, 8i, 7, - KMP_ARCH_X86) // __kmpc_atomic_fixed8_min -MIN_MAX_COMPXCHG(float4, max, kmp_real32, 32, <, 4r, 3, - KMP_ARCH_X86) // __kmpc_atomic_float4_max -MIN_MAX_COMPXCHG(float4, min, kmp_real32, 32, >, 4r, 3, - KMP_ARCH_X86) // __kmpc_atomic_float4_min -MIN_MAX_COMPXCHG(float8, max, kmp_real64, 64, <, 8r, 7, - KMP_ARCH_X86) // __kmpc_atomic_float8_max -MIN_MAX_COMPXCHG(float8, min, kmp_real64, 64, >, 8r, 7, - KMP_ARCH_X86) // __kmpc_atomic_float8_min -#if KMP_HAVE_QUAD -MIN_MAX_CRITICAL(float16, max, QUAD_LEGACY, <, 16r, - 1) // __kmpc_atomic_float16_max -MIN_MAX_CRITICAL(float16, min, QUAD_LEGACY, >, 16r, - 1) // __kmpc_atomic_float16_min -#if (KMP_ARCH_X86) -MIN_MAX_CRITICAL(float16, max_a16, Quad_a16_t, <, 16r, - 1) // __kmpc_atomic_float16_max_a16 -MIN_MAX_CRITICAL(float16, min_a16, Quad_a16_t, >, 16r, - 1) // __kmpc_atomic_float16_min_a16 -#endif -#endif -// ------------------------------------------------------------------------ -// Need separate macros for .EQV. because of the need of complement (~) -// OP ignored for critical sections, ^=~ used instead -#define ATOMIC_CRIT_EQV(TYPE_ID, OP_ID, TYPE, OP, LCK_ID, GOMP_FLAG) \ - ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void) \ - OP_GOMP_CRITICAL(^= ~, GOMP_FLAG) /* send assignment */ \ - OP_CRITICAL(^= ~, LCK_ID) /* send assignment and complement */ \ - } - -// ------------------------------------------------------------------------ -#if KMP_ARCH_X86 || KMP_ARCH_X86_64 -// ------------------------------------------------------------------------ -// X86 or X86_64: no alignment problems =================================== -#define ATOMIC_CMPX_EQV(TYPE_ID, OP_ID, TYPE, BITS, OP, LCK_ID, MASK, \ - GOMP_FLAG) \ - ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void) \ - OP_GOMP_CRITICAL(^= ~, GOMP_FLAG) /* send assignment */ \ - OP_CMPXCHG(TYPE, BITS, OP) \ - } -// ------------------------------------------------------------------------ -#else -// ------------------------------------------------------------------------ -// Code for other architectures that don't handle unaligned accesses. -#define ATOMIC_CMPX_EQV(TYPE_ID, OP_ID, TYPE, BITS, OP, LCK_ID, MASK, \ - GOMP_FLAG) \ - ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void) \ - OP_GOMP_CRITICAL(^= ~, GOMP_FLAG) \ - if (!((kmp_uintptr_t)lhs & 0x##MASK)) { \ - OP_CMPXCHG(TYPE, BITS, OP) /* aligned address */ \ - } else { \ - KMP_CHECK_GTID; \ - OP_CRITICAL(^= ~, LCK_ID) /* unaligned address - use critical */ \ - } \ - } -#endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */ - -ATOMIC_CMPXCHG(fixed1, neqv, kmp_int8, 8, ^, 1i, 0, - KMP_ARCH_X86) // __kmpc_atomic_fixed1_neqv -ATOMIC_CMPXCHG(fixed2, neqv, kmp_int16, 16, ^, 2i, 1, - KMP_ARCH_X86) // __kmpc_atomic_fixed2_neqv -ATOMIC_CMPXCHG(fixed4, neqv, kmp_int32, 32, ^, 4i, 3, - KMP_ARCH_X86) // __kmpc_atomic_fixed4_neqv -ATOMIC_CMPXCHG(fixed8, neqv, kmp_int64, 64, ^, 8i, 7, - KMP_ARCH_X86) // __kmpc_atomic_fixed8_neqv -ATOMIC_CMPX_EQV(fixed1, eqv, kmp_int8, 8, ^~, 1i, 0, - KMP_ARCH_X86) // __kmpc_atomic_fixed1_eqv -ATOMIC_CMPX_EQV(fixed2, eqv, kmp_int16, 16, ^~, 2i, 1, - KMP_ARCH_X86) // __kmpc_atomic_fixed2_eqv -ATOMIC_CMPX_EQV(fixed4, eqv, kmp_int32, 32, ^~, 4i, 3, - KMP_ARCH_X86) // __kmpc_atomic_fixed4_eqv -ATOMIC_CMPX_EQV(fixed8, eqv, kmp_int64, 64, ^~, 8i, 7, - KMP_ARCH_X86) // __kmpc_atomic_fixed8_eqv - -// ------------------------------------------------------------------------ -// Routines for Extended types: long double, _Quad, complex flavours (use -// critical section) -// TYPE_ID, OP_ID, TYPE - detailed above -// OP - operator -// LCK_ID - lock identifier, used to possibly distinguish lock variable -#define ATOMIC_CRITICAL(TYPE_ID, OP_ID, TYPE, OP, LCK_ID, GOMP_FLAG) \ - ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void) \ - OP_GOMP_CRITICAL(OP## =, GOMP_FLAG) /* send assignment */ \ - OP_CRITICAL(OP## =, LCK_ID) /* send assignment */ \ - } - -/* ------------------------------------------------------------------------- */ -// routines for long double type -ATOMIC_CRITICAL(float10, add, long double, +, 10r, - 1) // __kmpc_atomic_float10_add -ATOMIC_CRITICAL(float10, sub, long double, -, 10r, - 1) // __kmpc_atomic_float10_sub -ATOMIC_CRITICAL(float10, mul, long double, *, 10r, - 1) // __kmpc_atomic_float10_mul -ATOMIC_CRITICAL(float10, div, long double, /, 10r, - 1) // __kmpc_atomic_float10_div -#if KMP_HAVE_QUAD -// routines for _Quad type -ATOMIC_CRITICAL(float16, add, QUAD_LEGACY, +, 16r, - 1) // __kmpc_atomic_float16_add -ATOMIC_CRITICAL(float16, sub, QUAD_LEGACY, -, 16r, - 1) // __kmpc_atomic_float16_sub -ATOMIC_CRITICAL(float16, mul, QUAD_LEGACY, *, 16r, - 1) // __kmpc_atomic_float16_mul -ATOMIC_CRITICAL(float16, div, QUAD_LEGACY, /, 16r, - 1) // __kmpc_atomic_float16_div -#if (KMP_ARCH_X86) -ATOMIC_CRITICAL(float16, add_a16, Quad_a16_t, +, 16r, - 1) // __kmpc_atomic_float16_add_a16 -ATOMIC_CRITICAL(float16, sub_a16, Quad_a16_t, -, 16r, - 1) // __kmpc_atomic_float16_sub_a16 -ATOMIC_CRITICAL(float16, mul_a16, Quad_a16_t, *, 16r, - 1) // __kmpc_atomic_float16_mul_a16 -ATOMIC_CRITICAL(float16, div_a16, Quad_a16_t, /, 16r, - 1) // __kmpc_atomic_float16_div_a16 -#endif -#endif -// routines for complex types - -#if USE_CMPXCHG_FIX -// workaround for C78287 (complex(kind=4) data type) -ATOMIC_CMPXCHG_WORKAROUND(cmplx4, add, kmp_cmplx32, 64, +, 8c, 7, - 1) // __kmpc_atomic_cmplx4_add -ATOMIC_CMPXCHG_WORKAROUND(cmplx4, sub, kmp_cmplx32, 64, -, 8c, 7, - 1) // __kmpc_atomic_cmplx4_sub -ATOMIC_CMPXCHG_WORKAROUND(cmplx4, mul, kmp_cmplx32, 64, *, 8c, 7, - 1) // __kmpc_atomic_cmplx4_mul -ATOMIC_CMPXCHG_WORKAROUND(cmplx4, div, kmp_cmplx32, 64, /, 8c, 7, - 1) // __kmpc_atomic_cmplx4_div -// end of the workaround for C78287 -#else -ATOMIC_CRITICAL(cmplx4, add, kmp_cmplx32, +, 8c, 1) // __kmpc_atomic_cmplx4_add -ATOMIC_CRITICAL(cmplx4, sub, kmp_cmplx32, -, 8c, 1) // __kmpc_atomic_cmplx4_sub -ATOMIC_CRITICAL(cmplx4, mul, kmp_cmplx32, *, 8c, 1) // __kmpc_atomic_cmplx4_mul -ATOMIC_CRITICAL(cmplx4, div, kmp_cmplx32, /, 8c, 1) // __kmpc_atomic_cmplx4_div -#endif // USE_CMPXCHG_FIX - -ATOMIC_CRITICAL(cmplx8, add, kmp_cmplx64, +, 16c, 1) // __kmpc_atomic_cmplx8_add -ATOMIC_CRITICAL(cmplx8, sub, kmp_cmplx64, -, 16c, 1) // __kmpc_atomic_cmplx8_sub -ATOMIC_CRITICAL(cmplx8, mul, kmp_cmplx64, *, 16c, 1) // __kmpc_atomic_cmplx8_mul -ATOMIC_CRITICAL(cmplx8, div, kmp_cmplx64, /, 16c, 1) // __kmpc_atomic_cmplx8_div -ATOMIC_CRITICAL(cmplx10, add, kmp_cmplx80, +, 20c, - 1) // __kmpc_atomic_cmplx10_add -ATOMIC_CRITICAL(cmplx10, sub, kmp_cmplx80, -, 20c, - 1) // __kmpc_atomic_cmplx10_sub -ATOMIC_CRITICAL(cmplx10, mul, kmp_cmplx80, *, 20c, - 1) // __kmpc_atomic_cmplx10_mul -ATOMIC_CRITICAL(cmplx10, div, kmp_cmplx80, /, 20c, - 1) // __kmpc_atomic_cmplx10_div -#if KMP_HAVE_QUAD -ATOMIC_CRITICAL(cmplx16, add, CPLX128_LEG, +, 32c, - 1) // __kmpc_atomic_cmplx16_add -ATOMIC_CRITICAL(cmplx16, sub, CPLX128_LEG, -, 32c, - 1) // __kmpc_atomic_cmplx16_sub -ATOMIC_CRITICAL(cmplx16, mul, CPLX128_LEG, *, 32c, - 1) // __kmpc_atomic_cmplx16_mul -ATOMIC_CRITICAL(cmplx16, div, CPLX128_LEG, /, 32c, - 1) // __kmpc_atomic_cmplx16_div -#if (KMP_ARCH_X86) -ATOMIC_CRITICAL(cmplx16, add_a16, kmp_cmplx128_a16_t, +, 32c, - 1) // __kmpc_atomic_cmplx16_add_a16 -ATOMIC_CRITICAL(cmplx16, sub_a16, kmp_cmplx128_a16_t, -, 32c, - 1) // __kmpc_atomic_cmplx16_sub_a16 -ATOMIC_CRITICAL(cmplx16, mul_a16, kmp_cmplx128_a16_t, *, 32c, - 1) // __kmpc_atomic_cmplx16_mul_a16 -ATOMIC_CRITICAL(cmplx16, div_a16, kmp_cmplx128_a16_t, /, 32c, - 1) // __kmpc_atomic_cmplx16_div_a16 -#endif -#endif - -#if OMP_40_ENABLED - -// OpenMP 4.0: x = expr binop x for non-commutative operations. -// Supported only on IA-32 architecture and Intel(R) 64 -#if KMP_ARCH_X86 || KMP_ARCH_X86_64 - -// ------------------------------------------------------------------------ -// Operation on *lhs, rhs bound by critical section -// OP - operator (it's supposed to contain an assignment) -// LCK_ID - lock identifier -// Note: don't check gtid as it should always be valid -// 1, 2-byte - expect valid parameter, other - check before this macro -#define OP_CRITICAL_REV(OP, LCK_ID) \ - __kmp_acquire_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); \ - \ - (*lhs) = (rhs)OP(*lhs); \ - \ - __kmp_release_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); - -#ifdef KMP_GOMP_COMPAT -#define OP_GOMP_CRITICAL_REV(OP, FLAG) \ - if ((FLAG) && (__kmp_atomic_mode == 2)) { \ - KMP_CHECK_GTID; \ - OP_CRITICAL_REV(OP, 0); \ - return; \ - } -#else -#define OP_GOMP_CRITICAL_REV(OP, FLAG) -#endif /* KMP_GOMP_COMPAT */ - -// Beginning of a definition (provides name, parameters, gebug trace) -// TYPE_ID - operands type and size (fixed*, fixed*u for signed, unsigned -// fixed) -// OP_ID - operation identifier (add, sub, mul, ...) -// TYPE - operands' type -#define ATOMIC_BEGIN_REV(TYPE_ID, OP_ID, TYPE, RET_TYPE) \ - RET_TYPE __kmpc_atomic_##TYPE_ID##_##OP_ID##_rev(ident_t *id_ref, int gtid, \ - TYPE *lhs, TYPE rhs) { \ - KMP_DEBUG_ASSERT(__kmp_init_serial); \ - KA_TRACE(100, ("__kmpc_atomic_" #TYPE_ID "_" #OP_ID "_rev: T#%d\n", gtid)); - -// ------------------------------------------------------------------------ -// Operation on *lhs, rhs using "compare_and_store" routine -// TYPE - operands' type -// BITS - size in bits, used to distinguish low level calls -// OP - operator -// Note: temp_val introduced in order to force the compiler to read -// *lhs only once (w/o it the compiler reads *lhs twice) -#define OP_CMPXCHG_REV(TYPE, BITS, OP) \ - { \ - TYPE KMP_ATOMIC_VOLATILE temp_val; \ - TYPE old_value, new_value; \ - temp_val = *lhs; \ - old_value = temp_val; \ - new_value = rhs OP old_value; \ - while (!KMP_COMPARE_AND_STORE_ACQ##BITS( \ - (kmp_int##BITS *)lhs, *VOLATILE_CAST(kmp_int##BITS *) & old_value, \ - *VOLATILE_CAST(kmp_int##BITS *) & new_value)) { \ - KMP_DO_PAUSE; \ - \ - temp_val = *lhs; \ - old_value = temp_val; \ - new_value = rhs OP old_value; \ - } \ - } - -// ------------------------------------------------------------------------- -#define ATOMIC_CMPXCHG_REV(TYPE_ID, OP_ID, TYPE, BITS, OP, LCK_ID, GOMP_FLAG) \ - ATOMIC_BEGIN_REV(TYPE_ID, OP_ID, TYPE, void) \ - OP_GOMP_CRITICAL_REV(OP, GOMP_FLAG) \ - OP_CMPXCHG_REV(TYPE, BITS, OP) \ - } - -// ------------------------------------------------------------------------ -// Entries definition for integer operands -// TYPE_ID - operands type and size (fixed4, float4) -// OP_ID - operation identifier (add, sub, mul, ...) -// TYPE - operand type -// BITS - size in bits, used to distinguish low level calls -// OP - operator (used in critical section) -// LCK_ID - lock identifier, used to possibly distinguish lock variable - -// TYPE_ID,OP_ID, TYPE, BITS,OP,LCK_ID,GOMP_FLAG -// ------------------------------------------------------------------------ -// Routines for ATOMIC integer operands, other operators -// ------------------------------------------------------------------------ -// TYPE_ID,OP_ID, TYPE, BITS, OP, LCK_ID, GOMP_FLAG -ATOMIC_CMPXCHG_REV(fixed1, div, kmp_int8, 8, /, 1i, - KMP_ARCH_X86) // __kmpc_atomic_fixed1_div_rev -ATOMIC_CMPXCHG_REV(fixed1u, div, kmp_uint8, 8, /, 1i, - KMP_ARCH_X86) // __kmpc_atomic_fixed1u_div_rev -ATOMIC_CMPXCHG_REV(fixed1, shl, kmp_int8, 8, <<, 1i, - KMP_ARCH_X86) // __kmpc_atomic_fixed1_shl_rev -ATOMIC_CMPXCHG_REV(fixed1, shr, kmp_int8, 8, >>, 1i, - KMP_ARCH_X86) // __kmpc_atomic_fixed1_shr_rev -ATOMIC_CMPXCHG_REV(fixed1u, shr, kmp_uint8, 8, >>, 1i, - KMP_ARCH_X86) // __kmpc_atomic_fixed1u_shr_rev -ATOMIC_CMPXCHG_REV(fixed1, sub, kmp_int8, 8, -, 1i, - KMP_ARCH_X86) // __kmpc_atomic_fixed1_sub_rev - -ATOMIC_CMPXCHG_REV(fixed2, div, kmp_int16, 16, /, 2i, - KMP_ARCH_X86) // __kmpc_atomic_fixed2_div_rev -ATOMIC_CMPXCHG_REV(fixed2u, div, kmp_uint16, 16, /, 2i, - KMP_ARCH_X86) // __kmpc_atomic_fixed2u_div_rev -ATOMIC_CMPXCHG_REV(fixed2, shl, kmp_int16, 16, <<, 2i, - KMP_ARCH_X86) // __kmpc_atomic_fixed2_shl_rev -ATOMIC_CMPXCHG_REV(fixed2, shr, kmp_int16, 16, >>, 2i, - KMP_ARCH_X86) // __kmpc_atomic_fixed2_shr_rev -ATOMIC_CMPXCHG_REV(fixed2u, shr, kmp_uint16, 16, >>, 2i, - KMP_ARCH_X86) // __kmpc_atomic_fixed2u_shr_rev -ATOMIC_CMPXCHG_REV(fixed2, sub, kmp_int16, 16, -, 2i, - KMP_ARCH_X86) // __kmpc_atomic_fixed2_sub_rev - -ATOMIC_CMPXCHG_REV(fixed4, div, kmp_int32, 32, /, 4i, - KMP_ARCH_X86) // __kmpc_atomic_fixed4_div_rev -ATOMIC_CMPXCHG_REV(fixed4u, div, kmp_uint32, 32, /, 4i, - KMP_ARCH_X86) // __kmpc_atomic_fixed4u_div_rev -ATOMIC_CMPXCHG_REV(fixed4, shl, kmp_int32, 32, <<, 4i, - KMP_ARCH_X86) // __kmpc_atomic_fixed4_shl_rev -ATOMIC_CMPXCHG_REV(fixed4, shr, kmp_int32, 32, >>, 4i, - KMP_ARCH_X86) // __kmpc_atomic_fixed4_shr_rev -ATOMIC_CMPXCHG_REV(fixed4u, shr, kmp_uint32, 32, >>, 4i, - KMP_ARCH_X86) // __kmpc_atomic_fixed4u_shr_rev -ATOMIC_CMPXCHG_REV(fixed4, sub, kmp_int32, 32, -, 4i, - KMP_ARCH_X86) // __kmpc_atomic_fixed4_sub_rev - -ATOMIC_CMPXCHG_REV(fixed8, div, kmp_int64, 64, /, 8i, - KMP_ARCH_X86) // __kmpc_atomic_fixed8_div_rev -ATOMIC_CMPXCHG_REV(fixed8u, div, kmp_uint64, 64, /, 8i, - KMP_ARCH_X86) // __kmpc_atomic_fixed8u_div_rev -ATOMIC_CMPXCHG_REV(fixed8, shl, kmp_int64, 64, <<, 8i, - KMP_ARCH_X86) // __kmpc_atomic_fixed8_shl_rev -ATOMIC_CMPXCHG_REV(fixed8, shr, kmp_int64, 64, >>, 8i, - KMP_ARCH_X86) // __kmpc_atomic_fixed8_shr_rev -ATOMIC_CMPXCHG_REV(fixed8u, shr, kmp_uint64, 64, >>, 8i, - KMP_ARCH_X86) // __kmpc_atomic_fixed8u_shr_rev -ATOMIC_CMPXCHG_REV(fixed8, sub, kmp_int64, 64, -, 8i, - KMP_ARCH_X86) // __kmpc_atomic_fixed8_sub_rev - -ATOMIC_CMPXCHG_REV(float4, div, kmp_real32, 32, /, 4r, - KMP_ARCH_X86) // __kmpc_atomic_float4_div_rev -ATOMIC_CMPXCHG_REV(float4, sub, kmp_real32, 32, -, 4r, - KMP_ARCH_X86) // __kmpc_atomic_float4_sub_rev - -ATOMIC_CMPXCHG_REV(float8, div, kmp_real64, 64, /, 8r, - KMP_ARCH_X86) // __kmpc_atomic_float8_div_rev -ATOMIC_CMPXCHG_REV(float8, sub, kmp_real64, 64, -, 8r, - KMP_ARCH_X86) // __kmpc_atomic_float8_sub_rev -// TYPE_ID,OP_ID, TYPE, BITS,OP,LCK_ID, GOMP_FLAG - -// ------------------------------------------------------------------------ -// Routines for Extended types: long double, _Quad, complex flavours (use -// critical section) -// TYPE_ID, OP_ID, TYPE - detailed above -// OP - operator -// LCK_ID - lock identifier, used to possibly distinguish lock variable -#define ATOMIC_CRITICAL_REV(TYPE_ID, OP_ID, TYPE, OP, LCK_ID, GOMP_FLAG) \ - ATOMIC_BEGIN_REV(TYPE_ID, OP_ID, TYPE, void) \ - OP_GOMP_CRITICAL_REV(OP, GOMP_FLAG) \ - OP_CRITICAL_REV(OP, LCK_ID) \ - } - -/* ------------------------------------------------------------------------- */ -// routines for long double type -ATOMIC_CRITICAL_REV(float10, sub, long double, -, 10r, - 1) // __kmpc_atomic_float10_sub_rev -ATOMIC_CRITICAL_REV(float10, div, long double, /, 10r, - 1) // __kmpc_atomic_float10_div_rev -#if KMP_HAVE_QUAD -// routines for _Quad type -ATOMIC_CRITICAL_REV(float16, sub, QUAD_LEGACY, -, 16r, - 1) // __kmpc_atomic_float16_sub_rev -ATOMIC_CRITICAL_REV(float16, div, QUAD_LEGACY, /, 16r, - 1) // __kmpc_atomic_float16_div_rev -#if (KMP_ARCH_X86) -ATOMIC_CRITICAL_REV(float16, sub_a16, Quad_a16_t, -, 16r, - 1) // __kmpc_atomic_float16_sub_a16_rev -ATOMIC_CRITICAL_REV(float16, div_a16, Quad_a16_t, /, 16r, - 1) // __kmpc_atomic_float16_div_a16_rev -#endif -#endif - -// routines for complex types -ATOMIC_CRITICAL_REV(cmplx4, sub, kmp_cmplx32, -, 8c, - 1) // __kmpc_atomic_cmplx4_sub_rev -ATOMIC_CRITICAL_REV(cmplx4, div, kmp_cmplx32, /, 8c, - 1) // __kmpc_atomic_cmplx4_div_rev -ATOMIC_CRITICAL_REV(cmplx8, sub, kmp_cmplx64, -, 16c, - 1) // __kmpc_atomic_cmplx8_sub_rev -ATOMIC_CRITICAL_REV(cmplx8, div, kmp_cmplx64, /, 16c, - 1) // __kmpc_atomic_cmplx8_div_rev -ATOMIC_CRITICAL_REV(cmplx10, sub, kmp_cmplx80, -, 20c, - 1) // __kmpc_atomic_cmplx10_sub_rev -ATOMIC_CRITICAL_REV(cmplx10, div, kmp_cmplx80, /, 20c, - 1) // __kmpc_atomic_cmplx10_div_rev -#if KMP_HAVE_QUAD -ATOMIC_CRITICAL_REV(cmplx16, sub, CPLX128_LEG, -, 32c, - 1) // __kmpc_atomic_cmplx16_sub_rev -ATOMIC_CRITICAL_REV(cmplx16, div, CPLX128_LEG, /, 32c, - 1) // __kmpc_atomic_cmplx16_div_rev -#if (KMP_ARCH_X86) -ATOMIC_CRITICAL_REV(cmplx16, sub_a16, kmp_cmplx128_a16_t, -, 32c, - 1) // __kmpc_atomic_cmplx16_sub_a16_rev -ATOMIC_CRITICAL_REV(cmplx16, div_a16, kmp_cmplx128_a16_t, /, 32c, - 1) // __kmpc_atomic_cmplx16_div_a16_rev -#endif -#endif - -#endif // KMP_ARCH_X86 || KMP_ARCH_X86_64 -// End of OpenMP 4.0: x = expr binop x for non-commutative operations. - -#endif // OMP_40_ENABLED - -/* ------------------------------------------------------------------------ */ -/* Routines for mixed types of LHS and RHS, when RHS is "larger" */ -/* Note: in order to reduce the total number of types combinations */ -/* it is supposed that compiler converts RHS to longest floating type,*/ -/* that is _Quad, before call to any of these routines */ -/* Conversion to _Quad will be done by the compiler during calculation, */ -/* conversion back to TYPE - before the assignment, like: */ -/* *lhs = (TYPE)( (_Quad)(*lhs) OP rhs ) */ -/* Performance penalty expected because of SW emulation use */ -/* ------------------------------------------------------------------------ */ - -#define ATOMIC_BEGIN_MIX(TYPE_ID, TYPE, OP_ID, RTYPE_ID, RTYPE) \ - void __kmpc_atomic_##TYPE_ID##_##OP_ID##_##RTYPE_ID( \ - ident_t *id_ref, int gtid, TYPE *lhs, RTYPE rhs) { \ - KMP_DEBUG_ASSERT(__kmp_init_serial); \ - KA_TRACE(100, \ - ("__kmpc_atomic_" #TYPE_ID "_" #OP_ID "_" #RTYPE_ID ": T#%d\n", \ - gtid)); - -// ------------------------------------------------------------------------- -#define ATOMIC_CRITICAL_FP(TYPE_ID, TYPE, OP_ID, OP, RTYPE_ID, RTYPE, LCK_ID, \ - GOMP_FLAG) \ - ATOMIC_BEGIN_MIX(TYPE_ID, TYPE, OP_ID, RTYPE_ID, RTYPE) \ - OP_GOMP_CRITICAL(OP## =, GOMP_FLAG) /* send assignment */ \ - OP_CRITICAL(OP## =, LCK_ID) /* send assignment */ \ - } - -// ------------------------------------------------------------------------- -#if KMP_ARCH_X86 || KMP_ARCH_X86_64 -// ------------------------------------------------------------------------- -// X86 or X86_64: no alignment problems ==================================== -#define ATOMIC_CMPXCHG_MIX(TYPE_ID, TYPE, OP_ID, BITS, OP, RTYPE_ID, RTYPE, \ - LCK_ID, MASK, GOMP_FLAG) \ - ATOMIC_BEGIN_MIX(TYPE_ID, TYPE, OP_ID, RTYPE_ID, RTYPE) \ - OP_GOMP_CRITICAL(OP## =, GOMP_FLAG) \ - OP_CMPXCHG(TYPE, BITS, OP) \ - } -// ------------------------------------------------------------------------- -#else -// ------------------------------------------------------------------------ -// Code for other architectures that don't handle unaligned accesses. -#define ATOMIC_CMPXCHG_MIX(TYPE_ID, TYPE, OP_ID, BITS, OP, RTYPE_ID, RTYPE, \ - LCK_ID, MASK, GOMP_FLAG) \ - ATOMIC_BEGIN_MIX(TYPE_ID, TYPE, OP_ID, RTYPE_ID, RTYPE) \ - OP_GOMP_CRITICAL(OP## =, GOMP_FLAG) \ - if (!((kmp_uintptr_t)lhs & 0x##MASK)) { \ - OP_CMPXCHG(TYPE, BITS, OP) /* aligned address */ \ - } else { \ - KMP_CHECK_GTID; \ - OP_CRITICAL(OP## =, LCK_ID) /* unaligned address - use critical */ \ - } \ - } -#endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */ - -// ------------------------------------------------------------------------- -#if KMP_ARCH_X86 || KMP_ARCH_X86_64 -// ------------------------------------------------------------------------- -#define ATOMIC_CMPXCHG_REV_MIX(TYPE_ID, TYPE, OP_ID, BITS, OP, RTYPE_ID, \ - RTYPE, LCK_ID, MASK, GOMP_FLAG) \ - ATOMIC_BEGIN_MIX(TYPE_ID, TYPE, OP_ID, RTYPE_ID, RTYPE) \ - OP_GOMP_CRITICAL_REV(OP, GOMP_FLAG) \ - OP_CMPXCHG_REV(TYPE, BITS, OP) \ - } -#define ATOMIC_CRITICAL_REV_FP(TYPE_ID, TYPE, OP_ID, OP, RTYPE_ID, RTYPE, \ - LCK_ID, GOMP_FLAG) \ - ATOMIC_BEGIN_MIX(TYPE_ID, TYPE, OP_ID, RTYPE_ID, RTYPE) \ - OP_GOMP_CRITICAL_REV(OP, GOMP_FLAG) \ - OP_CRITICAL_REV(OP, LCK_ID) \ - } -#endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */ - -// RHS=float8 -ATOMIC_CMPXCHG_MIX(fixed1, char, mul, 8, *, float8, kmp_real64, 1i, 0, - KMP_ARCH_X86) // __kmpc_atomic_fixed1_mul_float8 -ATOMIC_CMPXCHG_MIX(fixed1, char, div, 8, /, float8, kmp_real64, 1i, 0, - KMP_ARCH_X86) // __kmpc_atomic_fixed1_div_float8 -ATOMIC_CMPXCHG_MIX(fixed2, short, mul, 16, *, float8, kmp_real64, 2i, 1, - KMP_ARCH_X86) // __kmpc_atomic_fixed2_mul_float8 -ATOMIC_CMPXCHG_MIX(fixed2, short, div, 16, /, float8, kmp_real64, 2i, 1, - KMP_ARCH_X86) // __kmpc_atomic_fixed2_div_float8 -ATOMIC_CMPXCHG_MIX(fixed4, kmp_int32, mul, 32, *, float8, kmp_real64, 4i, 3, - 0) // __kmpc_atomic_fixed4_mul_float8 -ATOMIC_CMPXCHG_MIX(fixed4, kmp_int32, div, 32, /, float8, kmp_real64, 4i, 3, - 0) // __kmpc_atomic_fixed4_div_float8 -ATOMIC_CMPXCHG_MIX(fixed8, kmp_int64, mul, 64, *, float8, kmp_real64, 8i, 7, - KMP_ARCH_X86) // __kmpc_atomic_fixed8_mul_float8 -ATOMIC_CMPXCHG_MIX(fixed8, kmp_int64, div, 64, /, float8, kmp_real64, 8i, 7, - KMP_ARCH_X86) // __kmpc_atomic_fixed8_div_float8 -ATOMIC_CMPXCHG_MIX(float4, kmp_real32, add, 32, +, float8, kmp_real64, 4r, 3, - KMP_ARCH_X86) // __kmpc_atomic_float4_add_float8 -ATOMIC_CMPXCHG_MIX(float4, kmp_real32, sub, 32, -, float8, kmp_real64, 4r, 3, - KMP_ARCH_X86) // __kmpc_atomic_float4_sub_float8 -ATOMIC_CMPXCHG_MIX(float4, kmp_real32, mul, 32, *, float8, kmp_real64, 4r, 3, - KMP_ARCH_X86) // __kmpc_atomic_float4_mul_float8 -ATOMIC_CMPXCHG_MIX(float4, kmp_real32, div, 32, /, float8, kmp_real64, 4r, 3, - KMP_ARCH_X86) // __kmpc_atomic_float4_div_float8 - -// RHS=float16 (deprecated, to be removed when we are sure the compiler does not -// use them) -#if KMP_HAVE_QUAD -ATOMIC_CMPXCHG_MIX(fixed1, char, add, 8, +, fp, _Quad, 1i, 0, - KMP_ARCH_X86) // __kmpc_atomic_fixed1_add_fp -ATOMIC_CMPXCHG_MIX(fixed1u, uchar, add, 8, +, fp, _Quad, 1i, 0, - KMP_ARCH_X86) // __kmpc_atomic_fixed1u_add_fp -ATOMIC_CMPXCHG_MIX(fixed1, char, sub, 8, -, fp, _Quad, 1i, 0, - KMP_ARCH_X86) // __kmpc_atomic_fixed1_sub_fp -ATOMIC_CMPXCHG_MIX(fixed1u, uchar, sub, 8, -, fp, _Quad, 1i, 0, - KMP_ARCH_X86) // __kmpc_atomic_fixed1u_sub_fp -ATOMIC_CMPXCHG_MIX(fixed1, char, mul, 8, *, fp, _Quad, 1i, 0, - KMP_ARCH_X86) // __kmpc_atomic_fixed1_mul_fp -ATOMIC_CMPXCHG_MIX(fixed1u, uchar, mul, 8, *, fp, _Quad, 1i, 0, - KMP_ARCH_X86) // __kmpc_atomic_fixed1u_mul_fp -ATOMIC_CMPXCHG_MIX(fixed1, char, div, 8, /, fp, _Quad, 1i, 0, - KMP_ARCH_X86) // __kmpc_atomic_fixed1_div_fp -ATOMIC_CMPXCHG_MIX(fixed1u, uchar, div, 8, /, fp, _Quad, 1i, 0, - KMP_ARCH_X86) // __kmpc_atomic_fixed1u_div_fp - -ATOMIC_CMPXCHG_MIX(fixed2, short, add, 16, +, fp, _Quad, 2i, 1, - KMP_ARCH_X86) // __kmpc_atomic_fixed2_add_fp -ATOMIC_CMPXCHG_MIX(fixed2u, ushort, add, 16, +, fp, _Quad, 2i, 1, - KMP_ARCH_X86) // __kmpc_atomic_fixed2u_add_fp -ATOMIC_CMPXCHG_MIX(fixed2, short, sub, 16, -, fp, _Quad, 2i, 1, - KMP_ARCH_X86) // __kmpc_atomic_fixed2_sub_fp -ATOMIC_CMPXCHG_MIX(fixed2u, ushort, sub, 16, -, fp, _Quad, 2i, 1, - KMP_ARCH_X86) // __kmpc_atomic_fixed2u_sub_fp -ATOMIC_CMPXCHG_MIX(fixed2, short, mul, 16, *, fp, _Quad, 2i, 1, - KMP_ARCH_X86) // __kmpc_atomic_fixed2_mul_fp -ATOMIC_CMPXCHG_MIX(fixed2u, ushort, mul, 16, *, fp, _Quad, 2i, 1, - KMP_ARCH_X86) // __kmpc_atomic_fixed2u_mul_fp -ATOMIC_CMPXCHG_MIX(fixed2, short, div, 16, /, fp, _Quad, 2i, 1, - KMP_ARCH_X86) // __kmpc_atomic_fixed2_div_fp -ATOMIC_CMPXCHG_MIX(fixed2u, ushort, div, 16, /, fp, _Quad, 2i, 1, - KMP_ARCH_X86) // __kmpc_atomic_fixed2u_div_fp - -ATOMIC_CMPXCHG_MIX(fixed4, kmp_int32, add, 32, +, fp, _Quad, 4i, 3, - 0) // __kmpc_atomic_fixed4_add_fp -ATOMIC_CMPXCHG_MIX(fixed4u, kmp_uint32, add, 32, +, fp, _Quad, 4i, 3, - 0) // __kmpc_atomic_fixed4u_add_fp -ATOMIC_CMPXCHG_MIX(fixed4, kmp_int32, sub, 32, -, fp, _Quad, 4i, 3, - 0) // __kmpc_atomic_fixed4_sub_fp -ATOMIC_CMPXCHG_MIX(fixed4u, kmp_uint32, sub, 32, -, fp, _Quad, 4i, 3, - 0) // __kmpc_atomic_fixed4u_sub_fp -ATOMIC_CMPXCHG_MIX(fixed4, kmp_int32, mul, 32, *, fp, _Quad, 4i, 3, - 0) // __kmpc_atomic_fixed4_mul_fp -ATOMIC_CMPXCHG_MIX(fixed4u, kmp_uint32, mul, 32, *, fp, _Quad, 4i, 3, - 0) // __kmpc_atomic_fixed4u_mul_fp -ATOMIC_CMPXCHG_MIX(fixed4, kmp_int32, div, 32, /, fp, _Quad, 4i, 3, - 0) // __kmpc_atomic_fixed4_div_fp -ATOMIC_CMPXCHG_MIX(fixed4u, kmp_uint32, div, 32, /, fp, _Quad, 4i, 3, - 0) // __kmpc_atomic_fixed4u_div_fp - -ATOMIC_CMPXCHG_MIX(fixed8, kmp_int64, add, 64, +, fp, _Quad, 8i, 7, - KMP_ARCH_X86) // __kmpc_atomic_fixed8_add_fp -ATOMIC_CMPXCHG_MIX(fixed8u, kmp_uint64, add, 64, +, fp, _Quad, 8i, 7, - KMP_ARCH_X86) // __kmpc_atomic_fixed8u_add_fp -ATOMIC_CMPXCHG_MIX(fixed8, kmp_int64, sub, 64, -, fp, _Quad, 8i, 7, - KMP_ARCH_X86) // __kmpc_atomic_fixed8_sub_fp -ATOMIC_CMPXCHG_MIX(fixed8u, kmp_uint64, sub, 64, -, fp, _Quad, 8i, 7, - KMP_ARCH_X86) // __kmpc_atomic_fixed8u_sub_fp -ATOMIC_CMPXCHG_MIX(fixed8, kmp_int64, mul, 64, *, fp, _Quad, 8i, 7, - KMP_ARCH_X86) // __kmpc_atomic_fixed8_mul_fp -ATOMIC_CMPXCHG_MIX(fixed8u, kmp_uint64, mul, 64, *, fp, _Quad, 8i, 7, - KMP_ARCH_X86) // __kmpc_atomic_fixed8u_mul_fp -ATOMIC_CMPXCHG_MIX(fixed8, kmp_int64, div, 64, /, fp, _Quad, 8i, 7, - KMP_ARCH_X86) // __kmpc_atomic_fixed8_div_fp -ATOMIC_CMPXCHG_MIX(fixed8u, kmp_uint64, div, 64, /, fp, _Quad, 8i, 7, - KMP_ARCH_X86) // __kmpc_atomic_fixed8u_div_fp - -ATOMIC_CMPXCHG_MIX(float4, kmp_real32, add, 32, +, fp, _Quad, 4r, 3, - KMP_ARCH_X86) // __kmpc_atomic_float4_add_fp -ATOMIC_CMPXCHG_MIX(float4, kmp_real32, sub, 32, -, fp, _Quad, 4r, 3, - KMP_ARCH_X86) // __kmpc_atomic_float4_sub_fp -ATOMIC_CMPXCHG_MIX(float4, kmp_real32, mul, 32, *, fp, _Quad, 4r, 3, - KMP_ARCH_X86) // __kmpc_atomic_float4_mul_fp -ATOMIC_CMPXCHG_MIX(float4, kmp_real32, div, 32, /, fp, _Quad, 4r, 3, - KMP_ARCH_X86) // __kmpc_atomic_float4_div_fp - -ATOMIC_CMPXCHG_MIX(float8, kmp_real64, add, 64, +, fp, _Quad, 8r, 7, - KMP_ARCH_X86) // __kmpc_atomic_float8_add_fp -ATOMIC_CMPXCHG_MIX(float8, kmp_real64, sub, 64, -, fp, _Quad, 8r, 7, - KMP_ARCH_X86) // __kmpc_atomic_float8_sub_fp -ATOMIC_CMPXCHG_MIX(float8, kmp_real64, mul, 64, *, fp, _Quad, 8r, 7, - KMP_ARCH_X86) // __kmpc_atomic_float8_mul_fp -ATOMIC_CMPXCHG_MIX(float8, kmp_real64, div, 64, /, fp, _Quad, 8r, 7, - KMP_ARCH_X86) // __kmpc_atomic_float8_div_fp - -ATOMIC_CRITICAL_FP(float10, long double, add, +, fp, _Quad, 10r, - 1) // __kmpc_atomic_float10_add_fp -ATOMIC_CRITICAL_FP(float10, long double, sub, -, fp, _Quad, 10r, - 1) // __kmpc_atomic_float10_sub_fp -ATOMIC_CRITICAL_FP(float10, long double, mul, *, fp, _Quad, 10r, - 1) // __kmpc_atomic_float10_mul_fp -ATOMIC_CRITICAL_FP(float10, long double, div, /, fp, _Quad, 10r, - 1) // __kmpc_atomic_float10_div_fp - -#if KMP_ARCH_X86 || KMP_ARCH_X86_64 -// Reverse operations -ATOMIC_CMPXCHG_REV_MIX(fixed1, char, sub_rev, 8, -, fp, _Quad, 1i, 0, - KMP_ARCH_X86) // __kmpc_atomic_fixed1_sub_rev_fp -ATOMIC_CMPXCHG_REV_MIX(fixed1u, uchar, sub_rev, 8, -, fp, _Quad, 1i, 0, - KMP_ARCH_X86) // __kmpc_atomic_fixed1u_sub_rev_fp -ATOMIC_CMPXCHG_REV_MIX(fixed1, char, div_rev, 8, /, fp, _Quad, 1i, 0, - KMP_ARCH_X86) // __kmpc_atomic_fixed1_div_rev_fp -ATOMIC_CMPXCHG_REV_MIX(fixed1u, uchar, div_rev, 8, /, fp, _Quad, 1i, 0, - KMP_ARCH_X86) // __kmpc_atomic_fixed1u_div_rev_fp - -ATOMIC_CMPXCHG_REV_MIX(fixed2, short, sub_rev, 16, -, fp, _Quad, 2i, 1, - KMP_ARCH_X86) // __kmpc_atomic_fixed2_sub_rev_fp -ATOMIC_CMPXCHG_REV_MIX(fixed2u, ushort, sub_rev, 16, -, fp, _Quad, 2i, 1, - KMP_ARCH_X86) // __kmpc_atomic_fixed2u_sub_rev_fp -ATOMIC_CMPXCHG_REV_MIX(fixed2, short, div_rev, 16, /, fp, _Quad, 2i, 1, - KMP_ARCH_X86) // __kmpc_atomic_fixed2_div_rev_fp -ATOMIC_CMPXCHG_REV_MIX(fixed2u, ushort, div_rev, 16, /, fp, _Quad, 2i, 1, - KMP_ARCH_X86) // __kmpc_atomic_fixed2u_div_rev_fp - -ATOMIC_CMPXCHG_REV_MIX(fixed4, kmp_int32, sub_rev, 32, -, fp, _Quad, 4i, 3, - 0) // __kmpc_atomic_fixed4_sub_rev_fp -ATOMIC_CMPXCHG_REV_MIX(fixed4u, kmp_uint32, sub_rev, 32, -, fp, _Quad, 4i, 3, - 0) // __kmpc_atomic_fixed4u_sub_rev_fp -ATOMIC_CMPXCHG_REV_MIX(fixed4, kmp_int32, div_rev, 32, /, fp, _Quad, 4i, 3, - 0) // __kmpc_atomic_fixed4_div_rev_fp -ATOMIC_CMPXCHG_REV_MIX(fixed4u, kmp_uint32, div_rev, 32, /, fp, _Quad, 4i, 3, - 0) // __kmpc_atomic_fixed4u_div_rev_fp - -ATOMIC_CMPXCHG_REV_MIX(fixed8, kmp_int64, sub_rev, 64, -, fp, _Quad, 8i, 7, - KMP_ARCH_X86) // __kmpc_atomic_fixed8_sub_rev_fp -ATOMIC_CMPXCHG_REV_MIX(fixed8u, kmp_uint64, sub_rev, 64, -, fp, _Quad, 8i, 7, - KMP_ARCH_X86) // __kmpc_atomic_fixed8u_sub_rev_fp -ATOMIC_CMPXCHG_REV_MIX(fixed8, kmp_int64, div_rev, 64, /, fp, _Quad, 8i, 7, - KMP_ARCH_X86) // __kmpc_atomic_fixed8_div_rev_fp -ATOMIC_CMPXCHG_REV_MIX(fixed8u, kmp_uint64, div_rev, 64, /, fp, _Quad, 8i, 7, - KMP_ARCH_X86) // __kmpc_atomic_fixed8u_div_rev_fp - -ATOMIC_CMPXCHG_REV_MIX(float4, kmp_real32, sub_rev, 32, -, fp, _Quad, 4r, 3, - KMP_ARCH_X86) // __kmpc_atomic_float4_sub_rev_fp -ATOMIC_CMPXCHG_REV_MIX(float4, kmp_real32, div_rev, 32, /, fp, _Quad, 4r, 3, - KMP_ARCH_X86) // __kmpc_atomic_float4_div_rev_fp - -ATOMIC_CMPXCHG_REV_MIX(float8, kmp_real64, sub_rev, 64, -, fp, _Quad, 8r, 7, - KMP_ARCH_X86) // __kmpc_atomic_float8_sub_rev_fp -ATOMIC_CMPXCHG_REV_MIX(float8, kmp_real64, div_rev, 64, /, fp, _Quad, 8r, 7, - KMP_ARCH_X86) // __kmpc_atomic_float8_div_rev_fp - -ATOMIC_CRITICAL_REV_FP(float10, long double, sub_rev, -, fp, _Quad, 10r, - 1) // __kmpc_atomic_float10_sub_rev_fp -ATOMIC_CRITICAL_REV_FP(float10, long double, div_rev, /, fp, _Quad, 10r, - 1) // __kmpc_atomic_float10_div_rev_fp -#endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */ - -#endif - -#if KMP_ARCH_X86 || KMP_ARCH_X86_64 -// ------------------------------------------------------------------------ -// X86 or X86_64: no alignment problems ==================================== -#if USE_CMPXCHG_FIX -// workaround for C78287 (complex(kind=4) data type) -#define ATOMIC_CMPXCHG_CMPLX(TYPE_ID, TYPE, OP_ID, BITS, OP, RTYPE_ID, RTYPE, \ - LCK_ID, MASK, GOMP_FLAG) \ - ATOMIC_BEGIN_MIX(TYPE_ID, TYPE, OP_ID, RTYPE_ID, RTYPE) \ - OP_GOMP_CRITICAL(OP## =, GOMP_FLAG) \ - OP_CMPXCHG_WORKAROUND(TYPE, BITS, OP) \ - } -// end of the second part of the workaround for C78287 -#else -#define ATOMIC_CMPXCHG_CMPLX(TYPE_ID, TYPE, OP_ID, BITS, OP, RTYPE_ID, RTYPE, \ - LCK_ID, MASK, GOMP_FLAG) \ - ATOMIC_BEGIN_MIX(TYPE_ID, TYPE, OP_ID, RTYPE_ID, RTYPE) \ - OP_GOMP_CRITICAL(OP## =, GOMP_FLAG) \ - OP_CMPXCHG(TYPE, BITS, OP) \ - } -#endif // USE_CMPXCHG_FIX -#else -// ------------------------------------------------------------------------ -// Code for other architectures that don't handle unaligned accesses. -#define ATOMIC_CMPXCHG_CMPLX(TYPE_ID, TYPE, OP_ID, BITS, OP, RTYPE_ID, RTYPE, \ - LCK_ID, MASK, GOMP_FLAG) \ - ATOMIC_BEGIN_MIX(TYPE_ID, TYPE, OP_ID, RTYPE_ID, RTYPE) \ - OP_GOMP_CRITICAL(OP## =, GOMP_FLAG) \ - if (!((kmp_uintptr_t)lhs & 0x##MASK)) { \ - OP_CMPXCHG(TYPE, BITS, OP) /* aligned address */ \ - } else { \ - KMP_CHECK_GTID; \ - OP_CRITICAL(OP## =, LCK_ID) /* unaligned address - use critical */ \ - } \ - } -#endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */ - -ATOMIC_CMPXCHG_CMPLX(cmplx4, kmp_cmplx32, add, 64, +, cmplx8, kmp_cmplx64, 8c, - 7, KMP_ARCH_X86) // __kmpc_atomic_cmplx4_add_cmplx8 -ATOMIC_CMPXCHG_CMPLX(cmplx4, kmp_cmplx32, sub, 64, -, cmplx8, kmp_cmplx64, 8c, - 7, KMP_ARCH_X86) // __kmpc_atomic_cmplx4_sub_cmplx8 -ATOMIC_CMPXCHG_CMPLX(cmplx4, kmp_cmplx32, mul, 64, *, cmplx8, kmp_cmplx64, 8c, - 7, KMP_ARCH_X86) // __kmpc_atomic_cmplx4_mul_cmplx8 -ATOMIC_CMPXCHG_CMPLX(cmplx4, kmp_cmplx32, div, 64, /, cmplx8, kmp_cmplx64, 8c, - 7, KMP_ARCH_X86) // __kmpc_atomic_cmplx4_div_cmplx8 - -// READ, WRITE, CAPTURE are supported only on IA-32 architecture and Intel(R) 64 -#if KMP_ARCH_X86 || KMP_ARCH_X86_64 - -// ------------------------------------------------------------------------ -// Atomic READ routines - -// ------------------------------------------------------------------------ -// Beginning of a definition (provides name, parameters, gebug trace) -// TYPE_ID - operands type and size (fixed*, fixed*u for signed, unsigned -// fixed) -// OP_ID - operation identifier (add, sub, mul, ...) -// TYPE - operands' type -#define ATOMIC_BEGIN_READ(TYPE_ID, OP_ID, TYPE, RET_TYPE) \ - RET_TYPE __kmpc_atomic_##TYPE_ID##_##OP_ID(ident_t *id_ref, int gtid, \ - TYPE *loc) { \ - KMP_DEBUG_ASSERT(__kmp_init_serial); \ - KA_TRACE(100, ("__kmpc_atomic_" #TYPE_ID "_" #OP_ID ": T#%d\n", gtid)); - -// ------------------------------------------------------------------------ -// Operation on *lhs, rhs using "compare_and_store_ret" routine -// TYPE - operands' type -// BITS - size in bits, used to distinguish low level calls -// OP - operator -// Note: temp_val introduced in order to force the compiler to read -// *lhs only once (w/o it the compiler reads *lhs twice) -// TODO: check if it is still necessary -// Return old value regardless of the result of "compare & swap# operation -#define OP_CMPXCHG_READ(TYPE, BITS, OP) \ - { \ - TYPE KMP_ATOMIC_VOLATILE temp_val; \ - union f_i_union { \ - TYPE f_val; \ - kmp_int##BITS i_val; \ - }; \ - union f_i_union old_value; \ - temp_val = *loc; \ - old_value.f_val = temp_val; \ - old_value.i_val = KMP_COMPARE_AND_STORE_RET##BITS( \ - (kmp_int##BITS *)loc, \ - *VOLATILE_CAST(kmp_int##BITS *) & old_value.i_val, \ - *VOLATILE_CAST(kmp_int##BITS *) & old_value.i_val); \ - new_value = old_value.f_val; \ - return new_value; \ - } - -// ------------------------------------------------------------------------- -// Operation on *lhs, rhs bound by critical section -// OP - operator (it's supposed to contain an assignment) -// LCK_ID - lock identifier -// Note: don't check gtid as it should always be valid -// 1, 2-byte - expect valid parameter, other - check before this macro -#define OP_CRITICAL_READ(OP, LCK_ID) \ - __kmp_acquire_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); \ - \ - new_value = (*loc); \ - \ - __kmp_release_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); - -// ------------------------------------------------------------------------- -#ifdef KMP_GOMP_COMPAT -#define OP_GOMP_CRITICAL_READ(OP, FLAG) \ - if ((FLAG) && (__kmp_atomic_mode == 2)) { \ - KMP_CHECK_GTID; \ - OP_CRITICAL_READ(OP, 0); \ - return new_value; \ - } -#else -#define OP_GOMP_CRITICAL_READ(OP, FLAG) -#endif /* KMP_GOMP_COMPAT */ - -// ------------------------------------------------------------------------- -#define ATOMIC_FIXED_READ(TYPE_ID, OP_ID, TYPE, BITS, OP, GOMP_FLAG) \ - ATOMIC_BEGIN_READ(TYPE_ID, OP_ID, TYPE, TYPE) \ - TYPE new_value; \ - OP_GOMP_CRITICAL_READ(OP## =, GOMP_FLAG) \ - new_value = KMP_TEST_THEN_ADD##BITS(loc, OP 0); \ - return new_value; \ - } -// ------------------------------------------------------------------------- -#define ATOMIC_CMPXCHG_READ(TYPE_ID, OP_ID, TYPE, BITS, OP, GOMP_FLAG) \ - ATOMIC_BEGIN_READ(TYPE_ID, OP_ID, TYPE, TYPE) \ - TYPE new_value; \ - OP_GOMP_CRITICAL_READ(OP## =, GOMP_FLAG) \ - OP_CMPXCHG_READ(TYPE, BITS, OP) \ - } -// ------------------------------------------------------------------------ -// Routines for Extended types: long double, _Quad, complex flavours (use -// critical section) -// TYPE_ID, OP_ID, TYPE - detailed above -// OP - operator -// LCK_ID - lock identifier, used to possibly distinguish lock variable -#define ATOMIC_CRITICAL_READ(TYPE_ID, OP_ID, TYPE, OP, LCK_ID, GOMP_FLAG) \ - ATOMIC_BEGIN_READ(TYPE_ID, OP_ID, TYPE, TYPE) \ - TYPE new_value; \ - OP_GOMP_CRITICAL_READ(OP## =, GOMP_FLAG) /* send assignment */ \ - OP_CRITICAL_READ(OP, LCK_ID) /* send assignment */ \ - return new_value; \ - } - -// ------------------------------------------------------------------------ -// Fix for cmplx4 read (CQ220361) on Windows* OS. Regular routine with return -// value doesn't work. -// Let's return the read value through the additional parameter. -#if (KMP_OS_WINDOWS) - -#define OP_CRITICAL_READ_WRK(OP, LCK_ID) \ - __kmp_acquire_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); \ - \ - (*out) = (*loc); \ - \ - __kmp_release_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); -// ------------------------------------------------------------------------ -#ifdef KMP_GOMP_COMPAT -#define OP_GOMP_CRITICAL_READ_WRK(OP, FLAG) \ - if ((FLAG) && (__kmp_atomic_mode == 2)) { \ - KMP_CHECK_GTID; \ - OP_CRITICAL_READ_WRK(OP, 0); \ - } -#else -#define OP_GOMP_CRITICAL_READ_WRK(OP, FLAG) -#endif /* KMP_GOMP_COMPAT */ -// ------------------------------------------------------------------------ -#define ATOMIC_BEGIN_READ_WRK(TYPE_ID, OP_ID, TYPE) \ - void __kmpc_atomic_##TYPE_ID##_##OP_ID(TYPE *out, ident_t *id_ref, int gtid, \ - TYPE *loc) { \ - KMP_DEBUG_ASSERT(__kmp_init_serial); \ - KA_TRACE(100, ("__kmpc_atomic_" #TYPE_ID "_" #OP_ID ": T#%d\n", gtid)); - -// ------------------------------------------------------------------------ -#define ATOMIC_CRITICAL_READ_WRK(TYPE_ID, OP_ID, TYPE, OP, LCK_ID, GOMP_FLAG) \ - ATOMIC_BEGIN_READ_WRK(TYPE_ID, OP_ID, TYPE) \ - OP_GOMP_CRITICAL_READ_WRK(OP## =, GOMP_FLAG) /* send assignment */ \ - OP_CRITICAL_READ_WRK(OP, LCK_ID) /* send assignment */ \ - } - -#endif // KMP_OS_WINDOWS - -// ------------------------------------------------------------------------ -// TYPE_ID,OP_ID, TYPE, OP, GOMP_FLAG -ATOMIC_FIXED_READ(fixed4, rd, kmp_int32, 32, +, 0) // __kmpc_atomic_fixed4_rd -ATOMIC_FIXED_READ(fixed8, rd, kmp_int64, 64, +, - KMP_ARCH_X86) // __kmpc_atomic_fixed8_rd -ATOMIC_CMPXCHG_READ(float4, rd, kmp_real32, 32, +, - KMP_ARCH_X86) // __kmpc_atomic_float4_rd -ATOMIC_CMPXCHG_READ(float8, rd, kmp_real64, 64, +, - KMP_ARCH_X86) // __kmpc_atomic_float8_rd - -// !!! TODO: Remove lock operations for "char" since it can't be non-atomic -ATOMIC_CMPXCHG_READ(fixed1, rd, kmp_int8, 8, +, - KMP_ARCH_X86) // __kmpc_atomic_fixed1_rd -ATOMIC_CMPXCHG_READ(fixed2, rd, kmp_int16, 16, +, - KMP_ARCH_X86) // __kmpc_atomic_fixed2_rd - -ATOMIC_CRITICAL_READ(float10, rd, long double, +, 10r, - 1) // __kmpc_atomic_float10_rd -#if KMP_HAVE_QUAD -ATOMIC_CRITICAL_READ(float16, rd, QUAD_LEGACY, +, 16r, - 1) // __kmpc_atomic_float16_rd -#endif // KMP_HAVE_QUAD - -// Fix for CQ220361 on Windows* OS -#if (KMP_OS_WINDOWS) -ATOMIC_CRITICAL_READ_WRK(cmplx4, rd, kmp_cmplx32, +, 8c, - 1) // __kmpc_atomic_cmplx4_rd -#else -ATOMIC_CRITICAL_READ(cmplx4, rd, kmp_cmplx32, +, 8c, - 1) // __kmpc_atomic_cmplx4_rd -#endif -ATOMIC_CRITICAL_READ(cmplx8, rd, kmp_cmplx64, +, 16c, - 1) // __kmpc_atomic_cmplx8_rd -ATOMIC_CRITICAL_READ(cmplx10, rd, kmp_cmplx80, +, 20c, - 1) // __kmpc_atomic_cmplx10_rd -#if KMP_HAVE_QUAD -ATOMIC_CRITICAL_READ(cmplx16, rd, CPLX128_LEG, +, 32c, - 1) // __kmpc_atomic_cmplx16_rd -#if (KMP_ARCH_X86) -ATOMIC_CRITICAL_READ(float16, a16_rd, Quad_a16_t, +, 16r, - 1) // __kmpc_atomic_float16_a16_rd -ATOMIC_CRITICAL_READ(cmplx16, a16_rd, kmp_cmplx128_a16_t, +, 32c, - 1) // __kmpc_atomic_cmplx16_a16_rd -#endif -#endif - -// ------------------------------------------------------------------------ -// Atomic WRITE routines - -#define ATOMIC_XCHG_WR(TYPE_ID, OP_ID, TYPE, BITS, OP, GOMP_FLAG) \ - ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void) \ - OP_GOMP_CRITICAL(OP, GOMP_FLAG) \ - KMP_XCHG_FIXED##BITS(lhs, rhs); \ - } -// ------------------------------------------------------------------------ -#define ATOMIC_XCHG_FLOAT_WR(TYPE_ID, OP_ID, TYPE, BITS, OP, GOMP_FLAG) \ - ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void) \ - OP_GOMP_CRITICAL(OP, GOMP_FLAG) \ - KMP_XCHG_REAL##BITS(lhs, rhs); \ - } - -// ------------------------------------------------------------------------ -// Operation on *lhs, rhs using "compare_and_store" routine -// TYPE - operands' type -// BITS - size in bits, used to distinguish low level calls -// OP - operator -// Note: temp_val introduced in order to force the compiler to read -// *lhs only once (w/o it the compiler reads *lhs twice) -#define OP_CMPXCHG_WR(TYPE, BITS, OP) \ - { \ - TYPE KMP_ATOMIC_VOLATILE temp_val; \ - TYPE old_value, new_value; \ - temp_val = *lhs; \ - old_value = temp_val; \ - new_value = rhs; \ - while (!KMP_COMPARE_AND_STORE_ACQ##BITS( \ - (kmp_int##BITS *)lhs, *VOLATILE_CAST(kmp_int##BITS *) & old_value, \ - *VOLATILE_CAST(kmp_int##BITS *) & new_value)) { \ - KMP_CPU_PAUSE(); \ - \ - temp_val = *lhs; \ - old_value = temp_val; \ - new_value = rhs; \ - } \ - } - -// ------------------------------------------------------------------------- -#define ATOMIC_CMPXCHG_WR(TYPE_ID, OP_ID, TYPE, BITS, OP, GOMP_FLAG) \ - ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void) \ - OP_GOMP_CRITICAL(OP, GOMP_FLAG) \ - OP_CMPXCHG_WR(TYPE, BITS, OP) \ - } - -// ------------------------------------------------------------------------ -// Routines for Extended types: long double, _Quad, complex flavours (use -// critical section) -// TYPE_ID, OP_ID, TYPE - detailed above -// OP - operator -// LCK_ID - lock identifier, used to possibly distinguish lock variable -#define ATOMIC_CRITICAL_WR(TYPE_ID, OP_ID, TYPE, OP, LCK_ID, GOMP_FLAG) \ - ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void) \ - OP_GOMP_CRITICAL(OP, GOMP_FLAG) /* send assignment */ \ - OP_CRITICAL(OP, LCK_ID) /* send assignment */ \ - } -// ------------------------------------------------------------------------- - -ATOMIC_XCHG_WR(fixed1, wr, kmp_int8, 8, =, - KMP_ARCH_X86) // __kmpc_atomic_fixed1_wr -ATOMIC_XCHG_WR(fixed2, wr, kmp_int16, 16, =, - KMP_ARCH_X86) // __kmpc_atomic_fixed2_wr -ATOMIC_XCHG_WR(fixed4, wr, kmp_int32, 32, =, - KMP_ARCH_X86) // __kmpc_atomic_fixed4_wr -#if (KMP_ARCH_X86) -ATOMIC_CMPXCHG_WR(fixed8, wr, kmp_int64, 64, =, - KMP_ARCH_X86) // __kmpc_atomic_fixed8_wr -#else -ATOMIC_XCHG_WR(fixed8, wr, kmp_int64, 64, =, - KMP_ARCH_X86) // __kmpc_atomic_fixed8_wr -#endif - -ATOMIC_XCHG_FLOAT_WR(float4, wr, kmp_real32, 32, =, - KMP_ARCH_X86) // __kmpc_atomic_float4_wr -#if (KMP_ARCH_X86) -ATOMIC_CMPXCHG_WR(float8, wr, kmp_real64, 64, =, - KMP_ARCH_X86) // __kmpc_atomic_float8_wr -#else -ATOMIC_XCHG_FLOAT_WR(float8, wr, kmp_real64, 64, =, - KMP_ARCH_X86) // __kmpc_atomic_float8_wr -#endif - -ATOMIC_CRITICAL_WR(float10, wr, long double, =, 10r, - 1) // __kmpc_atomic_float10_wr -#if KMP_HAVE_QUAD -ATOMIC_CRITICAL_WR(float16, wr, QUAD_LEGACY, =, 16r, - 1) // __kmpc_atomic_float16_wr -#endif -ATOMIC_CRITICAL_WR(cmplx4, wr, kmp_cmplx32, =, 8c, 1) // __kmpc_atomic_cmplx4_wr -ATOMIC_CRITICAL_WR(cmplx8, wr, kmp_cmplx64, =, 16c, - 1) // __kmpc_atomic_cmplx8_wr -ATOMIC_CRITICAL_WR(cmplx10, wr, kmp_cmplx80, =, 20c, - 1) // __kmpc_atomic_cmplx10_wr -#if KMP_HAVE_QUAD -ATOMIC_CRITICAL_WR(cmplx16, wr, CPLX128_LEG, =, 32c, - 1) // __kmpc_atomic_cmplx16_wr -#if (KMP_ARCH_X86) -ATOMIC_CRITICAL_WR(float16, a16_wr, Quad_a16_t, =, 16r, - 1) // __kmpc_atomic_float16_a16_wr -ATOMIC_CRITICAL_WR(cmplx16, a16_wr, kmp_cmplx128_a16_t, =, 32c, - 1) // __kmpc_atomic_cmplx16_a16_wr -#endif -#endif - -// ------------------------------------------------------------------------ -// Atomic CAPTURE routines - -// Beginning of a definition (provides name, parameters, gebug trace) -// TYPE_ID - operands type and size (fixed*, fixed*u for signed, unsigned -// fixed) -// OP_ID - operation identifier (add, sub, mul, ...) -// TYPE - operands' type -#define ATOMIC_BEGIN_CPT(TYPE_ID, OP_ID, TYPE, RET_TYPE) \ - RET_TYPE __kmpc_atomic_##TYPE_ID##_##OP_ID(ident_t *id_ref, int gtid, \ - TYPE *lhs, TYPE rhs, int flag) { \ - KMP_DEBUG_ASSERT(__kmp_init_serial); \ - KA_TRACE(100, ("__kmpc_atomic_" #TYPE_ID "_" #OP_ID ": T#%d\n", gtid)); - -// ------------------------------------------------------------------------- -// Operation on *lhs, rhs bound by critical section -// OP - operator (it's supposed to contain an assignment) -// LCK_ID - lock identifier -// Note: don't check gtid as it should always be valid -// 1, 2-byte - expect valid parameter, other - check before this macro -#define OP_CRITICAL_CPT(OP, LCK_ID) \ - __kmp_acquire_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); \ - \ - if (flag) { \ - (*lhs) OP rhs; \ - new_value = (*lhs); \ - } else { \ - new_value = (*lhs); \ - (*lhs) OP rhs; \ - } \ - \ - __kmp_release_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); \ - return new_value; - -// ------------------------------------------------------------------------ -#ifdef KMP_GOMP_COMPAT -#define OP_GOMP_CRITICAL_CPT(OP, FLAG) \ - if ((FLAG) && (__kmp_atomic_mode == 2)) { \ - KMP_CHECK_GTID; \ - OP_CRITICAL_CPT(OP## =, 0); \ - } -#else -#define OP_GOMP_CRITICAL_CPT(OP, FLAG) -#endif /* KMP_GOMP_COMPAT */ - -// ------------------------------------------------------------------------ -// Operation on *lhs, rhs using "compare_and_store" routine -// TYPE - operands' type -// BITS - size in bits, used to distinguish low level calls -// OP - operator -// Note: temp_val introduced in order to force the compiler to read -// *lhs only once (w/o it the compiler reads *lhs twice) -#define OP_CMPXCHG_CPT(TYPE, BITS, OP) \ - { \ - TYPE KMP_ATOMIC_VOLATILE temp_val; \ - TYPE old_value, new_value; \ - temp_val = *lhs; \ - old_value = temp_val; \ - new_value = old_value OP rhs; \ - while (!KMP_COMPARE_AND_STORE_ACQ##BITS( \ - (kmp_int##BITS *)lhs, *VOLATILE_CAST(kmp_int##BITS *) & old_value, \ - *VOLATILE_CAST(kmp_int##BITS *) & new_value)) { \ - KMP_CPU_PAUSE(); \ - \ - temp_val = *lhs; \ - old_value = temp_val; \ - new_value = old_value OP rhs; \ - } \ - if (flag) { \ - return new_value; \ - } else \ - return old_value; \ - } - -// ------------------------------------------------------------------------- -#define ATOMIC_CMPXCHG_CPT(TYPE_ID, OP_ID, TYPE, BITS, OP, GOMP_FLAG) \ - ATOMIC_BEGIN_CPT(TYPE_ID, OP_ID, TYPE, TYPE) \ - TYPE new_value; \ - OP_GOMP_CRITICAL_CPT(OP, GOMP_FLAG) \ - OP_CMPXCHG_CPT(TYPE, BITS, OP) \ - } - -// ------------------------------------------------------------------------- -#define ATOMIC_FIXED_ADD_CPT(TYPE_ID, OP_ID, TYPE, BITS, OP, GOMP_FLAG) \ - ATOMIC_BEGIN_CPT(TYPE_ID, OP_ID, TYPE, TYPE) \ - TYPE old_value, new_value; \ - OP_GOMP_CRITICAL_CPT(OP, GOMP_FLAG) \ - /* OP used as a sign for subtraction: (lhs-rhs) --> (lhs+-rhs) */ \ - old_value = KMP_TEST_THEN_ADD##BITS(lhs, OP rhs); \ - if (flag) { \ - return old_value OP rhs; \ - } else \ - return old_value; \ - } -// ------------------------------------------------------------------------- - -ATOMIC_FIXED_ADD_CPT(fixed4, add_cpt, kmp_int32, 32, +, - 0) // __kmpc_atomic_fixed4_add_cpt -ATOMIC_FIXED_ADD_CPT(fixed4, sub_cpt, kmp_int32, 32, -, - 0) // __kmpc_atomic_fixed4_sub_cpt -ATOMIC_FIXED_ADD_CPT(fixed8, add_cpt, kmp_int64, 64, +, - KMP_ARCH_X86) // __kmpc_atomic_fixed8_add_cpt -ATOMIC_FIXED_ADD_CPT(fixed8, sub_cpt, kmp_int64, 64, -, - KMP_ARCH_X86) // __kmpc_atomic_fixed8_sub_cpt - -ATOMIC_CMPXCHG_CPT(float4, add_cpt, kmp_real32, 32, +, - KMP_ARCH_X86) // __kmpc_atomic_float4_add_cpt -ATOMIC_CMPXCHG_CPT(float4, sub_cpt, kmp_real32, 32, -, - KMP_ARCH_X86) // __kmpc_atomic_float4_sub_cpt -ATOMIC_CMPXCHG_CPT(float8, add_cpt, kmp_real64, 64, +, - KMP_ARCH_X86) // __kmpc_atomic_float8_add_cpt -ATOMIC_CMPXCHG_CPT(float8, sub_cpt, kmp_real64, 64, -, - KMP_ARCH_X86) // __kmpc_atomic_float8_sub_cpt - -// ------------------------------------------------------------------------ -// Entries definition for integer operands -// TYPE_ID - operands type and size (fixed4, float4) -// OP_ID - operation identifier (add, sub, mul, ...) -// TYPE - operand type -// BITS - size in bits, used to distinguish low level calls -// OP - operator (used in critical section) -// TYPE_ID,OP_ID, TYPE, BITS,OP,GOMP_FLAG -// ------------------------------------------------------------------------ -// Routines for ATOMIC integer operands, other operators -// ------------------------------------------------------------------------ -// TYPE_ID,OP_ID, TYPE, OP, GOMP_FLAG -ATOMIC_CMPXCHG_CPT(fixed1, add_cpt, kmp_int8, 8, +, - KMP_ARCH_X86) // __kmpc_atomic_fixed1_add_cpt -ATOMIC_CMPXCHG_CPT(fixed1, andb_cpt, kmp_int8, 8, &, - 0) // __kmpc_atomic_fixed1_andb_cpt -ATOMIC_CMPXCHG_CPT(fixed1, div_cpt, kmp_int8, 8, /, - KMP_ARCH_X86) // __kmpc_atomic_fixed1_div_cpt -ATOMIC_CMPXCHG_CPT(fixed1u, div_cpt, kmp_uint8, 8, /, - KMP_ARCH_X86) // __kmpc_atomic_fixed1u_div_cpt -ATOMIC_CMPXCHG_CPT(fixed1, mul_cpt, kmp_int8, 8, *, - KMP_ARCH_X86) // __kmpc_atomic_fixed1_mul_cpt -ATOMIC_CMPXCHG_CPT(fixed1, orb_cpt, kmp_int8, 8, |, - 0) // __kmpc_atomic_fixed1_orb_cpt -ATOMIC_CMPXCHG_CPT(fixed1, shl_cpt, kmp_int8, 8, <<, - KMP_ARCH_X86) // __kmpc_atomic_fixed1_shl_cpt -ATOMIC_CMPXCHG_CPT(fixed1, shr_cpt, kmp_int8, 8, >>, - KMP_ARCH_X86) // __kmpc_atomic_fixed1_shr_cpt -ATOMIC_CMPXCHG_CPT(fixed1u, shr_cpt, kmp_uint8, 8, >>, - KMP_ARCH_X86) // __kmpc_atomic_fixed1u_shr_cpt -ATOMIC_CMPXCHG_CPT(fixed1, sub_cpt, kmp_int8, 8, -, - KMP_ARCH_X86) // __kmpc_atomic_fixed1_sub_cpt -ATOMIC_CMPXCHG_CPT(fixed1, xor_cpt, kmp_int8, 8, ^, - 0) // __kmpc_atomic_fixed1_xor_cpt -ATOMIC_CMPXCHG_CPT(fixed2, add_cpt, kmp_int16, 16, +, - KMP_ARCH_X86) // __kmpc_atomic_fixed2_add_cpt -ATOMIC_CMPXCHG_CPT(fixed2, andb_cpt, kmp_int16, 16, &, - 0) // __kmpc_atomic_fixed2_andb_cpt -ATOMIC_CMPXCHG_CPT(fixed2, div_cpt, kmp_int16, 16, /, - KMP_ARCH_X86) // __kmpc_atomic_fixed2_div_cpt -ATOMIC_CMPXCHG_CPT(fixed2u, div_cpt, kmp_uint16, 16, /, - KMP_ARCH_X86) // __kmpc_atomic_fixed2u_div_cpt -ATOMIC_CMPXCHG_CPT(fixed2, mul_cpt, kmp_int16, 16, *, - KMP_ARCH_X86) // __kmpc_atomic_fixed2_mul_cpt -ATOMIC_CMPXCHG_CPT(fixed2, orb_cpt, kmp_int16, 16, |, - 0) // __kmpc_atomic_fixed2_orb_cpt -ATOMIC_CMPXCHG_CPT(fixed2, shl_cpt, kmp_int16, 16, <<, - KMP_ARCH_X86) // __kmpc_atomic_fixed2_shl_cpt -ATOMIC_CMPXCHG_CPT(fixed2, shr_cpt, kmp_int16, 16, >>, - KMP_ARCH_X86) // __kmpc_atomic_fixed2_shr_cpt -ATOMIC_CMPXCHG_CPT(fixed2u, shr_cpt, kmp_uint16, 16, >>, - KMP_ARCH_X86) // __kmpc_atomic_fixed2u_shr_cpt -ATOMIC_CMPXCHG_CPT(fixed2, sub_cpt, kmp_int16, 16, -, - KMP_ARCH_X86) // __kmpc_atomic_fixed2_sub_cpt -ATOMIC_CMPXCHG_CPT(fixed2, xor_cpt, kmp_int16, 16, ^, - 0) // __kmpc_atomic_fixed2_xor_cpt -ATOMIC_CMPXCHG_CPT(fixed4, andb_cpt, kmp_int32, 32, &, - 0) // __kmpc_atomic_fixed4_andb_cpt -ATOMIC_CMPXCHG_CPT(fixed4, div_cpt, kmp_int32, 32, /, - KMP_ARCH_X86) // __kmpc_atomic_fixed4_div_cpt -ATOMIC_CMPXCHG_CPT(fixed4u, div_cpt, kmp_uint32, 32, /, - KMP_ARCH_X86) // __kmpc_atomic_fixed4u_div_cpt -ATOMIC_CMPXCHG_CPT(fixed4, mul_cpt, kmp_int32, 32, *, - KMP_ARCH_X86) // __kmpc_atomic_fixed4_mul_cpt -ATOMIC_CMPXCHG_CPT(fixed4, orb_cpt, kmp_int32, 32, |, - 0) // __kmpc_atomic_fixed4_orb_cpt -ATOMIC_CMPXCHG_CPT(fixed4, shl_cpt, kmp_int32, 32, <<, - KMP_ARCH_X86) // __kmpc_atomic_fixed4_shl_cpt -ATOMIC_CMPXCHG_CPT(fixed4, shr_cpt, kmp_int32, 32, >>, - KMP_ARCH_X86) // __kmpc_atomic_fixed4_shr_cpt -ATOMIC_CMPXCHG_CPT(fixed4u, shr_cpt, kmp_uint32, 32, >>, - KMP_ARCH_X86) // __kmpc_atomic_fixed4u_shr_cpt -ATOMIC_CMPXCHG_CPT(fixed4, xor_cpt, kmp_int32, 32, ^, - 0) // __kmpc_atomic_fixed4_xor_cpt -ATOMIC_CMPXCHG_CPT(fixed8, andb_cpt, kmp_int64, 64, &, - KMP_ARCH_X86) // __kmpc_atomic_fixed8_andb_cpt -ATOMIC_CMPXCHG_CPT(fixed8, div_cpt, kmp_int64, 64, /, - KMP_ARCH_X86) // __kmpc_atomic_fixed8_div_cpt -ATOMIC_CMPXCHG_CPT(fixed8u, div_cpt, kmp_uint64, 64, /, - KMP_ARCH_X86) // __kmpc_atomic_fixed8u_div_cpt -ATOMIC_CMPXCHG_CPT(fixed8, mul_cpt, kmp_int64, 64, *, - KMP_ARCH_X86) // __kmpc_atomic_fixed8_mul_cpt -ATOMIC_CMPXCHG_CPT(fixed8, orb_cpt, kmp_int64, 64, |, - KMP_ARCH_X86) // __kmpc_atomic_fixed8_orb_cpt -ATOMIC_CMPXCHG_CPT(fixed8, shl_cpt, kmp_int64, 64, <<, - KMP_ARCH_X86) // __kmpc_atomic_fixed8_shl_cpt -ATOMIC_CMPXCHG_CPT(fixed8, shr_cpt, kmp_int64, 64, >>, - KMP_ARCH_X86) // __kmpc_atomic_fixed8_shr_cpt -ATOMIC_CMPXCHG_CPT(fixed8u, shr_cpt, kmp_uint64, 64, >>, - KMP_ARCH_X86) // __kmpc_atomic_fixed8u_shr_cpt -ATOMIC_CMPXCHG_CPT(fixed8, xor_cpt, kmp_int64, 64, ^, - KMP_ARCH_X86) // __kmpc_atomic_fixed8_xor_cpt -ATOMIC_CMPXCHG_CPT(float4, div_cpt, kmp_real32, 32, /, - KMP_ARCH_X86) // __kmpc_atomic_float4_div_cpt -ATOMIC_CMPXCHG_CPT(float4, mul_cpt, kmp_real32, 32, *, - KMP_ARCH_X86) // __kmpc_atomic_float4_mul_cpt -ATOMIC_CMPXCHG_CPT(float8, div_cpt, kmp_real64, 64, /, - KMP_ARCH_X86) // __kmpc_atomic_float8_div_cpt -ATOMIC_CMPXCHG_CPT(float8, mul_cpt, kmp_real64, 64, *, - KMP_ARCH_X86) // __kmpc_atomic_float8_mul_cpt -// TYPE_ID,OP_ID, TYPE, OP, GOMP_FLAG - -// CAPTURE routines for mixed types RHS=float16 -#if KMP_HAVE_QUAD - -// Beginning of a definition (provides name, parameters, gebug trace) -// TYPE_ID - operands type and size (fixed*, fixed*u for signed, unsigned -// fixed) -// OP_ID - operation identifier (add, sub, mul, ...) -// TYPE - operands' type -#define ATOMIC_BEGIN_CPT_MIX(TYPE_ID, OP_ID, TYPE, RTYPE_ID, RTYPE) \ - TYPE __kmpc_atomic_##TYPE_ID##_##OP_ID##_##RTYPE_ID( \ - ident_t *id_ref, int gtid, TYPE *lhs, RTYPE rhs, int flag) { \ - KMP_DEBUG_ASSERT(__kmp_init_serial); \ - KA_TRACE(100, \ - ("__kmpc_atomic_" #TYPE_ID "_" #OP_ID "_" #RTYPE_ID ": T#%d\n", \ - gtid)); - -// ------------------------------------------------------------------------- -#define ATOMIC_CMPXCHG_CPT_MIX(TYPE_ID, TYPE, OP_ID, BITS, OP, RTYPE_ID, \ - RTYPE, LCK_ID, MASK, GOMP_FLAG) \ - ATOMIC_BEGIN_CPT_MIX(TYPE_ID, OP_ID, TYPE, RTYPE_ID, RTYPE) \ - TYPE new_value; \ - OP_GOMP_CRITICAL_CPT(OP, GOMP_FLAG) \ - OP_CMPXCHG_CPT(TYPE, BITS, OP) \ - } - -// ------------------------------------------------------------------------- -#define ATOMIC_CRITICAL_CPT_MIX(TYPE_ID, TYPE, OP_ID, OP, RTYPE_ID, RTYPE, \ - LCK_ID, GOMP_FLAG) \ - ATOMIC_BEGIN_CPT_MIX(TYPE_ID, OP_ID, TYPE, RTYPE_ID, RTYPE) \ - TYPE new_value; \ - OP_GOMP_CRITICAL_CPT(OP, GOMP_FLAG) /* send assignment */ \ - OP_CRITICAL_CPT(OP## =, LCK_ID) /* send assignment */ \ - } - -ATOMIC_CMPXCHG_CPT_MIX(fixed1, char, add_cpt, 8, +, fp, _Quad, 1i, 0, - KMP_ARCH_X86) // __kmpc_atomic_fixed1_add_cpt_fp -ATOMIC_CMPXCHG_CPT_MIX(fixed1u, uchar, add_cpt, 8, +, fp, _Quad, 1i, 0, - KMP_ARCH_X86) // __kmpc_atomic_fixed1u_add_cpt_fp -ATOMIC_CMPXCHG_CPT_MIX(fixed1, char, sub_cpt, 8, -, fp, _Quad, 1i, 0, - KMP_ARCH_X86) // __kmpc_atomic_fixed1_sub_cpt_fp -ATOMIC_CMPXCHG_CPT_MIX(fixed1u, uchar, sub_cpt, 8, -, fp, _Quad, 1i, 0, - KMP_ARCH_X86) // __kmpc_atomic_fixed1u_sub_cpt_fp -ATOMIC_CMPXCHG_CPT_MIX(fixed1, char, mul_cpt, 8, *, fp, _Quad, 1i, 0, - KMP_ARCH_X86) // __kmpc_atomic_fixed1_mul_cpt_fp -ATOMIC_CMPXCHG_CPT_MIX(fixed1u, uchar, mul_cpt, 8, *, fp, _Quad, 1i, 0, - KMP_ARCH_X86) // __kmpc_atomic_fixed1u_mul_cpt_fp -ATOMIC_CMPXCHG_CPT_MIX(fixed1, char, div_cpt, 8, /, fp, _Quad, 1i, 0, - KMP_ARCH_X86) // __kmpc_atomic_fixed1_div_cpt_fp -ATOMIC_CMPXCHG_CPT_MIX(fixed1u, uchar, div_cpt, 8, /, fp, _Quad, 1i, 0, - KMP_ARCH_X86) // __kmpc_atomic_fixed1u_div_cpt_fp - -ATOMIC_CMPXCHG_CPT_MIX(fixed2, short, add_cpt, 16, +, fp, _Quad, 2i, 1, - KMP_ARCH_X86) // __kmpc_atomic_fixed2_add_cpt_fp -ATOMIC_CMPXCHG_CPT_MIX(fixed2u, ushort, add_cpt, 16, +, fp, _Quad, 2i, 1, - KMP_ARCH_X86) // __kmpc_atomic_fixed2u_add_cpt_fp -ATOMIC_CMPXCHG_CPT_MIX(fixed2, short, sub_cpt, 16, -, fp, _Quad, 2i, 1, - KMP_ARCH_X86) // __kmpc_atomic_fixed2_sub_cpt_fp -ATOMIC_CMPXCHG_CPT_MIX(fixed2u, ushort, sub_cpt, 16, -, fp, _Quad, 2i, 1, - KMP_ARCH_X86) // __kmpc_atomic_fixed2u_sub_cpt_fp -ATOMIC_CMPXCHG_CPT_MIX(fixed2, short, mul_cpt, 16, *, fp, _Quad, 2i, 1, - KMP_ARCH_X86) // __kmpc_atomic_fixed2_mul_cpt_fp -ATOMIC_CMPXCHG_CPT_MIX(fixed2u, ushort, mul_cpt, 16, *, fp, _Quad, 2i, 1, - KMP_ARCH_X86) // __kmpc_atomic_fixed2u_mul_cpt_fp -ATOMIC_CMPXCHG_CPT_MIX(fixed2, short, div_cpt, 16, /, fp, _Quad, 2i, 1, - KMP_ARCH_X86) // __kmpc_atomic_fixed2_div_cpt_fp -ATOMIC_CMPXCHG_CPT_MIX(fixed2u, ushort, div_cpt, 16, /, fp, _Quad, 2i, 1, - KMP_ARCH_X86) // __kmpc_atomic_fixed2u_div_cpt_fp - -ATOMIC_CMPXCHG_CPT_MIX(fixed4, kmp_int32, add_cpt, 32, +, fp, _Quad, 4i, 3, - 0) // __kmpc_atomic_fixed4_add_cpt_fp -ATOMIC_CMPXCHG_CPT_MIX(fixed4u, kmp_uint32, add_cpt, 32, +, fp, _Quad, 4i, 3, - 0) // __kmpc_atomic_fixed4u_add_cpt_fp -ATOMIC_CMPXCHG_CPT_MIX(fixed4, kmp_int32, sub_cpt, 32, -, fp, _Quad, 4i, 3, - 0) // __kmpc_atomic_fixed4_sub_cpt_fp -ATOMIC_CMPXCHG_CPT_MIX(fixed4u, kmp_uint32, sub_cpt, 32, -, fp, _Quad, 4i, 3, - 0) // __kmpc_atomic_fixed4u_sub_cpt_fp -ATOMIC_CMPXCHG_CPT_MIX(fixed4, kmp_int32, mul_cpt, 32, *, fp, _Quad, 4i, 3, - 0) // __kmpc_atomic_fixed4_mul_cpt_fp -ATOMIC_CMPXCHG_CPT_MIX(fixed4u, kmp_uint32, mul_cpt, 32, *, fp, _Quad, 4i, 3, - 0) // __kmpc_atomic_fixed4u_mul_cpt_fp -ATOMIC_CMPXCHG_CPT_MIX(fixed4, kmp_int32, div_cpt, 32, /, fp, _Quad, 4i, 3, - 0) // __kmpc_atomic_fixed4_div_cpt_fp -ATOMIC_CMPXCHG_CPT_MIX(fixed4u, kmp_uint32, div_cpt, 32, /, fp, _Quad, 4i, 3, - 0) // __kmpc_atomic_fixed4u_div_cpt_fp - -ATOMIC_CMPXCHG_CPT_MIX(fixed8, kmp_int64, add_cpt, 64, +, fp, _Quad, 8i, 7, - KMP_ARCH_X86) // __kmpc_atomic_fixed8_add_cpt_fp -ATOMIC_CMPXCHG_CPT_MIX(fixed8u, kmp_uint64, add_cpt, 64, +, fp, _Quad, 8i, 7, - KMP_ARCH_X86) // __kmpc_atomic_fixed8u_add_cpt_fp -ATOMIC_CMPXCHG_CPT_MIX(fixed8, kmp_int64, sub_cpt, 64, -, fp, _Quad, 8i, 7, - KMP_ARCH_X86) // __kmpc_atomic_fixed8_sub_cpt_fp -ATOMIC_CMPXCHG_CPT_MIX(fixed8u, kmp_uint64, sub_cpt, 64, -, fp, _Quad, 8i, 7, - KMP_ARCH_X86) // __kmpc_atomic_fixed8u_sub_cpt_fp -ATOMIC_CMPXCHG_CPT_MIX(fixed8, kmp_int64, mul_cpt, 64, *, fp, _Quad, 8i, 7, - KMP_ARCH_X86) // __kmpc_atomic_fixed8_mul_cpt_fp -ATOMIC_CMPXCHG_CPT_MIX(fixed8u, kmp_uint64, mul_cpt, 64, *, fp, _Quad, 8i, 7, - KMP_ARCH_X86) // __kmpc_atomic_fixed8u_mul_cpt_fp -ATOMIC_CMPXCHG_CPT_MIX(fixed8, kmp_int64, div_cpt, 64, /, fp, _Quad, 8i, 7, - KMP_ARCH_X86) // __kmpc_atomic_fixed8_div_cpt_fp -ATOMIC_CMPXCHG_CPT_MIX(fixed8u, kmp_uint64, div_cpt, 64, /, fp, _Quad, 8i, 7, - KMP_ARCH_X86) // __kmpc_atomic_fixed8u_div_cpt_fp - -ATOMIC_CMPXCHG_CPT_MIX(float4, kmp_real32, add_cpt, 32, +, fp, _Quad, 4r, 3, - KMP_ARCH_X86) // __kmpc_atomic_float4_add_cpt_fp -ATOMIC_CMPXCHG_CPT_MIX(float4, kmp_real32, sub_cpt, 32, -, fp, _Quad, 4r, 3, - KMP_ARCH_X86) // __kmpc_atomic_float4_sub_cpt_fp -ATOMIC_CMPXCHG_CPT_MIX(float4, kmp_real32, mul_cpt, 32, *, fp, _Quad, 4r, 3, - KMP_ARCH_X86) // __kmpc_atomic_float4_mul_cpt_fp -ATOMIC_CMPXCHG_CPT_MIX(float4, kmp_real32, div_cpt, 32, /, fp, _Quad, 4r, 3, - KMP_ARCH_X86) // __kmpc_atomic_float4_div_cpt_fp - -ATOMIC_CMPXCHG_CPT_MIX(float8, kmp_real64, add_cpt, 64, +, fp, _Quad, 8r, 7, - KMP_ARCH_X86) // __kmpc_atomic_float8_add_cpt_fp -ATOMIC_CMPXCHG_CPT_MIX(float8, kmp_real64, sub_cpt, 64, -, fp, _Quad, 8r, 7, - KMP_ARCH_X86) // __kmpc_atomic_float8_sub_cpt_fp -ATOMIC_CMPXCHG_CPT_MIX(float8, kmp_real64, mul_cpt, 64, *, fp, _Quad, 8r, 7, - KMP_ARCH_X86) // __kmpc_atomic_float8_mul_cpt_fp -ATOMIC_CMPXCHG_CPT_MIX(float8, kmp_real64, div_cpt, 64, /, fp, _Quad, 8r, 7, - KMP_ARCH_X86) // __kmpc_atomic_float8_div_cpt_fp - -ATOMIC_CRITICAL_CPT_MIX(float10, long double, add_cpt, +, fp, _Quad, 10r, - 1) // __kmpc_atomic_float10_add_cpt_fp -ATOMIC_CRITICAL_CPT_MIX(float10, long double, sub_cpt, -, fp, _Quad, 10r, - 1) // __kmpc_atomic_float10_sub_cpt_fp -ATOMIC_CRITICAL_CPT_MIX(float10, long double, mul_cpt, *, fp, _Quad, 10r, - 1) // __kmpc_atomic_float10_mul_cpt_fp -ATOMIC_CRITICAL_CPT_MIX(float10, long double, div_cpt, /, fp, _Quad, 10r, - 1) // __kmpc_atomic_float10_div_cpt_fp - -#endif // KMP_HAVE_QUAD - -// ------------------------------------------------------------------------ -// Routines for C/C++ Reduction operators && and || - -// ------------------------------------------------------------------------- -// Operation on *lhs, rhs bound by critical section -// OP - operator (it's supposed to contain an assignment) -// LCK_ID - lock identifier -// Note: don't check gtid as it should always be valid -// 1, 2-byte - expect valid parameter, other - check before this macro -#define OP_CRITICAL_L_CPT(OP, LCK_ID) \ - __kmp_acquire_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); \ - \ - if (flag) { \ - new_value OP rhs; \ - } else \ - new_value = (*lhs); \ - \ - __kmp_release_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); - -// ------------------------------------------------------------------------ -#ifdef KMP_GOMP_COMPAT -#define OP_GOMP_CRITICAL_L_CPT(OP, FLAG) \ - if ((FLAG) && (__kmp_atomic_mode == 2)) { \ - KMP_CHECK_GTID; \ - OP_CRITICAL_L_CPT(OP, 0); \ - return new_value; \ - } -#else -#define OP_GOMP_CRITICAL_L_CPT(OP, FLAG) -#endif /* KMP_GOMP_COMPAT */ - -// ------------------------------------------------------------------------ -// Need separate macros for &&, || because there is no combined assignment -#define ATOMIC_CMPX_L_CPT(TYPE_ID, OP_ID, TYPE, BITS, OP, GOMP_FLAG) \ - ATOMIC_BEGIN_CPT(TYPE_ID, OP_ID, TYPE, TYPE) \ - TYPE new_value; \ - OP_GOMP_CRITICAL_L_CPT(= *lhs OP, GOMP_FLAG) \ - OP_CMPXCHG_CPT(TYPE, BITS, OP) \ - } - -ATOMIC_CMPX_L_CPT(fixed1, andl_cpt, char, 8, &&, - KMP_ARCH_X86) // __kmpc_atomic_fixed1_andl_cpt -ATOMIC_CMPX_L_CPT(fixed1, orl_cpt, char, 8, ||, - KMP_ARCH_X86) // __kmpc_atomic_fixed1_orl_cpt -ATOMIC_CMPX_L_CPT(fixed2, andl_cpt, short, 16, &&, - KMP_ARCH_X86) // __kmpc_atomic_fixed2_andl_cpt -ATOMIC_CMPX_L_CPT(fixed2, orl_cpt, short, 16, ||, - KMP_ARCH_X86) // __kmpc_atomic_fixed2_orl_cpt -ATOMIC_CMPX_L_CPT(fixed4, andl_cpt, kmp_int32, 32, &&, - 0) // __kmpc_atomic_fixed4_andl_cpt -ATOMIC_CMPX_L_CPT(fixed4, orl_cpt, kmp_int32, 32, ||, - 0) // __kmpc_atomic_fixed4_orl_cpt -ATOMIC_CMPX_L_CPT(fixed8, andl_cpt, kmp_int64, 64, &&, - KMP_ARCH_X86) // __kmpc_atomic_fixed8_andl_cpt -ATOMIC_CMPX_L_CPT(fixed8, orl_cpt, kmp_int64, 64, ||, - KMP_ARCH_X86) // __kmpc_atomic_fixed8_orl_cpt - -// ------------------------------------------------------------------------- -// Routines for Fortran operators that matched no one in C: -// MAX, MIN, .EQV., .NEQV. -// Operators .AND., .OR. are covered by __kmpc_atomic_*_{andl,orl}_cpt -// Intrinsics IAND, IOR, IEOR are covered by __kmpc_atomic_*_{andb,orb,xor}_cpt - -// ------------------------------------------------------------------------- -// MIN and MAX need separate macros -// OP - operator to check if we need any actions? -#define MIN_MAX_CRITSECT_CPT(OP, LCK_ID) \ - __kmp_acquire_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); \ - \ - if (*lhs OP rhs) { /* still need actions? */ \ - old_value = *lhs; \ - *lhs = rhs; \ - if (flag) \ - new_value = rhs; \ - else \ - new_value = old_value; \ - } \ - __kmp_release_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); \ - return new_value; - -// ------------------------------------------------------------------------- -#ifdef KMP_GOMP_COMPAT -#define GOMP_MIN_MAX_CRITSECT_CPT(OP, FLAG) \ - if ((FLAG) && (__kmp_atomic_mode == 2)) { \ - KMP_CHECK_GTID; \ - MIN_MAX_CRITSECT_CPT(OP, 0); \ - } -#else -#define GOMP_MIN_MAX_CRITSECT_CPT(OP, FLAG) -#endif /* KMP_GOMP_COMPAT */ - -// ------------------------------------------------------------------------- -#define MIN_MAX_CMPXCHG_CPT(TYPE, BITS, OP) \ - { \ - TYPE KMP_ATOMIC_VOLATILE temp_val; \ - /*TYPE old_value; */ \ - temp_val = *lhs; \ - old_value = temp_val; \ - while (old_value OP rhs && /* still need actions? */ \ - !KMP_COMPARE_AND_STORE_ACQ##BITS( \ - (kmp_int##BITS *)lhs, \ - *VOLATILE_CAST(kmp_int##BITS *) & old_value, \ - *VOLATILE_CAST(kmp_int##BITS *) & rhs)) { \ - KMP_CPU_PAUSE(); \ - temp_val = *lhs; \ - old_value = temp_val; \ - } \ - if (flag) \ - return rhs; \ - else \ - return old_value; \ - } - -// ------------------------------------------------------------------------- -// 1-byte, 2-byte operands - use critical section -#define MIN_MAX_CRITICAL_CPT(TYPE_ID, OP_ID, TYPE, OP, LCK_ID, GOMP_FLAG) \ - ATOMIC_BEGIN_CPT(TYPE_ID, OP_ID, TYPE, TYPE) \ - TYPE new_value, old_value; \ - if (*lhs OP rhs) { /* need actions? */ \ - GOMP_MIN_MAX_CRITSECT_CPT(OP, GOMP_FLAG) \ - MIN_MAX_CRITSECT_CPT(OP, LCK_ID) \ - } \ - return *lhs; \ - } - -#define MIN_MAX_COMPXCHG_CPT(TYPE_ID, OP_ID, TYPE, BITS, OP, GOMP_FLAG) \ - ATOMIC_BEGIN_CPT(TYPE_ID, OP_ID, TYPE, TYPE) \ - TYPE new_value, old_value; \ - if (*lhs OP rhs) { \ - GOMP_MIN_MAX_CRITSECT_CPT(OP, GOMP_FLAG) \ - MIN_MAX_CMPXCHG_CPT(TYPE, BITS, OP) \ - } \ - return *lhs; \ - } - -MIN_MAX_COMPXCHG_CPT(fixed1, max_cpt, char, 8, <, - KMP_ARCH_X86) // __kmpc_atomic_fixed1_max_cpt -MIN_MAX_COMPXCHG_CPT(fixed1, min_cpt, char, 8, >, - KMP_ARCH_X86) // __kmpc_atomic_fixed1_min_cpt -MIN_MAX_COMPXCHG_CPT(fixed2, max_cpt, short, 16, <, - KMP_ARCH_X86) // __kmpc_atomic_fixed2_max_cpt -MIN_MAX_COMPXCHG_CPT(fixed2, min_cpt, short, 16, >, - KMP_ARCH_X86) // __kmpc_atomic_fixed2_min_cpt -MIN_MAX_COMPXCHG_CPT(fixed4, max_cpt, kmp_int32, 32, <, - 0) // __kmpc_atomic_fixed4_max_cpt -MIN_MAX_COMPXCHG_CPT(fixed4, min_cpt, kmp_int32, 32, >, - 0) // __kmpc_atomic_fixed4_min_cpt -MIN_MAX_COMPXCHG_CPT(fixed8, max_cpt, kmp_int64, 64, <, - KMP_ARCH_X86) // __kmpc_atomic_fixed8_max_cpt -MIN_MAX_COMPXCHG_CPT(fixed8, min_cpt, kmp_int64, 64, >, - KMP_ARCH_X86) // __kmpc_atomic_fixed8_min_cpt -MIN_MAX_COMPXCHG_CPT(float4, max_cpt, kmp_real32, 32, <, - KMP_ARCH_X86) // __kmpc_atomic_float4_max_cpt -MIN_MAX_COMPXCHG_CPT(float4, min_cpt, kmp_real32, 32, >, - KMP_ARCH_X86) // __kmpc_atomic_float4_min_cpt -MIN_MAX_COMPXCHG_CPT(float8, max_cpt, kmp_real64, 64, <, - KMP_ARCH_X86) // __kmpc_atomic_float8_max_cpt -MIN_MAX_COMPXCHG_CPT(float8, min_cpt, kmp_real64, 64, >, - KMP_ARCH_X86) // __kmpc_atomic_float8_min_cpt -#if KMP_HAVE_QUAD -MIN_MAX_CRITICAL_CPT(float16, max_cpt, QUAD_LEGACY, <, 16r, - 1) // __kmpc_atomic_float16_max_cpt -MIN_MAX_CRITICAL_CPT(float16, min_cpt, QUAD_LEGACY, >, 16r, - 1) // __kmpc_atomic_float16_min_cpt -#if (KMP_ARCH_X86) -MIN_MAX_CRITICAL_CPT(float16, max_a16_cpt, Quad_a16_t, <, 16r, - 1) // __kmpc_atomic_float16_max_a16_cpt -MIN_MAX_CRITICAL_CPT(float16, min_a16_cpt, Quad_a16_t, >, 16r, - 1) // __kmpc_atomic_float16_mix_a16_cpt -#endif -#endif - -// ------------------------------------------------------------------------ -#ifdef KMP_GOMP_COMPAT -#define OP_GOMP_CRITICAL_EQV_CPT(OP, FLAG) \ - if ((FLAG) && (__kmp_atomic_mode == 2)) { \ - KMP_CHECK_GTID; \ - OP_CRITICAL_CPT(OP, 0); \ - } -#else -#define OP_GOMP_CRITICAL_EQV_CPT(OP, FLAG) -#endif /* KMP_GOMP_COMPAT */ -// ------------------------------------------------------------------------ -#define ATOMIC_CMPX_EQV_CPT(TYPE_ID, OP_ID, TYPE, BITS, OP, GOMP_FLAG) \ - ATOMIC_BEGIN_CPT(TYPE_ID, OP_ID, TYPE, TYPE) \ - TYPE new_value; \ - OP_GOMP_CRITICAL_EQV_CPT(^= ~, GOMP_FLAG) /* send assignment */ \ - OP_CMPXCHG_CPT(TYPE, BITS, OP) \ - } - -// ------------------------------------------------------------------------ - -ATOMIC_CMPXCHG_CPT(fixed1, neqv_cpt, kmp_int8, 8, ^, - KMP_ARCH_X86) // __kmpc_atomic_fixed1_neqv_cpt -ATOMIC_CMPXCHG_CPT(fixed2, neqv_cpt, kmp_int16, 16, ^, - KMP_ARCH_X86) // __kmpc_atomic_fixed2_neqv_cpt -ATOMIC_CMPXCHG_CPT(fixed4, neqv_cpt, kmp_int32, 32, ^, - KMP_ARCH_X86) // __kmpc_atomic_fixed4_neqv_cpt -ATOMIC_CMPXCHG_CPT(fixed8, neqv_cpt, kmp_int64, 64, ^, - KMP_ARCH_X86) // __kmpc_atomic_fixed8_neqv_cpt -ATOMIC_CMPX_EQV_CPT(fixed1, eqv_cpt, kmp_int8, 8, ^~, - KMP_ARCH_X86) // __kmpc_atomic_fixed1_eqv_cpt -ATOMIC_CMPX_EQV_CPT(fixed2, eqv_cpt, kmp_int16, 16, ^~, - KMP_ARCH_X86) // __kmpc_atomic_fixed2_eqv_cpt -ATOMIC_CMPX_EQV_CPT(fixed4, eqv_cpt, kmp_int32, 32, ^~, - KMP_ARCH_X86) // __kmpc_atomic_fixed4_eqv_cpt -ATOMIC_CMPX_EQV_CPT(fixed8, eqv_cpt, kmp_int64, 64, ^~, - KMP_ARCH_X86) // __kmpc_atomic_fixed8_eqv_cpt - -// ------------------------------------------------------------------------ -// Routines for Extended types: long double, _Quad, complex flavours (use -// critical section) -// TYPE_ID, OP_ID, TYPE - detailed above -// OP - operator -// LCK_ID - lock identifier, used to possibly distinguish lock variable -#define ATOMIC_CRITICAL_CPT(TYPE_ID, OP_ID, TYPE, OP, LCK_ID, GOMP_FLAG) \ - ATOMIC_BEGIN_CPT(TYPE_ID, OP_ID, TYPE, TYPE) \ - TYPE new_value; \ - OP_GOMP_CRITICAL_CPT(OP, GOMP_FLAG) /* send assignment */ \ - OP_CRITICAL_CPT(OP## =, LCK_ID) /* send assignment */ \ - } - -// ------------------------------------------------------------------------ -// Workaround for cmplx4. Regular routines with return value don't work -// on Win_32e. Let's return captured values through the additional parameter. -#define OP_CRITICAL_CPT_WRK(OP, LCK_ID) \ - __kmp_acquire_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); \ - \ - if (flag) { \ - (*lhs) OP rhs; \ - (*out) = (*lhs); \ - } else { \ - (*out) = (*lhs); \ - (*lhs) OP rhs; \ - } \ - \ - __kmp_release_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); \ - return; -// ------------------------------------------------------------------------ - -#ifdef KMP_GOMP_COMPAT -#define OP_GOMP_CRITICAL_CPT_WRK(OP, FLAG) \ - if ((FLAG) && (__kmp_atomic_mode == 2)) { \ - KMP_CHECK_GTID; \ - OP_CRITICAL_CPT_WRK(OP## =, 0); \ - } -#else -#define OP_GOMP_CRITICAL_CPT_WRK(OP, FLAG) -#endif /* KMP_GOMP_COMPAT */ -// ------------------------------------------------------------------------ - -#define ATOMIC_BEGIN_WRK(TYPE_ID, OP_ID, TYPE) \ - void __kmpc_atomic_##TYPE_ID##_##OP_ID(ident_t *id_ref, int gtid, TYPE *lhs, \ - TYPE rhs, TYPE *out, int flag) { \ - KMP_DEBUG_ASSERT(__kmp_init_serial); \ - KA_TRACE(100, ("__kmpc_atomic_" #TYPE_ID "_" #OP_ID ": T#%d\n", gtid)); -// ------------------------------------------------------------------------ - -#define ATOMIC_CRITICAL_CPT_WRK(TYPE_ID, OP_ID, TYPE, OP, LCK_ID, GOMP_FLAG) \ - ATOMIC_BEGIN_WRK(TYPE_ID, OP_ID, TYPE) \ - OP_GOMP_CRITICAL_CPT_WRK(OP, GOMP_FLAG) \ - OP_CRITICAL_CPT_WRK(OP## =, LCK_ID) \ - } -// The end of workaround for cmplx4 - -/* ------------------------------------------------------------------------- */ -// routines for long double type -ATOMIC_CRITICAL_CPT(float10, add_cpt, long double, +, 10r, - 1) // __kmpc_atomic_float10_add_cpt -ATOMIC_CRITICAL_CPT(float10, sub_cpt, long double, -, 10r, - 1) // __kmpc_atomic_float10_sub_cpt -ATOMIC_CRITICAL_CPT(float10, mul_cpt, long double, *, 10r, - 1) // __kmpc_atomic_float10_mul_cpt -ATOMIC_CRITICAL_CPT(float10, div_cpt, long double, /, 10r, - 1) // __kmpc_atomic_float10_div_cpt -#if KMP_HAVE_QUAD -// routines for _Quad type -ATOMIC_CRITICAL_CPT(float16, add_cpt, QUAD_LEGACY, +, 16r, - 1) // __kmpc_atomic_float16_add_cpt -ATOMIC_CRITICAL_CPT(float16, sub_cpt, QUAD_LEGACY, -, 16r, - 1) // __kmpc_atomic_float16_sub_cpt -ATOMIC_CRITICAL_CPT(float16, mul_cpt, QUAD_LEGACY, *, 16r, - 1) // __kmpc_atomic_float16_mul_cpt -ATOMIC_CRITICAL_CPT(float16, div_cpt, QUAD_LEGACY, /, 16r, - 1) // __kmpc_atomic_float16_div_cpt -#if (KMP_ARCH_X86) -ATOMIC_CRITICAL_CPT(float16, add_a16_cpt, Quad_a16_t, +, 16r, - 1) // __kmpc_atomic_float16_add_a16_cpt -ATOMIC_CRITICAL_CPT(float16, sub_a16_cpt, Quad_a16_t, -, 16r, - 1) // __kmpc_atomic_float16_sub_a16_cpt -ATOMIC_CRITICAL_CPT(float16, mul_a16_cpt, Quad_a16_t, *, 16r, - 1) // __kmpc_atomic_float16_mul_a16_cpt -ATOMIC_CRITICAL_CPT(float16, div_a16_cpt, Quad_a16_t, /, 16r, - 1) // __kmpc_atomic_float16_div_a16_cpt -#endif -#endif - -// routines for complex types - -// cmplx4 routines to return void -ATOMIC_CRITICAL_CPT_WRK(cmplx4, add_cpt, kmp_cmplx32, +, 8c, - 1) // __kmpc_atomic_cmplx4_add_cpt -ATOMIC_CRITICAL_CPT_WRK(cmplx4, sub_cpt, kmp_cmplx32, -, 8c, - 1) // __kmpc_atomic_cmplx4_sub_cpt -ATOMIC_CRITICAL_CPT_WRK(cmplx4, mul_cpt, kmp_cmplx32, *, 8c, - 1) // __kmpc_atomic_cmplx4_mul_cpt -ATOMIC_CRITICAL_CPT_WRK(cmplx4, div_cpt, kmp_cmplx32, /, 8c, - 1) // __kmpc_atomic_cmplx4_div_cpt - -ATOMIC_CRITICAL_CPT(cmplx8, add_cpt, kmp_cmplx64, +, 16c, - 1) // __kmpc_atomic_cmplx8_add_cpt -ATOMIC_CRITICAL_CPT(cmplx8, sub_cpt, kmp_cmplx64, -, 16c, - 1) // __kmpc_atomic_cmplx8_sub_cpt -ATOMIC_CRITICAL_CPT(cmplx8, mul_cpt, kmp_cmplx64, *, 16c, - 1) // __kmpc_atomic_cmplx8_mul_cpt -ATOMIC_CRITICAL_CPT(cmplx8, div_cpt, kmp_cmplx64, /, 16c, - 1) // __kmpc_atomic_cmplx8_div_cpt -ATOMIC_CRITICAL_CPT(cmplx10, add_cpt, kmp_cmplx80, +, 20c, - 1) // __kmpc_atomic_cmplx10_add_cpt -ATOMIC_CRITICAL_CPT(cmplx10, sub_cpt, kmp_cmplx80, -, 20c, - 1) // __kmpc_atomic_cmplx10_sub_cpt -ATOMIC_CRITICAL_CPT(cmplx10, mul_cpt, kmp_cmplx80, *, 20c, - 1) // __kmpc_atomic_cmplx10_mul_cpt -ATOMIC_CRITICAL_CPT(cmplx10, div_cpt, kmp_cmplx80, /, 20c, - 1) // __kmpc_atomic_cmplx10_div_cpt -#if KMP_HAVE_QUAD -ATOMIC_CRITICAL_CPT(cmplx16, add_cpt, CPLX128_LEG, +, 32c, - 1) // __kmpc_atomic_cmplx16_add_cpt -ATOMIC_CRITICAL_CPT(cmplx16, sub_cpt, CPLX128_LEG, -, 32c, - 1) // __kmpc_atomic_cmplx16_sub_cpt -ATOMIC_CRITICAL_CPT(cmplx16, mul_cpt, CPLX128_LEG, *, 32c, - 1) // __kmpc_atomic_cmplx16_mul_cpt -ATOMIC_CRITICAL_CPT(cmplx16, div_cpt, CPLX128_LEG, /, 32c, - 1) // __kmpc_atomic_cmplx16_div_cpt -#if (KMP_ARCH_X86) -ATOMIC_CRITICAL_CPT(cmplx16, add_a16_cpt, kmp_cmplx128_a16_t, +, 32c, - 1) // __kmpc_atomic_cmplx16_add_a16_cpt -ATOMIC_CRITICAL_CPT(cmplx16, sub_a16_cpt, kmp_cmplx128_a16_t, -, 32c, - 1) // __kmpc_atomic_cmplx16_sub_a16_cpt -ATOMIC_CRITICAL_CPT(cmplx16, mul_a16_cpt, kmp_cmplx128_a16_t, *, 32c, - 1) // __kmpc_atomic_cmplx16_mul_a16_cpt -ATOMIC_CRITICAL_CPT(cmplx16, div_a16_cpt, kmp_cmplx128_a16_t, /, 32c, - 1) // __kmpc_atomic_cmplx16_div_a16_cpt -#endif -#endif - -#if OMP_40_ENABLED - -// OpenMP 4.0: v = x = expr binop x; { v = x; x = expr binop x; } { x = expr -// binop x; v = x; } for non-commutative operations. -// Supported only on IA-32 architecture and Intel(R) 64 - -// ------------------------------------------------------------------------- -// Operation on *lhs, rhs bound by critical section -// OP - operator (it's supposed to contain an assignment) -// LCK_ID - lock identifier -// Note: don't check gtid as it should always be valid -// 1, 2-byte - expect valid parameter, other - check before this macro -#define OP_CRITICAL_CPT_REV(OP, LCK_ID) \ - __kmp_acquire_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); \ - \ - if (flag) { \ - /*temp_val = (*lhs);*/ \ - (*lhs) = (rhs)OP(*lhs); \ - new_value = (*lhs); \ - } else { \ - new_value = (*lhs); \ - (*lhs) = (rhs)OP(*lhs); \ - } \ - __kmp_release_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); \ - return new_value; - -// ------------------------------------------------------------------------ -#ifdef KMP_GOMP_COMPAT -#define OP_GOMP_CRITICAL_CPT_REV(OP, FLAG) \ - if ((FLAG) && (__kmp_atomic_mode == 2)) { \ - KMP_CHECK_GTID; \ - OP_CRITICAL_CPT_REV(OP, 0); \ - } -#else -#define OP_GOMP_CRITICAL_CPT_REV(OP, FLAG) -#endif /* KMP_GOMP_COMPAT */ - -// ------------------------------------------------------------------------ -// Operation on *lhs, rhs using "compare_and_store" routine -// TYPE - operands' type -// BITS - size in bits, used to distinguish low level calls -// OP - operator -// Note: temp_val introduced in order to force the compiler to read -// *lhs only once (w/o it the compiler reads *lhs twice) -#define OP_CMPXCHG_CPT_REV(TYPE, BITS, OP) \ - { \ - TYPE KMP_ATOMIC_VOLATILE temp_val; \ - TYPE old_value, new_value; \ - temp_val = *lhs; \ - old_value = temp_val; \ - new_value = rhs OP old_value; \ - while (!KMP_COMPARE_AND_STORE_ACQ##BITS( \ - (kmp_int##BITS *)lhs, *VOLATILE_CAST(kmp_int##BITS *) & old_value, \ - *VOLATILE_CAST(kmp_int##BITS *) & new_value)) { \ - KMP_CPU_PAUSE(); \ - \ - temp_val = *lhs; \ - old_value = temp_val; \ - new_value = rhs OP old_value; \ - } \ - if (flag) { \ - return new_value; \ - } else \ - return old_value; \ - } - -// ------------------------------------------------------------------------- -#define ATOMIC_CMPXCHG_CPT_REV(TYPE_ID, OP_ID, TYPE, BITS, OP, GOMP_FLAG) \ - ATOMIC_BEGIN_CPT(TYPE_ID, OP_ID, TYPE, TYPE) \ - TYPE new_value; \ - OP_GOMP_CRITICAL_CPT_REV(OP, GOMP_FLAG) \ - OP_CMPXCHG_CPT_REV(TYPE, BITS, OP) \ - } - -ATOMIC_CMPXCHG_CPT_REV(fixed1, div_cpt_rev, kmp_int8, 8, /, - KMP_ARCH_X86) // __kmpc_atomic_fixed1_div_cpt_rev -ATOMIC_CMPXCHG_CPT_REV(fixed1u, div_cpt_rev, kmp_uint8, 8, /, - KMP_ARCH_X86) // __kmpc_atomic_fixed1u_div_cpt_rev -ATOMIC_CMPXCHG_CPT_REV(fixed1, shl_cpt_rev, kmp_int8, 8, <<, - KMP_ARCH_X86) // __kmpc_atomic_fixed1_shl_cpt_rev -ATOMIC_CMPXCHG_CPT_REV(fixed1, shr_cpt_rev, kmp_int8, 8, >>, - KMP_ARCH_X86) // __kmpc_atomic_fixed1_shr_cpt_rev -ATOMIC_CMPXCHG_CPT_REV(fixed1u, shr_cpt_rev, kmp_uint8, 8, >>, - KMP_ARCH_X86) // __kmpc_atomic_fixed1u_shr_cpt_rev -ATOMIC_CMPXCHG_CPT_REV(fixed1, sub_cpt_rev, kmp_int8, 8, -, - KMP_ARCH_X86) // __kmpc_atomic_fixed1_sub_cpt_rev -ATOMIC_CMPXCHG_CPT_REV(fixed2, div_cpt_rev, kmp_int16, 16, /, - KMP_ARCH_X86) // __kmpc_atomic_fixed2_div_cpt_rev -ATOMIC_CMPXCHG_CPT_REV(fixed2u, div_cpt_rev, kmp_uint16, 16, /, - KMP_ARCH_X86) // __kmpc_atomic_fixed2u_div_cpt_rev -ATOMIC_CMPXCHG_CPT_REV(fixed2, shl_cpt_rev, kmp_int16, 16, <<, - KMP_ARCH_X86) // __kmpc_atomic_fixed2_shl_cpt_rev -ATOMIC_CMPXCHG_CPT_REV(fixed2, shr_cpt_rev, kmp_int16, 16, >>, - KMP_ARCH_X86) // __kmpc_atomic_fixed2_shr_cpt_rev -ATOMIC_CMPXCHG_CPT_REV(fixed2u, shr_cpt_rev, kmp_uint16, 16, >>, - KMP_ARCH_X86) // __kmpc_atomic_fixed2u_shr_cpt_rev -ATOMIC_CMPXCHG_CPT_REV(fixed2, sub_cpt_rev, kmp_int16, 16, -, - KMP_ARCH_X86) // __kmpc_atomic_fixed2_sub_cpt_rev -ATOMIC_CMPXCHG_CPT_REV(fixed4, div_cpt_rev, kmp_int32, 32, /, - KMP_ARCH_X86) // __kmpc_atomic_fixed4_div_cpt_rev -ATOMIC_CMPXCHG_CPT_REV(fixed4u, div_cpt_rev, kmp_uint32, 32, /, - KMP_ARCH_X86) // __kmpc_atomic_fixed4u_div_cpt_rev -ATOMIC_CMPXCHG_CPT_REV(fixed4, shl_cpt_rev, kmp_int32, 32, <<, - KMP_ARCH_X86) // __kmpc_atomic_fixed4_shl_cpt_rev -ATOMIC_CMPXCHG_CPT_REV(fixed4, shr_cpt_rev, kmp_int32, 32, >>, - KMP_ARCH_X86) // __kmpc_atomic_fixed4_shr_cpt_rev -ATOMIC_CMPXCHG_CPT_REV(fixed4u, shr_cpt_rev, kmp_uint32, 32, >>, - KMP_ARCH_X86) // __kmpc_atomic_fixed4u_shr_cpt_rev -ATOMIC_CMPXCHG_CPT_REV(fixed4, sub_cpt_rev, kmp_int32, 32, -, - KMP_ARCH_X86) // __kmpc_atomic_fixed4_sub_cpt_rev -ATOMIC_CMPXCHG_CPT_REV(fixed8, div_cpt_rev, kmp_int64, 64, /, - KMP_ARCH_X86) // __kmpc_atomic_fixed8_div_cpt_rev -ATOMIC_CMPXCHG_CPT_REV(fixed8u, div_cpt_rev, kmp_uint64, 64, /, - KMP_ARCH_X86) // __kmpc_atomic_fixed8u_div_cpt_rev -ATOMIC_CMPXCHG_CPT_REV(fixed8, shl_cpt_rev, kmp_int64, 64, <<, - KMP_ARCH_X86) // __kmpc_atomic_fixed8_shl_cpt_rev -ATOMIC_CMPXCHG_CPT_REV(fixed8, shr_cpt_rev, kmp_int64, 64, >>, - KMP_ARCH_X86) // __kmpc_atomic_fixed8_shr_cpt_rev -ATOMIC_CMPXCHG_CPT_REV(fixed8u, shr_cpt_rev, kmp_uint64, 64, >>, - KMP_ARCH_X86) // __kmpc_atomic_fixed8u_shr_cpt_rev -ATOMIC_CMPXCHG_CPT_REV(fixed8, sub_cpt_rev, kmp_int64, 64, -, - KMP_ARCH_X86) // __kmpc_atomic_fixed8_sub_cpt_rev -ATOMIC_CMPXCHG_CPT_REV(float4, div_cpt_rev, kmp_real32, 32, /, - KMP_ARCH_X86) // __kmpc_atomic_float4_div_cpt_rev -ATOMIC_CMPXCHG_CPT_REV(float4, sub_cpt_rev, kmp_real32, 32, -, - KMP_ARCH_X86) // __kmpc_atomic_float4_sub_cpt_rev -ATOMIC_CMPXCHG_CPT_REV(float8, div_cpt_rev, kmp_real64, 64, /, - KMP_ARCH_X86) // __kmpc_atomic_float8_div_cpt_rev -ATOMIC_CMPXCHG_CPT_REV(float8, sub_cpt_rev, kmp_real64, 64, -, - KMP_ARCH_X86) // __kmpc_atomic_float8_sub_cpt_rev -// TYPE_ID,OP_ID, TYPE, OP, GOMP_FLAG - -// ------------------------------------------------------------------------ -// Routines for Extended types: long double, _Quad, complex flavours (use -// critical section) -// TYPE_ID, OP_ID, TYPE - detailed above -// OP - operator -// LCK_ID - lock identifier, used to possibly distinguish lock variable -#define ATOMIC_CRITICAL_CPT_REV(TYPE_ID, OP_ID, TYPE, OP, LCK_ID, GOMP_FLAG) \ - ATOMIC_BEGIN_CPT(TYPE_ID, OP_ID, TYPE, TYPE) \ - TYPE new_value; \ - /*printf("__kmp_atomic_mode = %d\n", __kmp_atomic_mode);*/ \ - OP_GOMP_CRITICAL_CPT_REV(OP, GOMP_FLAG) \ - OP_CRITICAL_CPT_REV(OP, LCK_ID) \ - } - -/* ------------------------------------------------------------------------- */ -// routines for long double type -ATOMIC_CRITICAL_CPT_REV(float10, sub_cpt_rev, long double, -, 10r, - 1) // __kmpc_atomic_float10_sub_cpt_rev -ATOMIC_CRITICAL_CPT_REV(float10, div_cpt_rev, long double, /, 10r, - 1) // __kmpc_atomic_float10_div_cpt_rev -#if KMP_HAVE_QUAD -// routines for _Quad type -ATOMIC_CRITICAL_CPT_REV(float16, sub_cpt_rev, QUAD_LEGACY, -, 16r, - 1) // __kmpc_atomic_float16_sub_cpt_rev -ATOMIC_CRITICAL_CPT_REV(float16, div_cpt_rev, QUAD_LEGACY, /, 16r, - 1) // __kmpc_atomic_float16_div_cpt_rev -#if (KMP_ARCH_X86) -ATOMIC_CRITICAL_CPT_REV(float16, sub_a16_cpt_rev, Quad_a16_t, -, 16r, - 1) // __kmpc_atomic_float16_sub_a16_cpt_rev -ATOMIC_CRITICAL_CPT_REV(float16, div_a16_cpt_rev, Quad_a16_t, /, 16r, - 1) // __kmpc_atomic_float16_div_a16_cpt_rev -#endif -#endif - -// routines for complex types - -// ------------------------------------------------------------------------ -// Workaround for cmplx4. Regular routines with return value don't work -// on Win_32e. Let's return captured values through the additional parameter. -#define OP_CRITICAL_CPT_REV_WRK(OP, LCK_ID) \ - __kmp_acquire_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); \ - \ - if (flag) { \ - (*lhs) = (rhs)OP(*lhs); \ - (*out) = (*lhs); \ - } else { \ - (*out) = (*lhs); \ - (*lhs) = (rhs)OP(*lhs); \ - } \ - \ - __kmp_release_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); \ - return; -// ------------------------------------------------------------------------ - -#ifdef KMP_GOMP_COMPAT -#define OP_GOMP_CRITICAL_CPT_REV_WRK(OP, FLAG) \ - if ((FLAG) && (__kmp_atomic_mode == 2)) { \ - KMP_CHECK_GTID; \ - OP_CRITICAL_CPT_REV_WRK(OP, 0); \ - } -#else -#define OP_GOMP_CRITICAL_CPT_REV_WRK(OP, FLAG) -#endif /* KMP_GOMP_COMPAT */ -// ------------------------------------------------------------------------ - -#define ATOMIC_CRITICAL_CPT_REV_WRK(TYPE_ID, OP_ID, TYPE, OP, LCK_ID, \ - GOMP_FLAG) \ - ATOMIC_BEGIN_WRK(TYPE_ID, OP_ID, TYPE) \ - OP_GOMP_CRITICAL_CPT_REV_WRK(OP, GOMP_FLAG) \ - OP_CRITICAL_CPT_REV_WRK(OP, LCK_ID) \ - } -// The end of workaround for cmplx4 - -// !!! TODO: check if we need to return void for cmplx4 routines -// cmplx4 routines to return void -ATOMIC_CRITICAL_CPT_REV_WRK(cmplx4, sub_cpt_rev, kmp_cmplx32, -, 8c, - 1) // __kmpc_atomic_cmplx4_sub_cpt_rev -ATOMIC_CRITICAL_CPT_REV_WRK(cmplx4, div_cpt_rev, kmp_cmplx32, /, 8c, - 1) // __kmpc_atomic_cmplx4_div_cpt_rev - -ATOMIC_CRITICAL_CPT_REV(cmplx8, sub_cpt_rev, kmp_cmplx64, -, 16c, - 1) // __kmpc_atomic_cmplx8_sub_cpt_rev -ATOMIC_CRITICAL_CPT_REV(cmplx8, div_cpt_rev, kmp_cmplx64, /, 16c, - 1) // __kmpc_atomic_cmplx8_div_cpt_rev -ATOMIC_CRITICAL_CPT_REV(cmplx10, sub_cpt_rev, kmp_cmplx80, -, 20c, - 1) // __kmpc_atomic_cmplx10_sub_cpt_rev -ATOMIC_CRITICAL_CPT_REV(cmplx10, div_cpt_rev, kmp_cmplx80, /, 20c, - 1) // __kmpc_atomic_cmplx10_div_cpt_rev -#if KMP_HAVE_QUAD -ATOMIC_CRITICAL_CPT_REV(cmplx16, sub_cpt_rev, CPLX128_LEG, -, 32c, - 1) // __kmpc_atomic_cmplx16_sub_cpt_rev -ATOMIC_CRITICAL_CPT_REV(cmplx16, div_cpt_rev, CPLX128_LEG, /, 32c, - 1) // __kmpc_atomic_cmplx16_div_cpt_rev -#if (KMP_ARCH_X86) -ATOMIC_CRITICAL_CPT_REV(cmplx16, sub_a16_cpt_rev, kmp_cmplx128_a16_t, -, 32c, - 1) // __kmpc_atomic_cmplx16_sub_a16_cpt_rev -ATOMIC_CRITICAL_CPT_REV(cmplx16, div_a16_cpt_rev, kmp_cmplx128_a16_t, /, 32c, - 1) // __kmpc_atomic_cmplx16_div_a16_cpt_rev -#endif -#endif - -// Capture reverse for mixed type: RHS=float16 -#if KMP_HAVE_QUAD - -// Beginning of a definition (provides name, parameters, gebug trace) -// TYPE_ID - operands type and size (fixed*, fixed*u for signed, unsigned -// fixed) -// OP_ID - operation identifier (add, sub, mul, ...) -// TYPE - operands' type -// ------------------------------------------------------------------------- -#define ATOMIC_CMPXCHG_CPT_REV_MIX(TYPE_ID, TYPE, OP_ID, BITS, OP, RTYPE_ID, \ - RTYPE, LCK_ID, MASK, GOMP_FLAG) \ - ATOMIC_BEGIN_CPT_MIX(TYPE_ID, OP_ID, TYPE, RTYPE_ID, RTYPE) \ - TYPE new_value; \ - OP_GOMP_CRITICAL_CPT_REV(OP, GOMP_FLAG) \ - OP_CMPXCHG_CPT_REV(TYPE, BITS, OP) \ - } - -// ------------------------------------------------------------------------- -#define ATOMIC_CRITICAL_CPT_REV_MIX(TYPE_ID, TYPE, OP_ID, OP, RTYPE_ID, RTYPE, \ - LCK_ID, GOMP_FLAG) \ - ATOMIC_BEGIN_CPT_MIX(TYPE_ID, OP_ID, TYPE, RTYPE_ID, RTYPE) \ - TYPE new_value; \ - OP_GOMP_CRITICAL_CPT_REV(OP, GOMP_FLAG) /* send assignment */ \ - OP_CRITICAL_CPT_REV(OP, LCK_ID) /* send assignment */ \ - } - -ATOMIC_CMPXCHG_CPT_REV_MIX(fixed1, char, sub_cpt_rev, 8, -, fp, _Quad, 1i, 0, - KMP_ARCH_X86) // __kmpc_atomic_fixed1_sub_cpt_rev_fp -ATOMIC_CMPXCHG_CPT_REV_MIX(fixed1u, uchar, sub_cpt_rev, 8, -, fp, _Quad, 1i, 0, - KMP_ARCH_X86) // __kmpc_atomic_fixed1u_sub_cpt_rev_fp -ATOMIC_CMPXCHG_CPT_REV_MIX(fixed1, char, div_cpt_rev, 8, /, fp, _Quad, 1i, 0, - KMP_ARCH_X86) // __kmpc_atomic_fixed1_div_cpt_rev_fp -ATOMIC_CMPXCHG_CPT_REV_MIX(fixed1u, uchar, div_cpt_rev, 8, /, fp, _Quad, 1i, 0, - KMP_ARCH_X86) // __kmpc_atomic_fixed1u_div_cpt_rev_fp - -ATOMIC_CMPXCHG_CPT_REV_MIX(fixed2, short, sub_cpt_rev, 16, -, fp, _Quad, 2i, 1, - KMP_ARCH_X86) // __kmpc_atomic_fixed2_sub_cpt_rev_fp -ATOMIC_CMPXCHG_CPT_REV_MIX(fixed2u, ushort, sub_cpt_rev, 16, -, fp, _Quad, 2i, - 1, - KMP_ARCH_X86) // __kmpc_atomic_fixed2u_sub_cpt_rev_fp -ATOMIC_CMPXCHG_CPT_REV_MIX(fixed2, short, div_cpt_rev, 16, /, fp, _Quad, 2i, 1, - KMP_ARCH_X86) // __kmpc_atomic_fixed2_div_cpt_rev_fp -ATOMIC_CMPXCHG_CPT_REV_MIX(fixed2u, ushort, div_cpt_rev, 16, /, fp, _Quad, 2i, - 1, - KMP_ARCH_X86) // __kmpc_atomic_fixed2u_div_cpt_rev_fp - -ATOMIC_CMPXCHG_CPT_REV_MIX(fixed4, kmp_int32, sub_cpt_rev, 32, -, fp, _Quad, 4i, - 3, 0) // __kmpc_atomic_fixed4_sub_cpt_rev_fp -ATOMIC_CMPXCHG_CPT_REV_MIX(fixed4u, kmp_uint32, sub_cpt_rev, 32, -, fp, _Quad, - 4i, 3, 0) // __kmpc_atomic_fixed4u_sub_cpt_rev_fp -ATOMIC_CMPXCHG_CPT_REV_MIX(fixed4, kmp_int32, div_cpt_rev, 32, /, fp, _Quad, 4i, - 3, 0) // __kmpc_atomic_fixed4_div_cpt_rev_fp -ATOMIC_CMPXCHG_CPT_REV_MIX(fixed4u, kmp_uint32, div_cpt_rev, 32, /, fp, _Quad, - 4i, 3, 0) // __kmpc_atomic_fixed4u_div_cpt_rev_fp - -ATOMIC_CMPXCHG_CPT_REV_MIX(fixed8, kmp_int64, sub_cpt_rev, 64, -, fp, _Quad, 8i, - 7, - KMP_ARCH_X86) // __kmpc_atomic_fixed8_sub_cpt_rev_fp -ATOMIC_CMPXCHG_CPT_REV_MIX(fixed8u, kmp_uint64, sub_cpt_rev, 64, -, fp, _Quad, - 8i, 7, - KMP_ARCH_X86) // __kmpc_atomic_fixed8u_sub_cpt_rev_fp -ATOMIC_CMPXCHG_CPT_REV_MIX(fixed8, kmp_int64, div_cpt_rev, 64, /, fp, _Quad, 8i, - 7, - KMP_ARCH_X86) // __kmpc_atomic_fixed8_div_cpt_rev_fp -ATOMIC_CMPXCHG_CPT_REV_MIX(fixed8u, kmp_uint64, div_cpt_rev, 64, /, fp, _Quad, - 8i, 7, - KMP_ARCH_X86) // __kmpc_atomic_fixed8u_div_cpt_rev_fp - -ATOMIC_CMPXCHG_CPT_REV_MIX(float4, kmp_real32, sub_cpt_rev, 32, -, fp, _Quad, - 4r, 3, - KMP_ARCH_X86) // __kmpc_atomic_float4_sub_cpt_rev_fp -ATOMIC_CMPXCHG_CPT_REV_MIX(float4, kmp_real32, div_cpt_rev, 32, /, fp, _Quad, - 4r, 3, - KMP_ARCH_X86) // __kmpc_atomic_float4_div_cpt_rev_fp - -ATOMIC_CMPXCHG_CPT_REV_MIX(float8, kmp_real64, sub_cpt_rev, 64, -, fp, _Quad, - 8r, 7, - KMP_ARCH_X86) // __kmpc_atomic_float8_sub_cpt_rev_fp -ATOMIC_CMPXCHG_CPT_REV_MIX(float8, kmp_real64, div_cpt_rev, 64, /, fp, _Quad, - 8r, 7, - KMP_ARCH_X86) // __kmpc_atomic_float8_div_cpt_rev_fp - -ATOMIC_CRITICAL_CPT_REV_MIX(float10, long double, sub_cpt_rev, -, fp, _Quad, - 10r, 1) // __kmpc_atomic_float10_sub_cpt_rev_fp -ATOMIC_CRITICAL_CPT_REV_MIX(float10, long double, div_cpt_rev, /, fp, _Quad, - 10r, 1) // __kmpc_atomic_float10_div_cpt_rev_fp - -#endif // KMP_HAVE_QUAD - -// OpenMP 4.0 Capture-write (swap): {v = x; x = expr;} - -#define ATOMIC_BEGIN_SWP(TYPE_ID, TYPE) \ - TYPE __kmpc_atomic_##TYPE_ID##_swp(ident_t *id_ref, int gtid, TYPE *lhs, \ - TYPE rhs) { \ - KMP_DEBUG_ASSERT(__kmp_init_serial); \ - KA_TRACE(100, ("__kmpc_atomic_" #TYPE_ID "_swp: T#%d\n", gtid)); - -#define CRITICAL_SWP(LCK_ID) \ - __kmp_acquire_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); \ - \ - old_value = (*lhs); \ - (*lhs) = rhs; \ - \ - __kmp_release_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); \ - return old_value; - -// ------------------------------------------------------------------------ -#ifdef KMP_GOMP_COMPAT -#define GOMP_CRITICAL_SWP(FLAG) \ - if ((FLAG) && (__kmp_atomic_mode == 2)) { \ - KMP_CHECK_GTID; \ - CRITICAL_SWP(0); \ - } -#else -#define GOMP_CRITICAL_SWP(FLAG) -#endif /* KMP_GOMP_COMPAT */ - -#define ATOMIC_XCHG_SWP(TYPE_ID, TYPE, BITS, GOMP_FLAG) \ - ATOMIC_BEGIN_SWP(TYPE_ID, TYPE) \ - TYPE old_value; \ - GOMP_CRITICAL_SWP(GOMP_FLAG) \ - old_value = KMP_XCHG_FIXED##BITS(lhs, rhs); \ - return old_value; \ - } -// ------------------------------------------------------------------------ -#define ATOMIC_XCHG_FLOAT_SWP(TYPE_ID, TYPE, BITS, GOMP_FLAG) \ - ATOMIC_BEGIN_SWP(TYPE_ID, TYPE) \ - TYPE old_value; \ - GOMP_CRITICAL_SWP(GOMP_FLAG) \ - old_value = KMP_XCHG_REAL##BITS(lhs, rhs); \ - return old_value; \ - } - -// ------------------------------------------------------------------------ -#define CMPXCHG_SWP(TYPE, BITS) \ - { \ - TYPE KMP_ATOMIC_VOLATILE temp_val; \ - TYPE old_value, new_value; \ - temp_val = *lhs; \ - old_value = temp_val; \ - new_value = rhs; \ - while (!KMP_COMPARE_AND_STORE_ACQ##BITS( \ - (kmp_int##BITS *)lhs, *VOLATILE_CAST(kmp_int##BITS *) & old_value, \ - *VOLATILE_CAST(kmp_int##BITS *) & new_value)) { \ - KMP_CPU_PAUSE(); \ - \ - temp_val = *lhs; \ - old_value = temp_val; \ - new_value = rhs; \ - } \ - return old_value; \ - } - -// ------------------------------------------------------------------------- -#define ATOMIC_CMPXCHG_SWP(TYPE_ID, TYPE, BITS, GOMP_FLAG) \ - ATOMIC_BEGIN_SWP(TYPE_ID, TYPE) \ - TYPE old_value; \ - GOMP_CRITICAL_SWP(GOMP_FLAG) \ - CMPXCHG_SWP(TYPE, BITS) \ - } - -ATOMIC_XCHG_SWP(fixed1, kmp_int8, 8, KMP_ARCH_X86) // __kmpc_atomic_fixed1_swp -ATOMIC_XCHG_SWP(fixed2, kmp_int16, 16, KMP_ARCH_X86) // __kmpc_atomic_fixed2_swp -ATOMIC_XCHG_SWP(fixed4, kmp_int32, 32, KMP_ARCH_X86) // __kmpc_atomic_fixed4_swp - -ATOMIC_XCHG_FLOAT_SWP(float4, kmp_real32, 32, - KMP_ARCH_X86) // __kmpc_atomic_float4_swp - -#if (KMP_ARCH_X86) -ATOMIC_CMPXCHG_SWP(fixed8, kmp_int64, 64, - KMP_ARCH_X86) // __kmpc_atomic_fixed8_swp -ATOMIC_CMPXCHG_SWP(float8, kmp_real64, 64, - KMP_ARCH_X86) // __kmpc_atomic_float8_swp -#else -ATOMIC_XCHG_SWP(fixed8, kmp_int64, 64, KMP_ARCH_X86) // __kmpc_atomic_fixed8_swp -ATOMIC_XCHG_FLOAT_SWP(float8, kmp_real64, 64, - KMP_ARCH_X86) // __kmpc_atomic_float8_swp -#endif - -// ------------------------------------------------------------------------ -// Routines for Extended types: long double, _Quad, complex flavours (use -// critical section) -#define ATOMIC_CRITICAL_SWP(TYPE_ID, TYPE, LCK_ID, GOMP_FLAG) \ - ATOMIC_BEGIN_SWP(TYPE_ID, TYPE) \ - TYPE old_value; \ - GOMP_CRITICAL_SWP(GOMP_FLAG) \ - CRITICAL_SWP(LCK_ID) \ - } - -// ------------------------------------------------------------------------ -// !!! TODO: check if we need to return void for cmplx4 routines -// Workaround for cmplx4. Regular routines with return value don't work -// on Win_32e. Let's return captured values through the additional parameter. - -#define ATOMIC_BEGIN_SWP_WRK(TYPE_ID, TYPE) \ - void __kmpc_atomic_##TYPE_ID##_swp(ident_t *id_ref, int gtid, TYPE *lhs, \ - TYPE rhs, TYPE *out) { \ - KMP_DEBUG_ASSERT(__kmp_init_serial); \ - KA_TRACE(100, ("__kmpc_atomic_" #TYPE_ID "_swp: T#%d\n", gtid)); - -#define CRITICAL_SWP_WRK(LCK_ID) \ - __kmp_acquire_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); \ - \ - tmp = (*lhs); \ - (*lhs) = (rhs); \ - (*out) = tmp; \ - __kmp_release_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); \ - return; -// ------------------------------------------------------------------------ - -#ifdef KMP_GOMP_COMPAT -#define GOMP_CRITICAL_SWP_WRK(FLAG) \ - if ((FLAG) && (__kmp_atomic_mode == 2)) { \ - KMP_CHECK_GTID; \ - CRITICAL_SWP_WRK(0); \ - } -#else -#define GOMP_CRITICAL_SWP_WRK(FLAG) -#endif /* KMP_GOMP_COMPAT */ -// ------------------------------------------------------------------------ - -#define ATOMIC_CRITICAL_SWP_WRK(TYPE_ID, TYPE, LCK_ID, GOMP_FLAG) \ - ATOMIC_BEGIN_SWP_WRK(TYPE_ID, TYPE) \ - TYPE tmp; \ - GOMP_CRITICAL_SWP_WRK(GOMP_FLAG) \ - CRITICAL_SWP_WRK(LCK_ID) \ - } -// The end of workaround for cmplx4 - -ATOMIC_CRITICAL_SWP(float10, long double, 10r, 1) // __kmpc_atomic_float10_swp -#if KMP_HAVE_QUAD -ATOMIC_CRITICAL_SWP(float16, QUAD_LEGACY, 16r, 1) // __kmpc_atomic_float16_swp -#endif -// cmplx4 routine to return void -ATOMIC_CRITICAL_SWP_WRK(cmplx4, kmp_cmplx32, 8c, 1) // __kmpc_atomic_cmplx4_swp - -// ATOMIC_CRITICAL_SWP( cmplx4, kmp_cmplx32, 8c, 1 ) // -// __kmpc_atomic_cmplx4_swp - -ATOMIC_CRITICAL_SWP(cmplx8, kmp_cmplx64, 16c, 1) // __kmpc_atomic_cmplx8_swp -ATOMIC_CRITICAL_SWP(cmplx10, kmp_cmplx80, 20c, 1) // __kmpc_atomic_cmplx10_swp -#if KMP_HAVE_QUAD -ATOMIC_CRITICAL_SWP(cmplx16, CPLX128_LEG, 32c, 1) // __kmpc_atomic_cmplx16_swp -#if (KMP_ARCH_X86) -ATOMIC_CRITICAL_SWP(float16_a16, Quad_a16_t, 16r, - 1) // __kmpc_atomic_float16_a16_swp -ATOMIC_CRITICAL_SWP(cmplx16_a16, kmp_cmplx128_a16_t, 32c, - 1) // __kmpc_atomic_cmplx16_a16_swp -#endif -#endif - -// End of OpenMP 4.0 Capture - -#endif // OMP_40_ENABLED - -#endif // KMP_ARCH_X86 || KMP_ARCH_X86_64 - -#undef OP_CRITICAL - -/* ------------------------------------------------------------------------ */ -/* Generic atomic routines */ - -void __kmpc_atomic_1(ident_t *id_ref, int gtid, void *lhs, void *rhs, - void (*f)(void *, void *, void *)) { - KMP_DEBUG_ASSERT(__kmp_init_serial); - - if ( -#if KMP_ARCH_X86 && defined(KMP_GOMP_COMPAT) - FALSE /* must use lock */ -#else - TRUE -#endif - ) { - kmp_int8 old_value, new_value; - - old_value = *(kmp_int8 *)lhs; - (*f)(&new_value, &old_value, rhs); - - /* TODO: Should this be acquire or release? */ - while (!KMP_COMPARE_AND_STORE_ACQ8((kmp_int8 *)lhs, *(kmp_int8 *)&old_value, - *(kmp_int8 *)&new_value)) { - KMP_CPU_PAUSE(); - - old_value = *(kmp_int8 *)lhs; - (*f)(&new_value, &old_value, rhs); - } - - return; - } else { -// All 1-byte data is of integer data type. - -#ifdef KMP_GOMP_COMPAT - if (__kmp_atomic_mode == 2) { - __kmp_acquire_atomic_lock(&__kmp_atomic_lock, gtid); - } else -#endif /* KMP_GOMP_COMPAT */ - __kmp_acquire_atomic_lock(&__kmp_atomic_lock_1i, gtid); - - (*f)(lhs, lhs, rhs); - -#ifdef KMP_GOMP_COMPAT - if (__kmp_atomic_mode == 2) { - __kmp_release_atomic_lock(&__kmp_atomic_lock, gtid); - } else -#endif /* KMP_GOMP_COMPAT */ - __kmp_release_atomic_lock(&__kmp_atomic_lock_1i, gtid); - } -} - -void __kmpc_atomic_2(ident_t *id_ref, int gtid, void *lhs, void *rhs, - void (*f)(void *, void *, void *)) { - if ( -#if KMP_ARCH_X86 && defined(KMP_GOMP_COMPAT) - FALSE /* must use lock */ -#elif KMP_ARCH_X86 || KMP_ARCH_X86_64 - TRUE /* no alignment problems */ -#else - !((kmp_uintptr_t)lhs & 0x1) /* make sure address is 2-byte aligned */ -#endif - ) { - kmp_int16 old_value, new_value; - - old_value = *(kmp_int16 *)lhs; - (*f)(&new_value, &old_value, rhs); - - /* TODO: Should this be acquire or release? */ - while (!KMP_COMPARE_AND_STORE_ACQ16( - (kmp_int16 *)lhs, *(kmp_int16 *)&old_value, *(kmp_int16 *)&new_value)) { - KMP_CPU_PAUSE(); - - old_value = *(kmp_int16 *)lhs; - (*f)(&new_value, &old_value, rhs); - } - - return; - } else { -// All 2-byte data is of integer data type. - -#ifdef KMP_GOMP_COMPAT - if (__kmp_atomic_mode == 2) { - __kmp_acquire_atomic_lock(&__kmp_atomic_lock, gtid); - } else -#endif /* KMP_GOMP_COMPAT */ - __kmp_acquire_atomic_lock(&__kmp_atomic_lock_2i, gtid); - - (*f)(lhs, lhs, rhs); - -#ifdef KMP_GOMP_COMPAT - if (__kmp_atomic_mode == 2) { - __kmp_release_atomic_lock(&__kmp_atomic_lock, gtid); - } else -#endif /* KMP_GOMP_COMPAT */ - __kmp_release_atomic_lock(&__kmp_atomic_lock_2i, gtid); - } -} - -void __kmpc_atomic_4(ident_t *id_ref, int gtid, void *lhs, void *rhs, - void (*f)(void *, void *, void *)) { - KMP_DEBUG_ASSERT(__kmp_init_serial); - - if ( -// FIXME: On IA-32 architecture, gcc uses cmpxchg only for 4-byte ints. -// Gomp compatibility is broken if this routine is called for floats. -#if KMP_ARCH_X86 || KMP_ARCH_X86_64 - TRUE /* no alignment problems */ -#else - !((kmp_uintptr_t)lhs & 0x3) /* make sure address is 4-byte aligned */ -#endif - ) { - kmp_int32 old_value, new_value; - - old_value = *(kmp_int32 *)lhs; - (*f)(&new_value, &old_value, rhs); - - /* TODO: Should this be acquire or release? */ - while (!KMP_COMPARE_AND_STORE_ACQ32( - (kmp_int32 *)lhs, *(kmp_int32 *)&old_value, *(kmp_int32 *)&new_value)) { - KMP_CPU_PAUSE(); - - old_value = *(kmp_int32 *)lhs; - (*f)(&new_value, &old_value, rhs); - } - - return; - } else { -// Use __kmp_atomic_lock_4i for all 4-byte data, -// even if it isn't of integer data type. - -#ifdef KMP_GOMP_COMPAT - if (__kmp_atomic_mode == 2) { - __kmp_acquire_atomic_lock(&__kmp_atomic_lock, gtid); - } else -#endif /* KMP_GOMP_COMPAT */ - __kmp_acquire_atomic_lock(&__kmp_atomic_lock_4i, gtid); - - (*f)(lhs, lhs, rhs); - -#ifdef KMP_GOMP_COMPAT - if (__kmp_atomic_mode == 2) { - __kmp_release_atomic_lock(&__kmp_atomic_lock, gtid); - } else -#endif /* KMP_GOMP_COMPAT */ - __kmp_release_atomic_lock(&__kmp_atomic_lock_4i, gtid); - } -} - -void __kmpc_atomic_8(ident_t *id_ref, int gtid, void *lhs, void *rhs, - void (*f)(void *, void *, void *)) { - KMP_DEBUG_ASSERT(__kmp_init_serial); - if ( - -#if KMP_ARCH_X86 && defined(KMP_GOMP_COMPAT) - FALSE /* must use lock */ -#elif KMP_ARCH_X86 || KMP_ARCH_X86_64 - TRUE /* no alignment problems */ -#else - !((kmp_uintptr_t)lhs & 0x7) /* make sure address is 8-byte aligned */ -#endif - ) { - kmp_int64 old_value, new_value; - - old_value = *(kmp_int64 *)lhs; - (*f)(&new_value, &old_value, rhs); - /* TODO: Should this be acquire or release? */ - while (!KMP_COMPARE_AND_STORE_ACQ64( - (kmp_int64 *)lhs, *(kmp_int64 *)&old_value, *(kmp_int64 *)&new_value)) { - KMP_CPU_PAUSE(); - - old_value = *(kmp_int64 *)lhs; - (*f)(&new_value, &old_value, rhs); - } - - return; - } else { -// Use __kmp_atomic_lock_8i for all 8-byte data, -// even if it isn't of integer data type. - -#ifdef KMP_GOMP_COMPAT - if (__kmp_atomic_mode == 2) { - __kmp_acquire_atomic_lock(&__kmp_atomic_lock, gtid); - } else -#endif /* KMP_GOMP_COMPAT */ - __kmp_acquire_atomic_lock(&__kmp_atomic_lock_8i, gtid); - - (*f)(lhs, lhs, rhs); - -#ifdef KMP_GOMP_COMPAT - if (__kmp_atomic_mode == 2) { - __kmp_release_atomic_lock(&__kmp_atomic_lock, gtid); - } else -#endif /* KMP_GOMP_COMPAT */ - __kmp_release_atomic_lock(&__kmp_atomic_lock_8i, gtid); - } -} - -void __kmpc_atomic_10(ident_t *id_ref, int gtid, void *lhs, void *rhs, - void (*f)(void *, void *, void *)) { - KMP_DEBUG_ASSERT(__kmp_init_serial); - -#ifdef KMP_GOMP_COMPAT - if (__kmp_atomic_mode == 2) { - __kmp_acquire_atomic_lock(&__kmp_atomic_lock, gtid); - } else -#endif /* KMP_GOMP_COMPAT */ - __kmp_acquire_atomic_lock(&__kmp_atomic_lock_10r, gtid); - - (*f)(lhs, lhs, rhs); - -#ifdef KMP_GOMP_COMPAT - if (__kmp_atomic_mode == 2) { - __kmp_release_atomic_lock(&__kmp_atomic_lock, gtid); - } else -#endif /* KMP_GOMP_COMPAT */ - __kmp_release_atomic_lock(&__kmp_atomic_lock_10r, gtid); -} - -void __kmpc_atomic_16(ident_t *id_ref, int gtid, void *lhs, void *rhs, - void (*f)(void *, void *, void *)) { - KMP_DEBUG_ASSERT(__kmp_init_serial); - -#ifdef KMP_GOMP_COMPAT - if (__kmp_atomic_mode == 2) { - __kmp_acquire_atomic_lock(&__kmp_atomic_lock, gtid); - } else -#endif /* KMP_GOMP_COMPAT */ - __kmp_acquire_atomic_lock(&__kmp_atomic_lock_16c, gtid); - - (*f)(lhs, lhs, rhs); - -#ifdef KMP_GOMP_COMPAT - if (__kmp_atomic_mode == 2) { - __kmp_release_atomic_lock(&__kmp_atomic_lock, gtid); - } else -#endif /* KMP_GOMP_COMPAT */ - __kmp_release_atomic_lock(&__kmp_atomic_lock_16c, gtid); -} - -void __kmpc_atomic_20(ident_t *id_ref, int gtid, void *lhs, void *rhs, - void (*f)(void *, void *, void *)) { - KMP_DEBUG_ASSERT(__kmp_init_serial); - -#ifdef KMP_GOMP_COMPAT - if (__kmp_atomic_mode == 2) { - __kmp_acquire_atomic_lock(&__kmp_atomic_lock, gtid); - } else -#endif /* KMP_GOMP_COMPAT */ - __kmp_acquire_atomic_lock(&__kmp_atomic_lock_20c, gtid); - - (*f)(lhs, lhs, rhs); - -#ifdef KMP_GOMP_COMPAT - if (__kmp_atomic_mode == 2) { - __kmp_release_atomic_lock(&__kmp_atomic_lock, gtid); - } else -#endif /* KMP_GOMP_COMPAT */ - __kmp_release_atomic_lock(&__kmp_atomic_lock_20c, gtid); -} - -void __kmpc_atomic_32(ident_t *id_ref, int gtid, void *lhs, void *rhs, - void (*f)(void *, void *, void *)) { - KMP_DEBUG_ASSERT(__kmp_init_serial); - -#ifdef KMP_GOMP_COMPAT - if (__kmp_atomic_mode == 2) { - __kmp_acquire_atomic_lock(&__kmp_atomic_lock, gtid); - } else -#endif /* KMP_GOMP_COMPAT */ - __kmp_acquire_atomic_lock(&__kmp_atomic_lock_32c, gtid); - - (*f)(lhs, lhs, rhs); - -#ifdef KMP_GOMP_COMPAT - if (__kmp_atomic_mode == 2) { - __kmp_release_atomic_lock(&__kmp_atomic_lock, gtid); - } else -#endif /* KMP_GOMP_COMPAT */ - __kmp_release_atomic_lock(&__kmp_atomic_lock_32c, gtid); -} - -// AC: same two routines as GOMP_atomic_start/end, but will be called by our -// compiler; duplicated in order to not use 3-party names in pure Intel code -// TODO: consider adding GTID parameter after consultation with Ernesto/Xinmin. -void __kmpc_atomic_start(void) { - int gtid = __kmp_entry_gtid(); - KA_TRACE(20, ("__kmpc_atomic_start: T#%d\n", gtid)); - __kmp_acquire_atomic_lock(&__kmp_atomic_lock, gtid); -} - -void __kmpc_atomic_end(void) { - int gtid = __kmp_get_gtid(); - KA_TRACE(20, ("__kmpc_atomic_end: T#%d\n", gtid)); - __kmp_release_atomic_lock(&__kmp_atomic_lock, gtid); -} - -/*! -@} -*/ - -// end of file Property changes on: vendor/llvm-openmp/openmp-release80-r363030/runtime/src/kmp_atomic.cpp ___________________________________________________________________ Deleted: svn:eol-style ## -1 +0,0 ## -native \ No newline at end of property Deleted: svn:keywords ## -1 +0,0 ## -FreeBSD=%H \ No newline at end of property Deleted: svn:mime-type ## -1 +0,0 ## -text/plain \ No newline at end of property Index: vendor/llvm-openmp/openmp-release80-r363030/runtime/src/ompt-specific.h =================================================================== --- vendor/llvm-openmp/openmp-release80-r363030/runtime/src/ompt-specific.h (revision 348960) +++ vendor/llvm-openmp/openmp-release80-r363030/runtime/src/ompt-specific.h (nonexistent) @@ -1,104 +0,0 @@ -/* - * ompt-specific.h - header of OMPT internal functions implementation - */ - -//===----------------------------------------------------------------------===// -// -// The LLVM Compiler Infrastructure -// -// This file is dual licensed under the MIT and the University of Illinois Open -// Source Licenses. See LICENSE.txt for details. -// -//===----------------------------------------------------------------------===// - -#ifndef OMPT_SPECIFIC_H -#define OMPT_SPECIFIC_H - -#include "kmp.h" - -/***************************************************************************** - * forward declarations - ****************************************************************************/ - -void __ompt_team_assign_id(kmp_team_t *team, ompt_data_t ompt_pid); -void __ompt_thread_assign_wait_id(void *variable); - -void __ompt_lw_taskteam_init(ompt_lw_taskteam_t *lwt, kmp_info_t *thr, - int gtid, ompt_data_t *ompt_pid, void *codeptr); - -void __ompt_lw_taskteam_link(ompt_lw_taskteam_t *lwt, kmp_info_t *thr, - int on_heap); - -void __ompt_lw_taskteam_unlink(kmp_info_t *thr); - -ompt_team_info_t *__ompt_get_teaminfo(int depth, int *size); - -ompt_task_info_t *__ompt_get_task_info_object(int depth); - -int __ompt_get_parallel_info_internal(int ancestor_level, - ompt_data_t **parallel_data, - int *team_size); - -int __ompt_get_task_info_internal(int ancestor_level, int *type, - ompt_data_t **task_data, - ompt_frame_t **task_frame, - ompt_data_t **parallel_data, int *thread_num); - -ompt_data_t *__ompt_get_thread_data_internal(); - -/* - * Unused currently -static uint64_t __ompt_get_get_unique_id_internal(); -*/ - -/***************************************************************************** - * macros - ****************************************************************************/ - -#define OMPT_CUR_TASK_INFO(thr) (&(thr->th.th_current_task->ompt_task_info)) -#define OMPT_CUR_TASK_DATA(thr) \ - (&(thr->th.th_current_task->ompt_task_info.task_data)) -#define OMPT_CUR_TEAM_INFO(thr) (&(thr->th.th_team->t.ompt_team_info)) -#define OMPT_CUR_TEAM_DATA(thr) \ - (&(thr->th.th_team->t.ompt_team_info.parallel_data)) - -#define OMPT_HAVE_WEAK_ATTRIBUTE KMP_HAVE_WEAK_ATTRIBUTE -#define OMPT_HAVE_PSAPI KMP_HAVE_PSAPI -#define OMPT_STR_MATCH(haystack, needle) __kmp_str_match(haystack, 0, needle) - -inline void *__ompt_load_return_address(int gtid) { - kmp_info_t *thr = __kmp_threads[gtid]; - void *return_address = thr->th.ompt_thread_info.return_address; - thr->th.ompt_thread_info.return_address = NULL; - return return_address; -} - -#define OMPT_STORE_RETURN_ADDRESS(gtid) \ - if (ompt_enabled.enabled && gtid >= 0 && __kmp_threads[gtid] && \ - !__kmp_threads[gtid]->th.ompt_thread_info.return_address) \ - __kmp_threads[gtid]->th.ompt_thread_info.return_address = \ - __builtin_return_address(0) -#define OMPT_LOAD_RETURN_ADDRESS(gtid) __ompt_load_return_address(gtid) - -//****************************************************************************** -// inline functions -//****************************************************************************** - -inline kmp_info_t *ompt_get_thread_gtid(int gtid) { - return (gtid >= 0) ? __kmp_thread_from_gtid(gtid) : NULL; -} - -inline kmp_info_t *ompt_get_thread() { - int gtid = __kmp_get_gtid(); - return ompt_get_thread_gtid(gtid); -} - -inline void ompt_set_thread_state(kmp_info_t *thread, ompt_state_t state) { - thread->th.ompt_thread_info.state = state; -} - -inline const char *ompt_get_runtime_version() { - return &__kmp_version_lib_ver[KMP_VERSION_MAGIC_LEN]; -} - -#endif Property changes on: vendor/llvm-openmp/openmp-release80-r363030/runtime/src/ompt-specific.h ___________________________________________________________________ Deleted: svn:eol-style ## -1 +0,0 ## -native \ No newline at end of property Deleted: svn:keywords ## -1 +0,0 ## -FreeBSD=%H \ No newline at end of property Deleted: svn:mime-type ## -1 +0,0 ## -text/plain \ No newline at end of property Index: vendor/llvm-openmp/openmp-release80-r363030/runtime/src/kmp_safe_c_api.h =================================================================== --- vendor/llvm-openmp/openmp-release80-r363030/runtime/src/kmp_safe_c_api.h (revision 348960) +++ vendor/llvm-openmp/openmp-release80-r363030/runtime/src/kmp_safe_c_api.h (nonexistent) @@ -1,75 +0,0 @@ - -//===----------------------------------------------------------------------===// -// -// The LLVM Compiler Infrastructure -// -// This file is dual licensed under the MIT and the University of Illinois Open -// Source Licenses. See LICENSE.txt for details. -// -//===----------------------------------------------------------------------===// - -#ifndef KMP_SAFE_C_API_H -#define KMP_SAFE_C_API_H - -#include "kmp_platform.h" -#include - -// Replacement for banned C API - -// Not every unsafe call listed here is handled now, but keeping everything -// in one place should be handy for future maintenance. -#if KMP_OS_WINDOWS && KMP_MSVC_COMPAT - -#define RSIZE_MAX_STR (4UL << 10) // 4KB - -// _malloca was suggested, but it is not a drop-in replacement for _alloca -#define KMP_ALLOCA _alloca - -#define KMP_MEMCPY_S memcpy_s -#define KMP_SNPRINTF sprintf_s -#define KMP_SSCANF sscanf_s -#define KMP_STRCPY_S strcpy_s -#define KMP_STRNCPY_S strncpy_s - -// Use this only when buffer size is unknown -#define KMP_MEMCPY(dst, src, cnt) memcpy_s(dst, cnt, src, cnt) - -#define KMP_STRLEN(str) strnlen_s(str, RSIZE_MAX_STR) - -// Use this only when buffer size is unknown -#define KMP_STRNCPY(dst, src, cnt) strncpy_s(dst, cnt, src, cnt) - -// _TRUNCATE insures buffer size > max string to print. -#define KMP_VSNPRINTF(dst, cnt, fmt, arg) \ - vsnprintf_s(dst, cnt, _TRUNCATE, fmt, arg) - -#else // KMP_OS_WINDOWS - -// For now, these macros use the existing API. - -#define KMP_ALLOCA alloca -#define KMP_MEMCPY_S(dst, bsz, src, cnt) memcpy(dst, src, cnt) -#define KMP_SNPRINTF snprintf -#define KMP_SSCANF sscanf -#define KMP_STRCPY_S(dst, bsz, src) strcpy(dst, src) -#define KMP_STRNCPY_S(dst, bsz, src, cnt) strncpy(dst, src, cnt) -#define KMP_VSNPRINTF vsnprintf -#define KMP_STRNCPY strncpy -#define KMP_STRLEN strlen -#define KMP_MEMCPY memcpy - -#endif // KMP_OS_WINDOWS - -// Offer truncated version of strncpy -static inline void __kmp_strncpy_truncate(char *buffer, size_t buf_size, - char const *src, size_t src_size) { - if (src_size >= buf_size) { - src_size = buf_size - 1; - KMP_STRNCPY_S(buffer, buf_size, src, src_size); - buffer[buf_size - 1] = '\0'; - } else { - KMP_STRNCPY_S(buffer, buf_size, src, src_size); - } -} - -#endif // KMP_SAFE_C_API_H Property changes on: vendor/llvm-openmp/openmp-release80-r363030/runtime/src/kmp_safe_c_api.h ___________________________________________________________________ Deleted: svn:eol-style ## -1 +0,0 ## -native \ No newline at end of property Deleted: svn:keywords ## -1 +0,0 ## -FreeBSD=%H \ No newline at end of property Deleted: svn:mime-type ## -1 +0,0 ## -text/plain \ No newline at end of property Index: vendor/llvm-openmp/openmp-release80-r363030/runtime/src/tsan_annotations.cpp =================================================================== --- vendor/llvm-openmp/openmp-release80-r363030/runtime/src/tsan_annotations.cpp (revision 348960) +++ vendor/llvm-openmp/openmp-release80-r363030/runtime/src/tsan_annotations.cpp (nonexistent) @@ -1,108 +0,0 @@ -/* - * tsan_annotations.cpp -- ThreadSanitizer annotations to support data - * race detection in OpenMP programs. - */ - -//===----------------------------------------------------------------------===// -// -// The LLVM Compiler Infrastructure -// -// This file is dual licensed under the MIT and the University of Illinois Open -// Source Licenses. See LICENSE.txt for details. -// -//===----------------------------------------------------------------------===// - -#include "tsan_annotations.h" - -#include - -typedef unsigned long uptr; -typedef signed long sptr; - -extern "C" __attribute__((weak)) void AnnotateHappensBefore(const char *f, - int l, uptr addr) {} -extern "C" __attribute__((weak)) void AnnotateHappensAfter(const char *f, int l, - uptr addr) {} -extern "C" __attribute__((weak)) void AnnotateCondVarSignal(const char *f, - int l, uptr cv) {} -extern "C" __attribute__((weak)) void AnnotateCondVarSignalAll(const char *f, - int l, uptr cv) { -} -extern "C" __attribute__((weak)) void AnnotateMutexIsNotPHB(const char *f, - int l, uptr mu) {} -extern "C" __attribute__((weak)) void AnnotateCondVarWait(const char *f, int l, - uptr cv, uptr lock) {} -extern "C" __attribute__((weak)) void AnnotateRWLockCreate(const char *f, int l, - uptr m) {} -extern "C" __attribute__((weak)) void -AnnotateRWLockCreateStatic(const char *f, int l, uptr m) {} -extern "C" __attribute__((weak)) void AnnotateRWLockDestroy(const char *f, - int l, uptr m) {} -extern "C" __attribute__((weak)) void -AnnotateRWLockAcquired(const char *f, int l, uptr m, uptr is_w) {} -extern "C" __attribute__((weak)) void -AnnotateRWLockReleased(const char *f, int l, uptr m, uptr is_w) {} -extern "C" __attribute__((weak)) void AnnotateTraceMemory(const char *f, int l, - uptr mem) {} -extern "C" __attribute__((weak)) void AnnotateFlushState(const char *f, int l) { -} -extern "C" __attribute__((weak)) void AnnotateNewMemory(const char *f, int l, - uptr mem, uptr size) {} -extern "C" __attribute__((weak)) void AnnotateNoOp(const char *f, int l, - uptr mem) {} -extern "C" __attribute__((weak)) void AnnotateFlushExpectedRaces(const char *f, - int l) {} -extern "C" __attribute__((weak)) void -AnnotateEnableRaceDetection(const char *f, int l, int enable) {} -extern "C" __attribute__((weak)) void -AnnotateMutexIsUsedAsCondVar(const char *f, int l, uptr mu) {} -extern "C" __attribute__((weak)) void AnnotatePCQGet(const char *f, int l, - uptr pcq) {} -extern "C" __attribute__((weak)) void AnnotatePCQPut(const char *f, int l, - uptr pcq) {} -extern "C" __attribute__((weak)) void AnnotatePCQDestroy(const char *f, int l, - uptr pcq) {} -extern "C" __attribute__((weak)) void AnnotatePCQCreate(const char *f, int l, - uptr pcq) {} -extern "C" __attribute__((weak)) void AnnotateExpectRace(const char *f, int l, - uptr mem, char *desc) { -} -extern "C" __attribute__((weak)) void -AnnotateBenignRaceSized(const char *f, int l, uptr mem, uptr size, char *desc) { -} -extern "C" __attribute__((weak)) void AnnotateBenignRace(const char *f, int l, - uptr mem, char *desc) { -} -extern "C" __attribute__((weak)) void AnnotateIgnoreReadsBegin(const char *f, - int l) {} -extern "C" __attribute__((weak)) void AnnotateIgnoreReadsEnd(const char *f, - int l) {} -extern "C" __attribute__((weak)) void AnnotateIgnoreWritesBegin(const char *f, - int l) {} -extern "C" __attribute__((weak)) void AnnotateIgnoreWritesEnd(const char *f, - int l) {} -extern "C" __attribute__((weak)) void AnnotateIgnoreSyncBegin(const char *f, - int l) {} -extern "C" __attribute__((weak)) void AnnotateIgnoreSyncEnd(const char *f, - int l) {} -extern "C" __attribute__((weak)) void -AnnotatePublishMemoryRange(const char *f, int l, uptr addr, uptr size) {} -extern "C" __attribute__((weak)) void -AnnotateUnpublishMemoryRange(const char *f, int l, uptr addr, uptr size) {} -extern "C" __attribute__((weak)) void AnnotateThreadName(const char *f, int l, - char *name) {} -extern "C" __attribute__((weak)) void -WTFAnnotateHappensBefore(const char *f, int l, uptr addr) {} -extern "C" __attribute__((weak)) void -WTFAnnotateHappensAfter(const char *f, int l, uptr addr) {} -extern "C" __attribute__((weak)) void -WTFAnnotateBenignRaceSized(const char *f, int l, uptr mem, uptr sz, - char *desc) {} -extern "C" __attribute__((weak)) int RunningOnValgrind() { return 0; } -extern "C" __attribute__((weak)) double ValgrindSlowdown(void) { return 0; } -extern "C" __attribute__((weak)) const char __attribute__((weak)) * - ThreadSanitizerQuery(const char *query) { - return 0; -} -extern "C" __attribute__((weak)) void -AnnotateMemoryIsInitialized(const char *f, int l, uptr mem, uptr sz) {} Property changes on: vendor/llvm-openmp/openmp-release80-r363030/runtime/src/tsan_annotations.cpp ___________________________________________________________________ Deleted: svn:eol-style ## -1 +0,0 ## -native \ No newline at end of property Deleted: svn:keywords ## -1 +0,0 ## -FreeBSD=%H \ No newline at end of property Deleted: svn:mime-type ## -1 +0,0 ## -text/plain \ No newline at end of property Index: vendor/llvm-openmp/openmp-release80-r363030/runtime/src/kmp_barrier.cpp =================================================================== --- vendor/llvm-openmp/openmp-release80-r363030/runtime/src/kmp_barrier.cpp (revision 348960) +++ vendor/llvm-openmp/openmp-release80-r363030/runtime/src/kmp_barrier.cpp (nonexistent) @@ -1,2067 +0,0 @@ -/* - * kmp_barrier.cpp - */ - -//===----------------------------------------------------------------------===// -// -// The LLVM Compiler Infrastructure -// -// This file is dual licensed under the MIT and the University of Illinois Open -// Source Licenses. See LICENSE.txt for details. -// -//===----------------------------------------------------------------------===// - -#include "kmp.h" -#include "kmp_wait_release.h" -#include "kmp_itt.h" -#include "kmp_os.h" -#include "kmp_stats.h" -#if OMPT_SUPPORT -#include "ompt-specific.h" -#endif - -#if KMP_MIC -#include -#define USE_NGO_STORES 1 -#endif // KMP_MIC - -#include "tsan_annotations.h" - -#if KMP_MIC && USE_NGO_STORES -// ICV copying -#define ngo_load(src) __m512d Vt = _mm512_load_pd((void *)(src)) -#define ngo_store_icvs(dst, src) _mm512_storenrngo_pd((void *)(dst), Vt) -#define ngo_store_go(dst, src) _mm512_storenrngo_pd((void *)(dst), Vt) -#define ngo_sync() __asm__ volatile("lock; addl $0,0(%%rsp)" ::: "memory") -#else -#define ngo_load(src) ((void)0) -#define ngo_store_icvs(dst, src) copy_icvs((dst), (src)) -#define ngo_store_go(dst, src) KMP_MEMCPY((dst), (src), CACHE_LINE) -#define ngo_sync() ((void)0) -#endif /* KMP_MIC && USE_NGO_STORES */ - -void __kmp_print_structure(void); // Forward declaration - -// ---------------------------- Barrier Algorithms ---------------------------- - -// Linear Barrier -static void __kmp_linear_barrier_gather( - enum barrier_type bt, kmp_info_t *this_thr, int gtid, int tid, - void (*reduce)(void *, void *) USE_ITT_BUILD_ARG(void *itt_sync_obj)) { - KMP_TIME_DEVELOPER_PARTITIONED_BLOCK(KMP_linear_gather); - kmp_team_t *team = this_thr->th.th_team; - kmp_bstate_t *thr_bar = &this_thr->th.th_bar[bt].bb; - kmp_info_t **other_threads = team->t.t_threads; - - KA_TRACE( - 20, - ("__kmp_linear_barrier_gather: T#%d(%d:%d) enter for barrier type %d\n", - gtid, team->t.t_id, tid, bt)); - KMP_DEBUG_ASSERT(this_thr == other_threads[this_thr->th.th_info.ds.ds_tid]); - -#if USE_ITT_BUILD && USE_ITT_NOTIFY - // Barrier imbalance - save arrive time to the thread - if (__kmp_forkjoin_frames_mode == 3 || __kmp_forkjoin_frames_mode == 2) { - this_thr->th.th_bar_arrive_time = this_thr->th.th_bar_min_time = - __itt_get_timestamp(); - } -#endif - // We now perform a linear reduction to signal that all of the threads have - // arrived. - if (!KMP_MASTER_TID(tid)) { - KA_TRACE(20, - ("__kmp_linear_barrier_gather: T#%d(%d:%d) releasing T#%d(%d:%d)" - "arrived(%p): %llu => %llu\n", - gtid, team->t.t_id, tid, __kmp_gtid_from_tid(0, team), - team->t.t_id, 0, &thr_bar->b_arrived, thr_bar->b_arrived, - thr_bar->b_arrived + KMP_BARRIER_STATE_BUMP)); - // Mark arrival to master thread - /* After performing this write, a worker thread may not assume that the team - is valid any more - it could be deallocated by the master thread at any - time. */ - ANNOTATE_BARRIER_BEGIN(this_thr); - kmp_flag_64 flag(&thr_bar->b_arrived, other_threads[0]); - flag.release(); - } else { - kmp_balign_team_t *team_bar = &team->t.t_bar[bt]; - int nproc = this_thr->th.th_team_nproc; - int i; - // Don't have to worry about sleep bit here or atomic since team setting - kmp_uint64 new_state = team_bar->b_arrived + KMP_BARRIER_STATE_BUMP; - - // Collect all the worker team member threads. - for (i = 1; i < nproc; ++i) { -#if KMP_CACHE_MANAGE - // Prefetch next thread's arrived count - if (i + 1 < nproc) - KMP_CACHE_PREFETCH(&other_threads[i + 1]->th.th_bar[bt].bb.b_arrived); -#endif /* KMP_CACHE_MANAGE */ - KA_TRACE(20, ("__kmp_linear_barrier_gather: T#%d(%d:%d) wait T#%d(%d:%d) " - "arrived(%p) == %llu\n", - gtid, team->t.t_id, tid, __kmp_gtid_from_tid(i, team), - team->t.t_id, i, - &other_threads[i]->th.th_bar[bt].bb.b_arrived, new_state)); - - // Wait for worker thread to arrive - kmp_flag_64 flag(&other_threads[i]->th.th_bar[bt].bb.b_arrived, - new_state); - flag.wait(this_thr, FALSE USE_ITT_BUILD_ARG(itt_sync_obj)); - ANNOTATE_BARRIER_END(other_threads[i]); -#if USE_ITT_BUILD && USE_ITT_NOTIFY - // Barrier imbalance - write min of the thread time and the other thread - // time to the thread. - if (__kmp_forkjoin_frames_mode == 2) { - this_thr->th.th_bar_min_time = KMP_MIN( - this_thr->th.th_bar_min_time, other_threads[i]->th.th_bar_min_time); - } -#endif - if (reduce) { - KA_TRACE(100, - ("__kmp_linear_barrier_gather: T#%d(%d:%d) += T#%d(%d:%d)\n", - gtid, team->t.t_id, tid, __kmp_gtid_from_tid(i, team), - team->t.t_id, i)); - ANNOTATE_REDUCE_AFTER(reduce); - (*reduce)(this_thr->th.th_local.reduce_data, - other_threads[i]->th.th_local.reduce_data); - ANNOTATE_REDUCE_BEFORE(reduce); - ANNOTATE_REDUCE_BEFORE(&team->t.t_bar); - } - } - // Don't have to worry about sleep bit here or atomic since team setting - team_bar->b_arrived = new_state; - KA_TRACE(20, ("__kmp_linear_barrier_gather: T#%d(%d:%d) set team %d " - "arrived(%p) = %llu\n", - gtid, team->t.t_id, tid, team->t.t_id, &team_bar->b_arrived, - new_state)); - } - KA_TRACE( - 20, - ("__kmp_linear_barrier_gather: T#%d(%d:%d) exit for barrier type %d\n", - gtid, team->t.t_id, tid, bt)); -} - -static void __kmp_linear_barrier_release( - enum barrier_type bt, kmp_info_t *this_thr, int gtid, int tid, - int propagate_icvs USE_ITT_BUILD_ARG(void *itt_sync_obj)) { - KMP_TIME_DEVELOPER_PARTITIONED_BLOCK(KMP_linear_release); - kmp_bstate_t *thr_bar = &this_thr->th.th_bar[bt].bb; - kmp_team_t *team; - - if (KMP_MASTER_TID(tid)) { - unsigned int i; - kmp_uint32 nproc = this_thr->th.th_team_nproc; - kmp_info_t **other_threads; - - team = __kmp_threads[gtid]->th.th_team; - KMP_DEBUG_ASSERT(team != NULL); - other_threads = team->t.t_threads; - - KA_TRACE(20, ("__kmp_linear_barrier_release: T#%d(%d:%d) master enter for " - "barrier type %d\n", - gtid, team->t.t_id, tid, bt)); - - if (nproc > 1) { -#if KMP_BARRIER_ICV_PUSH - { - KMP_TIME_DEVELOPER_PARTITIONED_BLOCK(USER_icv_copy); - if (propagate_icvs) { - ngo_load(&team->t.t_implicit_task_taskdata[0].td_icvs); - for (i = 1; i < nproc; ++i) { - __kmp_init_implicit_task(team->t.t_ident, team->t.t_threads[i], - team, i, FALSE); - ngo_store_icvs(&team->t.t_implicit_task_taskdata[i].td_icvs, - &team->t.t_implicit_task_taskdata[0].td_icvs); - } - ngo_sync(); - } - } -#endif // KMP_BARRIER_ICV_PUSH - - // Now, release all of the worker threads - for (i = 1; i < nproc; ++i) { -#if KMP_CACHE_MANAGE - // Prefetch next thread's go flag - if (i + 1 < nproc) - KMP_CACHE_PREFETCH(&other_threads[i + 1]->th.th_bar[bt].bb.b_go); -#endif /* KMP_CACHE_MANAGE */ - KA_TRACE( - 20, - ("__kmp_linear_barrier_release: T#%d(%d:%d) releasing T#%d(%d:%d) " - "go(%p): %u => %u\n", - gtid, team->t.t_id, tid, other_threads[i]->th.th_info.ds.ds_gtid, - team->t.t_id, i, &other_threads[i]->th.th_bar[bt].bb.b_go, - other_threads[i]->th.th_bar[bt].bb.b_go, - other_threads[i]->th.th_bar[bt].bb.b_go + KMP_BARRIER_STATE_BUMP)); - ANNOTATE_BARRIER_BEGIN(other_threads[i]); - kmp_flag_64 flag(&other_threads[i]->th.th_bar[bt].bb.b_go, - other_threads[i]); - flag.release(); - } - } - } else { // Wait for the MASTER thread to release us - KA_TRACE(20, ("__kmp_linear_barrier_release: T#%d wait go(%p) == %u\n", - gtid, &thr_bar->b_go, KMP_BARRIER_STATE_BUMP)); - kmp_flag_64 flag(&thr_bar->b_go, KMP_BARRIER_STATE_BUMP); - flag.wait(this_thr, TRUE USE_ITT_BUILD_ARG(itt_sync_obj)); - ANNOTATE_BARRIER_END(this_thr); -#if USE_ITT_BUILD && USE_ITT_NOTIFY - if ((__itt_sync_create_ptr && itt_sync_obj == NULL) || KMP_ITT_DEBUG) { - // In a fork barrier; cannot get the object reliably (or ITTNOTIFY is - // disabled) - itt_sync_obj = __kmp_itt_barrier_object(gtid, bs_forkjoin_barrier, 0, -1); - // Cancel wait on previous parallel region... - __kmp_itt_task_starting(itt_sync_obj); - - if (bt == bs_forkjoin_barrier && TCR_4(__kmp_global.g.g_done)) - return; - - itt_sync_obj = __kmp_itt_barrier_object(gtid, bs_forkjoin_barrier); - if (itt_sync_obj != NULL) - // Call prepare as early as possible for "new" barrier - __kmp_itt_task_finished(itt_sync_obj); - } else -#endif /* USE_ITT_BUILD && USE_ITT_NOTIFY */ - // Early exit for reaping threads releasing forkjoin barrier - if (bt == bs_forkjoin_barrier && TCR_4(__kmp_global.g.g_done)) - return; -// The worker thread may now assume that the team is valid. -#ifdef KMP_DEBUG - tid = __kmp_tid_from_gtid(gtid); - team = __kmp_threads[gtid]->th.th_team; -#endif - KMP_DEBUG_ASSERT(team != NULL); - TCW_4(thr_bar->b_go, KMP_INIT_BARRIER_STATE); - KA_TRACE(20, - ("__kmp_linear_barrier_release: T#%d(%d:%d) set go(%p) = %u\n", - gtid, team->t.t_id, tid, &thr_bar->b_go, KMP_INIT_BARRIER_STATE)); - KMP_MB(); // Flush all pending memory write invalidates. - } - KA_TRACE( - 20, - ("__kmp_linear_barrier_release: T#%d(%d:%d) exit for barrier type %d\n", - gtid, team->t.t_id, tid, bt)); -} - -// Tree barrier -static void -__kmp_tree_barrier_gather(enum barrier_type bt, kmp_info_t *this_thr, int gtid, - int tid, void (*reduce)(void *, void *) - USE_ITT_BUILD_ARG(void *itt_sync_obj)) { - KMP_TIME_DEVELOPER_PARTITIONED_BLOCK(KMP_tree_gather); - kmp_team_t *team = this_thr->th.th_team; - kmp_bstate_t *thr_bar = &this_thr->th.th_bar[bt].bb; - kmp_info_t **other_threads = team->t.t_threads; - kmp_uint32 nproc = this_thr->th.th_team_nproc; - kmp_uint32 branch_bits = __kmp_barrier_gather_branch_bits[bt]; - kmp_uint32 branch_factor = 1 << branch_bits; - kmp_uint32 child; - kmp_uint32 child_tid; - kmp_uint64 new_state; - - KA_TRACE( - 20, ("__kmp_tree_barrier_gather: T#%d(%d:%d) enter for barrier type %d\n", - gtid, team->t.t_id, tid, bt)); - KMP_DEBUG_ASSERT(this_thr == other_threads[this_thr->th.th_info.ds.ds_tid]); - -#if USE_ITT_BUILD && USE_ITT_NOTIFY - // Barrier imbalance - save arrive time to the thread - if (__kmp_forkjoin_frames_mode == 3 || __kmp_forkjoin_frames_mode == 2) { - this_thr->th.th_bar_arrive_time = this_thr->th.th_bar_min_time = - __itt_get_timestamp(); - } -#endif - // Perform tree gather to wait until all threads have arrived; reduce any - // required data as we go - child_tid = (tid << branch_bits) + 1; - if (child_tid < nproc) { - // Parent threads wait for all their children to arrive - new_state = team->t.t_bar[bt].b_arrived + KMP_BARRIER_STATE_BUMP; - child = 1; - do { - kmp_info_t *child_thr = other_threads[child_tid]; - kmp_bstate_t *child_bar = &child_thr->th.th_bar[bt].bb; -#if KMP_CACHE_MANAGE - // Prefetch next thread's arrived count - if (child + 1 <= branch_factor && child_tid + 1 < nproc) - KMP_CACHE_PREFETCH( - &other_threads[child_tid + 1]->th.th_bar[bt].bb.b_arrived); -#endif /* KMP_CACHE_MANAGE */ - KA_TRACE(20, - ("__kmp_tree_barrier_gather: T#%d(%d:%d) wait T#%d(%d:%u) " - "arrived(%p) == %llu\n", - gtid, team->t.t_id, tid, __kmp_gtid_from_tid(child_tid, team), - team->t.t_id, child_tid, &child_bar->b_arrived, new_state)); - // Wait for child to arrive - kmp_flag_64 flag(&child_bar->b_arrived, new_state); - flag.wait(this_thr, FALSE USE_ITT_BUILD_ARG(itt_sync_obj)); - ANNOTATE_BARRIER_END(child_thr); -#if USE_ITT_BUILD && USE_ITT_NOTIFY - // Barrier imbalance - write min of the thread time and a child time to - // the thread. - if (__kmp_forkjoin_frames_mode == 2) { - this_thr->th.th_bar_min_time = KMP_MIN(this_thr->th.th_bar_min_time, - child_thr->th.th_bar_min_time); - } -#endif - if (reduce) { - KA_TRACE(100, - ("__kmp_tree_barrier_gather: T#%d(%d:%d) += T#%d(%d:%u)\n", - gtid, team->t.t_id, tid, __kmp_gtid_from_tid(child_tid, team), - team->t.t_id, child_tid)); - ANNOTATE_REDUCE_AFTER(reduce); - (*reduce)(this_thr->th.th_local.reduce_data, - child_thr->th.th_local.reduce_data); - ANNOTATE_REDUCE_BEFORE(reduce); - ANNOTATE_REDUCE_BEFORE(&team->t.t_bar); - } - child++; - child_tid++; - } while (child <= branch_factor && child_tid < nproc); - } - - if (!KMP_MASTER_TID(tid)) { // Worker threads - kmp_int32 parent_tid = (tid - 1) >> branch_bits; - - KA_TRACE(20, - ("__kmp_tree_barrier_gather: T#%d(%d:%d) releasing T#%d(%d:%d) " - "arrived(%p): %llu => %llu\n", - gtid, team->t.t_id, tid, __kmp_gtid_from_tid(parent_tid, team), - team->t.t_id, parent_tid, &thr_bar->b_arrived, thr_bar->b_arrived, - thr_bar->b_arrived + KMP_BARRIER_STATE_BUMP)); - - // Mark arrival to parent thread - /* After performing this write, a worker thread may not assume that the team - is valid any more - it could be deallocated by the master thread at any - time. */ - ANNOTATE_BARRIER_BEGIN(this_thr); - kmp_flag_64 flag(&thr_bar->b_arrived, other_threads[parent_tid]); - flag.release(); - } else { - // Need to update the team arrived pointer if we are the master thread - if (nproc > 1) // New value was already computed above - team->t.t_bar[bt].b_arrived = new_state; - else - team->t.t_bar[bt].b_arrived += KMP_BARRIER_STATE_BUMP; - KA_TRACE(20, ("__kmp_tree_barrier_gather: T#%d(%d:%d) set team %d " - "arrived(%p) = %llu\n", - gtid, team->t.t_id, tid, team->t.t_id, - &team->t.t_bar[bt].b_arrived, team->t.t_bar[bt].b_arrived)); - } - KA_TRACE(20, - ("__kmp_tree_barrier_gather: T#%d(%d:%d) exit for barrier type %d\n", - gtid, team->t.t_id, tid, bt)); -} - -static void __kmp_tree_barrier_release( - enum barrier_type bt, kmp_info_t *this_thr, int gtid, int tid, - int propagate_icvs USE_ITT_BUILD_ARG(void *itt_sync_obj)) { - KMP_TIME_DEVELOPER_PARTITIONED_BLOCK(KMP_tree_release); - kmp_team_t *team; - kmp_bstate_t *thr_bar = &this_thr->th.th_bar[bt].bb; - kmp_uint32 nproc; - kmp_uint32 branch_bits = __kmp_barrier_release_branch_bits[bt]; - kmp_uint32 branch_factor = 1 << branch_bits; - kmp_uint32 child; - kmp_uint32 child_tid; - - // Perform a tree release for all of the threads that have been gathered - if (!KMP_MASTER_TID( - tid)) { // Handle fork barrier workers who aren't part of a team yet - KA_TRACE(20, ("__kmp_tree_barrier_release: T#%d wait go(%p) == %u\n", gtid, - &thr_bar->b_go, KMP_BARRIER_STATE_BUMP)); - // Wait for parent thread to release us - kmp_flag_64 flag(&thr_bar->b_go, KMP_BARRIER_STATE_BUMP); - flag.wait(this_thr, TRUE USE_ITT_BUILD_ARG(itt_sync_obj)); - ANNOTATE_BARRIER_END(this_thr); -#if USE_ITT_BUILD && USE_ITT_NOTIFY - if ((__itt_sync_create_ptr && itt_sync_obj == NULL) || KMP_ITT_DEBUG) { - // In fork barrier where we could not get the object reliably (or - // ITTNOTIFY is disabled) - itt_sync_obj = __kmp_itt_barrier_object(gtid, bs_forkjoin_barrier, 0, -1); - // Cancel wait on previous parallel region... - __kmp_itt_task_starting(itt_sync_obj); - - if (bt == bs_forkjoin_barrier && TCR_4(__kmp_global.g.g_done)) - return; - - itt_sync_obj = __kmp_itt_barrier_object(gtid, bs_forkjoin_barrier); - if (itt_sync_obj != NULL) - // Call prepare as early as possible for "new" barrier - __kmp_itt_task_finished(itt_sync_obj); - } else -#endif /* USE_ITT_BUILD && USE_ITT_NOTIFY */ - // Early exit for reaping threads releasing forkjoin barrier - if (bt == bs_forkjoin_barrier && TCR_4(__kmp_global.g.g_done)) - return; - - // The worker thread may now assume that the team is valid. - team = __kmp_threads[gtid]->th.th_team; - KMP_DEBUG_ASSERT(team != NULL); - tid = __kmp_tid_from_gtid(gtid); - - TCW_4(thr_bar->b_go, KMP_INIT_BARRIER_STATE); - KA_TRACE(20, - ("__kmp_tree_barrier_release: T#%d(%d:%d) set go(%p) = %u\n", gtid, - team->t.t_id, tid, &thr_bar->b_go, KMP_INIT_BARRIER_STATE)); - KMP_MB(); // Flush all pending memory write invalidates. - } else { - team = __kmp_threads[gtid]->th.th_team; - KMP_DEBUG_ASSERT(team != NULL); - KA_TRACE(20, ("__kmp_tree_barrier_release: T#%d(%d:%d) master enter for " - "barrier type %d\n", - gtid, team->t.t_id, tid, bt)); - } - nproc = this_thr->th.th_team_nproc; - child_tid = (tid << branch_bits) + 1; - - if (child_tid < nproc) { - kmp_info_t **other_threads = team->t.t_threads; - child = 1; - // Parent threads release all their children - do { - kmp_info_t *child_thr = other_threads[child_tid]; - kmp_bstate_t *child_bar = &child_thr->th.th_bar[bt].bb; -#if KMP_CACHE_MANAGE - // Prefetch next thread's go count - if (child + 1 <= branch_factor && child_tid + 1 < nproc) - KMP_CACHE_PREFETCH( - &other_threads[child_tid + 1]->th.th_bar[bt].bb.b_go); -#endif /* KMP_CACHE_MANAGE */ - -#if KMP_BARRIER_ICV_PUSH - { - KMP_TIME_DEVELOPER_PARTITIONED_BLOCK(USER_icv_copy); - if (propagate_icvs) { - __kmp_init_implicit_task(team->t.t_ident, - team->t.t_threads[child_tid], team, - child_tid, FALSE); - copy_icvs(&team->t.t_implicit_task_taskdata[child_tid].td_icvs, - &team->t.t_implicit_task_taskdata[0].td_icvs); - } - } -#endif // KMP_BARRIER_ICV_PUSH - KA_TRACE(20, - ("__kmp_tree_barrier_release: T#%d(%d:%d) releasing T#%d(%d:%u)" - "go(%p): %u => %u\n", - gtid, team->t.t_id, tid, __kmp_gtid_from_tid(child_tid, team), - team->t.t_id, child_tid, &child_bar->b_go, child_bar->b_go, - child_bar->b_go + KMP_BARRIER_STATE_BUMP)); - // Release child from barrier - ANNOTATE_BARRIER_BEGIN(child_thr); - kmp_flag_64 flag(&child_bar->b_go, child_thr); - flag.release(); - child++; - child_tid++; - } while (child <= branch_factor && child_tid < nproc); - } - KA_TRACE( - 20, ("__kmp_tree_barrier_release: T#%d(%d:%d) exit for barrier type %d\n", - gtid, team->t.t_id, tid, bt)); -} - -// Hyper Barrier -static void -__kmp_hyper_barrier_gather(enum barrier_type bt, kmp_info_t *this_thr, int gtid, - int tid, void (*reduce)(void *, void *) - USE_ITT_BUILD_ARG(void *itt_sync_obj)) { - KMP_TIME_DEVELOPER_PARTITIONED_BLOCK(KMP_hyper_gather); - kmp_team_t *team = this_thr->th.th_team; - kmp_bstate_t *thr_bar = &this_thr->th.th_bar[bt].bb; - kmp_info_t **other_threads = team->t.t_threads; - kmp_uint64 new_state = KMP_BARRIER_UNUSED_STATE; - kmp_uint32 num_threads = this_thr->th.th_team_nproc; - kmp_uint32 branch_bits = __kmp_barrier_gather_branch_bits[bt]; - kmp_uint32 branch_factor = 1 << branch_bits; - kmp_uint32 offset; - kmp_uint32 level; - - KA_TRACE( - 20, - ("__kmp_hyper_barrier_gather: T#%d(%d:%d) enter for barrier type %d\n", - gtid, team->t.t_id, tid, bt)); - KMP_DEBUG_ASSERT(this_thr == other_threads[this_thr->th.th_info.ds.ds_tid]); - -#if USE_ITT_BUILD && USE_ITT_NOTIFY - // Barrier imbalance - save arrive time to the thread - if (__kmp_forkjoin_frames_mode == 3 || __kmp_forkjoin_frames_mode == 2) { - this_thr->th.th_bar_arrive_time = this_thr->th.th_bar_min_time = - __itt_get_timestamp(); - } -#endif - /* Perform a hypercube-embedded tree gather to wait until all of the threads - have arrived, and reduce any required data as we go. */ - kmp_flag_64 p_flag(&thr_bar->b_arrived); - for (level = 0, offset = 1; offset < num_threads; - level += branch_bits, offset <<= branch_bits) { - kmp_uint32 child; - kmp_uint32 child_tid; - - if (((tid >> level) & (branch_factor - 1)) != 0) { - kmp_int32 parent_tid = tid & ~((1 << (level + branch_bits)) - 1); - - KA_TRACE(20, - ("__kmp_hyper_barrier_gather: T#%d(%d:%d) releasing T#%d(%d:%d) " - "arrived(%p): %llu => %llu\n", - gtid, team->t.t_id, tid, __kmp_gtid_from_tid(parent_tid, team), - team->t.t_id, parent_tid, &thr_bar->b_arrived, - thr_bar->b_arrived, - thr_bar->b_arrived + KMP_BARRIER_STATE_BUMP)); - // Mark arrival to parent thread - /* After performing this write (in the last iteration of the enclosing for - loop), a worker thread may not assume that the team is valid any more - - it could be deallocated by the master thread at any time. */ - ANNOTATE_BARRIER_BEGIN(this_thr); - p_flag.set_waiter(other_threads[parent_tid]); - p_flag.release(); - break; - } - - // Parent threads wait for children to arrive - if (new_state == KMP_BARRIER_UNUSED_STATE) - new_state = team->t.t_bar[bt].b_arrived + KMP_BARRIER_STATE_BUMP; - for (child = 1, child_tid = tid + (1 << level); - child < branch_factor && child_tid < num_threads; - child++, child_tid += (1 << level)) { - kmp_info_t *child_thr = other_threads[child_tid]; - kmp_bstate_t *child_bar = &child_thr->th.th_bar[bt].bb; -#if KMP_CACHE_MANAGE - kmp_uint32 next_child_tid = child_tid + (1 << level); - // Prefetch next thread's arrived count - if (child + 1 < branch_factor && next_child_tid < num_threads) - KMP_CACHE_PREFETCH( - &other_threads[next_child_tid]->th.th_bar[bt].bb.b_arrived); -#endif /* KMP_CACHE_MANAGE */ - KA_TRACE(20, - ("__kmp_hyper_barrier_gather: T#%d(%d:%d) wait T#%d(%d:%u) " - "arrived(%p) == %llu\n", - gtid, team->t.t_id, tid, __kmp_gtid_from_tid(child_tid, team), - team->t.t_id, child_tid, &child_bar->b_arrived, new_state)); - // Wait for child to arrive - kmp_flag_64 c_flag(&child_bar->b_arrived, new_state); - c_flag.wait(this_thr, FALSE USE_ITT_BUILD_ARG(itt_sync_obj)); - ANNOTATE_BARRIER_END(child_thr); -#if USE_ITT_BUILD && USE_ITT_NOTIFY - // Barrier imbalance - write min of the thread time and a child time to - // the thread. - if (__kmp_forkjoin_frames_mode == 2) { - this_thr->th.th_bar_min_time = KMP_MIN(this_thr->th.th_bar_min_time, - child_thr->th.th_bar_min_time); - } -#endif - if (reduce) { - KA_TRACE(100, - ("__kmp_hyper_barrier_gather: T#%d(%d:%d) += T#%d(%d:%u)\n", - gtid, team->t.t_id, tid, __kmp_gtid_from_tid(child_tid, team), - team->t.t_id, child_tid)); - ANNOTATE_REDUCE_AFTER(reduce); - (*reduce)(this_thr->th.th_local.reduce_data, - child_thr->th.th_local.reduce_data); - ANNOTATE_REDUCE_BEFORE(reduce); - ANNOTATE_REDUCE_BEFORE(&team->t.t_bar); - } - } - } - - if (KMP_MASTER_TID(tid)) { - // Need to update the team arrived pointer if we are the master thread - if (new_state == KMP_BARRIER_UNUSED_STATE) - team->t.t_bar[bt].b_arrived += KMP_BARRIER_STATE_BUMP; - else - team->t.t_bar[bt].b_arrived = new_state; - KA_TRACE(20, ("__kmp_hyper_barrier_gather: T#%d(%d:%d) set team %d " - "arrived(%p) = %llu\n", - gtid, team->t.t_id, tid, team->t.t_id, - &team->t.t_bar[bt].b_arrived, team->t.t_bar[bt].b_arrived)); - } - KA_TRACE( - 20, ("__kmp_hyper_barrier_gather: T#%d(%d:%d) exit for barrier type %d\n", - gtid, team->t.t_id, tid, bt)); -} - -// The reverse versions seem to beat the forward versions overall -#define KMP_REVERSE_HYPER_BAR -static void __kmp_hyper_barrier_release( - enum barrier_type bt, kmp_info_t *this_thr, int gtid, int tid, - int propagate_icvs USE_ITT_BUILD_ARG(void *itt_sync_obj)) { - KMP_TIME_DEVELOPER_PARTITIONED_BLOCK(KMP_hyper_release); - kmp_team_t *team; - kmp_bstate_t *thr_bar = &this_thr->th.th_bar[bt].bb; - kmp_info_t **other_threads; - kmp_uint32 num_threads; - kmp_uint32 branch_bits = __kmp_barrier_release_branch_bits[bt]; - kmp_uint32 branch_factor = 1 << branch_bits; - kmp_uint32 child; - kmp_uint32 child_tid; - kmp_uint32 offset; - kmp_uint32 level; - - /* Perform a hypercube-embedded tree release for all of the threads that have - been gathered. If KMP_REVERSE_HYPER_BAR is defined (default) the threads - are released in the reverse order of the corresponding gather, otherwise - threads are released in the same order. */ - if (KMP_MASTER_TID(tid)) { // master - team = __kmp_threads[gtid]->th.th_team; - KMP_DEBUG_ASSERT(team != NULL); - KA_TRACE(20, ("__kmp_hyper_barrier_release: T#%d(%d:%d) master enter for " - "barrier type %d\n", - gtid, team->t.t_id, tid, bt)); -#if KMP_BARRIER_ICV_PUSH - if (propagate_icvs) { // master already has ICVs in final destination; copy - copy_icvs(&thr_bar->th_fixed_icvs, - &team->t.t_implicit_task_taskdata[tid].td_icvs); - } -#endif - } else { // Handle fork barrier workers who aren't part of a team yet - KA_TRACE(20, ("__kmp_hyper_barrier_release: T#%d wait go(%p) == %u\n", gtid, - &thr_bar->b_go, KMP_BARRIER_STATE_BUMP)); - // Wait for parent thread to release us - kmp_flag_64 flag(&thr_bar->b_go, KMP_BARRIER_STATE_BUMP); - flag.wait(this_thr, TRUE USE_ITT_BUILD_ARG(itt_sync_obj)); - ANNOTATE_BARRIER_END(this_thr); -#if USE_ITT_BUILD && USE_ITT_NOTIFY - if ((__itt_sync_create_ptr && itt_sync_obj == NULL) || KMP_ITT_DEBUG) { - // In fork barrier where we could not get the object reliably - itt_sync_obj = __kmp_itt_barrier_object(gtid, bs_forkjoin_barrier, 0, -1); - // Cancel wait on previous parallel region... - __kmp_itt_task_starting(itt_sync_obj); - - if (bt == bs_forkjoin_barrier && TCR_4(__kmp_global.g.g_done)) - return; - - itt_sync_obj = __kmp_itt_barrier_object(gtid, bs_forkjoin_barrier); - if (itt_sync_obj != NULL) - // Call prepare as early as possible for "new" barrier - __kmp_itt_task_finished(itt_sync_obj); - } else -#endif /* USE_ITT_BUILD && USE_ITT_NOTIFY */ - // Early exit for reaping threads releasing forkjoin barrier - if (bt == bs_forkjoin_barrier && TCR_4(__kmp_global.g.g_done)) - return; - - // The worker thread may now assume that the team is valid. - team = __kmp_threads[gtid]->th.th_team; - KMP_DEBUG_ASSERT(team != NULL); - tid = __kmp_tid_from_gtid(gtid); - - TCW_4(thr_bar->b_go, KMP_INIT_BARRIER_STATE); - KA_TRACE(20, - ("__kmp_hyper_barrier_release: T#%d(%d:%d) set go(%p) = %u\n", - gtid, team->t.t_id, tid, &thr_bar->b_go, KMP_INIT_BARRIER_STATE)); - KMP_MB(); // Flush all pending memory write invalidates. - } - num_threads = this_thr->th.th_team_nproc; - other_threads = team->t.t_threads; - -#ifdef KMP_REVERSE_HYPER_BAR - // Count up to correct level for parent - for (level = 0, offset = 1; - offset < num_threads && (((tid >> level) & (branch_factor - 1)) == 0); - level += branch_bits, offset <<= branch_bits) - ; - - // Now go down from there - for (level -= branch_bits, offset >>= branch_bits; offset != 0; - level -= branch_bits, offset >>= branch_bits) -#else - // Go down the tree, level by level - for (level = 0, offset = 1; offset < num_threads; - level += branch_bits, offset <<= branch_bits) -#endif // KMP_REVERSE_HYPER_BAR - { -#ifdef KMP_REVERSE_HYPER_BAR - /* Now go in reverse order through the children, highest to lowest. - Initial setting of child is conservative here. */ - child = num_threads >> ((level == 0) ? level : level - 1); - for (child = (child < branch_factor - 1) ? child : branch_factor - 1, - child_tid = tid + (child << level); - child >= 1; child--, child_tid -= (1 << level)) -#else - if (((tid >> level) & (branch_factor - 1)) != 0) - // No need to go lower than this, since this is the level parent would be - // notified - break; - // Iterate through children on this level of the tree - for (child = 1, child_tid = tid + (1 << level); - child < branch_factor && child_tid < num_threads; - child++, child_tid += (1 << level)) -#endif // KMP_REVERSE_HYPER_BAR - { - if (child_tid >= num_threads) - continue; // Child doesn't exist so keep going - else { - kmp_info_t *child_thr = other_threads[child_tid]; - kmp_bstate_t *child_bar = &child_thr->th.th_bar[bt].bb; -#if KMP_CACHE_MANAGE - kmp_uint32 next_child_tid = child_tid - (1 << level); -// Prefetch next thread's go count -#ifdef KMP_REVERSE_HYPER_BAR - if (child - 1 >= 1 && next_child_tid < num_threads) -#else - if (child + 1 < branch_factor && next_child_tid < num_threads) -#endif // KMP_REVERSE_HYPER_BAR - KMP_CACHE_PREFETCH( - &other_threads[next_child_tid]->th.th_bar[bt].bb.b_go); -#endif /* KMP_CACHE_MANAGE */ - -#if KMP_BARRIER_ICV_PUSH - if (propagate_icvs) // push my fixed ICVs to my child - copy_icvs(&child_bar->th_fixed_icvs, &thr_bar->th_fixed_icvs); -#endif // KMP_BARRIER_ICV_PUSH - - KA_TRACE( - 20, - ("__kmp_hyper_barrier_release: T#%d(%d:%d) releasing T#%d(%d:%u)" - "go(%p): %u => %u\n", - gtid, team->t.t_id, tid, __kmp_gtid_from_tid(child_tid, team), - team->t.t_id, child_tid, &child_bar->b_go, child_bar->b_go, - child_bar->b_go + KMP_BARRIER_STATE_BUMP)); - // Release child from barrier - ANNOTATE_BARRIER_BEGIN(child_thr); - kmp_flag_64 flag(&child_bar->b_go, child_thr); - flag.release(); - } - } - } -#if KMP_BARRIER_ICV_PUSH - if (propagate_icvs && - !KMP_MASTER_TID(tid)) { // copy ICVs locally to final dest - __kmp_init_implicit_task(team->t.t_ident, team->t.t_threads[tid], team, tid, - FALSE); - copy_icvs(&team->t.t_implicit_task_taskdata[tid].td_icvs, - &thr_bar->th_fixed_icvs); - } -#endif - KA_TRACE( - 20, - ("__kmp_hyper_barrier_release: T#%d(%d:%d) exit for barrier type %d\n", - gtid, team->t.t_id, tid, bt)); -} - -// Hierarchical Barrier - -// Initialize thread barrier data -/* Initializes/re-initializes the hierarchical barrier data stored on a thread. - Performs the minimum amount of initialization required based on how the team - has changed. Returns true if leaf children will require both on-core and - traditional wake-up mechanisms. For example, if the team size increases, - threads already in the team will respond to on-core wakeup on their parent - thread, but threads newly added to the team will only be listening on the - their local b_go. */ -static bool __kmp_init_hierarchical_barrier_thread(enum barrier_type bt, - kmp_bstate_t *thr_bar, - kmp_uint32 nproc, int gtid, - int tid, kmp_team_t *team) { - // Checks to determine if (re-)initialization is needed - bool uninitialized = thr_bar->team == NULL; - bool team_changed = team != thr_bar->team; - bool team_sz_changed = nproc != thr_bar->nproc; - bool tid_changed = tid != thr_bar->old_tid; - bool retval = false; - - if (uninitialized || team_sz_changed) { - __kmp_get_hierarchy(nproc, thr_bar); - } - - if (uninitialized || team_sz_changed || tid_changed) { - thr_bar->my_level = thr_bar->depth - 1; // default for master - thr_bar->parent_tid = -1; // default for master - if (!KMP_MASTER_TID( - tid)) { // if not master, find parent thread in hierarchy - kmp_uint32 d = 0; - while (d < thr_bar->depth) { // find parent based on level of thread in - // hierarchy, and note level - kmp_uint32 rem; - if (d == thr_bar->depth - 2) { // reached level right below the master - thr_bar->parent_tid = 0; - thr_bar->my_level = d; - break; - } else if ((rem = tid % thr_bar->skip_per_level[d + 1]) != - 0) { // TODO: can we make this op faster? - // thread is not a subtree root at next level, so this is max - thr_bar->parent_tid = tid - rem; - thr_bar->my_level = d; - break; - } - ++d; - } - } - thr_bar->offset = 7 - (tid - thr_bar->parent_tid - 1); - thr_bar->old_tid = tid; - thr_bar->wait_flag = KMP_BARRIER_NOT_WAITING; - thr_bar->team = team; - thr_bar->parent_bar = - &team->t.t_threads[thr_bar->parent_tid]->th.th_bar[bt].bb; - } - if (uninitialized || team_changed || tid_changed) { - thr_bar->team = team; - thr_bar->parent_bar = - &team->t.t_threads[thr_bar->parent_tid]->th.th_bar[bt].bb; - retval = true; - } - if (uninitialized || team_sz_changed || tid_changed) { - thr_bar->nproc = nproc; - thr_bar->leaf_kids = thr_bar->base_leaf_kids; - if (thr_bar->my_level == 0) - thr_bar->leaf_kids = 0; - if (thr_bar->leaf_kids && (kmp_uint32)tid + thr_bar->leaf_kids + 1 > nproc) - thr_bar->leaf_kids = nproc - tid - 1; - thr_bar->leaf_state = 0; - for (int i = 0; i < thr_bar->leaf_kids; ++i) - ((char *)&(thr_bar->leaf_state))[7 - i] = 1; - } - return retval; -} - -static void __kmp_hierarchical_barrier_gather( - enum barrier_type bt, kmp_info_t *this_thr, int gtid, int tid, - void (*reduce)(void *, void *) USE_ITT_BUILD_ARG(void *itt_sync_obj)) { - KMP_TIME_DEVELOPER_PARTITIONED_BLOCK(KMP_hier_gather); - kmp_team_t *team = this_thr->th.th_team; - kmp_bstate_t *thr_bar = &this_thr->th.th_bar[bt].bb; - kmp_uint32 nproc = this_thr->th.th_team_nproc; - kmp_info_t **other_threads = team->t.t_threads; - kmp_uint64 new_state; - - int level = team->t.t_level; -#if OMP_40_ENABLED - if (other_threads[0] - ->th.th_teams_microtask) // are we inside the teams construct? - if (this_thr->th.th_teams_size.nteams > 1) - ++level; // level was not increased in teams construct for team_of_masters -#endif - if (level == 1) - thr_bar->use_oncore_barrier = 1; - else - thr_bar->use_oncore_barrier = 0; // Do not use oncore barrier when nested - - KA_TRACE(20, ("__kmp_hierarchical_barrier_gather: T#%d(%d:%d) enter for " - "barrier type %d\n", - gtid, team->t.t_id, tid, bt)); - KMP_DEBUG_ASSERT(this_thr == other_threads[this_thr->th.th_info.ds.ds_tid]); - -#if USE_ITT_BUILD && USE_ITT_NOTIFY - // Barrier imbalance - save arrive time to the thread - if (__kmp_forkjoin_frames_mode == 3 || __kmp_forkjoin_frames_mode == 2) { - this_thr->th.th_bar_arrive_time = __itt_get_timestamp(); - } -#endif - - (void)__kmp_init_hierarchical_barrier_thread(bt, thr_bar, nproc, gtid, tid, - team); - - if (thr_bar->my_level) { // not a leaf (my_level==0 means leaf) - kmp_int32 child_tid; - new_state = - (kmp_uint64)team->t.t_bar[bt].b_arrived + KMP_BARRIER_STATE_BUMP; - if (__kmp_dflt_blocktime == KMP_MAX_BLOCKTIME && - thr_bar->use_oncore_barrier) { - if (thr_bar->leaf_kids) { - // First, wait for leaf children to check-in on my b_arrived flag - kmp_uint64 leaf_state = - KMP_MASTER_TID(tid) - ? thr_bar->b_arrived | thr_bar->leaf_state - : team->t.t_bar[bt].b_arrived | thr_bar->leaf_state; - KA_TRACE(20, ("__kmp_hierarchical_barrier_gather: T#%d(%d:%d) waiting " - "for leaf kids\n", - gtid, team->t.t_id, tid)); - kmp_flag_64 flag(&thr_bar->b_arrived, leaf_state); - flag.wait(this_thr, FALSE USE_ITT_BUILD_ARG(itt_sync_obj)); - if (reduce) { - ANNOTATE_REDUCE_AFTER(reduce); - for (child_tid = tid + 1; child_tid <= tid + thr_bar->leaf_kids; - ++child_tid) { - KA_TRACE(100, ("__kmp_hierarchical_barrier_gather: T#%d(%d:%d) += " - "T#%d(%d:%d)\n", - gtid, team->t.t_id, tid, - __kmp_gtid_from_tid(child_tid, team), team->t.t_id, - child_tid)); - ANNOTATE_BARRIER_END(other_threads[child_tid]); - (*reduce)(this_thr->th.th_local.reduce_data, - other_threads[child_tid]->th.th_local.reduce_data); - } - ANNOTATE_REDUCE_BEFORE(reduce); - ANNOTATE_REDUCE_BEFORE(&team->t.t_bar); - } - // clear leaf_state bits - KMP_TEST_THEN_AND64(&thr_bar->b_arrived, ~(thr_bar->leaf_state)); - } - // Next, wait for higher level children on each child's b_arrived flag - for (kmp_uint32 d = 1; d < thr_bar->my_level; - ++d) { // gather lowest level threads first, but skip 0 - kmp_uint32 last = tid + thr_bar->skip_per_level[d + 1], - skip = thr_bar->skip_per_level[d]; - if (last > nproc) - last = nproc; - for (child_tid = tid + skip; child_tid < (int)last; child_tid += skip) { - kmp_info_t *child_thr = other_threads[child_tid]; - kmp_bstate_t *child_bar = &child_thr->th.th_bar[bt].bb; - KA_TRACE(20, ("__kmp_hierarchical_barrier_gather: T#%d(%d:%d) wait " - "T#%d(%d:%d) " - "arrived(%p) == %llu\n", - gtid, team->t.t_id, tid, - __kmp_gtid_from_tid(child_tid, team), team->t.t_id, - child_tid, &child_bar->b_arrived, new_state)); - kmp_flag_64 flag(&child_bar->b_arrived, new_state); - flag.wait(this_thr, FALSE USE_ITT_BUILD_ARG(itt_sync_obj)); - ANNOTATE_BARRIER_END(child_thr); - if (reduce) { - KA_TRACE(100, ("__kmp_hierarchical_barrier_gather: T#%d(%d:%d) += " - "T#%d(%d:%d)\n", - gtid, team->t.t_id, tid, - __kmp_gtid_from_tid(child_tid, team), team->t.t_id, - child_tid)); - ANNOTATE_REDUCE_AFTER(reduce); - (*reduce)(this_thr->th.th_local.reduce_data, - child_thr->th.th_local.reduce_data); - ANNOTATE_REDUCE_BEFORE(reduce); - ANNOTATE_REDUCE_BEFORE(&team->t.t_bar); - } - } - } - } else { // Blocktime is not infinite - for (kmp_uint32 d = 0; d < thr_bar->my_level; - ++d) { // Gather lowest level threads first - kmp_uint32 last = tid + thr_bar->skip_per_level[d + 1], - skip = thr_bar->skip_per_level[d]; - if (last > nproc) - last = nproc; - for (child_tid = tid + skip; child_tid < (int)last; child_tid += skip) { - kmp_info_t *child_thr = other_threads[child_tid]; - kmp_bstate_t *child_bar = &child_thr->th.th_bar[bt].bb; - KA_TRACE(20, ("__kmp_hierarchical_barrier_gather: T#%d(%d:%d) wait " - "T#%d(%d:%d) " - "arrived(%p) == %llu\n", - gtid, team->t.t_id, tid, - __kmp_gtid_from_tid(child_tid, team), team->t.t_id, - child_tid, &child_bar->b_arrived, new_state)); - kmp_flag_64 flag(&child_bar->b_arrived, new_state); - flag.wait(this_thr, FALSE USE_ITT_BUILD_ARG(itt_sync_obj)); - ANNOTATE_BARRIER_END(child_thr); - if (reduce) { - KA_TRACE(100, ("__kmp_hierarchical_barrier_gather: T#%d(%d:%d) += " - "T#%d(%d:%d)\n", - gtid, team->t.t_id, tid, - __kmp_gtid_from_tid(child_tid, team), team->t.t_id, - child_tid)); - ANNOTATE_REDUCE_AFTER(reduce); - (*reduce)(this_thr->th.th_local.reduce_data, - child_thr->th.th_local.reduce_data); - ANNOTATE_REDUCE_BEFORE(reduce); - ANNOTATE_REDUCE_BEFORE(&team->t.t_bar); - } - } - } - } - } - // All subordinates are gathered; now release parent if not master thread - - if (!KMP_MASTER_TID(tid)) { // worker threads release parent in hierarchy - KA_TRACE(20, ("__kmp_hierarchical_barrier_gather: T#%d(%d:%d) releasing" - " T#%d(%d:%d) arrived(%p): %llu => %llu\n", - gtid, team->t.t_id, tid, - __kmp_gtid_from_tid(thr_bar->parent_tid, team), team->t.t_id, - thr_bar->parent_tid, &thr_bar->b_arrived, thr_bar->b_arrived, - thr_bar->b_arrived + KMP_BARRIER_STATE_BUMP)); - /* Mark arrival to parent: After performing this write, a worker thread may - not assume that the team is valid any more - it could be deallocated by - the master thread at any time. */ - if (thr_bar->my_level || __kmp_dflt_blocktime != KMP_MAX_BLOCKTIME || - !thr_bar->use_oncore_barrier) { // Parent is waiting on my b_arrived - // flag; release it - ANNOTATE_BARRIER_BEGIN(this_thr); - kmp_flag_64 flag(&thr_bar->b_arrived, other_threads[thr_bar->parent_tid]); - flag.release(); - } else { - // Leaf does special release on "offset" bits of parent's b_arrived flag - thr_bar->b_arrived = team->t.t_bar[bt].b_arrived + KMP_BARRIER_STATE_BUMP; - kmp_flag_oncore flag(&thr_bar->parent_bar->b_arrived, thr_bar->offset); - flag.set_waiter(other_threads[thr_bar->parent_tid]); - flag.release(); - } - } else { // Master thread needs to update the team's b_arrived value - team->t.t_bar[bt].b_arrived = new_state; - KA_TRACE(20, ("__kmp_hierarchical_barrier_gather: T#%d(%d:%d) set team %d " - "arrived(%p) = %llu\n", - gtid, team->t.t_id, tid, team->t.t_id, - &team->t.t_bar[bt].b_arrived, team->t.t_bar[bt].b_arrived)); - } - // Is the team access below unsafe or just technically invalid? - KA_TRACE(20, ("__kmp_hierarchical_barrier_gather: T#%d(%d:%d) exit for " - "barrier type %d\n", - gtid, team->t.t_id, tid, bt)); -} - -static void __kmp_hierarchical_barrier_release( - enum barrier_type bt, kmp_info_t *this_thr, int gtid, int tid, - int propagate_icvs USE_ITT_BUILD_ARG(void *itt_sync_obj)) { - KMP_TIME_DEVELOPER_PARTITIONED_BLOCK(KMP_hier_release); - kmp_team_t *team; - kmp_bstate_t *thr_bar = &this_thr->th.th_bar[bt].bb; - kmp_uint32 nproc; - bool team_change = false; // indicates on-core barrier shouldn't be used - - if (KMP_MASTER_TID(tid)) { - team = __kmp_threads[gtid]->th.th_team; - KMP_DEBUG_ASSERT(team != NULL); - KA_TRACE(20, ("__kmp_hierarchical_barrier_release: T#%d(%d:%d) master " - "entered barrier type %d\n", - gtid, team->t.t_id, tid, bt)); - } else { // Worker threads - // Wait for parent thread to release me - if (!thr_bar->use_oncore_barrier || - __kmp_dflt_blocktime != KMP_MAX_BLOCKTIME || thr_bar->my_level != 0 || - thr_bar->team == NULL) { - // Use traditional method of waiting on my own b_go flag - thr_bar->wait_flag = KMP_BARRIER_OWN_FLAG; - kmp_flag_64 flag(&thr_bar->b_go, KMP_BARRIER_STATE_BUMP); - flag.wait(this_thr, TRUE USE_ITT_BUILD_ARG(itt_sync_obj)); - ANNOTATE_BARRIER_END(this_thr); - TCW_8(thr_bar->b_go, - KMP_INIT_BARRIER_STATE); // Reset my b_go flag for next time - } else { // Thread barrier data is initialized, this is a leaf, blocktime is - // infinite, not nested - // Wait on my "offset" bits on parent's b_go flag - thr_bar->wait_flag = KMP_BARRIER_PARENT_FLAG; - kmp_flag_oncore flag(&thr_bar->parent_bar->b_go, KMP_BARRIER_STATE_BUMP, - thr_bar->offset, bt, - this_thr USE_ITT_BUILD_ARG(itt_sync_obj)); - flag.wait(this_thr, TRUE); - if (thr_bar->wait_flag == - KMP_BARRIER_SWITCHING) { // Thread was switched to own b_go - TCW_8(thr_bar->b_go, - KMP_INIT_BARRIER_STATE); // Reset my b_go flag for next time - } else { // Reset my bits on parent's b_go flag - (RCAST(volatile char *, - &(thr_bar->parent_bar->b_go)))[thr_bar->offset] = 0; - } - } - thr_bar->wait_flag = KMP_BARRIER_NOT_WAITING; - // Early exit for reaping threads releasing forkjoin barrier - if (bt == bs_forkjoin_barrier && TCR_4(__kmp_global.g.g_done)) - return; - // The worker thread may now assume that the team is valid. - team = __kmp_threads[gtid]->th.th_team; - KMP_DEBUG_ASSERT(team != NULL); - tid = __kmp_tid_from_gtid(gtid); - - KA_TRACE( - 20, - ("__kmp_hierarchical_barrier_release: T#%d(%d:%d) set go(%p) = %u\n", - gtid, team->t.t_id, tid, &thr_bar->b_go, KMP_INIT_BARRIER_STATE)); - KMP_MB(); // Flush all pending memory write invalidates. - } - - nproc = this_thr->th.th_team_nproc; - int level = team->t.t_level; -#if OMP_40_ENABLED - if (team->t.t_threads[0] - ->th.th_teams_microtask) { // are we inside the teams construct? - if (team->t.t_pkfn != (microtask_t)__kmp_teams_master && - this_thr->th.th_teams_level == level) - ++level; // level was not increased in teams construct for team_of_workers - if (this_thr->th.th_teams_size.nteams > 1) - ++level; // level was not increased in teams construct for team_of_masters - } -#endif - if (level == 1) - thr_bar->use_oncore_barrier = 1; - else - thr_bar->use_oncore_barrier = 0; // Do not use oncore barrier when nested - - // If the team size has increased, we still communicate with old leaves via - // oncore barrier. - unsigned short int old_leaf_kids = thr_bar->leaf_kids; - kmp_uint64 old_leaf_state = thr_bar->leaf_state; - team_change = __kmp_init_hierarchical_barrier_thread(bt, thr_bar, nproc, gtid, - tid, team); - // But if the entire team changes, we won't use oncore barrier at all - if (team_change) - old_leaf_kids = 0; - -#if KMP_BARRIER_ICV_PUSH - if (propagate_icvs) { - __kmp_init_implicit_task(team->t.t_ident, team->t.t_threads[tid], team, tid, - FALSE); - if (KMP_MASTER_TID( - tid)) { // master already has copy in final destination; copy - copy_icvs(&thr_bar->th_fixed_icvs, - &team->t.t_implicit_task_taskdata[tid].td_icvs); - } else if (__kmp_dflt_blocktime == KMP_MAX_BLOCKTIME && - thr_bar->use_oncore_barrier) { // optimization for inf blocktime - if (!thr_bar->my_level) // I'm a leaf in the hierarchy (my_level==0) - // leaves (on-core children) pull parent's fixed ICVs directly to local - // ICV store - copy_icvs(&team->t.t_implicit_task_taskdata[tid].td_icvs, - &thr_bar->parent_bar->th_fixed_icvs); - // non-leaves will get ICVs piggybacked with b_go via NGO store - } else { // blocktime is not infinite; pull ICVs from parent's fixed ICVs - if (thr_bar->my_level) // not a leaf; copy ICVs to my fixed ICVs child can - // access - copy_icvs(&thr_bar->th_fixed_icvs, &thr_bar->parent_bar->th_fixed_icvs); - else // leaves copy parent's fixed ICVs directly to local ICV store - copy_icvs(&team->t.t_implicit_task_taskdata[tid].td_icvs, - &thr_bar->parent_bar->th_fixed_icvs); - } - } -#endif // KMP_BARRIER_ICV_PUSH - - // Now, release my children - if (thr_bar->my_level) { // not a leaf - kmp_int32 child_tid; - kmp_uint32 last; - if (__kmp_dflt_blocktime == KMP_MAX_BLOCKTIME && - thr_bar->use_oncore_barrier) { - if (KMP_MASTER_TID(tid)) { // do a flat release - // Set local b_go to bump children via NGO store of the cache line - // containing IVCs and b_go. - thr_bar->b_go = KMP_BARRIER_STATE_BUMP; - // Use ngo stores if available; b_go piggybacks in the last 8 bytes of - // the cache line - ngo_load(&thr_bar->th_fixed_icvs); - // This loops over all the threads skipping only the leaf nodes in the - // hierarchy - for (child_tid = thr_bar->skip_per_level[1]; child_tid < (int)nproc; - child_tid += thr_bar->skip_per_level[1]) { - kmp_bstate_t *child_bar = - &team->t.t_threads[child_tid]->th.th_bar[bt].bb; - KA_TRACE(20, ("__kmp_hierarchical_barrier_release: T#%d(%d:%d) " - "releasing T#%d(%d:%d)" - " go(%p): %u => %u\n", - gtid, team->t.t_id, tid, - __kmp_gtid_from_tid(child_tid, team), team->t.t_id, - child_tid, &child_bar->b_go, child_bar->b_go, - child_bar->b_go + KMP_BARRIER_STATE_BUMP)); - // Use ngo store (if available) to both store ICVs and release child - // via child's b_go - ngo_store_go(&child_bar->th_fixed_icvs, &thr_bar->th_fixed_icvs); - } - ngo_sync(); - } - TCW_8(thr_bar->b_go, - KMP_INIT_BARRIER_STATE); // Reset my b_go flag for next time - // Now, release leaf children - if (thr_bar->leaf_kids) { // if there are any - // We test team_change on the off-chance that the level 1 team changed. - if (team_change || - old_leaf_kids < thr_bar->leaf_kids) { // some old, some new - if (old_leaf_kids) { // release old leaf kids - thr_bar->b_go |= old_leaf_state; - } - // Release new leaf kids - last = tid + thr_bar->skip_per_level[1]; - if (last > nproc) - last = nproc; - for (child_tid = tid + 1 + old_leaf_kids; child_tid < (int)last; - ++child_tid) { // skip_per_level[0]=1 - kmp_info_t *child_thr = team->t.t_threads[child_tid]; - kmp_bstate_t *child_bar = &child_thr->th.th_bar[bt].bb; - KA_TRACE( - 20, - ("__kmp_hierarchical_barrier_release: T#%d(%d:%d) releasing" - " T#%d(%d:%d) go(%p): %u => %u\n", - gtid, team->t.t_id, tid, __kmp_gtid_from_tid(child_tid, team), - team->t.t_id, child_tid, &child_bar->b_go, child_bar->b_go, - child_bar->b_go + KMP_BARRIER_STATE_BUMP)); - // Release child using child's b_go flag - ANNOTATE_BARRIER_BEGIN(child_thr); - kmp_flag_64 flag(&child_bar->b_go, child_thr); - flag.release(); - } - } else { // Release all children at once with leaf_state bits on my own - // b_go flag - thr_bar->b_go |= thr_bar->leaf_state; - } - } - } else { // Blocktime is not infinite; do a simple hierarchical release - for (int d = thr_bar->my_level - 1; d >= 0; - --d) { // Release highest level threads first - last = tid + thr_bar->skip_per_level[d + 1]; - kmp_uint32 skip = thr_bar->skip_per_level[d]; - if (last > nproc) - last = nproc; - for (child_tid = tid + skip; child_tid < (int)last; child_tid += skip) { - kmp_info_t *child_thr = team->t.t_threads[child_tid]; - kmp_bstate_t *child_bar = &child_thr->th.th_bar[bt].bb; - KA_TRACE(20, ("__kmp_hierarchical_barrier_release: T#%d(%d:%d) " - "releasing T#%d(%d:%d) go(%p): %u => %u\n", - gtid, team->t.t_id, tid, - __kmp_gtid_from_tid(child_tid, team), team->t.t_id, - child_tid, &child_bar->b_go, child_bar->b_go, - child_bar->b_go + KMP_BARRIER_STATE_BUMP)); - // Release child using child's b_go flag - ANNOTATE_BARRIER_BEGIN(child_thr); - kmp_flag_64 flag(&child_bar->b_go, child_thr); - flag.release(); - } - } - } -#if KMP_BARRIER_ICV_PUSH - if (propagate_icvs && !KMP_MASTER_TID(tid)) - // non-leaves copy ICVs from fixed ICVs to local dest - copy_icvs(&team->t.t_implicit_task_taskdata[tid].td_icvs, - &thr_bar->th_fixed_icvs); -#endif // KMP_BARRIER_ICV_PUSH - } - KA_TRACE(20, ("__kmp_hierarchical_barrier_release: T#%d(%d:%d) exit for " - "barrier type %d\n", - gtid, team->t.t_id, tid, bt)); -} - -// End of Barrier Algorithms - -// Internal function to do a barrier. -/* If is_split is true, do a split barrier, otherwise, do a plain barrier - If reduce is non-NULL, do a split reduction barrier, otherwise, do a split - barrier - Returns 0 if master thread, 1 if worker thread. */ -int __kmp_barrier(enum barrier_type bt, int gtid, int is_split, - size_t reduce_size, void *reduce_data, - void (*reduce)(void *, void *)) { - KMP_TIME_PARTITIONED_BLOCK(OMP_plain_barrier); - KMP_SET_THREAD_STATE_BLOCK(PLAIN_BARRIER); - int tid = __kmp_tid_from_gtid(gtid); - kmp_info_t *this_thr = __kmp_threads[gtid]; - kmp_team_t *team = this_thr->th.th_team; - int status = 0; -#if OMPT_SUPPORT && OMPT_OPTIONAL - ompt_data_t *my_task_data; - ompt_data_t *my_parallel_data; - void *return_address; -#endif - - KA_TRACE(15, ("__kmp_barrier: T#%d(%d:%d) has arrived\n", gtid, - __kmp_team_from_gtid(gtid)->t.t_id, __kmp_tid_from_gtid(gtid))); - - ANNOTATE_BARRIER_BEGIN(&team->t.t_bar); -#if OMPT_SUPPORT - if (ompt_enabled.enabled) { -#if OMPT_OPTIONAL - my_task_data = OMPT_CUR_TASK_DATA(this_thr); - my_parallel_data = OMPT_CUR_TEAM_DATA(this_thr); - return_address = OMPT_LOAD_RETURN_ADDRESS(gtid); - if (ompt_enabled.ompt_callback_sync_region) { - ompt_callbacks.ompt_callback(ompt_callback_sync_region)( - ompt_sync_region_barrier, ompt_scope_begin, my_parallel_data, - my_task_data, return_address); - } - if (ompt_enabled.ompt_callback_sync_region_wait) { - ompt_callbacks.ompt_callback(ompt_callback_sync_region_wait)( - ompt_sync_region_barrier, ompt_scope_begin, my_parallel_data, - my_task_data, return_address); - } -#endif - // It is OK to report the barrier state after the barrier begin callback. - // According to the OMPT specification, a compliant implementation may - // even delay reporting this state until the barrier begins to wait. - this_thr->th.ompt_thread_info.state = ompt_state_wait_barrier; - } -#endif - - if (!team->t.t_serialized) { -#if USE_ITT_BUILD - // This value will be used in itt notify events below. - void *itt_sync_obj = NULL; -#if USE_ITT_NOTIFY - if (__itt_sync_create_ptr || KMP_ITT_DEBUG) - itt_sync_obj = __kmp_itt_barrier_object(gtid, bt, 1); -#endif -#endif /* USE_ITT_BUILD */ - if (__kmp_tasking_mode == tskm_extra_barrier) { - __kmp_tasking_barrier(team, this_thr, gtid); - KA_TRACE(15, - ("__kmp_barrier: T#%d(%d:%d) past tasking barrier\n", gtid, - __kmp_team_from_gtid(gtid)->t.t_id, __kmp_tid_from_gtid(gtid))); - } - - /* Copy the blocktime info to the thread, where __kmp_wait_template() can - access it when the team struct is not guaranteed to exist. */ - // See note about the corresponding code in __kmp_join_barrier() being - // performance-critical. - if (__kmp_dflt_blocktime != KMP_MAX_BLOCKTIME) { -#if KMP_USE_MONITOR - this_thr->th.th_team_bt_intervals = - team->t.t_implicit_task_taskdata[tid].td_icvs.bt_intervals; - this_thr->th.th_team_bt_set = - team->t.t_implicit_task_taskdata[tid].td_icvs.bt_set; -#else - this_thr->th.th_team_bt_intervals = KMP_BLOCKTIME_INTERVAL(team, tid); -#endif - } - -#if USE_ITT_BUILD - if (__itt_sync_create_ptr || KMP_ITT_DEBUG) - __kmp_itt_barrier_starting(gtid, itt_sync_obj); -#endif /* USE_ITT_BUILD */ -#if USE_DEBUGGER - // Let the debugger know: the thread arrived to the barrier and waiting. - if (KMP_MASTER_TID(tid)) { // Master counter is stored in team structure. - team->t.t_bar[bt].b_master_arrived += 1; - } else { - this_thr->th.th_bar[bt].bb.b_worker_arrived += 1; - } // if -#endif /* USE_DEBUGGER */ - if (reduce != NULL) { - // KMP_DEBUG_ASSERT( is_split == TRUE ); // #C69956 - this_thr->th.th_local.reduce_data = reduce_data; - } - - if (KMP_MASTER_TID(tid) && __kmp_tasking_mode != tskm_immediate_exec) - __kmp_task_team_setup( - this_thr, team, - 0); // use 0 to only setup the current team if nthreads > 1 - - switch (__kmp_barrier_gather_pattern[bt]) { - case bp_hyper_bar: { - KMP_ASSERT(__kmp_barrier_gather_branch_bits[bt]); // don't set branch bits - // to 0; use linear - __kmp_hyper_barrier_gather(bt, this_thr, gtid, tid, - reduce USE_ITT_BUILD_ARG(itt_sync_obj)); - break; - } - case bp_hierarchical_bar: { - __kmp_hierarchical_barrier_gather(bt, this_thr, gtid, tid, - reduce USE_ITT_BUILD_ARG(itt_sync_obj)); - break; - } - case bp_tree_bar: { - KMP_ASSERT(__kmp_barrier_gather_branch_bits[bt]); // don't set branch bits - // to 0; use linear - __kmp_tree_barrier_gather(bt, this_thr, gtid, tid, - reduce USE_ITT_BUILD_ARG(itt_sync_obj)); - break; - } - default: { - __kmp_linear_barrier_gather(bt, this_thr, gtid, tid, - reduce USE_ITT_BUILD_ARG(itt_sync_obj)); - } - } - - KMP_MB(); - - if (KMP_MASTER_TID(tid)) { - status = 0; - if (__kmp_tasking_mode != tskm_immediate_exec) { - __kmp_task_team_wait(this_thr, team USE_ITT_BUILD_ARG(itt_sync_obj)); - } -#if USE_DEBUGGER - // Let the debugger know: All threads are arrived and starting leaving the - // barrier. - team->t.t_bar[bt].b_team_arrived += 1; -#endif - -#if OMP_40_ENABLED - kmp_int32 cancel_request = KMP_ATOMIC_LD_RLX(&team->t.t_cancel_request); - // Reset cancellation flag for worksharing constructs - if (cancel_request == cancel_loop || cancel_request == cancel_sections) { - KMP_ATOMIC_ST_RLX(&team->t.t_cancel_request, cancel_noreq); - } -#endif -#if USE_ITT_BUILD - /* TODO: In case of split reduction barrier, master thread may send - acquired event early, before the final summation into the shared - variable is done (final summation can be a long operation for array - reductions). */ - if (__itt_sync_create_ptr || KMP_ITT_DEBUG) - __kmp_itt_barrier_middle(gtid, itt_sync_obj); -#endif /* USE_ITT_BUILD */ -#if USE_ITT_BUILD && USE_ITT_NOTIFY - // Barrier - report frame end (only if active_level == 1) - if ((__itt_frame_submit_v3_ptr || KMP_ITT_DEBUG) && - __kmp_forkjoin_frames_mode && -#if OMP_40_ENABLED - this_thr->th.th_teams_microtask == NULL && -#endif - team->t.t_active_level == 1) { - ident_t *loc = __kmp_threads[gtid]->th.th_ident; - kmp_uint64 cur_time = __itt_get_timestamp(); - kmp_info_t **other_threads = team->t.t_threads; - int nproc = this_thr->th.th_team_nproc; - int i; - switch (__kmp_forkjoin_frames_mode) { - case 1: - __kmp_itt_frame_submit(gtid, this_thr->th.th_frame_time, cur_time, 0, - loc, nproc); - this_thr->th.th_frame_time = cur_time; - break; - case 2: // AC 2015-01-19: currently does not work for hierarchical (to - // be fixed) - __kmp_itt_frame_submit(gtid, this_thr->th.th_bar_min_time, cur_time, - 1, loc, nproc); - break; - case 3: - if (__itt_metadata_add_ptr) { - // Initialize with master's wait time - kmp_uint64 delta = cur_time - this_thr->th.th_bar_arrive_time; - // Set arrive time to zero to be able to check it in - // __kmp_invoke_task(); the same is done inside the loop below - this_thr->th.th_bar_arrive_time = 0; - for (i = 1; i < nproc; ++i) { - delta += (cur_time - other_threads[i]->th.th_bar_arrive_time); - other_threads[i]->th.th_bar_arrive_time = 0; - } - __kmp_itt_metadata_imbalance(gtid, this_thr->th.th_frame_time, - cur_time, delta, - (kmp_uint64)(reduce != NULL)); - } - __kmp_itt_frame_submit(gtid, this_thr->th.th_frame_time, cur_time, 0, - loc, nproc); - this_thr->th.th_frame_time = cur_time; - break; - } - } -#endif /* USE_ITT_BUILD */ - } else { - status = 1; -#if USE_ITT_BUILD - if (__itt_sync_create_ptr || KMP_ITT_DEBUG) - __kmp_itt_barrier_middle(gtid, itt_sync_obj); -#endif /* USE_ITT_BUILD */ - } - if (status == 1 || !is_split) { - switch (__kmp_barrier_release_pattern[bt]) { - case bp_hyper_bar: { - KMP_ASSERT(__kmp_barrier_release_branch_bits[bt]); - __kmp_hyper_barrier_release(bt, this_thr, gtid, tid, - FALSE USE_ITT_BUILD_ARG(itt_sync_obj)); - break; - } - case bp_hierarchical_bar: { - __kmp_hierarchical_barrier_release( - bt, this_thr, gtid, tid, FALSE USE_ITT_BUILD_ARG(itt_sync_obj)); - break; - } - case bp_tree_bar: { - KMP_ASSERT(__kmp_barrier_release_branch_bits[bt]); - __kmp_tree_barrier_release(bt, this_thr, gtid, tid, - FALSE USE_ITT_BUILD_ARG(itt_sync_obj)); - break; - } - default: { - __kmp_linear_barrier_release(bt, this_thr, gtid, tid, - FALSE USE_ITT_BUILD_ARG(itt_sync_obj)); - } - } - if (__kmp_tasking_mode != tskm_immediate_exec) { - __kmp_task_team_sync(this_thr, team); - } - } - -#if USE_ITT_BUILD - /* GEH: TODO: Move this under if-condition above and also include in - __kmp_end_split_barrier(). This will more accurately represent the actual - release time of the threads for split barriers. */ - if (__itt_sync_create_ptr || KMP_ITT_DEBUG) - __kmp_itt_barrier_finished(gtid, itt_sync_obj); -#endif /* USE_ITT_BUILD */ - } else { // Team is serialized. - status = 0; - if (__kmp_tasking_mode != tskm_immediate_exec) { -#if OMP_45_ENABLED - if (this_thr->th.th_task_team != NULL) { -#if USE_ITT_NOTIFY - void *itt_sync_obj = NULL; - if (__itt_sync_create_ptr || KMP_ITT_DEBUG) { - itt_sync_obj = __kmp_itt_barrier_object(gtid, bt, 1); - __kmp_itt_barrier_starting(gtid, itt_sync_obj); - } -#endif - - KMP_DEBUG_ASSERT(this_thr->th.th_task_team->tt.tt_found_proxy_tasks == - TRUE); - __kmp_task_team_wait(this_thr, team USE_ITT_BUILD_ARG(itt_sync_obj)); - __kmp_task_team_setup(this_thr, team, 0); - -#if USE_ITT_BUILD - if (__itt_sync_create_ptr || KMP_ITT_DEBUG) - __kmp_itt_barrier_finished(gtid, itt_sync_obj); -#endif /* USE_ITT_BUILD */ - } -#else - // The task team should be NULL for serialized code (tasks will be - // executed immediately) - KMP_DEBUG_ASSERT(team->t.t_task_team[this_thr->th.th_task_state] == NULL); - KMP_DEBUG_ASSERT(this_thr->th.th_task_team == NULL); -#endif - } - } - KA_TRACE(15, ("__kmp_barrier: T#%d(%d:%d) is leaving with return value %d\n", - gtid, __kmp_team_from_gtid(gtid)->t.t_id, - __kmp_tid_from_gtid(gtid), status)); - -#if OMPT_SUPPORT - if (ompt_enabled.enabled) { -#if OMPT_OPTIONAL - if (ompt_enabled.ompt_callback_sync_region_wait) { - ompt_callbacks.ompt_callback(ompt_callback_sync_region_wait)( - ompt_sync_region_barrier, ompt_scope_end, my_parallel_data, - my_task_data, return_address); - } - if (ompt_enabled.ompt_callback_sync_region) { - ompt_callbacks.ompt_callback(ompt_callback_sync_region)( - ompt_sync_region_barrier, ompt_scope_end, my_parallel_data, - my_task_data, return_address); - } -#endif - this_thr->th.ompt_thread_info.state = ompt_state_work_parallel; - } -#endif - ANNOTATE_BARRIER_END(&team->t.t_bar); - - return status; -} - -void __kmp_end_split_barrier(enum barrier_type bt, int gtid) { - KMP_TIME_DEVELOPER_PARTITIONED_BLOCK(KMP_end_split_barrier); - KMP_SET_THREAD_STATE_BLOCK(PLAIN_BARRIER); - int tid = __kmp_tid_from_gtid(gtid); - kmp_info_t *this_thr = __kmp_threads[gtid]; - kmp_team_t *team = this_thr->th.th_team; - - ANNOTATE_BARRIER_BEGIN(&team->t.t_bar); - if (!team->t.t_serialized) { - if (KMP_MASTER_GTID(gtid)) { - switch (__kmp_barrier_release_pattern[bt]) { - case bp_hyper_bar: { - KMP_ASSERT(__kmp_barrier_release_branch_bits[bt]); - __kmp_hyper_barrier_release(bt, this_thr, gtid, tid, - FALSE USE_ITT_BUILD_ARG(NULL)); - break; - } - case bp_hierarchical_bar: { - __kmp_hierarchical_barrier_release(bt, this_thr, gtid, tid, - FALSE USE_ITT_BUILD_ARG(NULL)); - break; - } - case bp_tree_bar: { - KMP_ASSERT(__kmp_barrier_release_branch_bits[bt]); - __kmp_tree_barrier_release(bt, this_thr, gtid, tid, - FALSE USE_ITT_BUILD_ARG(NULL)); - break; - } - default: { - __kmp_linear_barrier_release(bt, this_thr, gtid, tid, - FALSE USE_ITT_BUILD_ARG(NULL)); - } - } - if (__kmp_tasking_mode != tskm_immediate_exec) { - __kmp_task_team_sync(this_thr, team); - } // if - } - } - ANNOTATE_BARRIER_END(&team->t.t_bar); -} - -void __kmp_join_barrier(int gtid) { - KMP_TIME_PARTITIONED_BLOCK(OMP_join_barrier); - KMP_SET_THREAD_STATE_BLOCK(FORK_JOIN_BARRIER); - kmp_info_t *this_thr = __kmp_threads[gtid]; - kmp_team_t *team; - kmp_uint nproc; - kmp_info_t *master_thread; - int tid; -#ifdef KMP_DEBUG - int team_id; -#endif /* KMP_DEBUG */ -#if USE_ITT_BUILD - void *itt_sync_obj = NULL; -#if USE_ITT_NOTIFY - if (__itt_sync_create_ptr || KMP_ITT_DEBUG) // Don't call routine without need - // Get object created at fork_barrier - itt_sync_obj = __kmp_itt_barrier_object(gtid, bs_forkjoin_barrier); -#endif -#endif /* USE_ITT_BUILD */ - KMP_MB(); - - // Get current info - team = this_thr->th.th_team; - nproc = this_thr->th.th_team_nproc; - KMP_DEBUG_ASSERT((int)nproc == team->t.t_nproc); - tid = __kmp_tid_from_gtid(gtid); -#ifdef KMP_DEBUG - team_id = team->t.t_id; -#endif /* KMP_DEBUG */ - master_thread = this_thr->th.th_team_master; -#ifdef KMP_DEBUG - if (master_thread != team->t.t_threads[0]) { - __kmp_print_structure(); - } -#endif /* KMP_DEBUG */ - KMP_DEBUG_ASSERT(master_thread == team->t.t_threads[0]); - KMP_MB(); - - // Verify state - KMP_DEBUG_ASSERT(__kmp_threads && __kmp_threads[gtid]); - KMP_DEBUG_ASSERT(TCR_PTR(this_thr->th.th_team)); - KMP_DEBUG_ASSERT(TCR_PTR(this_thr->th.th_root)); - KMP_DEBUG_ASSERT(this_thr == team->t.t_threads[tid]); - KA_TRACE(10, ("__kmp_join_barrier: T#%d(%d:%d) arrived at join barrier\n", - gtid, team_id, tid)); - - ANNOTATE_BARRIER_BEGIN(&team->t.t_bar); -#if OMPT_SUPPORT - if (ompt_enabled.enabled) { -#if OMPT_OPTIONAL - ompt_data_t *my_task_data; - ompt_data_t *my_parallel_data; - void *codeptr = NULL; - int ds_tid = this_thr->th.th_info.ds.ds_tid; - if (KMP_MASTER_TID(ds_tid) && - (ompt_callbacks.ompt_callback(ompt_callback_sync_region_wait) || - ompt_callbacks.ompt_callback(ompt_callback_sync_region))) - codeptr = team->t.ompt_team_info.master_return_address; - my_task_data = OMPT_CUR_TASK_DATA(this_thr); - my_parallel_data = OMPT_CUR_TEAM_DATA(this_thr); - if (ompt_enabled.ompt_callback_sync_region) { - ompt_callbacks.ompt_callback(ompt_callback_sync_region)( - ompt_sync_region_barrier, ompt_scope_begin, my_parallel_data, - my_task_data, codeptr); - } - if (ompt_enabled.ompt_callback_sync_region_wait) { - ompt_callbacks.ompt_callback(ompt_callback_sync_region_wait)( - ompt_sync_region_barrier, ompt_scope_begin, my_parallel_data, - my_task_data, codeptr); - } - if (!KMP_MASTER_TID(ds_tid)) - this_thr->th.ompt_thread_info.task_data = *OMPT_CUR_TASK_DATA(this_thr); -#endif - this_thr->th.ompt_thread_info.state = ompt_state_wait_barrier_implicit; - } -#endif - - if (__kmp_tasking_mode == tskm_extra_barrier) { - __kmp_tasking_barrier(team, this_thr, gtid); - KA_TRACE(10, ("__kmp_join_barrier: T#%d(%d:%d) past taking barrier\n", gtid, - team_id, tid)); - } -#ifdef KMP_DEBUG - if (__kmp_tasking_mode != tskm_immediate_exec) { - KA_TRACE(20, ("__kmp_join_barrier: T#%d, old team = %d, old task_team = " - "%p, th_task_team = %p\n", - __kmp_gtid_from_thread(this_thr), team_id, - team->t.t_task_team[this_thr->th.th_task_state], - this_thr->th.th_task_team)); - KMP_DEBUG_ASSERT(this_thr->th.th_task_team == - team->t.t_task_team[this_thr->th.th_task_state]); - } -#endif /* KMP_DEBUG */ - - /* Copy the blocktime info to the thread, where __kmp_wait_template() can - access it when the team struct is not guaranteed to exist. Doing these - loads causes a cache miss slows down EPCC parallel by 2x. As a workaround, - we do not perform the copy if blocktime=infinite, since the values are not - used by __kmp_wait_template() in that case. */ - if (__kmp_dflt_blocktime != KMP_MAX_BLOCKTIME) { -#if KMP_USE_MONITOR - this_thr->th.th_team_bt_intervals = - team->t.t_implicit_task_taskdata[tid].td_icvs.bt_intervals; - this_thr->th.th_team_bt_set = - team->t.t_implicit_task_taskdata[tid].td_icvs.bt_set; -#else - this_thr->th.th_team_bt_intervals = KMP_BLOCKTIME_INTERVAL(team, tid); -#endif - } - -#if USE_ITT_BUILD - if (__itt_sync_create_ptr || KMP_ITT_DEBUG) - __kmp_itt_barrier_starting(gtid, itt_sync_obj); -#endif /* USE_ITT_BUILD */ - - switch (__kmp_barrier_gather_pattern[bs_forkjoin_barrier]) { - case bp_hyper_bar: { - KMP_ASSERT(__kmp_barrier_gather_branch_bits[bs_forkjoin_barrier]); - __kmp_hyper_barrier_gather(bs_forkjoin_barrier, this_thr, gtid, tid, - NULL USE_ITT_BUILD_ARG(itt_sync_obj)); - break; - } - case bp_hierarchical_bar: { - __kmp_hierarchical_barrier_gather(bs_forkjoin_barrier, this_thr, gtid, tid, - NULL USE_ITT_BUILD_ARG(itt_sync_obj)); - break; - } - case bp_tree_bar: { - KMP_ASSERT(__kmp_barrier_gather_branch_bits[bs_forkjoin_barrier]); - __kmp_tree_barrier_gather(bs_forkjoin_barrier, this_thr, gtid, tid, - NULL USE_ITT_BUILD_ARG(itt_sync_obj)); - break; - } - default: { - __kmp_linear_barrier_gather(bs_forkjoin_barrier, this_thr, gtid, tid, - NULL USE_ITT_BUILD_ARG(itt_sync_obj)); - } - } - - /* From this point on, the team data structure may be deallocated at any time - by the master thread - it is unsafe to reference it in any of the worker - threads. Any per-team data items that need to be referenced before the - end of the barrier should be moved to the kmp_task_team_t structs. */ - if (KMP_MASTER_TID(tid)) { - if (__kmp_tasking_mode != tskm_immediate_exec) { - __kmp_task_team_wait(this_thr, team USE_ITT_BUILD_ARG(itt_sync_obj)); - } -#if OMP_50_ENABLED - if (__kmp_display_affinity) { - KMP_CHECK_UPDATE(team->t.t_display_affinity, 0); - } -#endif -#if KMP_STATS_ENABLED - // Have master thread flag the workers to indicate they are now waiting for - // next parallel region, Also wake them up so they switch their timers to - // idle. - for (int i = 0; i < team->t.t_nproc; ++i) { - kmp_info_t *team_thread = team->t.t_threads[i]; - if (team_thread == this_thr) - continue; - team_thread->th.th_stats->setIdleFlag(); - if (__kmp_dflt_blocktime != KMP_MAX_BLOCKTIME && - team_thread->th.th_sleep_loc != NULL) - __kmp_null_resume_wrapper(__kmp_gtid_from_thread(team_thread), - team_thread->th.th_sleep_loc); - } -#endif -#if USE_ITT_BUILD - if (__itt_sync_create_ptr || KMP_ITT_DEBUG) - __kmp_itt_barrier_middle(gtid, itt_sync_obj); -#endif /* USE_ITT_BUILD */ - -#if USE_ITT_BUILD && USE_ITT_NOTIFY - // Join barrier - report frame end - if ((__itt_frame_submit_v3_ptr || KMP_ITT_DEBUG) && - __kmp_forkjoin_frames_mode && -#if OMP_40_ENABLED - this_thr->th.th_teams_microtask == NULL && -#endif - team->t.t_active_level == 1) { - kmp_uint64 cur_time = __itt_get_timestamp(); - ident_t *loc = team->t.t_ident; - kmp_info_t **other_threads = team->t.t_threads; - int nproc = this_thr->th.th_team_nproc; - int i; - switch (__kmp_forkjoin_frames_mode) { - case 1: - __kmp_itt_frame_submit(gtid, this_thr->th.th_frame_time, cur_time, 0, - loc, nproc); - break; - case 2: - __kmp_itt_frame_submit(gtid, this_thr->th.th_bar_min_time, cur_time, 1, - loc, nproc); - break; - case 3: - if (__itt_metadata_add_ptr) { - // Initialize with master's wait time - kmp_uint64 delta = cur_time - this_thr->th.th_bar_arrive_time; - // Set arrive time to zero to be able to check it in - // __kmp_invoke_task(); the same is done inside the loop below - this_thr->th.th_bar_arrive_time = 0; - for (i = 1; i < nproc; ++i) { - delta += (cur_time - other_threads[i]->th.th_bar_arrive_time); - other_threads[i]->th.th_bar_arrive_time = 0; - } - __kmp_itt_metadata_imbalance(gtid, this_thr->th.th_frame_time, - cur_time, delta, 0); - } - __kmp_itt_frame_submit(gtid, this_thr->th.th_frame_time, cur_time, 0, - loc, nproc); - this_thr->th.th_frame_time = cur_time; - break; - } - } -#endif /* USE_ITT_BUILD */ - } -#if USE_ITT_BUILD - else { - if (__itt_sync_create_ptr || KMP_ITT_DEBUG) - __kmp_itt_barrier_middle(gtid, itt_sync_obj); - } -#endif /* USE_ITT_BUILD */ - -#if KMP_DEBUG - if (KMP_MASTER_TID(tid)) { - KA_TRACE( - 15, - ("__kmp_join_barrier: T#%d(%d:%d) says all %d team threads arrived\n", - gtid, team_id, tid, nproc)); - } -#endif /* KMP_DEBUG */ - - // TODO now, mark worker threads as done so they may be disbanded - KMP_MB(); // Flush all pending memory write invalidates. - KA_TRACE(10, - ("__kmp_join_barrier: T#%d(%d:%d) leaving\n", gtid, team_id, tid)); - - ANNOTATE_BARRIER_END(&team->t.t_bar); -} - -// TODO release worker threads' fork barriers as we are ready instead of all at -// once -void __kmp_fork_barrier(int gtid, int tid) { - KMP_TIME_PARTITIONED_BLOCK(OMP_fork_barrier); - KMP_SET_THREAD_STATE_BLOCK(FORK_JOIN_BARRIER); - kmp_info_t *this_thr = __kmp_threads[gtid]; - kmp_team_t *team = (tid == 0) ? this_thr->th.th_team : NULL; -#if USE_ITT_BUILD - void *itt_sync_obj = NULL; -#endif /* USE_ITT_BUILD */ - if (team) - ANNOTATE_BARRIER_END(&team->t.t_bar); - - KA_TRACE(10, ("__kmp_fork_barrier: T#%d(%d:%d) has arrived\n", gtid, - (team != NULL) ? team->t.t_id : -1, tid)); - - // th_team pointer only valid for master thread here - if (KMP_MASTER_TID(tid)) { -#if USE_ITT_BUILD && USE_ITT_NOTIFY - if (__itt_sync_create_ptr || KMP_ITT_DEBUG) { - // Create itt barrier object - itt_sync_obj = __kmp_itt_barrier_object(gtid, bs_forkjoin_barrier, 1); - __kmp_itt_barrier_middle(gtid, itt_sync_obj); // Call acquired/releasing - } -#endif /* USE_ITT_BUILD && USE_ITT_NOTIFY */ - -#ifdef KMP_DEBUG - kmp_info_t **other_threads = team->t.t_threads; - int i; - - // Verify state - KMP_MB(); - - for (i = 1; i < team->t.t_nproc; ++i) { - KA_TRACE(500, - ("__kmp_fork_barrier: T#%d(%d:0) checking T#%d(%d:%d) fork go " - "== %u.\n", - gtid, team->t.t_id, other_threads[i]->th.th_info.ds.ds_gtid, - team->t.t_id, other_threads[i]->th.th_info.ds.ds_tid, - other_threads[i]->th.th_bar[bs_forkjoin_barrier].bb.b_go)); - KMP_DEBUG_ASSERT( - (TCR_4(other_threads[i]->th.th_bar[bs_forkjoin_barrier].bb.b_go) & - ~(KMP_BARRIER_SLEEP_STATE)) == KMP_INIT_BARRIER_STATE); - KMP_DEBUG_ASSERT(other_threads[i]->th.th_team == team); - } -#endif - - if (__kmp_tasking_mode != tskm_immediate_exec) { - // 0 indicates setup current task team if nthreads > 1 - __kmp_task_team_setup(this_thr, team, 0); - } - - /* The master thread may have changed its blocktime between the join barrier - and the fork barrier. Copy the blocktime info to the thread, where - __kmp_wait_template() can access it when the team struct is not - guaranteed to exist. */ - // See note about the corresponding code in __kmp_join_barrier() being - // performance-critical - if (__kmp_dflt_blocktime != KMP_MAX_BLOCKTIME) { -#if KMP_USE_MONITOR - this_thr->th.th_team_bt_intervals = - team->t.t_implicit_task_taskdata[tid].td_icvs.bt_intervals; - this_thr->th.th_team_bt_set = - team->t.t_implicit_task_taskdata[tid].td_icvs.bt_set; -#else - this_thr->th.th_team_bt_intervals = KMP_BLOCKTIME_INTERVAL(team, tid); -#endif - } - } // master - - switch (__kmp_barrier_release_pattern[bs_forkjoin_barrier]) { - case bp_hyper_bar: { - KMP_ASSERT(__kmp_barrier_release_branch_bits[bs_forkjoin_barrier]); - __kmp_hyper_barrier_release(bs_forkjoin_barrier, this_thr, gtid, tid, - TRUE USE_ITT_BUILD_ARG(itt_sync_obj)); - break; - } - case bp_hierarchical_bar: { - __kmp_hierarchical_barrier_release(bs_forkjoin_barrier, this_thr, gtid, tid, - TRUE USE_ITT_BUILD_ARG(itt_sync_obj)); - break; - } - case bp_tree_bar: { - KMP_ASSERT(__kmp_barrier_release_branch_bits[bs_forkjoin_barrier]); - __kmp_tree_barrier_release(bs_forkjoin_barrier, this_thr, gtid, tid, - TRUE USE_ITT_BUILD_ARG(itt_sync_obj)); - break; - } - default: { - __kmp_linear_barrier_release(bs_forkjoin_barrier, this_thr, gtid, tid, - TRUE USE_ITT_BUILD_ARG(itt_sync_obj)); - } - } - -#if OMPT_SUPPORT - if (ompt_enabled.enabled && - this_thr->th.ompt_thread_info.state == ompt_state_wait_barrier_implicit) { - int ds_tid = this_thr->th.th_info.ds.ds_tid; - ompt_data_t *task_data = (team) - ? OMPT_CUR_TASK_DATA(this_thr) - : &(this_thr->th.ompt_thread_info.task_data); - this_thr->th.ompt_thread_info.state = ompt_state_overhead; -#if OMPT_OPTIONAL - void *codeptr = NULL; - if (KMP_MASTER_TID(ds_tid) && - (ompt_callbacks.ompt_callback(ompt_callback_sync_region_wait) || - ompt_callbacks.ompt_callback(ompt_callback_sync_region))) - codeptr = team->t.ompt_team_info.master_return_address; - if (ompt_enabled.ompt_callback_sync_region_wait) { - ompt_callbacks.ompt_callback(ompt_callback_sync_region_wait)( - ompt_sync_region_barrier, ompt_scope_end, NULL, task_data, codeptr); - } - if (ompt_enabled.ompt_callback_sync_region) { - ompt_callbacks.ompt_callback(ompt_callback_sync_region)( - ompt_sync_region_barrier, ompt_scope_end, NULL, task_data, codeptr); - } -#endif - if (!KMP_MASTER_TID(ds_tid) && ompt_enabled.ompt_callback_implicit_task) { - ompt_callbacks.ompt_callback(ompt_callback_implicit_task)( - ompt_scope_end, NULL, task_data, 0, ds_tid, ompt_task_implicit); // TODO: Can this be ompt_task_initial? - } - } -#endif - - // Early exit for reaping threads releasing forkjoin barrier - if (TCR_4(__kmp_global.g.g_done)) { - this_thr->th.th_task_team = NULL; - -#if USE_ITT_BUILD && USE_ITT_NOTIFY - if (__itt_sync_create_ptr || KMP_ITT_DEBUG) { - if (!KMP_MASTER_TID(tid)) { - itt_sync_obj = __kmp_itt_barrier_object(gtid, bs_forkjoin_barrier); - if (itt_sync_obj) - __kmp_itt_barrier_finished(gtid, itt_sync_obj); - } - } -#endif /* USE_ITT_BUILD && USE_ITT_NOTIFY */ - KA_TRACE(10, ("__kmp_fork_barrier: T#%d is leaving early\n", gtid)); - return; - } - - /* We can now assume that a valid team structure has been allocated by the - master and propagated to all worker threads. The current thread, however, - may not be part of the team, so we can't blindly assume that the team - pointer is non-null. */ - team = (kmp_team_t *)TCR_PTR(this_thr->th.th_team); - KMP_DEBUG_ASSERT(team != NULL); - tid = __kmp_tid_from_gtid(gtid); - -#if KMP_BARRIER_ICV_PULL - /* Master thread's copy of the ICVs was set up on the implicit taskdata in - __kmp_reinitialize_team. __kmp_fork_call() assumes the master thread's - implicit task has this data before this function is called. We cannot - modify __kmp_fork_call() to look at the fixed ICVs in the master's thread - struct, because it is not always the case that the threads arrays have - been allocated when __kmp_fork_call() is executed. */ - { - KMP_TIME_DEVELOPER_PARTITIONED_BLOCK(USER_icv_copy); - if (!KMP_MASTER_TID(tid)) { // master thread already has ICVs - // Copy the initial ICVs from the master's thread struct to the implicit - // task for this tid. - KA_TRACE(10, - ("__kmp_fork_barrier: T#%d(%d) is PULLing ICVs\n", gtid, tid)); - __kmp_init_implicit_task(team->t.t_ident, team->t.t_threads[tid], team, - tid, FALSE); - copy_icvs(&team->t.t_implicit_task_taskdata[tid].td_icvs, - &team->t.t_threads[0] - ->th.th_bar[bs_forkjoin_barrier] - .bb.th_fixed_icvs); - } - } -#endif // KMP_BARRIER_ICV_PULL - - if (__kmp_tasking_mode != tskm_immediate_exec) { - __kmp_task_team_sync(this_thr, team); - } - -#if OMP_40_ENABLED && KMP_AFFINITY_SUPPORTED - kmp_proc_bind_t proc_bind = team->t.t_proc_bind; - if (proc_bind == proc_bind_intel) { -#endif -#if KMP_AFFINITY_SUPPORTED - // Call dynamic affinity settings - if (__kmp_affinity_type == affinity_balanced && team->t.t_size_changed) { - __kmp_balanced_affinity(this_thr, team->t.t_nproc); - } -#endif // KMP_AFFINITY_SUPPORTED -#if OMP_40_ENABLED && KMP_AFFINITY_SUPPORTED - } else if (proc_bind != proc_bind_false) { - if (this_thr->th.th_new_place == this_thr->th.th_current_place) { - KA_TRACE(100, ("__kmp_fork_barrier: T#%d already in correct place %d\n", - __kmp_gtid_from_thread(this_thr), - this_thr->th.th_current_place)); - } else { - __kmp_affinity_set_place(gtid); - } - } -#endif -#if OMP_50_ENABLED - // Perform the display affinity functionality - if (__kmp_display_affinity) { - if (team->t.t_display_affinity -#if KMP_AFFINITY_SUPPORTED - || (__kmp_affinity_type == affinity_balanced && team->t.t_size_changed) -#endif - ) { - // NULL means use the affinity-format-var ICV - __kmp_aux_display_affinity(gtid, NULL); - this_thr->th.th_prev_num_threads = team->t.t_nproc; - this_thr->th.th_prev_level = team->t.t_level; - } - } - if (!KMP_MASTER_TID(tid)) - KMP_CHECK_UPDATE(this_thr->th.th_def_allocator, team->t.t_def_allocator); -#endif - -#if USE_ITT_BUILD && USE_ITT_NOTIFY - if (__itt_sync_create_ptr || KMP_ITT_DEBUG) { - if (!KMP_MASTER_TID(tid)) { - // Get correct barrier object - itt_sync_obj = __kmp_itt_barrier_object(gtid, bs_forkjoin_barrier); - __kmp_itt_barrier_finished(gtid, itt_sync_obj); // Workers call acquired - } // (prepare called inside barrier_release) - } -#endif /* USE_ITT_BUILD && USE_ITT_NOTIFY */ - ANNOTATE_BARRIER_END(&team->t.t_bar); - KA_TRACE(10, ("__kmp_fork_barrier: T#%d(%d:%d) is leaving\n", gtid, - team->t.t_id, tid)); -} - -void __kmp_setup_icv_copy(kmp_team_t *team, int new_nproc, - kmp_internal_control_t *new_icvs, ident_t *loc) { - KMP_TIME_DEVELOPER_PARTITIONED_BLOCK(KMP_setup_icv_copy); - - KMP_DEBUG_ASSERT(team && new_nproc && new_icvs); - KMP_DEBUG_ASSERT((!TCR_4(__kmp_init_parallel)) || new_icvs->nproc); - -/* Master thread's copy of the ICVs was set up on the implicit taskdata in - __kmp_reinitialize_team. __kmp_fork_call() assumes the master thread's - implicit task has this data before this function is called. */ -#if KMP_BARRIER_ICV_PULL - /* Copy ICVs to master's thread structure into th_fixed_icvs (which remains - untouched), where all of the worker threads can access them and make their - own copies after the barrier. */ - KMP_DEBUG_ASSERT(team->t.t_threads[0]); // The threads arrays should be - // allocated at this point - copy_icvs( - &team->t.t_threads[0]->th.th_bar[bs_forkjoin_barrier].bb.th_fixed_icvs, - new_icvs); - KF_TRACE(10, ("__kmp_setup_icv_copy: PULL: T#%d this_thread=%p team=%p\n", 0, - team->t.t_threads[0], team)); -#elif KMP_BARRIER_ICV_PUSH - // The ICVs will be propagated in the fork barrier, so nothing needs to be - // done here. - KF_TRACE(10, ("__kmp_setup_icv_copy: PUSH: T#%d this_thread=%p team=%p\n", 0, - team->t.t_threads[0], team)); -#else - // Copy the ICVs to each of the non-master threads. This takes O(nthreads) - // time. - ngo_load(new_icvs); - KMP_DEBUG_ASSERT(team->t.t_threads[0]); // The threads arrays should be - // allocated at this point - for (int f = 1; f < new_nproc; ++f) { // Skip the master thread - // TODO: GEH - pass in better source location info since usually NULL here - KF_TRACE(10, ("__kmp_setup_icv_copy: LINEAR: T#%d this_thread=%p team=%p\n", - f, team->t.t_threads[f], team)); - __kmp_init_implicit_task(loc, team->t.t_threads[f], team, f, FALSE); - ngo_store_icvs(&team->t.t_implicit_task_taskdata[f].td_icvs, new_icvs); - KF_TRACE(10, ("__kmp_setup_icv_copy: LINEAR: T#%d this_thread=%p team=%p\n", - f, team->t.t_threads[f], team)); - } - ngo_sync(); -#endif // KMP_BARRIER_ICV_PULL -} Property changes on: vendor/llvm-openmp/openmp-release80-r363030/runtime/src/kmp_barrier.cpp ___________________________________________________________________ Deleted: svn:eol-style ## -1 +0,0 ## -native \ No newline at end of property Deleted: svn:keywords ## -1 +0,0 ## -FreeBSD=%H \ No newline at end of property Deleted: svn:mime-type ## -1 +0,0 ## -text/plain \ No newline at end of property Index: vendor/llvm-openmp/openmp-release80-r363030/runtime/src/kmp_wait_release.cpp =================================================================== --- vendor/llvm-openmp/openmp-release80-r363030/runtime/src/kmp_wait_release.cpp (revision 348960) +++ vendor/llvm-openmp/openmp-release80-r363030/runtime/src/kmp_wait_release.cpp (nonexistent) @@ -1,26 +0,0 @@ -/* - * kmp_wait_release.cpp -- Wait/Release implementation - */ - -//===----------------------------------------------------------------------===// -// -// The LLVM Compiler Infrastructure -// -// This file is dual licensed under the MIT and the University of Illinois Open -// Source Licenses. See LICENSE.txt for details. -// -//===----------------------------------------------------------------------===// - -#include "kmp_wait_release.h" - -void __kmp_wait_64(kmp_info_t *this_thr, kmp_flag_64 *flag, - int final_spin USE_ITT_BUILD_ARG(void *itt_sync_obj)) { - if (final_spin) - __kmp_wait_template( - this_thr, flag USE_ITT_BUILD_ARG(itt_sync_obj)); - else - __kmp_wait_template( - this_thr, flag USE_ITT_BUILD_ARG(itt_sync_obj)); -} - -void __kmp_release_64(kmp_flag_64 *flag) { __kmp_release_template(flag); } Property changes on: vendor/llvm-openmp/openmp-release80-r363030/runtime/src/kmp_wait_release.cpp ___________________________________________________________________ Deleted: svn:eol-style ## -1 +0,0 ## -native \ No newline at end of property Deleted: svn:keywords ## -1 +0,0 ## -FreeBSD=%H \ No newline at end of property Deleted: svn:mime-type ## -1 +0,0 ## -text/plain \ No newline at end of property Index: vendor/llvm-openmp/openmp-release80-r363030/runtime/src/ompt-specific.cpp =================================================================== --- vendor/llvm-openmp/openmp-release80-r363030/runtime/src/ompt-specific.cpp (revision 348960) +++ vendor/llvm-openmp/openmp-release80-r363030/runtime/src/ompt-specific.cpp (nonexistent) @@ -1,451 +0,0 @@ -/* - * ompt-specific.cpp -- OMPT internal functions - */ - -//===----------------------------------------------------------------------===// -// -// The LLVM Compiler Infrastructure -// -// This file is dual licensed under the MIT and the University of Illinois Open -// Source Licenses. See LICENSE.txt for details. -// -//===----------------------------------------------------------------------===// - -//****************************************************************************** -// include files -//****************************************************************************** - -#include "kmp.h" -#include "ompt-specific.h" - -#if KMP_OS_UNIX -#include -#endif - -#if KMP_OS_WINDOWS -#define THREAD_LOCAL __declspec(thread) -#else -#define THREAD_LOCAL __thread -#endif - -#define OMPT_WEAK_ATTRIBUTE KMP_WEAK_ATTRIBUTE - -//****************************************************************************** -// macros -//****************************************************************************** - -#define LWT_FROM_TEAM(team) (team)->t.ompt_serialized_team_info - -#define OMPT_THREAD_ID_BITS 16 - -//****************************************************************************** -// private operations -//****************************************************************************** - -//---------------------------------------------------------- -// traverse the team and task hierarchy -// note: __ompt_get_teaminfo and __ompt_get_task_info_object -// traverse the hierarchy similarly and need to be -// kept consistent -//---------------------------------------------------------- - -ompt_team_info_t *__ompt_get_teaminfo(int depth, int *size) { - kmp_info_t *thr = ompt_get_thread(); - - if (thr) { - kmp_team *team = thr->th.th_team; - if (team == NULL) - return NULL; - - ompt_lw_taskteam_t *next_lwt = LWT_FROM_TEAM(team), *lwt = NULL; - - while (depth > 0) { - // next lightweight team (if any) - if (lwt) - lwt = lwt->parent; - - // next heavyweight team (if any) after - // lightweight teams are exhausted - if (!lwt && team) { - if (next_lwt) { - lwt = next_lwt; - next_lwt = NULL; - } else { - team = team->t.t_parent; - if (team) { - next_lwt = LWT_FROM_TEAM(team); - } - } - } - - depth--; - } - - if (lwt) { - // lightweight teams have one task - if (size) - *size = 1; - - // return team info for lightweight team - return &lwt->ompt_team_info; - } else if (team) { - // extract size from heavyweight team - if (size) - *size = team->t.t_nproc; - - // return team info for heavyweight team - return &team->t.ompt_team_info; - } - } - - return NULL; -} - -ompt_task_info_t *__ompt_get_task_info_object(int depth) { - ompt_task_info_t *info = NULL; - kmp_info_t *thr = ompt_get_thread(); - - if (thr) { - kmp_taskdata_t *taskdata = thr->th.th_current_task; - ompt_lw_taskteam_t *lwt = NULL, - *next_lwt = LWT_FROM_TEAM(taskdata->td_team); - - while (depth > 0) { - // next lightweight team (if any) - if (lwt) - lwt = lwt->parent; - - // next heavyweight team (if any) after - // lightweight teams are exhausted - if (!lwt && taskdata) { - if (next_lwt) { - lwt = next_lwt; - next_lwt = NULL; - } else { - taskdata = taskdata->td_parent; - if (taskdata) { - next_lwt = LWT_FROM_TEAM(taskdata->td_team); - } - } - } - depth--; - } - - if (lwt) { - info = &lwt->ompt_task_info; - } else if (taskdata) { - info = &taskdata->ompt_task_info; - } - } - - return info; -} - -ompt_task_info_t *__ompt_get_scheduling_taskinfo(int depth) { - ompt_task_info_t *info = NULL; - kmp_info_t *thr = ompt_get_thread(); - - if (thr) { - kmp_taskdata_t *taskdata = thr->th.th_current_task; - - ompt_lw_taskteam_t *lwt = NULL, - *next_lwt = LWT_FROM_TEAM(taskdata->td_team); - - while (depth > 0) { - // next lightweight team (if any) - if (lwt) - lwt = lwt->parent; - - // next heavyweight team (if any) after - // lightweight teams are exhausted - if (!lwt && taskdata) { - // first try scheduling parent (for explicit task scheduling) - if (taskdata->ompt_task_info.scheduling_parent) { - taskdata = taskdata->ompt_task_info.scheduling_parent; - } else if (next_lwt) { - lwt = next_lwt; - next_lwt = NULL; - } else { - // then go for implicit tasks - taskdata = taskdata->td_parent; - if (taskdata) { - next_lwt = LWT_FROM_TEAM(taskdata->td_team); - } - } - } - depth--; - } - - if (lwt) { - info = &lwt->ompt_task_info; - } else if (taskdata) { - info = &taskdata->ompt_task_info; - } - } - - return info; -} - -//****************************************************************************** -// interface operations -//****************************************************************************** - -//---------------------------------------------------------- -// thread support -//---------------------------------------------------------- - -ompt_data_t *__ompt_get_thread_data_internal() { - if (__kmp_get_gtid() >= 0) { - kmp_info_t *thread = ompt_get_thread(); - if (thread == NULL) - return NULL; - return &(thread->th.ompt_thread_info.thread_data); - } - return NULL; -} - -//---------------------------------------------------------- -// state support -//---------------------------------------------------------- - -void __ompt_thread_assign_wait_id(void *variable) { - kmp_info_t *ti = ompt_get_thread(); - - ti->th.ompt_thread_info.wait_id = (ompt_wait_id_t)variable; -} - -int __ompt_get_state_internal(ompt_wait_id_t *omp_wait_id) { - kmp_info_t *ti = ompt_get_thread(); - - if (ti) { - if (omp_wait_id) - *omp_wait_id = ti->th.ompt_thread_info.wait_id; - return ti->th.ompt_thread_info.state; - } - return ompt_state_undefined; -} - -//---------------------------------------------------------- -// parallel region support -//---------------------------------------------------------- - -int __ompt_get_parallel_info_internal(int ancestor_level, - ompt_data_t **parallel_data, - int *team_size) { - if (__kmp_get_gtid() >= 0) { - ompt_team_info_t *info; - if (team_size) { - info = __ompt_get_teaminfo(ancestor_level, team_size); - } else { - info = __ompt_get_teaminfo(ancestor_level, NULL); - } - if (parallel_data) { - *parallel_data = info ? &(info->parallel_data) : NULL; - } - return info ? 2 : 0; - } else { - return 0; - } -} - -//---------------------------------------------------------- -// lightweight task team support -//---------------------------------------------------------- - -void __ompt_lw_taskteam_init(ompt_lw_taskteam_t *lwt, kmp_info_t *thr, int gtid, - ompt_data_t *ompt_pid, void *codeptr) { - // initialize parallel_data with input, return address to parallel_data on - // exit - lwt->ompt_team_info.parallel_data = *ompt_pid; - lwt->ompt_team_info.master_return_address = codeptr; - lwt->ompt_task_info.task_data.value = 0; - lwt->ompt_task_info.frame.enter_frame = ompt_data_none; - lwt->ompt_task_info.frame.exit_frame = ompt_data_none; - lwt->ompt_task_info.scheduling_parent = NULL; - lwt->ompt_task_info.deps = NULL; - lwt->ompt_task_info.ndeps = 0; - lwt->heap = 0; - lwt->parent = 0; -} - -void __ompt_lw_taskteam_link(ompt_lw_taskteam_t *lwt, kmp_info_t *thr, - int on_heap) { - ompt_lw_taskteam_t *link_lwt = lwt; - if (thr->th.th_team->t.t_serialized > - 1) { // we already have a team, so link the new team and swap values - if (on_heap) { // the lw_taskteam cannot stay on stack, allocate it on heap - link_lwt = - (ompt_lw_taskteam_t *)__kmp_allocate(sizeof(ompt_lw_taskteam_t)); - } - link_lwt->heap = on_heap; - - // would be swap in the (on_stack) case. - ompt_team_info_t tmp_team = lwt->ompt_team_info; - link_lwt->ompt_team_info = *OMPT_CUR_TEAM_INFO(thr); - *OMPT_CUR_TEAM_INFO(thr) = tmp_team; - - ompt_task_info_t tmp_task = lwt->ompt_task_info; - link_lwt->ompt_task_info = *OMPT_CUR_TASK_INFO(thr); - *OMPT_CUR_TASK_INFO(thr) = tmp_task; - - // link the taskteam into the list of taskteams: - ompt_lw_taskteam_t *my_parent = - thr->th.th_team->t.ompt_serialized_team_info; - link_lwt->parent = my_parent; - thr->th.th_team->t.ompt_serialized_team_info = link_lwt; - } else { - // this is the first serialized team, so we just store the values in the - // team and drop the taskteam-object - *OMPT_CUR_TEAM_INFO(thr) = lwt->ompt_team_info; - *OMPT_CUR_TASK_INFO(thr) = lwt->ompt_task_info; - } -} - -void __ompt_lw_taskteam_unlink(kmp_info_t *thr) { - ompt_lw_taskteam_t *lwtask = thr->th.th_team->t.ompt_serialized_team_info; - if (lwtask) { - thr->th.th_team->t.ompt_serialized_team_info = lwtask->parent; - - ompt_team_info_t tmp_team = lwtask->ompt_team_info; - lwtask->ompt_team_info = *OMPT_CUR_TEAM_INFO(thr); - *OMPT_CUR_TEAM_INFO(thr) = tmp_team; - - ompt_task_info_t tmp_task = lwtask->ompt_task_info; - lwtask->ompt_task_info = *OMPT_CUR_TASK_INFO(thr); - *OMPT_CUR_TASK_INFO(thr) = tmp_task; - - if (lwtask->heap) { - __kmp_free(lwtask); - lwtask = NULL; - } - } - // return lwtask; -} - -//---------------------------------------------------------- -// task support -//---------------------------------------------------------- - -int __ompt_get_task_info_internal(int ancestor_level, int *type, - ompt_data_t **task_data, - ompt_frame_t **task_frame, - ompt_data_t **parallel_data, - int *thread_num) { - if (__kmp_get_gtid() < 0) - return 0; - - if (ancestor_level < 0) - return 0; - - // copied from __ompt_get_scheduling_taskinfo - ompt_task_info_t *info = NULL; - ompt_team_info_t *team_info = NULL; - kmp_info_t *thr = ompt_get_thread(); - int level = ancestor_level; - - if (thr) { - kmp_taskdata_t *taskdata = thr->th.th_current_task; - if (taskdata == NULL) - return 0; - kmp_team *team = thr->th.th_team, *prev_team = NULL; - if (team == NULL) - return 0; - ompt_lw_taskteam_t *lwt = NULL, - *next_lwt = LWT_FROM_TEAM(taskdata->td_team), - *prev_lwt = NULL; - - while (ancestor_level > 0) { - // needed for thread_num - prev_team = team; - prev_lwt = lwt; - // next lightweight team (if any) - if (lwt) - lwt = lwt->parent; - - // next heavyweight team (if any) after - // lightweight teams are exhausted - if (!lwt && taskdata) { - // first try scheduling parent (for explicit task scheduling) - if (taskdata->ompt_task_info.scheduling_parent) { - taskdata = taskdata->ompt_task_info.scheduling_parent; - } else if (next_lwt) { - lwt = next_lwt; - next_lwt = NULL; - } else { - // then go for implicit tasks - taskdata = taskdata->td_parent; - if (team == NULL) - return 0; - team = team->t.t_parent; - if (taskdata) { - next_lwt = LWT_FROM_TEAM(taskdata->td_team); - } - } - } - ancestor_level--; - } - - if (lwt) { - info = &lwt->ompt_task_info; - team_info = &lwt->ompt_team_info; - if (type) { - *type = ompt_task_implicit; - } - } else if (taskdata) { - info = &taskdata->ompt_task_info; - team_info = &team->t.ompt_team_info; - if (type) { - if (taskdata->td_parent) { - *type = (taskdata->td_flags.tasktype ? ompt_task_explicit - : ompt_task_implicit) | - TASK_TYPE_DETAILS_FORMAT(taskdata); - } else { - *type = ompt_task_initial; - } - } - } - if (task_data) { - *task_data = info ? &info->task_data : NULL; - } - if (task_frame) { - // OpenMP spec asks for the scheduling task to be returned. - *task_frame = info ? &info->frame : NULL; - } - if (parallel_data) { - *parallel_data = team_info ? &(team_info->parallel_data) : NULL; - } - if (thread_num) { - if (level == 0) - *thread_num = __kmp_get_tid(); - else if (prev_lwt) - *thread_num = 0; - else - *thread_num = prev_team->t.t_master_tid; - // *thread_num = team->t.t_master_tid; - } - return info ? 2 : 0; - } - return 0; -} - -//---------------------------------------------------------- -// team support -//---------------------------------------------------------- - -void __ompt_team_assign_id(kmp_team_t *team, ompt_data_t ompt_pid) { - team->t.ompt_team_info.parallel_data = ompt_pid; -} - -//---------------------------------------------------------- -// misc -//---------------------------------------------------------- - -static uint64_t __ompt_get_unique_id_internal() { - static uint64_t thread = 1; - static THREAD_LOCAL uint64_t ID = 0; - if (ID == 0) { - uint64_t new_thread = KMP_TEST_THEN_INC64((kmp_int64 *)&thread); - ID = new_thread << (sizeof(uint64_t) * 8 - OMPT_THREAD_ID_BITS); - } - return ++ID; -} Property changes on: vendor/llvm-openmp/openmp-release80-r363030/runtime/src/ompt-specific.cpp ___________________________________________________________________ Deleted: svn:eol-style ## -1 +0,0 ## -native \ No newline at end of property Deleted: svn:keywords ## -1 +0,0 ## -FreeBSD=%H \ No newline at end of property Deleted: svn:mime-type ## -1 +0,0 ## -text/plain \ No newline at end of property Index: vendor/llvm-openmp/openmp-release80-r363030/runtime/src/kmp_debugger.h =================================================================== --- vendor/llvm-openmp/openmp-release80-r363030/runtime/src/kmp_debugger.h (revision 348960) +++ vendor/llvm-openmp/openmp-release80-r363030/runtime/src/kmp_debugger.h (nonexistent) @@ -1,49 +0,0 @@ -#if USE_DEBUGGER -/* - * kmp_debugger.h -- debugger support. - */ - -//===----------------------------------------------------------------------===// -// -// The LLVM Compiler Infrastructure -// -// This file is dual licensed under the MIT and the University of Illinois Open -// Source Licenses. See LICENSE.txt for details. -// -//===----------------------------------------------------------------------===// - -#ifndef KMP_DEBUGGER_H -#define KMP_DEBUGGER_H - -#ifdef __cplusplus -extern "C" { -#endif // __cplusplus - -/* This external variable can be set by any debugger to flag to the runtime - that we are currently executing inside a debugger. This will allow the - debugger to override the number of threads spawned in a parallel region by - using __kmp_omp_num_threads() (below). - * When __kmp_debugging is TRUE, each team and each task gets a unique integer - identifier that can be used by debugger to conveniently identify teams and - tasks. - * The debugger has access to __kmp_omp_debug_struct_info which contains - information about the OpenMP library's important internal structures. This - access will allow the debugger to read detailed information from the typical - OpenMP constructs (teams, threads, tasking, etc. ) during a debugging - session and offer detailed and useful information which the user can probe - about the OpenMP portion of their code. */ -extern int __kmp_debugging; /* Boolean whether currently debugging OpenMP RTL */ -// Return number of threads specified by the debugger for given parallel region. -/* The ident field, which represents a source file location, is used to check if - the debugger has changed the number of threads for the parallel region at - source file location ident. This way, specific parallel regions' number of - threads can be changed at the debugger's request. */ -int __kmp_omp_num_threads(ident_t const *ident); - -#ifdef __cplusplus -} // extern "C" -#endif // __cplusplus - -#endif // KMP_DEBUGGER_H - -#endif // USE_DEBUGGER Property changes on: vendor/llvm-openmp/openmp-release80-r363030/runtime/src/kmp_debugger.h ___________________________________________________________________ Deleted: svn:eol-style ## -1 +0,0 ## -native \ No newline at end of property Deleted: svn:keywords ## -1 +0,0 ## -FreeBSD=%H \ No newline at end of property Deleted: svn:mime-type ## -1 +0,0 ## -text/plain \ No newline at end of property Index: vendor/llvm-openmp/openmp-release80-r363030/runtime/src/kmp_error.h =================================================================== --- vendor/llvm-openmp/openmp-release80-r363030/runtime/src/kmp_error.h (revision 348960) +++ vendor/llvm-openmp/openmp-release80-r363030/runtime/src/kmp_error.h (nonexistent) @@ -1,61 +0,0 @@ -/* - * kmp_error.h -- PTS functions for error checking at runtime. - */ - -//===----------------------------------------------------------------------===// -// -// The LLVM Compiler Infrastructure -// -// This file is dual licensed under the MIT and the University of Illinois Open -// Source Licenses. See LICENSE.txt for details. -// -//===----------------------------------------------------------------------===// - -#ifndef KMP_ERROR_H -#define KMP_ERROR_H - -#include "kmp_i18n.h" - -/* ------------------------------------------------------------------------ */ -#ifdef __cplusplus -extern "C" { -#endif - -void __kmp_error_construct(kmp_i18n_id_t id, enum cons_type ct, - ident_t const *ident); -void __kmp_error_construct2(kmp_i18n_id_t id, enum cons_type ct, - ident_t const *ident, struct cons_data const *cons); - -struct cons_header *__kmp_allocate_cons_stack(int gtid); -void __kmp_free_cons_stack(void *ptr); - -void __kmp_push_parallel(int gtid, ident_t const *ident); -void __kmp_push_workshare(int gtid, enum cons_type ct, ident_t const *ident); -#if KMP_USE_DYNAMIC_LOCK -void __kmp_push_sync(int gtid, enum cons_type ct, ident_t const *ident, - kmp_user_lock_p name, kmp_uint32); -#else -void __kmp_push_sync(int gtid, enum cons_type ct, ident_t const *ident, - kmp_user_lock_p name); -#endif - -void __kmp_check_workshare(int gtid, enum cons_type ct, ident_t const *ident); -#if KMP_USE_DYNAMIC_LOCK -void __kmp_check_sync(int gtid, enum cons_type ct, ident_t const *ident, - kmp_user_lock_p name, kmp_uint32); -#else -void __kmp_check_sync(int gtid, enum cons_type ct, ident_t const *ident, - kmp_user_lock_p name); -#endif - -void __kmp_pop_parallel(int gtid, ident_t const *ident); -enum cons_type __kmp_pop_workshare(int gtid, enum cons_type ct, - ident_t const *ident); -void __kmp_pop_sync(int gtid, enum cons_type ct, ident_t const *ident); -void __kmp_check_barrier(int gtid, enum cons_type ct, ident_t const *ident); - -#ifdef __cplusplus -} // extern "C" -#endif - -#endif // KMP_ERROR_H Property changes on: vendor/llvm-openmp/openmp-release80-r363030/runtime/src/kmp_error.h ___________________________________________________________________ Deleted: svn:eol-style ## -1 +0,0 ## -native \ No newline at end of property Deleted: svn:keywords ## -1 +0,0 ## -FreeBSD=%H \ No newline at end of property Deleted: svn:mime-type ## -1 +0,0 ## -text/plain \ No newline at end of property Index: vendor/llvm-openmp/openmp-release80-r363030/runtime/src/kmp.h =================================================================== --- vendor/llvm-openmp/openmp-release80-r363030/runtime/src/kmp.h (revision 348960) +++ vendor/llvm-openmp/openmp-release80-r363030/runtime/src/kmp.h (nonexistent) @@ -1,4014 +0,0 @@ -/*! \file */ -/* - * kmp.h -- KPTS runtime header file. - */ - -//===----------------------------------------------------------------------===// -// -// The LLVM Compiler Infrastructure -// -// This file is dual licensed under the MIT and the University of Illinois Open -// Source Licenses. See LICENSE.txt for details. -// -//===----------------------------------------------------------------------===// - -#ifndef KMP_H -#define KMP_H - -#include "kmp_config.h" - -/* #define BUILD_PARALLEL_ORDERED 1 */ - -/* This fix replaces gettimeofday with clock_gettime for better scalability on - the Altix. Requires user code to be linked with -lrt. */ -//#define FIX_SGI_CLOCK - -/* Defines for OpenMP 3.0 tasking and auto scheduling */ - -#ifndef KMP_STATIC_STEAL_ENABLED -#define KMP_STATIC_STEAL_ENABLED 1 -#endif - -#define TASK_CURRENT_NOT_QUEUED 0 -#define TASK_CURRENT_QUEUED 1 - -#ifdef BUILD_TIED_TASK_STACK -#define TASK_STACK_EMPTY 0 // entries when the stack is empty -#define TASK_STACK_BLOCK_BITS 5 // Used in TASK_STACK_SIZE and TASK_STACK_MASK -// Number of entries in each task stack array -#define TASK_STACK_BLOCK_SIZE (1 << TASK_STACK_BLOCK_BITS) -// Mask for determining index into stack block -#define TASK_STACK_INDEX_MASK (TASK_STACK_BLOCK_SIZE - 1) -#endif // BUILD_TIED_TASK_STACK - -#define TASK_NOT_PUSHED 1 -#define TASK_SUCCESSFULLY_PUSHED 0 -#define TASK_TIED 1 -#define TASK_UNTIED 0 -#define TASK_EXPLICIT 1 -#define TASK_IMPLICIT 0 -#define TASK_PROXY 1 -#define TASK_FULL 0 - -#define KMP_CANCEL_THREADS -#define KMP_THREAD_ATTR - -// Android does not have pthread_cancel. Undefine KMP_CANCEL_THREADS if being -// built on Android -#if defined(__ANDROID__) -#undef KMP_CANCEL_THREADS -#endif - -#include -#include -#include -#include -#include -#include -/* include don't use; problems with /MD on Windows* OS NT due to bad - Microsoft library. Some macros provided below to replace these functions */ -#ifndef __ABSOFT_WIN -#include -#endif -#include -#include - -#include - -#include "kmp_os.h" - -#include "kmp_safe_c_api.h" - -#if KMP_STATS_ENABLED -class kmp_stats_list; -#endif - -#if KMP_USE_HIER_SCHED -// Only include hierarchical scheduling if affinity is supported -#undef KMP_USE_HIER_SCHED -#define KMP_USE_HIER_SCHED KMP_AFFINITY_SUPPORTED -#endif - -#if KMP_USE_HWLOC && KMP_AFFINITY_SUPPORTED -#include "hwloc.h" -#ifndef HWLOC_OBJ_NUMANODE -#define HWLOC_OBJ_NUMANODE HWLOC_OBJ_NODE -#endif -#ifndef HWLOC_OBJ_PACKAGE -#define HWLOC_OBJ_PACKAGE HWLOC_OBJ_SOCKET -#endif -#endif - -#if KMP_ARCH_X86 || KMP_ARCH_X86_64 -#include -#endif - -#include "kmp_debug.h" -#include "kmp_lock.h" -#include "kmp_version.h" -#if USE_DEBUGGER -#include "kmp_debugger.h" -#endif -#include "kmp_i18n.h" - -#define KMP_HANDLE_SIGNALS (KMP_OS_UNIX || KMP_OS_WINDOWS) - -#include "kmp_wrapper_malloc.h" -#if KMP_OS_UNIX -#include -#if !defined NSIG && defined _NSIG -#define NSIG _NSIG -#endif -#endif - -#if KMP_OS_LINUX -#pragma weak clock_gettime -#endif - -#if OMPT_SUPPORT -#include "ompt-internal.h" -#endif - -#if OMP_50_ENABLED -// Affinity format function -#include "kmp_str.h" -#endif - -// 0 - no fast memory allocation, alignment: 8-byte on x86, 16-byte on x64. -// 3 - fast allocation using sync, non-sync free lists of any size, non-self -// free lists of limited size. -#ifndef USE_FAST_MEMORY -#define USE_FAST_MEMORY 3 -#endif - -#ifndef KMP_NESTED_HOT_TEAMS -#define KMP_NESTED_HOT_TEAMS 0 -#define USE_NESTED_HOT_ARG(x) -#else -#if KMP_NESTED_HOT_TEAMS -#if OMP_40_ENABLED -#define USE_NESTED_HOT_ARG(x) , x -#else -// Nested hot teams feature depends on omp 4.0, disable it for earlier versions -#undef KMP_NESTED_HOT_TEAMS -#define KMP_NESTED_HOT_TEAMS 0 -#define USE_NESTED_HOT_ARG(x) -#endif -#else -#define USE_NESTED_HOT_ARG(x) -#endif -#endif - -// Assume using BGET compare_exchange instruction instead of lock by default. -#ifndef USE_CMP_XCHG_FOR_BGET -#define USE_CMP_XCHG_FOR_BGET 1 -#endif - -// Test to see if queuing lock is better than bootstrap lock for bget -// #ifndef USE_QUEUING_LOCK_FOR_BGET -// #define USE_QUEUING_LOCK_FOR_BGET -// #endif - -#define KMP_NSEC_PER_SEC 1000000000L -#define KMP_USEC_PER_SEC 1000000L - -/*! -@ingroup BASIC_TYPES -@{ -*/ - -/*! -Values for bit flags used in the ident_t to describe the fields. -*/ -enum { - /*! Use trampoline for internal microtasks */ - KMP_IDENT_IMB = 0x01, - /*! Use c-style ident structure */ - KMP_IDENT_KMPC = 0x02, - /* 0x04 is no longer used */ - /*! Entry point generated by auto-parallelization */ - KMP_IDENT_AUTOPAR = 0x08, - /*! Compiler generates atomic reduction option for kmpc_reduce* */ - KMP_IDENT_ATOMIC_REDUCE = 0x10, - /*! To mark a 'barrier' directive in user code */ - KMP_IDENT_BARRIER_EXPL = 0x20, - /*! To Mark implicit barriers. */ - KMP_IDENT_BARRIER_IMPL = 0x0040, - KMP_IDENT_BARRIER_IMPL_MASK = 0x01C0, - KMP_IDENT_BARRIER_IMPL_FOR = 0x0040, - KMP_IDENT_BARRIER_IMPL_SECTIONS = 0x00C0, - - KMP_IDENT_BARRIER_IMPL_SINGLE = 0x0140, - KMP_IDENT_BARRIER_IMPL_WORKSHARE = 0x01C0, - - /*! To mark a static loop in OMPT callbacks */ - KMP_IDENT_WORK_LOOP = 0x200, - /*! To mark a sections directive in OMPT callbacks */ - KMP_IDENT_WORK_SECTIONS = 0x400, - /*! To mark a distirbute construct in OMPT callbacks */ - KMP_IDENT_WORK_DISTRIBUTE = 0x800, - /*! Atomic hint; bottom four bits as omp_sync_hint_t. Top four reserved and - not currently used. If one day we need more bits, then we can use - an invalid combination of hints to mean that another, larger field - should be used in a different flag. */ - KMP_IDENT_ATOMIC_HINT_MASK = 0xFF0000, - KMP_IDENT_ATOMIC_HINT_UNCONTENDED = 0x010000, - KMP_IDENT_ATOMIC_HINT_CONTENDED = 0x020000, - KMP_IDENT_ATOMIC_HINT_NONSPECULATIVE = 0x040000, - KMP_IDENT_ATOMIC_HINT_SPECULATIVE = 0x080000, -}; - -/*! - * The ident structure that describes a source location. - */ -typedef struct ident { - kmp_int32 reserved_1; /**< might be used in Fortran; see above */ - kmp_int32 flags; /**< also f.flags; KMP_IDENT_xxx flags; KMP_IDENT_KMPC - identifies this union member */ - kmp_int32 reserved_2; /**< not really used in Fortran any more; see above */ -#if USE_ITT_BUILD -/* but currently used for storing region-specific ITT */ -/* contextual information. */ -#endif /* USE_ITT_BUILD */ - kmp_int32 reserved_3; /**< source[4] in Fortran, do not use for C++ */ - char const *psource; /**< String describing the source location. - The string is composed of semi-colon separated fields - which describe the source file, the function and a pair - of line numbers that delimit the construct. */ -} ident_t; -/*! -@} -*/ - -// Some forward declarations. -typedef union kmp_team kmp_team_t; -typedef struct kmp_taskdata kmp_taskdata_t; -typedef union kmp_task_team kmp_task_team_t; -typedef union kmp_team kmp_team_p; -typedef union kmp_info kmp_info_p; -typedef union kmp_root kmp_root_p; - -#ifdef __cplusplus -extern "C" { -#endif - -/* ------------------------------------------------------------------------ */ - -/* Pack two 32-bit signed integers into a 64-bit signed integer */ -/* ToDo: Fix word ordering for big-endian machines. */ -#define KMP_PACK_64(HIGH_32, LOW_32) \ - ((kmp_int64)((((kmp_uint64)(HIGH_32)) << 32) | (kmp_uint64)(LOW_32))) - -// Generic string manipulation macros. Assume that _x is of type char * -#define SKIP_WS(_x) \ - { \ - while (*(_x) == ' ' || *(_x) == '\t') \ - (_x)++; \ - } -#define SKIP_DIGITS(_x) \ - { \ - while (*(_x) >= '0' && *(_x) <= '9') \ - (_x)++; \ - } -#define SKIP_TOKEN(_x) \ - { \ - while ((*(_x) >= '0' && *(_x) <= '9') || (*(_x) >= 'a' && *(_x) <= 'z') || \ - (*(_x) >= 'A' && *(_x) <= 'Z') || *(_x) == '_') \ - (_x)++; \ - } -#define SKIP_TO(_x, _c) \ - { \ - while (*(_x) != '\0' && *(_x) != (_c)) \ - (_x)++; \ - } - -/* ------------------------------------------------------------------------ */ - -#define KMP_MAX(x, y) ((x) > (y) ? (x) : (y)) -#define KMP_MIN(x, y) ((x) < (y) ? (x) : (y)) - -/* ------------------------------------------------------------------------ */ -/* Enumeration types */ - -enum kmp_state_timer { - ts_stop, - ts_start, - ts_pause, - - ts_last_state -}; - -enum dynamic_mode { - dynamic_default, -#ifdef USE_LOAD_BALANCE - dynamic_load_balance, -#endif /* USE_LOAD_BALANCE */ - dynamic_random, - dynamic_thread_limit, - dynamic_max -}; - -/* external schedule constants, duplicate enum omp_sched in omp.h in order to - * not include it here */ -#ifndef KMP_SCHED_TYPE_DEFINED -#define KMP_SCHED_TYPE_DEFINED -typedef enum kmp_sched { - kmp_sched_lower = 0, // lower and upper bounds are for routine parameter check - // Note: need to adjust __kmp_sch_map global array in case enum is changed - kmp_sched_static = 1, // mapped to kmp_sch_static_chunked (33) - kmp_sched_dynamic = 2, // mapped to kmp_sch_dynamic_chunked (35) - kmp_sched_guided = 3, // mapped to kmp_sch_guided_chunked (36) - kmp_sched_auto = 4, // mapped to kmp_sch_auto (38) - kmp_sched_upper_std = 5, // upper bound for standard schedules - kmp_sched_lower_ext = 100, // lower bound of Intel extension schedules - kmp_sched_trapezoidal = 101, // mapped to kmp_sch_trapezoidal (39) -#if KMP_STATIC_STEAL_ENABLED - kmp_sched_static_steal = 102, // mapped to kmp_sch_static_steal (44) -#endif - kmp_sched_upper, - kmp_sched_default = kmp_sched_static // default scheduling -} kmp_sched_t; -#endif - -/*! - @ingroup WORK_SHARING - * Describes the loop schedule to be used for a parallel for loop. - */ -enum sched_type : kmp_int32 { - kmp_sch_lower = 32, /**< lower bound for unordered values */ - kmp_sch_static_chunked = 33, - kmp_sch_static = 34, /**< static unspecialized */ - kmp_sch_dynamic_chunked = 35, - kmp_sch_guided_chunked = 36, /**< guided unspecialized */ - kmp_sch_runtime = 37, - kmp_sch_auto = 38, /**< auto */ - kmp_sch_trapezoidal = 39, - - /* accessible only through KMP_SCHEDULE environment variable */ - kmp_sch_static_greedy = 40, - kmp_sch_static_balanced = 41, - /* accessible only through KMP_SCHEDULE environment variable */ - kmp_sch_guided_iterative_chunked = 42, - kmp_sch_guided_analytical_chunked = 43, - /* accessible only through KMP_SCHEDULE environment variable */ - kmp_sch_static_steal = 44, - -#if OMP_45_ENABLED - /* static with chunk adjustment (e.g., simd) */ - kmp_sch_static_balanced_chunked = 45, - kmp_sch_guided_simd = 46, /**< guided with chunk adjustment */ - kmp_sch_runtime_simd = 47, /**< runtime with chunk adjustment */ -#endif - - /* accessible only through KMP_SCHEDULE environment variable */ - kmp_sch_upper, /**< upper bound for unordered values */ - - kmp_ord_lower = 64, /**< lower bound for ordered values, must be power of 2 */ - kmp_ord_static_chunked = 65, - kmp_ord_static = 66, /**< ordered static unspecialized */ - kmp_ord_dynamic_chunked = 67, - kmp_ord_guided_chunked = 68, - kmp_ord_runtime = 69, - kmp_ord_auto = 70, /**< ordered auto */ - kmp_ord_trapezoidal = 71, - kmp_ord_upper, /**< upper bound for ordered values */ - -#if OMP_40_ENABLED - /* Schedules for Distribute construct */ - kmp_distribute_static_chunked = 91, /**< distribute static chunked */ - kmp_distribute_static = 92, /**< distribute static unspecialized */ -#endif - - /* For the "nomerge" versions, kmp_dispatch_next*() will always return a - single iteration/chunk, even if the loop is serialized. For the schedule - types listed above, the entire iteration vector is returned if the loop is - serialized. This doesn't work for gcc/gcomp sections. */ - kmp_nm_lower = 160, /**< lower bound for nomerge values */ - - kmp_nm_static_chunked = - (kmp_sch_static_chunked - kmp_sch_lower + kmp_nm_lower), - kmp_nm_static = 162, /**< static unspecialized */ - kmp_nm_dynamic_chunked = 163, - kmp_nm_guided_chunked = 164, /**< guided unspecialized */ - kmp_nm_runtime = 165, - kmp_nm_auto = 166, /**< auto */ - kmp_nm_trapezoidal = 167, - - /* accessible only through KMP_SCHEDULE environment variable */ - kmp_nm_static_greedy = 168, - kmp_nm_static_balanced = 169, - /* accessible only through KMP_SCHEDULE environment variable */ - kmp_nm_guided_iterative_chunked = 170, - kmp_nm_guided_analytical_chunked = 171, - kmp_nm_static_steal = - 172, /* accessible only through OMP_SCHEDULE environment variable */ - - kmp_nm_ord_static_chunked = 193, - kmp_nm_ord_static = 194, /**< ordered static unspecialized */ - kmp_nm_ord_dynamic_chunked = 195, - kmp_nm_ord_guided_chunked = 196, - kmp_nm_ord_runtime = 197, - kmp_nm_ord_auto = 198, /**< auto */ - kmp_nm_ord_trapezoidal = 199, - kmp_nm_upper, /**< upper bound for nomerge values */ - -#if OMP_45_ENABLED - /* Support for OpenMP 4.5 monotonic and nonmonotonic schedule modifiers. Since - we need to distinguish the three possible cases (no modifier, monotonic - modifier, nonmonotonic modifier), we need separate bits for each modifier. - The absence of monotonic does not imply nonmonotonic, especially since 4.5 - says that the behaviour of the "no modifier" case is implementation defined - in 4.5, but will become "nonmonotonic" in 5.0. - - Since we're passing a full 32 bit value, we can use a couple of high bits - for these flags; out of paranoia we avoid the sign bit. - - These modifiers can be or-ed into non-static schedules by the compiler to - pass the additional information. They will be stripped early in the - processing in __kmp_dispatch_init when setting up schedules, so most of the - code won't ever see schedules with these bits set. */ - kmp_sch_modifier_monotonic = - (1 << 29), /**< Set if the monotonic schedule modifier was present */ - kmp_sch_modifier_nonmonotonic = - (1 << 30), /**< Set if the nonmonotonic schedule modifier was present */ - -#define SCHEDULE_WITHOUT_MODIFIERS(s) \ - (enum sched_type)( \ - (s) & ~(kmp_sch_modifier_nonmonotonic | kmp_sch_modifier_monotonic)) -#define SCHEDULE_HAS_MONOTONIC(s) (((s)&kmp_sch_modifier_monotonic) != 0) -#define SCHEDULE_HAS_NONMONOTONIC(s) (((s)&kmp_sch_modifier_nonmonotonic) != 0) -#define SCHEDULE_HAS_NO_MODIFIERS(s) \ - (((s) & (kmp_sch_modifier_nonmonotonic | kmp_sch_modifier_monotonic)) == 0) -#else -/* By doing this we hope to avoid multiple tests on OMP_45_ENABLED. Compilers - can now eliminate tests on compile time constants and dead code that results - from them, so we can leave code guarded by such an if in place. */ -#define SCHEDULE_WITHOUT_MODIFIERS(s) (s) -#define SCHEDULE_HAS_MONOTONIC(s) false -#define SCHEDULE_HAS_NONMONOTONIC(s) false -#define SCHEDULE_HAS_NO_MODIFIERS(s) true -#endif - - kmp_sch_default = kmp_sch_static /**< default scheduling algorithm */ -}; - -/* Type to keep runtime schedule set via OMP_SCHEDULE or omp_set_schedule() */ -typedef union kmp_r_sched { - struct { - enum sched_type r_sched_type; - int chunk; - }; - kmp_int64 sched; -} kmp_r_sched_t; - -extern enum sched_type __kmp_sch_map[]; // map OMP 3.0 schedule types with our -// internal schedule types - -enum library_type { - library_none, - library_serial, - library_turnaround, - library_throughput -}; - -#if KMP_OS_LINUX -enum clock_function_type { - clock_function_gettimeofday, - clock_function_clock_gettime -}; -#endif /* KMP_OS_LINUX */ - -#if KMP_MIC_SUPPORTED -enum mic_type { non_mic, mic1, mic2, mic3, dummy }; -#endif - -/* -- fast reduction stuff ------------------------------------------------ */ - -#undef KMP_FAST_REDUCTION_BARRIER -#define KMP_FAST_REDUCTION_BARRIER 1 - -#undef KMP_FAST_REDUCTION_CORE_DUO -#if KMP_ARCH_X86 || KMP_ARCH_X86_64 -#define KMP_FAST_REDUCTION_CORE_DUO 1 -#endif - -enum _reduction_method { - reduction_method_not_defined = 0, - critical_reduce_block = (1 << 8), - atomic_reduce_block = (2 << 8), - tree_reduce_block = (3 << 8), - empty_reduce_block = (4 << 8) -}; - -// Description of the packed_reduction_method variable: -// The packed_reduction_method variable consists of two enum types variables -// that are packed together into 0-th byte and 1-st byte: -// 0: (packed_reduction_method & 0x000000FF) is a 'enum barrier_type' value of -// barrier that will be used in fast reduction: bs_plain_barrier or -// bs_reduction_barrier -// 1: (packed_reduction_method & 0x0000FF00) is a reduction method that will -// be used in fast reduction; -// Reduction method is of 'enum _reduction_method' type and it's defined the way -// so that the bits of 0-th byte are empty, so no need to execute a shift -// instruction while packing/unpacking - -#if KMP_FAST_REDUCTION_BARRIER -#define PACK_REDUCTION_METHOD_AND_BARRIER(reduction_method, barrier_type) \ - ((reduction_method) | (barrier_type)) - -#define UNPACK_REDUCTION_METHOD(packed_reduction_method) \ - ((enum _reduction_method)((packed_reduction_method) & (0x0000FF00))) - -#define UNPACK_REDUCTION_BARRIER(packed_reduction_method) \ - ((enum barrier_type)((packed_reduction_method) & (0x000000FF))) -#else -#define PACK_REDUCTION_METHOD_AND_BARRIER(reduction_method, barrier_type) \ - (reduction_method) - -#define UNPACK_REDUCTION_METHOD(packed_reduction_method) \ - (packed_reduction_method) - -#define UNPACK_REDUCTION_BARRIER(packed_reduction_method) (bs_plain_barrier) -#endif - -#define TEST_REDUCTION_METHOD(packed_reduction_method, which_reduction_block) \ - ((UNPACK_REDUCTION_METHOD(packed_reduction_method)) == \ - (which_reduction_block)) - -#if KMP_FAST_REDUCTION_BARRIER -#define TREE_REDUCE_BLOCK_WITH_REDUCTION_BARRIER \ - (PACK_REDUCTION_METHOD_AND_BARRIER(tree_reduce_block, bs_reduction_barrier)) - -#define TREE_REDUCE_BLOCK_WITH_PLAIN_BARRIER \ - (PACK_REDUCTION_METHOD_AND_BARRIER(tree_reduce_block, bs_plain_barrier)) -#endif - -typedef int PACKED_REDUCTION_METHOD_T; - -/* -- end of fast reduction stuff ----------------------------------------- */ - -#if KMP_OS_WINDOWS -#define USE_CBLKDATA -#if KMP_MSVC_COMPAT -#pragma warning(push) -#pragma warning(disable : 271 310) -#endif -#include -#if KMP_MSVC_COMPAT -#pragma warning(pop) -#endif -#endif - -#if KMP_OS_UNIX -#include -#include -#endif - -/* Only Linux* OS and Windows* OS support thread affinity. */ -#if KMP_AFFINITY_SUPPORTED - -// GROUP_AFFINITY is already defined for _MSC_VER>=1600 (VS2010 and later). -#if KMP_OS_WINDOWS -#if _MSC_VER < 1600 && KMP_MSVC_COMPAT -typedef struct GROUP_AFFINITY { - KAFFINITY Mask; - WORD Group; - WORD Reserved[3]; -} GROUP_AFFINITY; -#endif /* _MSC_VER < 1600 */ -#if KMP_GROUP_AFFINITY -extern int __kmp_num_proc_groups; -#else -static const int __kmp_num_proc_groups = 1; -#endif /* KMP_GROUP_AFFINITY */ -typedef DWORD (*kmp_GetActiveProcessorCount_t)(WORD); -extern kmp_GetActiveProcessorCount_t __kmp_GetActiveProcessorCount; - -typedef WORD (*kmp_GetActiveProcessorGroupCount_t)(void); -extern kmp_GetActiveProcessorGroupCount_t __kmp_GetActiveProcessorGroupCount; - -typedef BOOL (*kmp_GetThreadGroupAffinity_t)(HANDLE, GROUP_AFFINITY *); -extern kmp_GetThreadGroupAffinity_t __kmp_GetThreadGroupAffinity; - -typedef BOOL (*kmp_SetThreadGroupAffinity_t)(HANDLE, const GROUP_AFFINITY *, - GROUP_AFFINITY *); -extern kmp_SetThreadGroupAffinity_t __kmp_SetThreadGroupAffinity; -#endif /* KMP_OS_WINDOWS */ - -#if KMP_USE_HWLOC -extern hwloc_topology_t __kmp_hwloc_topology; -extern int __kmp_hwloc_error; -extern int __kmp_numa_detected; -extern int __kmp_tile_depth; -#endif - -extern size_t __kmp_affin_mask_size; -#define KMP_AFFINITY_CAPABLE() (__kmp_affin_mask_size > 0) -#define KMP_AFFINITY_DISABLE() (__kmp_affin_mask_size = 0) -#define KMP_AFFINITY_ENABLE(mask_size) (__kmp_affin_mask_size = mask_size) -#define KMP_CPU_SET_ITERATE(i, mask) \ - for (i = (mask)->begin(); (int)i != (mask)->end(); i = (mask)->next(i)) -#define KMP_CPU_SET(i, mask) (mask)->set(i) -#define KMP_CPU_ISSET(i, mask) (mask)->is_set(i) -#define KMP_CPU_CLR(i, mask) (mask)->clear(i) -#define KMP_CPU_ZERO(mask) (mask)->zero() -#define KMP_CPU_COPY(dest, src) (dest)->copy(src) -#define KMP_CPU_AND(dest, src) (dest)->bitwise_and(src) -#define KMP_CPU_COMPLEMENT(max_bit_number, mask) (mask)->bitwise_not() -#define KMP_CPU_UNION(dest, src) (dest)->bitwise_or(src) -#define KMP_CPU_ALLOC(ptr) (ptr = __kmp_affinity_dispatch->allocate_mask()) -#define KMP_CPU_FREE(ptr) __kmp_affinity_dispatch->deallocate_mask(ptr) -#define KMP_CPU_ALLOC_ON_STACK(ptr) KMP_CPU_ALLOC(ptr) -#define KMP_CPU_FREE_FROM_STACK(ptr) KMP_CPU_FREE(ptr) -#define KMP_CPU_INTERNAL_ALLOC(ptr) KMP_CPU_ALLOC(ptr) -#define KMP_CPU_INTERNAL_FREE(ptr) KMP_CPU_FREE(ptr) -#define KMP_CPU_INDEX(arr, i) __kmp_affinity_dispatch->index_mask_array(arr, i) -#define KMP_CPU_ALLOC_ARRAY(arr, n) \ - (arr = __kmp_affinity_dispatch->allocate_mask_array(n)) -#define KMP_CPU_FREE_ARRAY(arr, n) \ - __kmp_affinity_dispatch->deallocate_mask_array(arr) -#define KMP_CPU_INTERNAL_ALLOC_ARRAY(arr, n) KMP_CPU_ALLOC_ARRAY(arr, n) -#define KMP_CPU_INTERNAL_FREE_ARRAY(arr, n) KMP_CPU_FREE_ARRAY(arr, n) -#define __kmp_get_system_affinity(mask, abort_bool) \ - (mask)->get_system_affinity(abort_bool) -#define __kmp_set_system_affinity(mask, abort_bool) \ - (mask)->set_system_affinity(abort_bool) -#define __kmp_get_proc_group(mask) (mask)->get_proc_group() - -class KMPAffinity { -public: - class Mask { - public: - void *operator new(size_t n); - void operator delete(void *p); - void *operator new[](size_t n); - void operator delete[](void *p); - virtual ~Mask() {} - // Set bit i to 1 - virtual void set(int i) {} - // Return bit i - virtual bool is_set(int i) const { return false; } - // Set bit i to 0 - virtual void clear(int i) {} - // Zero out entire mask - virtual void zero() {} - // Copy src into this mask - virtual void copy(const Mask *src) {} - // this &= rhs - virtual void bitwise_and(const Mask *rhs) {} - // this |= rhs - virtual void bitwise_or(const Mask *rhs) {} - // this = ~this - virtual void bitwise_not() {} - // API for iterating over an affinity mask - // for (int i = mask->begin(); i != mask->end(); i = mask->next(i)) - virtual int begin() const { return 0; } - virtual int end() const { return 0; } - virtual int next(int previous) const { return 0; } - // Set the system's affinity to this affinity mask's value - virtual int set_system_affinity(bool abort_on_error) const { return -1; } - // Set this affinity mask to the current system affinity - virtual int get_system_affinity(bool abort_on_error) { return -1; } - // Only 1 DWORD in the mask should have any procs set. - // Return the appropriate index, or -1 for an invalid mask. - virtual int get_proc_group() const { return -1; } - }; - void *operator new(size_t n); - void operator delete(void *p); - // Need virtual destructor - virtual ~KMPAffinity() = default; - // Determine if affinity is capable - virtual void determine_capable(const char *env_var) {} - // Bind the current thread to os proc - virtual void bind_thread(int proc) {} - // Factory functions to allocate/deallocate a mask - virtual Mask *allocate_mask() { return nullptr; } - virtual void deallocate_mask(Mask *m) {} - virtual Mask *allocate_mask_array(int num) { return nullptr; } - virtual void deallocate_mask_array(Mask *m) {} - virtual Mask *index_mask_array(Mask *m, int index) { return nullptr; } - static void pick_api(); - static void destroy_api(); - enum api_type { - NATIVE_OS -#if KMP_USE_HWLOC - , - HWLOC -#endif - }; - virtual api_type get_api_type() const { - KMP_ASSERT(0); - return NATIVE_OS; - } - -private: - static bool picked_api; -}; - -typedef KMPAffinity::Mask kmp_affin_mask_t; -extern KMPAffinity *__kmp_affinity_dispatch; - -// Declare local char buffers with this size for printing debug and info -// messages, using __kmp_affinity_print_mask(). -#define KMP_AFFIN_MASK_PRINT_LEN 1024 - -enum affinity_type { - affinity_none = 0, - affinity_physical, - affinity_logical, - affinity_compact, - affinity_scatter, - affinity_explicit, - affinity_balanced, - affinity_disabled, // not used outsize the env var parser - affinity_default -}; - -enum affinity_gran { - affinity_gran_fine = 0, - affinity_gran_thread, - affinity_gran_core, - affinity_gran_tile, - affinity_gran_numa, - affinity_gran_package, - affinity_gran_node, -#if KMP_GROUP_AFFINITY - // The "group" granularity isn't necesssarily coarser than all of the - // other levels, but we put it last in the enum. - affinity_gran_group, -#endif /* KMP_GROUP_AFFINITY */ - affinity_gran_default -}; - -enum affinity_top_method { - affinity_top_method_all = 0, // try all (supported) methods, in order -#if KMP_ARCH_X86 || KMP_ARCH_X86_64 - affinity_top_method_apicid, - affinity_top_method_x2apicid, -#endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */ - affinity_top_method_cpuinfo, // KMP_CPUINFO_FILE is usable on Windows* OS, too -#if KMP_GROUP_AFFINITY - affinity_top_method_group, -#endif /* KMP_GROUP_AFFINITY */ - affinity_top_method_flat, -#if KMP_USE_HWLOC - affinity_top_method_hwloc, -#endif - affinity_top_method_default -}; - -#define affinity_respect_mask_default (-1) - -extern enum affinity_type __kmp_affinity_type; /* Affinity type */ -extern enum affinity_gran __kmp_affinity_gran; /* Affinity granularity */ -extern int __kmp_affinity_gran_levels; /* corresponding int value */ -extern int __kmp_affinity_dups; /* Affinity duplicate masks */ -extern enum affinity_top_method __kmp_affinity_top_method; -extern int __kmp_affinity_compact; /* Affinity 'compact' value */ -extern int __kmp_affinity_offset; /* Affinity offset value */ -extern int __kmp_affinity_verbose; /* Was verbose specified for KMP_AFFINITY? */ -extern int __kmp_affinity_warnings; /* KMP_AFFINITY warnings enabled ? */ -extern int __kmp_affinity_respect_mask; // Respect process' init affinity mask? -extern char *__kmp_affinity_proclist; /* proc ID list */ -extern kmp_affin_mask_t *__kmp_affinity_masks; -extern unsigned __kmp_affinity_num_masks; -extern void __kmp_affinity_bind_thread(int which); - -extern kmp_affin_mask_t *__kmp_affin_fullMask; -extern char *__kmp_cpuinfo_file; - -#endif /* KMP_AFFINITY_SUPPORTED */ - -#if OMP_40_ENABLED - -// This needs to be kept in sync with the values in omp.h !!! -typedef enum kmp_proc_bind_t { - proc_bind_false = 0, - proc_bind_true, - proc_bind_master, - proc_bind_close, - proc_bind_spread, - proc_bind_intel, // use KMP_AFFINITY interface - proc_bind_default -} kmp_proc_bind_t; - -typedef struct kmp_nested_proc_bind_t { - kmp_proc_bind_t *bind_types; - int size; - int used; -} kmp_nested_proc_bind_t; - -extern kmp_nested_proc_bind_t __kmp_nested_proc_bind; - -#endif /* OMP_40_ENABLED */ - -#if OMP_50_ENABLED -extern int __kmp_display_affinity; -extern char *__kmp_affinity_format; -static const size_t KMP_AFFINITY_FORMAT_SIZE = 512; -#endif // OMP_50_ENABLED - -#if KMP_AFFINITY_SUPPORTED -#define KMP_PLACE_ALL (-1) -#define KMP_PLACE_UNDEFINED (-2) -// Is KMP_AFFINITY is being used instead of OMP_PROC_BIND/OMP_PLACES? -#define KMP_AFFINITY_NON_PROC_BIND \ - ((__kmp_nested_proc_bind.bind_types[0] == proc_bind_false || \ - __kmp_nested_proc_bind.bind_types[0] == proc_bind_intel) && \ - (__kmp_affinity_num_masks > 0 || __kmp_affinity_type == affinity_balanced)) -#endif /* KMP_AFFINITY_SUPPORTED */ - -extern int __kmp_affinity_num_places; - -#if OMP_40_ENABLED -typedef enum kmp_cancel_kind_t { - cancel_noreq = 0, - cancel_parallel = 1, - cancel_loop = 2, - cancel_sections = 3, - cancel_taskgroup = 4 -} kmp_cancel_kind_t; -#endif // OMP_40_ENABLED - -// KMP_HW_SUBSET support: -typedef struct kmp_hws_item { - int num; - int offset; -} kmp_hws_item_t; - -extern kmp_hws_item_t __kmp_hws_socket; -extern kmp_hws_item_t __kmp_hws_node; -extern kmp_hws_item_t __kmp_hws_tile; -extern kmp_hws_item_t __kmp_hws_core; -extern kmp_hws_item_t __kmp_hws_proc; -extern int __kmp_hws_requested; -extern int __kmp_hws_abs_flag; // absolute or per-item number requested - -/* ------------------------------------------------------------------------ */ - -#define KMP_PAD(type, sz) \ - (sizeof(type) + (sz - ((sizeof(type) - 1) % (sz)) - 1)) - -// We need to avoid using -1 as a GTID as +1 is added to the gtid -// when storing it in a lock, and the value 0 is reserved. -#define KMP_GTID_DNE (-2) /* Does not exist */ -#define KMP_GTID_SHUTDOWN (-3) /* Library is shutting down */ -#define KMP_GTID_MONITOR (-4) /* Monitor thread ID */ -#define KMP_GTID_UNKNOWN (-5) /* Is not known */ -#define KMP_GTID_MIN (-6) /* Minimal gtid for low bound check in DEBUG */ - -#if OMP_50_ENABLED -/* OpenMP 5.0 Memory Management support */ -extern int __kmp_memkind_available; -extern int __kmp_hbw_mem_available; -typedef void *omp_allocator_t; -extern const omp_allocator_t *OMP_NULL_ALLOCATOR; -extern const omp_allocator_t *omp_default_mem_alloc; -extern const omp_allocator_t *omp_large_cap_mem_alloc; -extern const omp_allocator_t *omp_const_mem_alloc; -extern const omp_allocator_t *omp_high_bw_mem_alloc; -extern const omp_allocator_t *omp_low_lat_mem_alloc; -extern const omp_allocator_t *omp_cgroup_mem_alloc; -extern const omp_allocator_t *omp_pteam_mem_alloc; -extern const omp_allocator_t *omp_thread_mem_alloc; -extern const omp_allocator_t *__kmp_def_allocator; - -extern void __kmpc_set_default_allocator(int gtid, const omp_allocator_t *al); -extern const omp_allocator_t *__kmpc_get_default_allocator(int gtid); -extern void *__kmpc_alloc(int gtid, size_t sz, const omp_allocator_t *al); -extern void __kmpc_free(int gtid, void *ptr, const omp_allocator_t *al); - -extern void __kmp_init_memkind(); -extern void __kmp_fini_memkind(); -#endif // OMP_50_ENABLED - -/* ------------------------------------------------------------------------ */ - -#define KMP_UINT64_MAX \ - (~((kmp_uint64)1 << ((sizeof(kmp_uint64) * (1 << 3)) - 1))) - -#define KMP_MIN_NTH 1 - -#ifndef KMP_MAX_NTH -#if defined(PTHREAD_THREADS_MAX) && PTHREAD_THREADS_MAX < INT_MAX -#define KMP_MAX_NTH PTHREAD_THREADS_MAX -#else -#define KMP_MAX_NTH INT_MAX -#endif -#endif /* KMP_MAX_NTH */ - -#ifdef PTHREAD_STACK_MIN -#define KMP_MIN_STKSIZE PTHREAD_STACK_MIN -#else -#define KMP_MIN_STKSIZE ((size_t)(32 * 1024)) -#endif - -#define KMP_MAX_STKSIZE (~((size_t)1 << ((sizeof(size_t) * (1 << 3)) - 1))) - -#if KMP_ARCH_X86 -#define KMP_DEFAULT_STKSIZE ((size_t)(2 * 1024 * 1024)) -#elif KMP_ARCH_X86_64 -#define KMP_DEFAULT_STKSIZE ((size_t)(4 * 1024 * 1024)) -#define KMP_BACKUP_STKSIZE ((size_t)(2 * 1024 * 1024)) -#else -#define KMP_DEFAULT_STKSIZE ((size_t)(1024 * 1024)) -#endif - -#define KMP_DEFAULT_MALLOC_POOL_INCR ((size_t)(1024 * 1024)) -#define KMP_MIN_MALLOC_POOL_INCR ((size_t)(4 * 1024)) -#define KMP_MAX_MALLOC_POOL_INCR \ - (~((size_t)1 << ((sizeof(size_t) * (1 << 3)) - 1))) - -#define KMP_MIN_STKOFFSET (0) -#define KMP_MAX_STKOFFSET KMP_MAX_STKSIZE -#if KMP_OS_DARWIN -#define KMP_DEFAULT_STKOFFSET KMP_MIN_STKOFFSET -#else -#define KMP_DEFAULT_STKOFFSET CACHE_LINE -#endif - -#define KMP_MIN_STKPADDING (0) -#define KMP_MAX_STKPADDING (2 * 1024 * 1024) - -#define KMP_BLOCKTIME_MULTIPLIER \ - (1000) /* number of blocktime units per second */ -#define KMP_MIN_BLOCKTIME (0) -#define KMP_MAX_BLOCKTIME \ - (INT_MAX) /* Must be this for "infinite" setting the work */ -#define KMP_DEFAULT_BLOCKTIME (200) /* __kmp_blocktime is in milliseconds */ - -#if KMP_USE_MONITOR -#define KMP_DEFAULT_MONITOR_STKSIZE ((size_t)(64 * 1024)) -#define KMP_MIN_MONITOR_WAKEUPS (1) // min times monitor wakes up per second -#define KMP_MAX_MONITOR_WAKEUPS (1000) // max times monitor can wake up per sec - -/* Calculate new number of monitor wakeups for a specific block time based on - previous monitor_wakeups. Only allow increasing number of wakeups */ -#define KMP_WAKEUPS_FROM_BLOCKTIME(blocktime, monitor_wakeups) \ - (((blocktime) == KMP_MAX_BLOCKTIME) \ - ? (monitor_wakeups) \ - : ((blocktime) == KMP_MIN_BLOCKTIME) \ - ? KMP_MAX_MONITOR_WAKEUPS \ - : ((monitor_wakeups) > (KMP_BLOCKTIME_MULTIPLIER / (blocktime))) \ - ? (monitor_wakeups) \ - : (KMP_BLOCKTIME_MULTIPLIER) / (blocktime)) - -/* Calculate number of intervals for a specific block time based on - monitor_wakeups */ -#define KMP_INTERVALS_FROM_BLOCKTIME(blocktime, monitor_wakeups) \ - (((blocktime) + (KMP_BLOCKTIME_MULTIPLIER / (monitor_wakeups)) - 1) / \ - (KMP_BLOCKTIME_MULTIPLIER / (monitor_wakeups))) -#else -#define KMP_BLOCKTIME(team, tid) \ - (get__bt_set(team, tid) ? get__blocktime(team, tid) : __kmp_dflt_blocktime) -#if KMP_OS_UNIX && (KMP_ARCH_X86 || KMP_ARCH_X86_64) -// HW TSC is used to reduce overhead (clock tick instead of nanosecond). -extern kmp_uint64 __kmp_ticks_per_msec; -#if KMP_COMPILER_ICC -#define KMP_NOW() ((kmp_uint64)_rdtsc()) -#else -#define KMP_NOW() __kmp_hardware_timestamp() -#endif -#define KMP_NOW_MSEC() (KMP_NOW() / __kmp_ticks_per_msec) -#define KMP_BLOCKTIME_INTERVAL(team, tid) \ - (KMP_BLOCKTIME(team, tid) * __kmp_ticks_per_msec) -#define KMP_BLOCKING(goal, count) ((goal) > KMP_NOW()) -#else -// System time is retrieved sporadically while blocking. -extern kmp_uint64 __kmp_now_nsec(); -#define KMP_NOW() __kmp_now_nsec() -#define KMP_NOW_MSEC() (KMP_NOW() / KMP_USEC_PER_SEC) -#define KMP_BLOCKTIME_INTERVAL(team, tid) \ - (KMP_BLOCKTIME(team, tid) * KMP_USEC_PER_SEC) -#define KMP_BLOCKING(goal, count) ((count) % 1000 != 0 || (goal) > KMP_NOW()) -#endif -#define KMP_YIELD_NOW() \ - (KMP_NOW_MSEC() / KMP_MAX(__kmp_dflt_blocktime, 1) % \ - (__kmp_yield_on_count + __kmp_yield_off_count) < \ - (kmp_uint32)__kmp_yield_on_count) -#endif // KMP_USE_MONITOR - -#define KMP_MIN_STATSCOLS 40 -#define KMP_MAX_STATSCOLS 4096 -#define KMP_DEFAULT_STATSCOLS 80 - -#define KMP_MIN_INTERVAL 0 -#define KMP_MAX_INTERVAL (INT_MAX - 1) -#define KMP_DEFAULT_INTERVAL 0 - -#define KMP_MIN_CHUNK 1 -#define KMP_MAX_CHUNK (INT_MAX - 1) -#define KMP_DEFAULT_CHUNK 1 - -#define KMP_MIN_INIT_WAIT 1 -#define KMP_MAX_INIT_WAIT (INT_MAX / 2) -#define KMP_DEFAULT_INIT_WAIT 2048U - -#define KMP_MIN_NEXT_WAIT 1 -#define KMP_MAX_NEXT_WAIT (INT_MAX / 2) -#define KMP_DEFAULT_NEXT_WAIT 1024U - -#define KMP_DFLT_DISP_NUM_BUFF 7 -#define KMP_MAX_ORDERED 8 - -#define KMP_MAX_FIELDS 32 - -#define KMP_MAX_BRANCH_BITS 31 - -#define KMP_MAX_ACTIVE_LEVELS_LIMIT INT_MAX - -#define KMP_MAX_DEFAULT_DEVICE_LIMIT INT_MAX - -#define KMP_MAX_TASK_PRIORITY_LIMIT INT_MAX - -/* Minimum number of threads before switch to TLS gtid (experimentally - determined) */ -/* josh TODO: what about OS X* tuning? */ -#if KMP_ARCH_X86 || KMP_ARCH_X86_64 -#define KMP_TLS_GTID_MIN 5 -#else -#define KMP_TLS_GTID_MIN INT_MAX -#endif - -#define KMP_MASTER_TID(tid) ((tid) == 0) -#define KMP_WORKER_TID(tid) ((tid) != 0) - -#define KMP_MASTER_GTID(gtid) (__kmp_tid_from_gtid((gtid)) == 0) -#define KMP_WORKER_GTID(gtid) (__kmp_tid_from_gtid((gtid)) != 0) -#define KMP_INITIAL_GTID(gtid) ((gtid) == 0) - -#ifndef TRUE -#define FALSE 0 -#define TRUE (!FALSE) -#endif - -/* NOTE: all of the following constants must be even */ - -#if KMP_OS_WINDOWS -#define KMP_INIT_WAIT 64U /* initial number of spin-tests */ -#define KMP_NEXT_WAIT 32U /* susequent number of spin-tests */ -#elif KMP_OS_CNK -#define KMP_INIT_WAIT 16U /* initial number of spin-tests */ -#define KMP_NEXT_WAIT 8U /* susequent number of spin-tests */ -#elif KMP_OS_LINUX -#define KMP_INIT_WAIT 1024U /* initial number of spin-tests */ -#define KMP_NEXT_WAIT 512U /* susequent number of spin-tests */ -#elif KMP_OS_DARWIN -/* TODO: tune for KMP_OS_DARWIN */ -#define KMP_INIT_WAIT 1024U /* initial number of spin-tests */ -#define KMP_NEXT_WAIT 512U /* susequent number of spin-tests */ -#elif KMP_OS_DRAGONFLY -/* TODO: tune for KMP_OS_DRAGONFLY */ -#define KMP_INIT_WAIT 1024U /* initial number of spin-tests */ -#define KMP_NEXT_WAIT 512U /* susequent number of spin-tests */ -#elif KMP_OS_FREEBSD -/* TODO: tune for KMP_OS_FREEBSD */ -#define KMP_INIT_WAIT 1024U /* initial number of spin-tests */ -#define KMP_NEXT_WAIT 512U /* susequent number of spin-tests */ -#elif KMP_OS_NETBSD -/* TODO: tune for KMP_OS_NETBSD */ -#define KMP_INIT_WAIT 1024U /* initial number of spin-tests */ -#define KMP_NEXT_WAIT 512U /* susequent number of spin-tests */ -#elif KMP_OS_HURD -/* TODO: tune for KMP_OS_HURD */ -#define KMP_INIT_WAIT 1024U /* initial number of spin-tests */ -#define KMP_NEXT_WAIT 512U /* susequent number of spin-tests */ -#elif KMP_OS_OPENBSD -/* TODO: tune for KMP_OS_OPENBSD */ -#define KMP_INIT_WAIT 1024U /* initial number of spin-tests */ -#define KMP_NEXT_WAIT 512U /* susequent number of spin-tests */ -#endif - -#if KMP_ARCH_X86 || KMP_ARCH_X86_64 -typedef struct kmp_cpuid { - kmp_uint32 eax; - kmp_uint32 ebx; - kmp_uint32 ecx; - kmp_uint32 edx; -} kmp_cpuid_t; -extern void __kmp_x86_cpuid(int mode, int mode2, struct kmp_cpuid *p); -#if KMP_ARCH_X86 -extern void __kmp_x86_pause(void); -#elif KMP_MIC -// Performance testing on KNC (C0QS-7120 P/A/X/D, 61-core, 16 GB Memory) showed -// regression after removal of extra PAUSE from KMP_YIELD_SPIN(). Changing -// the delay from 100 to 300 showed even better performance than double PAUSE -// on Spec OMP2001 and LCPC tasking tests, no regressions on EPCC. -static inline void __kmp_x86_pause(void) { _mm_delay_32(300); } -#else -static inline void __kmp_x86_pause(void) { _mm_pause(); } -#endif -#define KMP_CPU_PAUSE() __kmp_x86_pause() -#elif KMP_ARCH_PPC64 -#define KMP_PPC64_PRI_LOW() __asm__ volatile("or 1, 1, 1") -#define KMP_PPC64_PRI_MED() __asm__ volatile("or 2, 2, 2") -#define KMP_PPC64_PRI_LOC_MB() __asm__ volatile("" : : : "memory") -#define KMP_CPU_PAUSE() \ - do { \ - KMP_PPC64_PRI_LOW(); \ - KMP_PPC64_PRI_MED(); \ - KMP_PPC64_PRI_LOC_MB(); \ - } while (0) -#else -#define KMP_CPU_PAUSE() /* nothing to do */ -#endif - -#define KMP_INIT_YIELD(count) \ - { (count) = __kmp_yield_init; } - -#define KMP_YIELD(cond) \ - { \ - KMP_CPU_PAUSE(); \ - __kmp_yield((cond)); \ - } - -// Note the decrement of 2 in the following Macros. With KMP_LIBRARY=turnaround, -// there should be no yielding since initial value from KMP_INIT_YIELD() is odd. - -#define KMP_YIELD_WHEN(cond, count) \ - { \ - KMP_CPU_PAUSE(); \ - (count) -= 2; \ - if (!(count)) { \ - __kmp_yield(cond); \ - (count) = __kmp_yield_next; \ - } \ - } -#define KMP_YIELD_SPIN(count) \ - { \ - KMP_CPU_PAUSE(); \ - (count) -= 2; \ - if (!(count)) { \ - __kmp_yield(1); \ - (count) = __kmp_yield_next; \ - } \ - } - -/* ------------------------------------------------------------------------ */ -/* Support datatypes for the orphaned construct nesting checks. */ -/* ------------------------------------------------------------------------ */ - -enum cons_type { - ct_none, - ct_parallel, - ct_pdo, - ct_pdo_ordered, - ct_psections, - ct_psingle, - - /* the following must be left in order and not split up */ - ct_taskq, - ct_task, // really task inside non-ordered taskq, considered worksharing type - ct_task_ordered, /* really task inside ordered taskq, considered a worksharing - type */ - /* the preceding must be left in order and not split up */ - - ct_critical, - ct_ordered_in_parallel, - ct_ordered_in_pdo, - ct_ordered_in_taskq, - ct_master, - ct_reduce, - ct_barrier -}; - -/* test to see if we are in a taskq construct */ -#define IS_CONS_TYPE_TASKQ(ct) \ - (((int)(ct)) >= ((int)ct_taskq) && ((int)(ct)) <= ((int)ct_task_ordered)) -#define IS_CONS_TYPE_ORDERED(ct) \ - ((ct) == ct_pdo_ordered || (ct) == ct_task_ordered) - -struct cons_data { - ident_t const *ident; - enum cons_type type; - int prev; - kmp_user_lock_p - name; /* address exclusively for critical section name comparison */ -}; - -struct cons_header { - int p_top, w_top, s_top; - int stack_size, stack_top; - struct cons_data *stack_data; -}; - -struct kmp_region_info { - char *text; - int offset[KMP_MAX_FIELDS]; - int length[KMP_MAX_FIELDS]; -}; - -/* ---------------------------------------------------------------------- */ -/* ---------------------------------------------------------------------- */ - -#if KMP_OS_WINDOWS -typedef HANDLE kmp_thread_t; -typedef DWORD kmp_key_t; -#endif /* KMP_OS_WINDOWS */ - -#if KMP_OS_UNIX -typedef pthread_t kmp_thread_t; -typedef pthread_key_t kmp_key_t; -#endif - -extern kmp_key_t __kmp_gtid_threadprivate_key; - -typedef struct kmp_sys_info { - long maxrss; /* the maximum resident set size utilized (in kilobytes) */ - long minflt; /* the number of page faults serviced without any I/O */ - long majflt; /* the number of page faults serviced that required I/O */ - long nswap; /* the number of times a process was "swapped" out of memory */ - long inblock; /* the number of times the file system had to perform input */ - long oublock; /* the number of times the file system had to perform output */ - long nvcsw; /* the number of times a context switch was voluntarily */ - long nivcsw; /* the number of times a context switch was forced */ -} kmp_sys_info_t; - -#if KMP_ARCH_X86 || KMP_ARCH_X86_64 -typedef struct kmp_cpuinfo { - int initialized; // If 0, other fields are not initialized. - int signature; // CPUID(1).EAX - int family; // CPUID(1).EAX[27:20]+CPUID(1).EAX[11:8] (Extended Family+Family) - int model; // ( CPUID(1).EAX[19:16] << 4 ) + CPUID(1).EAX[7:4] ( ( Extended - // Model << 4 ) + Model) - int stepping; // CPUID(1).EAX[3:0] ( Stepping ) - int sse2; // 0 if SSE2 instructions are not supported, 1 otherwise. - int rtm; // 0 if RTM instructions are not supported, 1 otherwise. - int cpu_stackoffset; - int apic_id; - int physical_id; - int logical_id; - kmp_uint64 frequency; // Nominal CPU frequency in Hz. - char name[3 * sizeof(kmp_cpuid_t)]; // CPUID(0x80000002,0x80000003,0x80000004) -} kmp_cpuinfo_t; -#endif - -#if USE_ITT_BUILD -// We cannot include "kmp_itt.h" due to circular dependency. Declare the only -// required type here. Later we will check the type meets requirements. -typedef int kmp_itt_mark_t; -#define KMP_ITT_DEBUG 0 -#endif /* USE_ITT_BUILD */ - -/* Taskq data structures */ - -#define HIGH_WATER_MARK(nslots) (((nslots)*3) / 4) -// num thunks that each thread can simultaneously execute from a task queue -#define __KMP_TASKQ_THUNKS_PER_TH 1 - -/* flags for taskq_global_flags, kmp_task_queue_t tq_flags, kmpc_thunk_t - th_flags */ - -#define TQF_IS_ORDERED 0x0001 // __kmpc_taskq interface, taskq ordered -// __kmpc_taskq interface, taskq with lastprivate list -#define TQF_IS_LASTPRIVATE 0x0002 -#define TQF_IS_NOWAIT 0x0004 // __kmpc_taskq interface, end taskq nowait -// __kmpc_taskq interface, use heuristics to decide task queue size -#define TQF_HEURISTICS 0x0008 - -// __kmpc_taskq interface, reserved for future use -#define TQF_INTERFACE_RESERVED1 0x0010 -// __kmpc_taskq interface, reserved for future use -#define TQF_INTERFACE_RESERVED2 0x0020 -// __kmpc_taskq interface, reserved for future use -#define TQF_INTERFACE_RESERVED3 0x0040 -// __kmpc_taskq interface, reserved for future use -#define TQF_INTERFACE_RESERVED4 0x0080 - -#define TQF_INTERFACE_FLAGS 0x00ff // all the __kmpc_taskq interface flags -// internal/read by instrumentation; only used with TQF_IS_LASTPRIVATE -#define TQF_IS_LAST_TASK 0x0100 -// internal use only; this thunk->th_task is the taskq_task -#define TQF_TASKQ_TASK 0x0200 -// internal use only; must release worker threads once ANY queued task -// exists (global) -#define TQF_RELEASE_WORKERS 0x0400 -// internal use only; notify workers that master has finished enqueuing tasks -#define TQF_ALL_TASKS_QUEUED 0x0800 -// internal use only: this queue encountered in parallel context: not serialized -#define TQF_PARALLEL_CONTEXT 0x1000 -// internal use only; this queue is on the freelist and not in use -#define TQF_DEALLOCATED 0x2000 - -#define TQF_INTERNAL_FLAGS 0x3f00 // all the internal use only flags - -typedef struct KMP_ALIGN_CACHE kmpc_aligned_int32_t { - kmp_int32 ai_data; -} kmpc_aligned_int32_t; - -typedef struct KMP_ALIGN_CACHE kmpc_aligned_queue_slot_t { - struct kmpc_thunk_t *qs_thunk; -} kmpc_aligned_queue_slot_t; - -typedef struct kmpc_task_queue_t { - /* task queue linkage fields for n-ary tree of queues (locked with global - taskq_tree_lck) */ - kmp_lock_t tq_link_lck; /* lock for child link, child next/prev links and - child ref counts */ - union { - struct kmpc_task_queue_t *tq_parent; // pointer to parent taskq, not locked - // for taskq internal freelists, locked with global taskq_freelist_lck - struct kmpc_task_queue_t *tq_next_free; - } tq; - // pointer to linked-list of children, locked by tq's tq_link_lck - volatile struct kmpc_task_queue_t *tq_first_child; - // next child in linked-list, locked by parent tq's tq_link_lck - struct kmpc_task_queue_t *tq_next_child; - // previous child in linked-list, locked by parent tq's tq_link_lck - struct kmpc_task_queue_t *tq_prev_child; - // reference count of threads with access to this task queue - volatile kmp_int32 tq_ref_count; - /* (other than the thread executing the kmpc_end_taskq call) */ - /* locked by parent tq's tq_link_lck */ - - /* shared data for task queue */ - /* per-thread array of pointers to shared variable structures */ - struct kmpc_aligned_shared_vars_t *tq_shareds; - /* only one array element exists for all but outermost taskq */ - - /* bookkeeping for ordered task queue */ - kmp_uint32 tq_tasknum_queuing; // ordered task # assigned while queuing tasks - // ordered number of next task to be served (executed) - volatile kmp_uint32 tq_tasknum_serving; - - /* thunk storage management for task queue */ - kmp_lock_t tq_free_thunks_lck; /* lock for thunk freelist manipulation */ - // thunk freelist, chained via th.th_next_free - struct kmpc_thunk_t *tq_free_thunks; - // space allocated for thunks for this task queue - struct kmpc_thunk_t *tq_thunk_space; - - /* data fields for queue itself */ - kmp_lock_t tq_queue_lck; /* lock for [de]enqueue operations: tq_queue, - tq_head, tq_tail, tq_nfull */ - /* array of queue slots to hold thunks for tasks */ - kmpc_aligned_queue_slot_t *tq_queue; - volatile struct kmpc_thunk_t *tq_taskq_slot; /* special slot for taskq task - thunk, occupied if not NULL */ - kmp_int32 tq_nslots; /* # of tq_thunk_space thunks alloc'd (not incl. - tq_taskq_slot space) */ - kmp_int32 tq_head; // enqueue puts item here (index into tq_queue array) - kmp_int32 tq_tail; // dequeue takes item from here (index into tq_queue array) - volatile kmp_int32 tq_nfull; // # of occupied entries in task queue right now - kmp_int32 tq_hiwat; /* high-water mark for tq_nfull and queue scheduling */ - volatile kmp_int32 tq_flags; /* TQF_xxx */ - - /* bookkeeping for outstanding thunks */ - - /* per-thread array for # of regular thunks currently being executed */ - struct kmpc_aligned_int32_t *tq_th_thunks; - kmp_int32 tq_nproc; /* number of thunks in the th_thunks array */ - - /* statistics library bookkeeping */ - ident_t *tq_loc; /* source location information for taskq directive */ -} kmpc_task_queue_t; - -typedef void (*kmpc_task_t)(kmp_int32 global_tid, struct kmpc_thunk_t *thunk); - -/* sizeof_shareds passed as arg to __kmpc_taskq call */ -typedef struct kmpc_shared_vars_t { /* aligned during dynamic allocation */ - kmpc_task_queue_t *sv_queue; /* (pointers to) shared vars */ -} kmpc_shared_vars_t; - -typedef struct KMP_ALIGN_CACHE kmpc_aligned_shared_vars_t { - volatile struct kmpc_shared_vars_t *ai_data; -} kmpc_aligned_shared_vars_t; - -/* sizeof_thunk passed as arg to kmpc_taskq call */ -typedef struct kmpc_thunk_t { /* aligned during dynamic allocation */ - union { /* field used for internal freelists too */ - kmpc_shared_vars_t *th_shareds; - struct kmpc_thunk_t *th_next_free; /* freelist of individual thunks within - queue, head at tq_free_thunks */ - } th; - kmpc_task_t th_task; /* taskq_task if flags & TQF_TASKQ_TASK */ - struct kmpc_thunk_t *th_encl_thunk; /* pointer to dynamically enclosing thunk - on this thread's call stack */ - // TQF_xxx(tq_flags interface plus possible internal flags) - kmp_int32 th_flags; - - kmp_int32 th_status; - kmp_uint32 th_tasknum; /* task number assigned in order of queuing, used for - ordered sections */ - /* private vars */ -} kmpc_thunk_t; - -typedef struct KMP_ALIGN_CACHE kmp_taskq { - int tq_curr_thunk_capacity; - - kmpc_task_queue_t *tq_root; - kmp_int32 tq_global_flags; - - kmp_lock_t tq_freelist_lck; - kmpc_task_queue_t *tq_freelist; - - kmpc_thunk_t **tq_curr_thunk; -} kmp_taskq_t; - -/* END Taskq data structures */ - -typedef kmp_int32 kmp_critical_name[8]; - -/*! -@ingroup PARALLEL -The type for a microtask which gets passed to @ref __kmpc_fork_call(). -The arguments to the outlined function are -@param global_tid the global thread identity of the thread executing the -function. -@param bound_tid the local identitiy of the thread executing the function -@param ... pointers to shared variables accessed by the function. -*/ -typedef void (*kmpc_micro)(kmp_int32 *global_tid, kmp_int32 *bound_tid, ...); -typedef void (*kmpc_micro_bound)(kmp_int32 *bound_tid, kmp_int32 *bound_nth, - ...); - -/*! -@ingroup THREADPRIVATE -@{ -*/ -/* --------------------------------------------------------------------------- - */ -/* Threadprivate initialization/finalization function declarations */ - -/* for non-array objects: __kmpc_threadprivate_register() */ - -/*! - Pointer to the constructor function. - The first argument is the this pointer -*/ -typedef void *(*kmpc_ctor)(void *); - -/*! - Pointer to the destructor function. - The first argument is the this pointer -*/ -typedef void (*kmpc_dtor)( - void * /*, size_t */); /* 2nd arg: magic number for KCC unused by Intel - compiler */ -/*! - Pointer to an alternate constructor. - The first argument is the this pointer. -*/ -typedef void *(*kmpc_cctor)(void *, void *); - -/* for array objects: __kmpc_threadprivate_register_vec() */ -/* First arg: "this" pointer */ -/* Last arg: number of array elements */ -/*! - Array constructor. - First argument is the this pointer - Second argument the number of array elements. -*/ -typedef void *(*kmpc_ctor_vec)(void *, size_t); -/*! - Pointer to the array destructor function. - The first argument is the this pointer - Second argument the number of array elements. -*/ -typedef void (*kmpc_dtor_vec)(void *, size_t); -/*! - Array constructor. - First argument is the this pointer - Third argument the number of array elements. -*/ -typedef void *(*kmpc_cctor_vec)(void *, void *, - size_t); /* function unused by compiler */ - -/*! -@} -*/ - -/* keeps tracked of threadprivate cache allocations for cleanup later */ -typedef struct kmp_cached_addr { - void **addr; /* address of allocated cache */ - void ***compiler_cache; /* pointer to compiler's cache */ - void *data; /* pointer to global data */ - struct kmp_cached_addr *next; /* pointer to next cached address */ -} kmp_cached_addr_t; - -struct private_data { - struct private_data *next; /* The next descriptor in the list */ - void *data; /* The data buffer for this descriptor */ - int more; /* The repeat count for this descriptor */ - size_t size; /* The data size for this descriptor */ -}; - -struct private_common { - struct private_common *next; - struct private_common *link; - void *gbl_addr; - void *par_addr; /* par_addr == gbl_addr for MASTER thread */ - size_t cmn_size; -}; - -struct shared_common { - struct shared_common *next; - struct private_data *pod_init; - void *obj_init; - void *gbl_addr; - union { - kmpc_ctor ctor; - kmpc_ctor_vec ctorv; - } ct; - union { - kmpc_cctor cctor; - kmpc_cctor_vec cctorv; - } cct; - union { - kmpc_dtor dtor; - kmpc_dtor_vec dtorv; - } dt; - size_t vec_len; - int is_vec; - size_t cmn_size; -}; - -#define KMP_HASH_TABLE_LOG2 9 /* log2 of the hash table size */ -#define KMP_HASH_TABLE_SIZE \ - (1 << KMP_HASH_TABLE_LOG2) /* size of the hash table */ -#define KMP_HASH_SHIFT 3 /* throw away this many low bits from the address */ -#define KMP_HASH(x) \ - ((((kmp_uintptr_t)x) >> KMP_HASH_SHIFT) & (KMP_HASH_TABLE_SIZE - 1)) - -struct common_table { - struct private_common *data[KMP_HASH_TABLE_SIZE]; -}; - -struct shared_table { - struct shared_common *data[KMP_HASH_TABLE_SIZE]; -}; - -/* ------------------------------------------------------------------------ */ - -#if KMP_USE_HIER_SCHED -// Shared barrier data that exists inside a single unit of the scheduling -// hierarchy -typedef struct kmp_hier_private_bdata_t { - kmp_int32 num_active; - kmp_uint64 index; - kmp_uint64 wait_val[2]; -} kmp_hier_private_bdata_t; -#endif - -typedef struct kmp_sched_flags { - unsigned ordered : 1; - unsigned nomerge : 1; - unsigned contains_last : 1; -#if KMP_USE_HIER_SCHED - unsigned use_hier : 1; - unsigned unused : 28; -#else - unsigned unused : 29; -#endif -} kmp_sched_flags_t; - -KMP_BUILD_ASSERT(sizeof(kmp_sched_flags_t) == 4); - -#if KMP_STATIC_STEAL_ENABLED -typedef struct KMP_ALIGN_CACHE dispatch_private_info32 { - kmp_int32 count; - kmp_int32 ub; - /* Adding KMP_ALIGN_CACHE here doesn't help / can hurt performance */ - kmp_int32 lb; - kmp_int32 st; - kmp_int32 tc; - kmp_int32 static_steal_counter; /* for static_steal only; maybe better to put - after ub */ - - // KMP_ALIGN( 16 ) ensures ( if the KMP_ALIGN macro is turned on ) - // a) parm3 is properly aligned and - // b) all parm1-4 are in the same cache line. - // Because of parm1-4 are used together, performance seems to be better - // if they are in the same line (not measured though). - - struct KMP_ALIGN(32) { // AC: changed 16 to 32 in order to simplify template - kmp_int32 parm1; // structures in kmp_dispatch.cpp. This should - kmp_int32 parm2; // make no real change at least while padding is off. - kmp_int32 parm3; - kmp_int32 parm4; - }; - - kmp_uint32 ordered_lower; - kmp_uint32 ordered_upper; -#if KMP_OS_WINDOWS - // This var can be placed in the hole between 'tc' and 'parm1', instead of - // 'static_steal_counter'. It would be nice to measure execution times. - // Conditional if/endif can be removed at all. - kmp_int32 last_upper; -#endif /* KMP_OS_WINDOWS */ -} dispatch_private_info32_t; - -typedef struct KMP_ALIGN_CACHE dispatch_private_info64 { - kmp_int64 count; // current chunk number for static & static-steal scheduling - kmp_int64 ub; /* upper-bound */ - /* Adding KMP_ALIGN_CACHE here doesn't help / can hurt performance */ - kmp_int64 lb; /* lower-bound */ - kmp_int64 st; /* stride */ - kmp_int64 tc; /* trip count (number of iterations) */ - kmp_int64 static_steal_counter; /* for static_steal only; maybe better to put - after ub */ - - /* parm[1-4] are used in different ways by different scheduling algorithms */ - - // KMP_ALIGN( 32 ) ensures ( if the KMP_ALIGN macro is turned on ) - // a) parm3 is properly aligned and - // b) all parm1-4 are in the same cache line. - // Because of parm1-4 are used together, performance seems to be better - // if they are in the same line (not measured though). - - struct KMP_ALIGN(32) { - kmp_int64 parm1; - kmp_int64 parm2; - kmp_int64 parm3; - kmp_int64 parm4; - }; - - kmp_uint64 ordered_lower; - kmp_uint64 ordered_upper; -#if KMP_OS_WINDOWS - // This var can be placed in the hole between 'tc' and 'parm1', instead of - // 'static_steal_counter'. It would be nice to measure execution times. - // Conditional if/endif can be removed at all. - kmp_int64 last_upper; -#endif /* KMP_OS_WINDOWS */ -} dispatch_private_info64_t; -#else /* KMP_STATIC_STEAL_ENABLED */ -typedef struct KMP_ALIGN_CACHE dispatch_private_info32 { - kmp_int32 lb; - kmp_int32 ub; - kmp_int32 st; - kmp_int32 tc; - - kmp_int32 parm1; - kmp_int32 parm2; - kmp_int32 parm3; - kmp_int32 parm4; - - kmp_int32 count; - - kmp_uint32 ordered_lower; - kmp_uint32 ordered_upper; -#if KMP_OS_WINDOWS - kmp_int32 last_upper; -#endif /* KMP_OS_WINDOWS */ -} dispatch_private_info32_t; - -typedef struct KMP_ALIGN_CACHE dispatch_private_info64 { - kmp_int64 lb; /* lower-bound */ - kmp_int64 ub; /* upper-bound */ - kmp_int64 st; /* stride */ - kmp_int64 tc; /* trip count (number of iterations) */ - - /* parm[1-4] are used in different ways by different scheduling algorithms */ - kmp_int64 parm1; - kmp_int64 parm2; - kmp_int64 parm3; - kmp_int64 parm4; - - kmp_int64 count; /* current chunk number for static scheduling */ - - kmp_uint64 ordered_lower; - kmp_uint64 ordered_upper; -#if KMP_OS_WINDOWS - kmp_int64 last_upper; -#endif /* KMP_OS_WINDOWS */ -} dispatch_private_info64_t; -#endif /* KMP_STATIC_STEAL_ENABLED */ - -typedef struct KMP_ALIGN_CACHE dispatch_private_info { - union private_info { - dispatch_private_info32_t p32; - dispatch_private_info64_t p64; - } u; - enum sched_type schedule; /* scheduling algorithm */ - kmp_sched_flags_t flags; /* flags (e.g., ordered, nomerge, etc.) */ - kmp_int32 ordered_bumped; - // To retain the structure size after making ordered_iteration scalar - kmp_int32 ordered_dummy[KMP_MAX_ORDERED - 3]; - // Stack of buffers for nest of serial regions - struct dispatch_private_info *next; - kmp_int32 type_size; /* the size of types in private_info */ -#if KMP_USE_HIER_SCHED - kmp_int32 hier_id; - void *parent; /* hierarchical scheduling parent pointer */ -#endif - enum cons_type pushed_ws; -} dispatch_private_info_t; - -typedef struct dispatch_shared_info32 { - /* chunk index under dynamic, number of idle threads under static-steal; - iteration index otherwise */ - volatile kmp_uint32 iteration; - volatile kmp_uint32 num_done; - volatile kmp_uint32 ordered_iteration; - // Dummy to retain the structure size after making ordered_iteration scalar - kmp_int32 ordered_dummy[KMP_MAX_ORDERED - 1]; -} dispatch_shared_info32_t; - -typedef struct dispatch_shared_info64 { - /* chunk index under dynamic, number of idle threads under static-steal; - iteration index otherwise */ - volatile kmp_uint64 iteration; - volatile kmp_uint64 num_done; - volatile kmp_uint64 ordered_iteration; - // Dummy to retain the structure size after making ordered_iteration scalar - kmp_int64 ordered_dummy[KMP_MAX_ORDERED - 3]; -} dispatch_shared_info64_t; - -typedef struct dispatch_shared_info { - union shared_info { - dispatch_shared_info32_t s32; - dispatch_shared_info64_t s64; - } u; - volatile kmp_uint32 buffer_index; -#if OMP_45_ENABLED - volatile kmp_int32 doacross_buf_idx; // teamwise index - volatile kmp_uint32 *doacross_flags; // shared array of iteration flags (0/1) - kmp_int32 doacross_num_done; // count finished threads -#endif -#if KMP_USE_HIER_SCHED - void *hier; -#endif -#if KMP_USE_HWLOC - // When linking with libhwloc, the ORDERED EPCC test slows down on big - // machines (> 48 cores). Performance analysis showed that a cache thrash - // was occurring and this padding helps alleviate the problem. - char padding[64]; -#endif -} dispatch_shared_info_t; - -typedef struct kmp_disp { - /* Vector for ORDERED SECTION */ - void (*th_deo_fcn)(int *gtid, int *cid, ident_t *); - /* Vector for END ORDERED SECTION */ - void (*th_dxo_fcn)(int *gtid, int *cid, ident_t *); - - dispatch_shared_info_t *th_dispatch_sh_current; - dispatch_private_info_t *th_dispatch_pr_current; - - dispatch_private_info_t *th_disp_buffer; - kmp_int32 th_disp_index; -#if OMP_45_ENABLED - kmp_int32 th_doacross_buf_idx; // thread's doacross buffer index - volatile kmp_uint32 *th_doacross_flags; // pointer to shared array of flags - union { // we can use union here because doacross cannot be used in - // nonmonotonic loops - kmp_int64 *th_doacross_info; // info on loop bounds - kmp_lock_t *th_steal_lock; // lock used for chunk stealing (8-byte variable) - }; -#else -#if KMP_STATIC_STEAL_ENABLED - kmp_lock_t *th_steal_lock; // lock used for chunk stealing (8-byte variable) - void *dummy_padding[1]; // make it 64 bytes on Intel(R) 64 -#else - void *dummy_padding[2]; // make it 64 bytes on Intel(R) 64 -#endif -#endif -#if KMP_USE_INTERNODE_ALIGNMENT - char more_padding[INTERNODE_CACHE_LINE]; -#endif -} kmp_disp_t; - -/* ------------------------------------------------------------------------ */ -/* Barrier stuff */ - -/* constants for barrier state update */ -#define KMP_INIT_BARRIER_STATE 0 /* should probably start from zero */ -#define KMP_BARRIER_SLEEP_BIT 0 /* bit used for suspend/sleep part of state */ -#define KMP_BARRIER_UNUSED_BIT 1 // bit that must never be set for valid state -#define KMP_BARRIER_BUMP_BIT 2 /* lsb used for bump of go/arrived state */ - -#define KMP_BARRIER_SLEEP_STATE (1 << KMP_BARRIER_SLEEP_BIT) -#define KMP_BARRIER_UNUSED_STATE (1 << KMP_BARRIER_UNUSED_BIT) -#define KMP_BARRIER_STATE_BUMP (1 << KMP_BARRIER_BUMP_BIT) - -#if (KMP_BARRIER_SLEEP_BIT >= KMP_BARRIER_BUMP_BIT) -#error "Barrier sleep bit must be smaller than barrier bump bit" -#endif -#if (KMP_BARRIER_UNUSED_BIT >= KMP_BARRIER_BUMP_BIT) -#error "Barrier unused bit must be smaller than barrier bump bit" -#endif - -// Constants for release barrier wait state: currently, hierarchical only -#define KMP_BARRIER_NOT_WAITING 0 // Normal state; worker not in wait_sleep -#define KMP_BARRIER_OWN_FLAG \ - 1 // Normal state; worker waiting on own b_go flag in release -#define KMP_BARRIER_PARENT_FLAG \ - 2 // Special state; worker waiting on parent's b_go flag in release -#define KMP_BARRIER_SWITCH_TO_OWN_FLAG \ - 3 // Special state; tells worker to shift from parent to own b_go -#define KMP_BARRIER_SWITCHING \ - 4 // Special state; worker resets appropriate flag on wake-up - -#define KMP_NOT_SAFE_TO_REAP \ - 0 // Thread th_reap_state: not safe to reap (tasking) -#define KMP_SAFE_TO_REAP 1 // Thread th_reap_state: safe to reap (not tasking) - -enum barrier_type { - bs_plain_barrier = 0, /* 0, All non-fork/join barriers (except reduction - barriers if enabled) */ - bs_forkjoin_barrier, /* 1, All fork/join (parallel region) barriers */ -#if KMP_FAST_REDUCTION_BARRIER - bs_reduction_barrier, /* 2, All barriers that are used in reduction */ -#endif // KMP_FAST_REDUCTION_BARRIER - bs_last_barrier /* Just a placeholder to mark the end */ -}; - -// to work with reduction barriers just like with plain barriers -#if !KMP_FAST_REDUCTION_BARRIER -#define bs_reduction_barrier bs_plain_barrier -#endif // KMP_FAST_REDUCTION_BARRIER - -typedef enum kmp_bar_pat { /* Barrier communication patterns */ - bp_linear_bar = - 0, /* Single level (degenerate) tree */ - bp_tree_bar = - 1, /* Balanced tree with branching factor 2^n */ - bp_hyper_bar = - 2, /* Hypercube-embedded tree with min branching - factor 2^n */ - bp_hierarchical_bar = 3, /* Machine hierarchy tree */ - bp_last_bar /* Placeholder to mark the end */ -} kmp_bar_pat_e; - -#define KMP_BARRIER_ICV_PUSH 1 - -/* Record for holding the values of the internal controls stack records */ -typedef struct kmp_internal_control { - int serial_nesting_level; /* corresponds to the value of the - th_team_serialized field */ - kmp_int8 nested; /* internal control for nested parallelism (per thread) */ - kmp_int8 dynamic; /* internal control for dynamic adjustment of threads (per - thread) */ - kmp_int8 - bt_set; /* internal control for whether blocktime is explicitly set */ - int blocktime; /* internal control for blocktime */ -#if KMP_USE_MONITOR - int bt_intervals; /* internal control for blocktime intervals */ -#endif - int nproc; /* internal control for #threads for next parallel region (per - thread) */ - int max_active_levels; /* internal control for max_active_levels */ - kmp_r_sched_t - sched; /* internal control for runtime schedule {sched,chunk} pair */ -#if OMP_40_ENABLED - kmp_proc_bind_t proc_bind; /* internal control for affinity */ - kmp_int32 default_device; /* internal control for default device */ -#endif // OMP_40_ENABLED - struct kmp_internal_control *next; -} kmp_internal_control_t; - -static inline void copy_icvs(kmp_internal_control_t *dst, - kmp_internal_control_t *src) { - *dst = *src; -} - -/* Thread barrier needs volatile barrier fields */ -typedef struct KMP_ALIGN_CACHE kmp_bstate { - // th_fixed_icvs is aligned by virtue of kmp_bstate being aligned (and all - // uses of it). It is not explicitly aligned below, because we *don't* want - // it to be padded -- instead, we fit b_go into the same cache line with - // th_fixed_icvs, enabling NGO cache lines stores in the hierarchical barrier. - kmp_internal_control_t th_fixed_icvs; // Initial ICVs for the thread - // Tuck b_go into end of th_fixed_icvs cache line, so it can be stored with - // same NGO store - volatile kmp_uint64 b_go; // STATE => task should proceed (hierarchical) - KMP_ALIGN_CACHE volatile kmp_uint64 - b_arrived; // STATE => task reached synch point. - kmp_uint32 *skip_per_level; - kmp_uint32 my_level; - kmp_int32 parent_tid; - kmp_int32 old_tid; - kmp_uint32 depth; - struct kmp_bstate *parent_bar; - kmp_team_t *team; - kmp_uint64 leaf_state; - kmp_uint32 nproc; - kmp_uint8 base_leaf_kids; - kmp_uint8 leaf_kids; - kmp_uint8 offset; - kmp_uint8 wait_flag; - kmp_uint8 use_oncore_barrier; -#if USE_DEBUGGER - // The following field is intended for the debugger solely. Only the worker - // thread itself accesses this field: the worker increases it by 1 when it - // arrives to a barrier. - KMP_ALIGN_CACHE kmp_uint b_worker_arrived; -#endif /* USE_DEBUGGER */ -} kmp_bstate_t; - -union KMP_ALIGN_CACHE kmp_barrier_union { - double b_align; /* use worst case alignment */ - char b_pad[KMP_PAD(kmp_bstate_t, CACHE_LINE)]; - kmp_bstate_t bb; -}; - -typedef union kmp_barrier_union kmp_balign_t; - -/* Team barrier needs only non-volatile arrived counter */ -union KMP_ALIGN_CACHE kmp_barrier_team_union { - double b_align; /* use worst case alignment */ - char b_pad[CACHE_LINE]; - struct { - kmp_uint64 b_arrived; /* STATE => task reached synch point. */ -#if USE_DEBUGGER - // The following two fields are indended for the debugger solely. Only - // master of the team accesses these fields: the first one is increased by - // 1 when master arrives to a barrier, the second one is increased by one - // when all the threads arrived. - kmp_uint b_master_arrived; - kmp_uint b_team_arrived; -#endif - }; -}; - -typedef union kmp_barrier_team_union kmp_balign_team_t; - -/* Padding for Linux* OS pthreads condition variables and mutexes used to signal - threads when a condition changes. This is to workaround an NPTL bug where - padding was added to pthread_cond_t which caused the initialization routine - to write outside of the structure if compiled on pre-NPTL threads. */ -#if KMP_OS_WINDOWS -typedef struct kmp_win32_mutex { - /* The Lock */ - CRITICAL_SECTION cs; -} kmp_win32_mutex_t; - -typedef struct kmp_win32_cond { - /* Count of the number of waiters. */ - int waiters_count_; - - /* Serialize access to */ - kmp_win32_mutex_t waiters_count_lock_; - - /* Number of threads to release via a or a */ - int release_count_; - - /* Keeps track of the current "generation" so that we don't allow */ - /* one thread to steal all the "releases" from the broadcast. */ - int wait_generation_count_; - - /* A manual-reset event that's used to block and release waiting threads. */ - HANDLE event_; -} kmp_win32_cond_t; -#endif - -#if KMP_OS_UNIX - -union KMP_ALIGN_CACHE kmp_cond_union { - double c_align; - char c_pad[CACHE_LINE]; - pthread_cond_t c_cond; -}; - -typedef union kmp_cond_union kmp_cond_align_t; - -union KMP_ALIGN_CACHE kmp_mutex_union { - double m_align; - char m_pad[CACHE_LINE]; - pthread_mutex_t m_mutex; -}; - -typedef union kmp_mutex_union kmp_mutex_align_t; - -#endif /* KMP_OS_UNIX */ - -typedef struct kmp_desc_base { - void *ds_stackbase; - size_t ds_stacksize; - int ds_stackgrow; - kmp_thread_t ds_thread; - volatile int ds_tid; - int ds_gtid; -#if KMP_OS_WINDOWS - volatile int ds_alive; - DWORD ds_thread_id; -/* ds_thread keeps thread handle on Windows* OS. It is enough for RTL purposes. - However, debugger support (libomp_db) cannot work with handles, because they - uncomparable. For example, debugger requests info about thread with handle h. - h is valid within debugger process, and meaningless within debugee process. - Even if h is duped by call to DuplicateHandle(), so the result h' is valid - within debugee process, but it is a *new* handle which does *not* equal to - any other handle in debugee... The only way to compare handles is convert - them to system-wide ids. GetThreadId() function is available only in - Longhorn and Server 2003. :-( In contrast, GetCurrentThreadId() is available - on all Windows* OS flavours (including Windows* 95). Thus, we have to get - thread id by call to GetCurrentThreadId() from within the thread and save it - to let libomp_db identify threads. */ -#endif /* KMP_OS_WINDOWS */ -} kmp_desc_base_t; - -typedef union KMP_ALIGN_CACHE kmp_desc { - double ds_align; /* use worst case alignment */ - char ds_pad[KMP_PAD(kmp_desc_base_t, CACHE_LINE)]; - kmp_desc_base_t ds; -} kmp_desc_t; - -typedef struct kmp_local { - volatile int this_construct; /* count of single's encountered by thread */ - void *reduce_data; -#if KMP_USE_BGET - void *bget_data; - void *bget_list; -#if !USE_CMP_XCHG_FOR_BGET -#ifdef USE_QUEUING_LOCK_FOR_BGET - kmp_lock_t bget_lock; /* Lock for accessing bget free list */ -#else - kmp_bootstrap_lock_t bget_lock; // Lock for accessing bget free list. Must be -// bootstrap lock so we can use it at library -// shutdown. -#endif /* USE_LOCK_FOR_BGET */ -#endif /* ! USE_CMP_XCHG_FOR_BGET */ -#endif /* KMP_USE_BGET */ - - PACKED_REDUCTION_METHOD_T - packed_reduction_method; /* stored by __kmpc_reduce*(), used by - __kmpc_end_reduce*() */ - -} kmp_local_t; - -#define KMP_CHECK_UPDATE(a, b) \ - if ((a) != (b)) \ - (a) = (b) -#define KMP_CHECK_UPDATE_SYNC(a, b) \ - if ((a) != (b)) \ - TCW_SYNC_PTR((a), (b)) - -#define get__blocktime(xteam, xtid) \ - ((xteam)->t.t_threads[(xtid)]->th.th_current_task->td_icvs.blocktime) -#define get__bt_set(xteam, xtid) \ - ((xteam)->t.t_threads[(xtid)]->th.th_current_task->td_icvs.bt_set) -#if KMP_USE_MONITOR -#define get__bt_intervals(xteam, xtid) \ - ((xteam)->t.t_threads[(xtid)]->th.th_current_task->td_icvs.bt_intervals) -#endif - -#define get__nested_2(xteam, xtid) \ - ((xteam)->t.t_threads[(xtid)]->th.th_current_task->td_icvs.nested) -#define get__dynamic_2(xteam, xtid) \ - ((xteam)->t.t_threads[(xtid)]->th.th_current_task->td_icvs.dynamic) -#define get__nproc_2(xteam, xtid) \ - ((xteam)->t.t_threads[(xtid)]->th.th_current_task->td_icvs.nproc) -#define get__sched_2(xteam, xtid) \ - ((xteam)->t.t_threads[(xtid)]->th.th_current_task->td_icvs.sched) - -#define set__blocktime_team(xteam, xtid, xval) \ - (((xteam)->t.t_threads[(xtid)]->th.th_current_task->td_icvs.blocktime) = \ - (xval)) - -#if KMP_USE_MONITOR -#define set__bt_intervals_team(xteam, xtid, xval) \ - (((xteam)->t.t_threads[(xtid)]->th.th_current_task->td_icvs.bt_intervals) = \ - (xval)) -#endif - -#define set__bt_set_team(xteam, xtid, xval) \ - (((xteam)->t.t_threads[(xtid)]->th.th_current_task->td_icvs.bt_set) = (xval)) - -#define set__nested(xthread, xval) \ - (((xthread)->th.th_current_task->td_icvs.nested) = (xval)) -#define get__nested(xthread) \ - (((xthread)->th.th_current_task->td_icvs.nested) ? (FTN_TRUE) : (FTN_FALSE)) - -#define set__dynamic(xthread, xval) \ - (((xthread)->th.th_current_task->td_icvs.dynamic) = (xval)) -#define get__dynamic(xthread) \ - (((xthread)->th.th_current_task->td_icvs.dynamic) ? (FTN_TRUE) : (FTN_FALSE)) - -#define set__nproc(xthread, xval) \ - (((xthread)->th.th_current_task->td_icvs.nproc) = (xval)) - -#define set__max_active_levels(xthread, xval) \ - (((xthread)->th.th_current_task->td_icvs.max_active_levels) = (xval)) - -#define set__sched(xthread, xval) \ - (((xthread)->th.th_current_task->td_icvs.sched) = (xval)) - -#if OMP_40_ENABLED - -#define set__proc_bind(xthread, xval) \ - (((xthread)->th.th_current_task->td_icvs.proc_bind) = (xval)) -#define get__proc_bind(xthread) \ - ((xthread)->th.th_current_task->td_icvs.proc_bind) - -#endif /* OMP_40_ENABLED */ - -// OpenMP tasking data structures - -typedef enum kmp_tasking_mode { - tskm_immediate_exec = 0, - tskm_extra_barrier = 1, - tskm_task_teams = 2, - tskm_max = 2 -} kmp_tasking_mode_t; - -extern kmp_tasking_mode_t - __kmp_tasking_mode; /* determines how/when to execute tasks */ -extern int __kmp_task_stealing_constraint; -#if OMP_40_ENABLED -extern kmp_int32 __kmp_default_device; // Set via OMP_DEFAULT_DEVICE if -// specified, defaults to 0 otherwise -#endif -#if OMP_45_ENABLED -// Set via OMP_MAX_TASK_PRIORITY if specified, defaults to 0 otherwise -extern kmp_int32 __kmp_max_task_priority; -// Set via KMP_TASKLOOP_MIN_TASKS if specified, defaults to 0 otherwise -extern kmp_uint64 __kmp_taskloop_min_tasks; -#endif - -/* NOTE: kmp_taskdata_t and kmp_task_t structures allocated in single block with - taskdata first */ -#define KMP_TASK_TO_TASKDATA(task) (((kmp_taskdata_t *)task) - 1) -#define KMP_TASKDATA_TO_TASK(taskdata) (kmp_task_t *)(taskdata + 1) - -// The tt_found_tasks flag is a signal to all threads in the team that tasks -// were spawned and queued since the previous barrier release. -#define KMP_TASKING_ENABLED(task_team) \ - (TCR_SYNC_4((task_team)->tt.tt_found_tasks) == TRUE) -/*! -@ingroup BASIC_TYPES -@{ -*/ - -/*! - */ -typedef kmp_int32 (*kmp_routine_entry_t)(kmp_int32, void *); - -#if OMP_40_ENABLED || OMP_45_ENABLED -typedef union kmp_cmplrdata { -#if OMP_45_ENABLED - kmp_int32 priority; /**< priority specified by user for the task */ -#endif // OMP_45_ENABLED -#if OMP_40_ENABLED - kmp_routine_entry_t - destructors; /* pointer to function to invoke deconstructors of - firstprivate C++ objects */ -#endif // OMP_40_ENABLED - /* future data */ -} kmp_cmplrdata_t; -#endif - -/* sizeof_kmp_task_t passed as arg to kmpc_omp_task call */ -/*! - */ -typedef struct kmp_task { /* GEH: Shouldn't this be aligned somehow? */ - void *shareds; /**< pointer to block of pointers to shared vars */ - kmp_routine_entry_t - routine; /**< pointer to routine to call for executing task */ - kmp_int32 part_id; /**< part id for the task */ -#if OMP_40_ENABLED || OMP_45_ENABLED - kmp_cmplrdata_t - data1; /* Two known optional additions: destructors and priority */ - kmp_cmplrdata_t data2; /* Process destructors first, priority second */ -/* future data */ -#endif - /* private vars */ -} kmp_task_t; - -/*! -@} -*/ - -#if OMP_40_ENABLED -typedef struct kmp_taskgroup { - std::atomic count; // number of allocated and incomplete tasks - std::atomic - cancel_request; // request for cancellation of this taskgroup - struct kmp_taskgroup *parent; // parent taskgroup -#if OMP_50_ENABLED - // Block of data to perform task reduction - void *reduce_data; // reduction related info - kmp_int32 reduce_num_data; // number of data items to reduce -#endif -} kmp_taskgroup_t; - -// forward declarations -typedef union kmp_depnode kmp_depnode_t; -typedef struct kmp_depnode_list kmp_depnode_list_t; -typedef struct kmp_dephash_entry kmp_dephash_entry_t; - -// Compiler sends us this info: -typedef struct kmp_depend_info { - kmp_intptr_t base_addr; - size_t len; - struct { - bool in : 1; - bool out : 1; - bool mtx : 1; - } flags; -} kmp_depend_info_t; - -// Internal structures to work with task dependencies: -struct kmp_depnode_list { - kmp_depnode_t *node; - kmp_depnode_list_t *next; -}; - -// Max number of mutexinoutset dependencies per node -#define MAX_MTX_DEPS 4 - -typedef struct kmp_base_depnode { - kmp_depnode_list_t *successors; /* used under lock */ - kmp_task_t *task; /* non-NULL if depnode is active, used under lock */ - kmp_lock_t *mtx_locks[MAX_MTX_DEPS]; /* lock mutexinoutset dependent tasks */ - kmp_int32 mtx_num_locks; /* number of locks in mtx_locks array */ - kmp_lock_t lock; /* guards shared fields: task, successors */ -#if KMP_SUPPORT_GRAPH_OUTPUT - kmp_uint32 id; -#endif - std::atomic npredecessors; - std::atomic nrefs; -} kmp_base_depnode_t; - -union KMP_ALIGN_CACHE kmp_depnode { - double dn_align; /* use worst case alignment */ - char dn_pad[KMP_PAD(kmp_base_depnode_t, CACHE_LINE)]; - kmp_base_depnode_t dn; -}; - -struct kmp_dephash_entry { - kmp_intptr_t addr; - kmp_depnode_t *last_out; - kmp_depnode_list_t *last_ins; - kmp_depnode_list_t *last_mtxs; - kmp_int32 last_flag; - kmp_lock_t *mtx_lock; /* is referenced by depnodes w/mutexinoutset dep */ - kmp_dephash_entry_t *next_in_bucket; -}; - -typedef struct kmp_dephash { - kmp_dephash_entry_t **buckets; - size_t size; -#ifdef KMP_DEBUG - kmp_uint32 nelements; - kmp_uint32 nconflicts; -#endif -} kmp_dephash_t; - -#if OMP_50_ENABLED -typedef struct kmp_task_affinity_info { - kmp_intptr_t base_addr; - size_t len; - struct { - bool flag1 : 1; - bool flag2 : 1; - kmp_int32 reserved : 30; - } flags; -} kmp_task_affinity_info_t; -#endif - -#endif - -#ifdef BUILD_TIED_TASK_STACK - -/* Tied Task stack definitions */ -typedef struct kmp_stack_block { - kmp_taskdata_t *sb_block[TASK_STACK_BLOCK_SIZE]; - struct kmp_stack_block *sb_next; - struct kmp_stack_block *sb_prev; -} kmp_stack_block_t; - -typedef struct kmp_task_stack { - kmp_stack_block_t ts_first_block; // first block of stack entries - kmp_taskdata_t **ts_top; // pointer to the top of stack - kmp_int32 ts_entries; // number of entries on the stack -} kmp_task_stack_t; - -#endif // BUILD_TIED_TASK_STACK - -typedef struct kmp_tasking_flags { /* Total struct must be exactly 32 bits */ - /* Compiler flags */ /* Total compiler flags must be 16 bits */ - unsigned tiedness : 1; /* task is either tied (1) or untied (0) */ - unsigned final : 1; /* task is final(1) so execute immediately */ - unsigned merged_if0 : 1; /* no __kmpc_task_{begin/complete}_if0 calls in if0 - code path */ -#if OMP_40_ENABLED - unsigned destructors_thunk : 1; /* set if the compiler creates a thunk to - invoke destructors from the runtime */ -#if OMP_45_ENABLED - unsigned proxy : 1; /* task is a proxy task (it will be executed outside the - context of the RTL) */ - unsigned priority_specified : 1; /* set if the compiler provides priority - setting for the task */ - unsigned reserved : 10; /* reserved for compiler use */ -#else - unsigned reserved : 12; /* reserved for compiler use */ -#endif -#else // OMP_40_ENABLED - unsigned reserved : 13; /* reserved for compiler use */ -#endif // OMP_40_ENABLED - - /* Library flags */ /* Total library flags must be 16 bits */ - unsigned tasktype : 1; /* task is either explicit(1) or implicit (0) */ - unsigned task_serial : 1; // task is executed immediately (1) or deferred (0) - unsigned tasking_ser : 1; // all tasks in team are either executed immediately - // (1) or may be deferred (0) - unsigned team_serial : 1; // entire team is serial (1) [1 thread] or parallel - // (0) [>= 2 threads] - /* If either team_serial or tasking_ser is set, task team may be NULL */ - /* Task State Flags: */ - unsigned started : 1; /* 1==started, 0==not started */ - unsigned executing : 1; /* 1==executing, 0==not executing */ - unsigned complete : 1; /* 1==complete, 0==not complete */ - unsigned freed : 1; /* 1==freed, 0==allocateed */ - unsigned native : 1; /* 1==gcc-compiled task, 0==intel */ - unsigned reserved31 : 7; /* reserved for library use */ - -} kmp_tasking_flags_t; - -struct kmp_taskdata { /* aligned during dynamic allocation */ - kmp_int32 td_task_id; /* id, assigned by debugger */ - kmp_tasking_flags_t td_flags; /* task flags */ - kmp_team_t *td_team; /* team for this task */ - kmp_info_p *td_alloc_thread; /* thread that allocated data structures */ - /* Currently not used except for perhaps IDB */ - kmp_taskdata_t *td_parent; /* parent task */ - kmp_int32 td_level; /* task nesting level */ - std::atomic td_untied_count; // untied task active parts counter - ident_t *td_ident; /* task identifier */ - // Taskwait data. - ident_t *td_taskwait_ident; - kmp_uint32 td_taskwait_counter; - kmp_int32 td_taskwait_thread; /* gtid + 1 of thread encountered taskwait */ - KMP_ALIGN_CACHE kmp_internal_control_t - td_icvs; /* Internal control variables for the task */ - KMP_ALIGN_CACHE std::atomic - td_allocated_child_tasks; /* Child tasks (+ current task) not yet - deallocated */ - std::atomic - td_incomplete_child_tasks; /* Child tasks not yet complete */ -#if OMP_40_ENABLED - kmp_taskgroup_t - *td_taskgroup; // Each task keeps pointer to its current taskgroup - kmp_dephash_t - *td_dephash; // Dependencies for children tasks are tracked from here - kmp_depnode_t - *td_depnode; // Pointer to graph node if this task has dependencies -#endif // OMP_40_ENABLED -#if OMP_45_ENABLED - kmp_task_team_t *td_task_team; - kmp_int32 td_size_alloc; // The size of task structure, including shareds etc. -#if defined(KMP_GOMP_COMPAT) - // 4 or 8 byte integers for the loop bounds in GOMP_taskloop - kmp_int32 td_size_loop_bounds; -#endif -#endif // OMP_45_ENABLED - kmp_taskdata_t *td_last_tied; // keep tied task for task scheduling constraint -#if defined(KMP_GOMP_COMPAT) && OMP_45_ENABLED - // GOMP sends in a copy function for copy constructors - void (*td_copy_func)(void *, void *); -#endif -#if OMPT_SUPPORT - ompt_task_info_t ompt_task_info; -#endif -}; // struct kmp_taskdata - -// Make sure padding above worked -KMP_BUILD_ASSERT(sizeof(kmp_taskdata_t) % sizeof(void *) == 0); - -// Data for task team but per thread -typedef struct kmp_base_thread_data { - kmp_info_p *td_thr; // Pointer back to thread info - // Used only in __kmp_execute_tasks_template, maybe not avail until task is - // queued? - kmp_bootstrap_lock_t td_deque_lock; // Lock for accessing deque - kmp_taskdata_t * - *td_deque; // Deque of tasks encountered by td_thr, dynamically allocated - kmp_int32 td_deque_size; // Size of deck - kmp_uint32 td_deque_head; // Head of deque (will wrap) - kmp_uint32 td_deque_tail; // Tail of deque (will wrap) - kmp_int32 td_deque_ntasks; // Number of tasks in deque - // GEH: shouldn't this be volatile since used in while-spin? - kmp_int32 td_deque_last_stolen; // Thread number of last successful steal -#ifdef BUILD_TIED_TASK_STACK - kmp_task_stack_t td_susp_tied_tasks; // Stack of suspended tied tasks for task -// scheduling constraint -#endif // BUILD_TIED_TASK_STACK -} kmp_base_thread_data_t; - -#define TASK_DEQUE_BITS 8 // Used solely to define INITIAL_TASK_DEQUE_SIZE -#define INITIAL_TASK_DEQUE_SIZE (1 << TASK_DEQUE_BITS) - -#define TASK_DEQUE_SIZE(td) ((td).td_deque_size) -#define TASK_DEQUE_MASK(td) ((td).td_deque_size - 1) - -typedef union KMP_ALIGN_CACHE kmp_thread_data { - kmp_base_thread_data_t td; - double td_align; /* use worst case alignment */ - char td_pad[KMP_PAD(kmp_base_thread_data_t, CACHE_LINE)]; -} kmp_thread_data_t; - -// Data for task teams which are used when tasking is enabled for the team -typedef struct kmp_base_task_team { - kmp_bootstrap_lock_t - tt_threads_lock; /* Lock used to allocate per-thread part of task team */ - /* must be bootstrap lock since used at library shutdown*/ - kmp_task_team_t *tt_next; /* For linking the task team free list */ - kmp_thread_data_t - *tt_threads_data; /* Array of per-thread structures for task team */ - /* Data survives task team deallocation */ - kmp_int32 tt_found_tasks; /* Have we found tasks and queued them while - executing this team? */ - /* TRUE means tt_threads_data is set up and initialized */ - kmp_int32 tt_nproc; /* #threads in team */ - kmp_int32 - tt_max_threads; /* number of entries allocated for threads_data array */ -#if OMP_45_ENABLED - kmp_int32 - tt_found_proxy_tasks; /* Have we found proxy tasks since last barrier */ -#endif - kmp_int32 tt_untied_task_encountered; - - KMP_ALIGN_CACHE - std::atomic tt_unfinished_threads; /* #threads still active */ - - KMP_ALIGN_CACHE - volatile kmp_uint32 - tt_active; /* is the team still actively executing tasks */ -} kmp_base_task_team_t; - -union KMP_ALIGN_CACHE kmp_task_team { - kmp_base_task_team_t tt; - double tt_align; /* use worst case alignment */ - char tt_pad[KMP_PAD(kmp_base_task_team_t, CACHE_LINE)]; -}; - -#if (USE_FAST_MEMORY == 3) || (USE_FAST_MEMORY == 5) -// Free lists keep same-size free memory slots for fast memory allocation -// routines -typedef struct kmp_free_list { - void *th_free_list_self; // Self-allocated tasks free list - void *th_free_list_sync; // Self-allocated tasks stolen/returned by other - // threads - void *th_free_list_other; // Non-self free list (to be returned to owner's - // sync list) -} kmp_free_list_t; -#endif -#if KMP_NESTED_HOT_TEAMS -// Hot teams array keeps hot teams and their sizes for given thread. Hot teams -// are not put in teams pool, and they don't put threads in threads pool. -typedef struct kmp_hot_team_ptr { - kmp_team_p *hot_team; // pointer to hot_team of given nesting level - kmp_int32 hot_team_nth; // number of threads allocated for the hot_team -} kmp_hot_team_ptr_t; -#endif -#if OMP_40_ENABLED -typedef struct kmp_teams_size { - kmp_int32 nteams; // number of teams in a league - kmp_int32 nth; // number of threads in each team of the league -} kmp_teams_size_t; -#endif - -// OpenMP thread data structures - -typedef struct KMP_ALIGN_CACHE kmp_base_info { - /* Start with the readonly data which is cache aligned and padded. This is - written before the thread starts working by the master. Uber masters may - update themselves later. Usage does not consider serialized regions. */ - kmp_desc_t th_info; - kmp_team_p *th_team; /* team we belong to */ - kmp_root_p *th_root; /* pointer to root of task hierarchy */ - kmp_info_p *th_next_pool; /* next available thread in the pool */ - kmp_disp_t *th_dispatch; /* thread's dispatch data */ - int th_in_pool; /* in thread pool (32 bits for TCR/TCW) */ - - /* The following are cached from the team info structure */ - /* TODO use these in more places as determined to be needed via profiling */ - int th_team_nproc; /* number of threads in a team */ - kmp_info_p *th_team_master; /* the team's master thread */ - int th_team_serialized; /* team is serialized */ -#if OMP_40_ENABLED - microtask_t th_teams_microtask; /* save entry address for teams construct */ - int th_teams_level; /* save initial level of teams construct */ -/* it is 0 on device but may be any on host */ -#endif - -/* The blocktime info is copied from the team struct to the thread sruct */ -/* at the start of a barrier, and the values stored in the team are used */ -/* at points in the code where the team struct is no longer guaranteed */ -/* to exist (from the POV of worker threads). */ -#if KMP_USE_MONITOR - int th_team_bt_intervals; - int th_team_bt_set; -#else - kmp_uint64 th_team_bt_intervals; -#endif - -#if KMP_AFFINITY_SUPPORTED - kmp_affin_mask_t *th_affin_mask; /* thread's current affinity mask */ -#endif -#if OMP_50_ENABLED - void *const *th_def_allocator; /* per implicit task default allocator */ -#endif - /* The data set by the master at reinit, then R/W by the worker */ - KMP_ALIGN_CACHE int - th_set_nproc; /* if > 0, then only use this request for the next fork */ -#if KMP_NESTED_HOT_TEAMS - kmp_hot_team_ptr_t *th_hot_teams; /* array of hot teams */ -#endif -#if OMP_40_ENABLED - kmp_proc_bind_t - th_set_proc_bind; /* if != proc_bind_default, use request for next fork */ - kmp_teams_size_t - th_teams_size; /* number of teams/threads in teams construct */ -#if KMP_AFFINITY_SUPPORTED - int th_current_place; /* place currently bound to */ - int th_new_place; /* place to bind to in par reg */ - int th_first_place; /* first place in partition */ - int th_last_place; /* last place in partition */ -#endif -#endif -#if OMP_50_ENABLED - int th_prev_level; /* previous level for affinity format */ - int th_prev_num_threads; /* previous num_threads for affinity format */ -#endif -#if USE_ITT_BUILD - kmp_uint64 th_bar_arrive_time; /* arrival to barrier timestamp */ - kmp_uint64 th_bar_min_time; /* minimum arrival time at the barrier */ - kmp_uint64 th_frame_time; /* frame timestamp */ -#endif /* USE_ITT_BUILD */ - kmp_local_t th_local; - struct private_common *th_pri_head; - - /* Now the data only used by the worker (after initial allocation) */ - /* TODO the first serial team should actually be stored in the info_t - structure. this will help reduce initial allocation overhead */ - KMP_ALIGN_CACHE kmp_team_p - *th_serial_team; /*serialized team held in reserve*/ - -#if OMPT_SUPPORT - ompt_thread_info_t ompt_thread_info; -#endif - - /* The following are also read by the master during reinit */ - struct common_table *th_pri_common; - - volatile kmp_uint32 th_spin_here; /* thread-local location for spinning */ - /* while awaiting queuing lock acquire */ - - volatile void *th_sleep_loc; // this points at a kmp_flag - - ident_t *th_ident; - unsigned th_x; // Random number generator data - unsigned th_a; // Random number generator data - - /* Tasking-related data for the thread */ - kmp_task_team_t *th_task_team; // Task team struct - kmp_taskdata_t *th_current_task; // Innermost Task being executed - kmp_uint8 th_task_state; // alternating 0/1 for task team identification - kmp_uint8 *th_task_state_memo_stack; // Stack holding memos of th_task_state - // at nested levels - kmp_uint32 th_task_state_top; // Top element of th_task_state_memo_stack - kmp_uint32 th_task_state_stack_sz; // Size of th_task_state_memo_stack - kmp_uint32 th_reap_state; // Non-zero indicates thread is not - // tasking, thus safe to reap - - /* More stuff for keeping track of active/sleeping threads (this part is - written by the worker thread) */ - kmp_uint8 th_active_in_pool; // included in count of #active threads in pool - int th_active; // ! sleeping; 32 bits for TCR/TCW - struct cons_header *th_cons; // used for consistency check -#if KMP_USE_HIER_SCHED - // used for hierarchical scheduling - kmp_hier_private_bdata_t *th_hier_bar_data; -#endif - - /* Add the syncronizing data which is cache aligned and padded. */ - KMP_ALIGN_CACHE kmp_balign_t th_bar[bs_last_barrier]; - - KMP_ALIGN_CACHE volatile kmp_int32 - th_next_waiting; /* gtid+1 of next thread on lock wait queue, 0 if none */ - -#if (USE_FAST_MEMORY == 3) || (USE_FAST_MEMORY == 5) -#define NUM_LISTS 4 - kmp_free_list_t th_free_lists[NUM_LISTS]; // Free lists for fast memory -// allocation routines -#endif - -#if KMP_OS_WINDOWS - kmp_win32_cond_t th_suspend_cv; - kmp_win32_mutex_t th_suspend_mx; - int th_suspend_init; -#endif -#if KMP_OS_UNIX - kmp_cond_align_t th_suspend_cv; - kmp_mutex_align_t th_suspend_mx; - int th_suspend_init_count; -#endif - -#if USE_ITT_BUILD - kmp_itt_mark_t th_itt_mark_single; -// alignment ??? -#endif /* USE_ITT_BUILD */ -#if KMP_STATS_ENABLED - kmp_stats_list *th_stats; -#endif -#if KMP_OS_UNIX - std::atomic th_blocking; -#endif -} kmp_base_info_t; - -typedef union KMP_ALIGN_CACHE kmp_info { - double th_align; /* use worst case alignment */ - char th_pad[KMP_PAD(kmp_base_info_t, CACHE_LINE)]; - kmp_base_info_t th; -} kmp_info_t; - -// OpenMP thread team data structures - -typedef struct kmp_base_data { volatile kmp_uint32 t_value; } kmp_base_data_t; - -typedef union KMP_ALIGN_CACHE kmp_sleep_team { - double dt_align; /* use worst case alignment */ - char dt_pad[KMP_PAD(kmp_base_data_t, CACHE_LINE)]; - kmp_base_data_t dt; -} kmp_sleep_team_t; - -typedef union KMP_ALIGN_CACHE kmp_ordered_team { - double dt_align; /* use worst case alignment */ - char dt_pad[KMP_PAD(kmp_base_data_t, CACHE_LINE)]; - kmp_base_data_t dt; -} kmp_ordered_team_t; - -typedef int (*launch_t)(int gtid); - -/* Minimum number of ARGV entries to malloc if necessary */ -#define KMP_MIN_MALLOC_ARGV_ENTRIES 100 - -// Set up how many argv pointers will fit in cache lines containing -// t_inline_argv. Historically, we have supported at least 96 bytes. Using a -// larger value for more space between the master write/worker read section and -// read/write by all section seems to buy more performance on EPCC PARALLEL. -#if KMP_ARCH_X86 || KMP_ARCH_X86_64 -#define KMP_INLINE_ARGV_BYTES \ - (4 * CACHE_LINE - \ - ((3 * KMP_PTR_SKIP + 2 * sizeof(int) + 2 * sizeof(kmp_int8) + \ - sizeof(kmp_int16) + sizeof(kmp_uint32)) % \ - CACHE_LINE)) -#else -#define KMP_INLINE_ARGV_BYTES \ - (2 * CACHE_LINE - ((3 * KMP_PTR_SKIP + 2 * sizeof(int)) % CACHE_LINE)) -#endif -#define KMP_INLINE_ARGV_ENTRIES (int)(KMP_INLINE_ARGV_BYTES / KMP_PTR_SKIP) - -typedef struct KMP_ALIGN_CACHE kmp_base_team { - // Synchronization Data - // --------------------------------------------------------------------------- - KMP_ALIGN_CACHE kmp_ordered_team_t t_ordered; - kmp_balign_team_t t_bar[bs_last_barrier]; - std::atomic t_construct; // count of single directive encountered by team - char pad[sizeof(kmp_lock_t)]; // padding to maintain performance on big iron - - // Master only - // --------------------------------------------------------------------------- - KMP_ALIGN_CACHE int t_master_tid; // tid of master in parent team - int t_master_this_cons; // "this_construct" single counter of master in parent - // team - ident_t *t_ident; // if volatile, have to change too much other crud to - // volatile too - kmp_team_p *t_parent; // parent team - kmp_team_p *t_next_pool; // next free team in the team pool - kmp_disp_t *t_dispatch; // thread's dispatch data - kmp_task_team_t *t_task_team[2]; // Task team struct; switch between 2 -#if OMP_40_ENABLED - kmp_proc_bind_t t_proc_bind; // bind type for par region -#endif // OMP_40_ENABLED -#if USE_ITT_BUILD - kmp_uint64 t_region_time; // region begin timestamp -#endif /* USE_ITT_BUILD */ - - // Master write, workers read - // -------------------------------------------------------------------------- - KMP_ALIGN_CACHE void **t_argv; - int t_argc; - int t_nproc; // number of threads in team - microtask_t t_pkfn; - launch_t t_invoke; // procedure to launch the microtask - -#if OMPT_SUPPORT - ompt_team_info_t ompt_team_info; - ompt_lw_taskteam_t *ompt_serialized_team_info; -#endif - -#if KMP_ARCH_X86 || KMP_ARCH_X86_64 - kmp_int8 t_fp_control_saved; - kmp_int8 t_pad2b; - kmp_int16 t_x87_fpu_control_word; // FP control regs - kmp_uint32 t_mxcsr; -#endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */ - - void *t_inline_argv[KMP_INLINE_ARGV_ENTRIES]; - - KMP_ALIGN_CACHE kmp_info_t **t_threads; - kmp_taskdata_t - *t_implicit_task_taskdata; // Taskdata for the thread's implicit task - int t_level; // nested parallel level - - KMP_ALIGN_CACHE int t_max_argc; - int t_max_nproc; // max threads this team can handle (dynamicly expandable) - int t_serialized; // levels deep of serialized teams - dispatch_shared_info_t *t_disp_buffer; // buffers for dispatch system - int t_id; // team's id, assigned by debugger. - int t_active_level; // nested active parallel level - kmp_r_sched_t t_sched; // run-time schedule for the team -#if OMP_40_ENABLED && KMP_AFFINITY_SUPPORTED - int t_first_place; // first & last place in parent thread's partition. - int t_last_place; // Restore these values to master after par region. -#endif // OMP_40_ENABLED && KMP_AFFINITY_SUPPORTED -#if OMP_50_ENABLED - int t_display_affinity; -#endif - int t_size_changed; // team size was changed?: 0: no, 1: yes, -1: changed via -// omp_set_num_threads() call -#if OMP_50_ENABLED - void *const *t_def_allocator; /* per implicit task default allocator */ -#endif - -// Read/write by workers as well -#if (KMP_ARCH_X86 || KMP_ARCH_X86_64) - // Using CACHE_LINE=64 reduces memory footprint, but causes a big perf - // regression of epcc 'parallel' and 'barrier' on fxe256lin01. This extra - // padding serves to fix the performance of epcc 'parallel' and 'barrier' when - // CACHE_LINE=64. TODO: investigate more and get rid if this padding. - char dummy_padding[1024]; -#endif - // Internal control stack for additional nested teams. - KMP_ALIGN_CACHE kmp_internal_control_t *t_control_stack_top; -// for SERIALIZED teams nested 2 or more levels deep -#if OMP_40_ENABLED - // typed flag to store request state of cancellation - std::atomic t_cancel_request; -#endif - int t_master_active; // save on fork, restore on join - kmp_taskq_t t_taskq; // this team's task queue - void *t_copypriv_data; // team specific pointer to copyprivate data array -#if KMP_OS_WINDOWS - std::atomic t_copyin_counter; -#endif -#if USE_ITT_BUILD - void *t_stack_id; // team specific stack stitching id (for ittnotify) -#endif /* USE_ITT_BUILD */ -} kmp_base_team_t; - -union KMP_ALIGN_CACHE kmp_team { - kmp_base_team_t t; - double t_align; /* use worst case alignment */ - char t_pad[KMP_PAD(kmp_base_team_t, CACHE_LINE)]; -}; - -typedef union KMP_ALIGN_CACHE kmp_time_global { - double dt_align; /* use worst case alignment */ - char dt_pad[KMP_PAD(kmp_base_data_t, CACHE_LINE)]; - kmp_base_data_t dt; -} kmp_time_global_t; - -typedef struct kmp_base_global { - /* cache-aligned */ - kmp_time_global_t g_time; - - /* non cache-aligned */ - volatile int g_abort; - volatile int g_done; - - int g_dynamic; - enum dynamic_mode g_dynamic_mode; -} kmp_base_global_t; - -typedef union KMP_ALIGN_CACHE kmp_global { - kmp_base_global_t g; - double g_align; /* use worst case alignment */ - char g_pad[KMP_PAD(kmp_base_global_t, CACHE_LINE)]; -} kmp_global_t; - -typedef struct kmp_base_root { - // TODO: GEH - combine r_active with r_in_parallel then r_active == - // (r_in_parallel>= 0) - // TODO: GEH - then replace r_active with t_active_levels if we can to reduce - // the synch overhead or keeping r_active - volatile int r_active; /* TRUE if some region in a nest has > 1 thread */ - // GEH: This is misnamed, should be r_in_parallel - volatile int r_nested; // TODO: GEH - This is unused, just remove it entirely. - // keeps a count of active parallel regions per root - std::atomic r_in_parallel; - // GEH: This is misnamed, should be r_active_levels - kmp_team_t *r_root_team; - kmp_team_t *r_hot_team; - kmp_info_t *r_uber_thread; - kmp_lock_t r_begin_lock; - volatile int r_begin; - int r_blocktime; /* blocktime for this root and descendants */ - int r_cg_nthreads; // count of active threads in a contention group -} kmp_base_root_t; - -typedef union KMP_ALIGN_CACHE kmp_root { - kmp_base_root_t r; - double r_align; /* use worst case alignment */ - char r_pad[KMP_PAD(kmp_base_root_t, CACHE_LINE)]; -} kmp_root_t; - -struct fortran_inx_info { - kmp_int32 data; -}; - -/* ------------------------------------------------------------------------ */ - -extern int __kmp_settings; -extern int __kmp_duplicate_library_ok; -#if USE_ITT_BUILD -extern int __kmp_forkjoin_frames; -extern int __kmp_forkjoin_frames_mode; -#endif -extern PACKED_REDUCTION_METHOD_T __kmp_force_reduction_method; -extern int __kmp_determ_red; - -#ifdef KMP_DEBUG -extern int kmp_a_debug; -extern int kmp_b_debug; -extern int kmp_c_debug; -extern int kmp_d_debug; -extern int kmp_e_debug; -extern int kmp_f_debug; -#endif /* KMP_DEBUG */ - -/* For debug information logging using rotating buffer */ -#define KMP_DEBUG_BUF_LINES_INIT 512 -#define KMP_DEBUG_BUF_LINES_MIN 1 - -#define KMP_DEBUG_BUF_CHARS_INIT 128 -#define KMP_DEBUG_BUF_CHARS_MIN 2 - -extern int - __kmp_debug_buf; /* TRUE means use buffer, FALSE means print to stderr */ -extern int __kmp_debug_buf_lines; /* How many lines of debug stored in buffer */ -extern int - __kmp_debug_buf_chars; /* How many characters allowed per line in buffer */ -extern int __kmp_debug_buf_atomic; /* TRUE means use atomic update of buffer - entry pointer */ - -extern char *__kmp_debug_buffer; /* Debug buffer itself */ -extern std::atomic __kmp_debug_count; /* Counter for number of lines - printed in buffer so far */ -extern int __kmp_debug_buf_warn_chars; /* Keep track of char increase - recommended in warnings */ -/* end rotating debug buffer */ - -#ifdef KMP_DEBUG -extern int __kmp_par_range; /* +1 => only go par for constructs in range */ - -#define KMP_PAR_RANGE_ROUTINE_LEN 1024 -extern char __kmp_par_range_routine[KMP_PAR_RANGE_ROUTINE_LEN]; -#define KMP_PAR_RANGE_FILENAME_LEN 1024 -extern char __kmp_par_range_filename[KMP_PAR_RANGE_FILENAME_LEN]; -extern int __kmp_par_range_lb; -extern int __kmp_par_range_ub; -#endif - -/* For printing out dynamic storage map for threads and teams */ -extern int - __kmp_storage_map; /* True means print storage map for threads and teams */ -extern int __kmp_storage_map_verbose; /* True means storage map includes - placement info */ -extern int __kmp_storage_map_verbose_specified; - -#if KMP_ARCH_X86 || KMP_ARCH_X86_64 -extern kmp_cpuinfo_t __kmp_cpuinfo; -#endif - -extern volatile int __kmp_init_serial; -extern volatile int __kmp_init_gtid; -extern volatile int __kmp_init_common; -extern volatile int __kmp_init_middle; -extern volatile int __kmp_init_parallel; -#if KMP_USE_MONITOR -extern volatile int __kmp_init_monitor; -#endif -extern volatile int __kmp_init_user_locks; -extern int __kmp_init_counter; -extern int __kmp_root_counter; -extern int __kmp_version; - -/* list of address of allocated caches for commons */ -extern kmp_cached_addr_t *__kmp_threadpriv_cache_list; - -/* Barrier algorithm types and options */ -extern kmp_uint32 __kmp_barrier_gather_bb_dflt; -extern kmp_uint32 __kmp_barrier_release_bb_dflt; -extern kmp_bar_pat_e __kmp_barrier_gather_pat_dflt; -extern kmp_bar_pat_e __kmp_barrier_release_pat_dflt; -extern kmp_uint32 __kmp_barrier_gather_branch_bits[bs_last_barrier]; -extern kmp_uint32 __kmp_barrier_release_branch_bits[bs_last_barrier]; -extern kmp_bar_pat_e __kmp_barrier_gather_pattern[bs_last_barrier]; -extern kmp_bar_pat_e __kmp_barrier_release_pattern[bs_last_barrier]; -extern char const *__kmp_barrier_branch_bit_env_name[bs_last_barrier]; -extern char const *__kmp_barrier_pattern_env_name[bs_last_barrier]; -extern char const *__kmp_barrier_type_name[bs_last_barrier]; -extern char const *__kmp_barrier_pattern_name[bp_last_bar]; - -/* Global Locks */ -extern kmp_bootstrap_lock_t __kmp_initz_lock; /* control initialization */ -extern kmp_bootstrap_lock_t __kmp_forkjoin_lock; /* control fork/join access */ -extern kmp_bootstrap_lock_t __kmp_task_team_lock; -extern kmp_bootstrap_lock_t - __kmp_exit_lock; /* exit() is not always thread-safe */ -#if KMP_USE_MONITOR -extern kmp_bootstrap_lock_t - __kmp_monitor_lock; /* control monitor thread creation */ -#endif -extern kmp_bootstrap_lock_t - __kmp_tp_cached_lock; /* used for the hack to allow threadprivate cache and - __kmp_threads expansion to co-exist */ - -extern kmp_lock_t __kmp_global_lock; /* control OS/global access */ -extern kmp_queuing_lock_t __kmp_dispatch_lock; /* control dispatch access */ -extern kmp_lock_t __kmp_debug_lock; /* control I/O access for KMP_DEBUG */ - -/* used for yielding spin-waits */ -extern unsigned int __kmp_init_wait; /* initial number of spin-tests */ -extern unsigned int __kmp_next_wait; /* susequent number of spin-tests */ - -extern enum library_type __kmp_library; - -extern enum sched_type __kmp_sched; /* default runtime scheduling */ -extern enum sched_type __kmp_static; /* default static scheduling method */ -extern enum sched_type __kmp_guided; /* default guided scheduling method */ -extern enum sched_type __kmp_auto; /* default auto scheduling method */ -extern int __kmp_chunk; /* default runtime chunk size */ - -extern size_t __kmp_stksize; /* stack size per thread */ -#if KMP_USE_MONITOR -extern size_t __kmp_monitor_stksize; /* stack size for monitor thread */ -#endif -extern size_t __kmp_stkoffset; /* stack offset per thread */ -extern int __kmp_stkpadding; /* Should we pad root thread(s) stack */ - -extern size_t - __kmp_malloc_pool_incr; /* incremental size of pool for kmp_malloc() */ -extern int __kmp_env_stksize; /* was KMP_STACKSIZE specified? */ -extern int __kmp_env_blocktime; /* was KMP_BLOCKTIME specified? */ -extern int __kmp_env_checks; /* was KMP_CHECKS specified? */ -extern int __kmp_env_consistency_check; // was KMP_CONSISTENCY_CHECK specified? -extern int __kmp_generate_warnings; /* should we issue warnings? */ -extern int __kmp_reserve_warn; /* have we issued reserve_threads warning? */ - -#ifdef DEBUG_SUSPEND -extern int __kmp_suspend_count; /* count inside __kmp_suspend_template() */ -#endif - -extern kmp_uint32 __kmp_yield_init; -extern kmp_uint32 __kmp_yield_next; - -#if KMP_USE_MONITOR -extern kmp_uint32 __kmp_yielding_on; -#endif -extern kmp_uint32 __kmp_yield_cycle; -extern kmp_int32 __kmp_yield_on_count; -extern kmp_int32 __kmp_yield_off_count; - -/* ------------------------------------------------------------------------- */ -extern int __kmp_allThreadsSpecified; - -extern size_t __kmp_align_alloc; -/* following data protected by initialization routines */ -extern int __kmp_xproc; /* number of processors in the system */ -extern int __kmp_avail_proc; /* number of processors available to the process */ -extern size_t __kmp_sys_min_stksize; /* system-defined minimum stack size */ -extern int __kmp_sys_max_nth; /* system-imposed maximum number of threads */ -// maximum total number of concurrently-existing threads on device -extern int __kmp_max_nth; -// maximum total number of concurrently-existing threads in a contention group -extern int __kmp_cg_max_nth; -extern int __kmp_teams_max_nth; // max threads used in a teams construct -extern int __kmp_threads_capacity; /* capacity of the arrays __kmp_threads and - __kmp_root */ -extern int __kmp_dflt_team_nth; /* default number of threads in a parallel - region a la OMP_NUM_THREADS */ -extern int __kmp_dflt_team_nth_ub; /* upper bound on "" determined at serial - initialization */ -extern int __kmp_tp_capacity; /* capacity of __kmp_threads if threadprivate is - used (fixed) */ -extern int __kmp_tp_cached; /* whether threadprivate cache has been created - (__kmpc_threadprivate_cached()) */ -extern int __kmp_dflt_nested; /* nested parallelism enabled by default a la - OMP_NESTED */ -extern int __kmp_dflt_blocktime; /* number of milliseconds to wait before - blocking (env setting) */ -#if KMP_USE_MONITOR -extern int - __kmp_monitor_wakeups; /* number of times monitor wakes up per second */ -extern int __kmp_bt_intervals; /* number of monitor timestamp intervals before - blocking */ -#endif -#ifdef KMP_ADJUST_BLOCKTIME -extern int __kmp_zero_bt; /* whether blocktime has been forced to zero */ -#endif /* KMP_ADJUST_BLOCKTIME */ -#ifdef KMP_DFLT_NTH_CORES -extern int __kmp_ncores; /* Total number of cores for threads placement */ -#endif -/* Number of millisecs to delay on abort for Intel(R) VTune(TM) tools */ -extern int __kmp_abort_delay; - -extern int __kmp_need_register_atfork_specified; -extern int - __kmp_need_register_atfork; /* At initialization, call pthread_atfork to - install fork handler */ -extern int __kmp_gtid_mode; /* Method of getting gtid, values: - 0 - not set, will be set at runtime - 1 - using stack search - 2 - dynamic TLS (pthread_getspecific(Linux* OS/OS - X*) or TlsGetValue(Windows* OS)) - 3 - static TLS (__declspec(thread) __kmp_gtid), - Linux* OS .so only. */ -extern int - __kmp_adjust_gtid_mode; /* If true, adjust method based on #threads */ -#ifdef KMP_TDATA_GTID -extern KMP_THREAD_LOCAL int __kmp_gtid; -#endif -extern int __kmp_tls_gtid_min; /* #threads below which use sp search for gtid */ -extern int __kmp_foreign_tp; // If true, separate TP var for each foreign thread -#if KMP_ARCH_X86 || KMP_ARCH_X86_64 -extern int __kmp_inherit_fp_control; // copy fp creg(s) parent->workers at fork -extern kmp_int16 __kmp_init_x87_fpu_control_word; // init thread's FP ctrl reg -extern kmp_uint32 __kmp_init_mxcsr; /* init thread's mxscr */ -#endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */ - -extern int __kmp_dflt_max_active_levels; /* max_active_levels for nested - parallelism enabled by default via - OMP_MAX_ACTIVE_LEVELS */ -extern int __kmp_dispatch_num_buffers; /* max possible dynamic loops in - concurrent execution per team */ -#if KMP_NESTED_HOT_TEAMS -extern int __kmp_hot_teams_mode; -extern int __kmp_hot_teams_max_level; -#endif - -#if KMP_OS_LINUX -extern enum clock_function_type __kmp_clock_function; -extern int __kmp_clock_function_param; -#endif /* KMP_OS_LINUX */ - -#if KMP_MIC_SUPPORTED -extern enum mic_type __kmp_mic_type; -#endif - -#ifdef USE_LOAD_BALANCE -extern double __kmp_load_balance_interval; // load balance algorithm interval -#endif /* USE_LOAD_BALANCE */ - -// OpenMP 3.1 - Nested num threads array -typedef struct kmp_nested_nthreads_t { - int *nth; - int size; - int used; -} kmp_nested_nthreads_t; - -extern kmp_nested_nthreads_t __kmp_nested_nth; - -#if KMP_USE_ADAPTIVE_LOCKS - -// Parameters for the speculative lock backoff system. -struct kmp_adaptive_backoff_params_t { - // Number of soft retries before it counts as a hard retry. - kmp_uint32 max_soft_retries; - // Badness is a bit mask : 0,1,3,7,15,... on each hard failure we move one to - // the right - kmp_uint32 max_badness; -}; - -extern kmp_adaptive_backoff_params_t __kmp_adaptive_backoff_params; - -#if KMP_DEBUG_ADAPTIVE_LOCKS -extern const char *__kmp_speculative_statsfile; -#endif - -#endif // KMP_USE_ADAPTIVE_LOCKS - -#if OMP_40_ENABLED -extern int __kmp_display_env; /* TRUE or FALSE */ -extern int __kmp_display_env_verbose; /* TRUE if OMP_DISPLAY_ENV=VERBOSE */ -extern int __kmp_omp_cancellation; /* TRUE or FALSE */ -#endif - -/* ------------------------------------------------------------------------- */ - -/* the following are protected by the fork/join lock */ -/* write: lock read: anytime */ -extern kmp_info_t **__kmp_threads; /* Descriptors for the threads */ -/* read/write: lock */ -extern volatile kmp_team_t *__kmp_team_pool; -extern volatile kmp_info_t *__kmp_thread_pool; -extern kmp_info_t *__kmp_thread_pool_insert_pt; - -// total num threads reachable from some root thread including all root threads -extern volatile int __kmp_nth; -/* total number of threads reachable from some root thread including all root - threads, and those in the thread pool */ -extern volatile int __kmp_all_nth; -extern int __kmp_thread_pool_nth; -extern std::atomic __kmp_thread_pool_active_nth; - -extern kmp_root_t **__kmp_root; /* root of thread hierarchy */ -/* end data protected by fork/join lock */ -/* ------------------------------------------------------------------------- */ - -#define __kmp_get_gtid() __kmp_get_global_thread_id() -#define __kmp_entry_gtid() __kmp_get_global_thread_id_reg() -#define __kmp_get_tid() (__kmp_tid_from_gtid(__kmp_get_gtid())) -#define __kmp_get_team() (__kmp_threads[(__kmp_get_gtid())]->th.th_team) -#define __kmp_get_thread() (__kmp_thread_from_gtid(__kmp_get_gtid())) - -// AT: Which way is correct? -// AT: 1. nproc = __kmp_threads[ ( gtid ) ] -> th.th_team -> t.t_nproc; -// AT: 2. nproc = __kmp_threads[ ( gtid ) ] -> th.th_team_nproc; -#define __kmp_get_team_num_threads(gtid) \ - (__kmp_threads[(gtid)]->th.th_team->t.t_nproc) - -static inline bool KMP_UBER_GTID(int gtid) { - KMP_DEBUG_ASSERT(gtid >= KMP_GTID_MIN); - KMP_DEBUG_ASSERT(gtid < __kmp_threads_capacity); - return (gtid >= 0 && __kmp_root[gtid] && __kmp_threads[gtid] && - __kmp_threads[gtid] == __kmp_root[gtid]->r.r_uber_thread); -} - -static inline int __kmp_tid_from_gtid(int gtid) { - KMP_DEBUG_ASSERT(gtid >= 0); - return __kmp_threads[gtid]->th.th_info.ds.ds_tid; -} - -static inline int __kmp_gtid_from_tid(int tid, const kmp_team_t *team) { - KMP_DEBUG_ASSERT(tid >= 0 && team); - return team->t.t_threads[tid]->th.th_info.ds.ds_gtid; -} - -static inline int __kmp_gtid_from_thread(const kmp_info_t *thr) { - KMP_DEBUG_ASSERT(thr); - return thr->th.th_info.ds.ds_gtid; -} - -static inline kmp_info_t *__kmp_thread_from_gtid(int gtid) { - KMP_DEBUG_ASSERT(gtid >= 0); - return __kmp_threads[gtid]; -} - -static inline kmp_team_t *__kmp_team_from_gtid(int gtid) { - KMP_DEBUG_ASSERT(gtid >= 0); - return __kmp_threads[gtid]->th.th_team; -} - -/* ------------------------------------------------------------------------- */ - -extern kmp_global_t __kmp_global; /* global status */ - -extern kmp_info_t __kmp_monitor; -// For Debugging Support Library -extern std::atomic __kmp_team_counter; -// For Debugging Support Library -extern std::atomic __kmp_task_counter; - -#if USE_DEBUGGER -#define _KMP_GEN_ID(counter) \ - (__kmp_debugging ? KMP_ATOMIC_INC(&counter) + 1 : ~0) -#else -#define _KMP_GEN_ID(counter) (~0) -#endif /* USE_DEBUGGER */ - -#define KMP_GEN_TASK_ID() _KMP_GEN_ID(__kmp_task_counter) -#define KMP_GEN_TEAM_ID() _KMP_GEN_ID(__kmp_team_counter) - -/* ------------------------------------------------------------------------ */ - -extern void __kmp_print_storage_map_gtid(int gtid, void *p1, void *p2, - size_t size, char const *format, ...); - -extern void __kmp_serial_initialize(void); -extern void __kmp_middle_initialize(void); -extern void __kmp_parallel_initialize(void); - -extern void __kmp_internal_begin(void); -extern void __kmp_internal_end_library(int gtid); -extern void __kmp_internal_end_thread(int gtid); -extern void __kmp_internal_end_atexit(void); -extern void __kmp_internal_end_fini(void); -extern void __kmp_internal_end_dtor(void); -extern void __kmp_internal_end_dest(void *); - -extern int __kmp_register_root(int initial_thread); -extern void __kmp_unregister_root(int gtid); - -extern int __kmp_ignore_mppbeg(void); -extern int __kmp_ignore_mppend(void); - -extern int __kmp_enter_single(int gtid, ident_t *id_ref, int push_ws); -extern void __kmp_exit_single(int gtid); - -extern void __kmp_parallel_deo(int *gtid_ref, int *cid_ref, ident_t *loc_ref); -extern void __kmp_parallel_dxo(int *gtid_ref, int *cid_ref, ident_t *loc_ref); - -#ifdef USE_LOAD_BALANCE -extern int __kmp_get_load_balance(int); -#endif - -extern int __kmp_get_global_thread_id(void); -extern int __kmp_get_global_thread_id_reg(void); -extern void __kmp_exit_thread(int exit_status); -extern void __kmp_abort(char const *format, ...); -extern void __kmp_abort_thread(void); -KMP_NORETURN extern void __kmp_abort_process(void); -extern void __kmp_warn(char const *format, ...); - -extern void __kmp_set_num_threads(int new_nth, int gtid); - -// Returns current thread (pointer to kmp_info_t). Current thread *must* be -// registered. -static inline kmp_info_t *__kmp_entry_thread() { - int gtid = __kmp_entry_gtid(); - - return __kmp_threads[gtid]; -} - -extern void __kmp_set_max_active_levels(int gtid, int new_max_active_levels); -extern int __kmp_get_max_active_levels(int gtid); -extern int __kmp_get_ancestor_thread_num(int gtid, int level); -extern int __kmp_get_team_size(int gtid, int level); -extern void __kmp_set_schedule(int gtid, kmp_sched_t new_sched, int chunk); -extern void __kmp_get_schedule(int gtid, kmp_sched_t *sched, int *chunk); - -extern unsigned short __kmp_get_random(kmp_info_t *thread); -extern void __kmp_init_random(kmp_info_t *thread); - -extern kmp_r_sched_t __kmp_get_schedule_global(void); -extern void __kmp_adjust_num_threads(int new_nproc); - -extern void *___kmp_allocate(size_t size KMP_SRC_LOC_DECL); -extern void *___kmp_page_allocate(size_t size KMP_SRC_LOC_DECL); -extern void ___kmp_free(void *ptr KMP_SRC_LOC_DECL); -#define __kmp_allocate(size) ___kmp_allocate((size)KMP_SRC_LOC_CURR) -#define __kmp_page_allocate(size) ___kmp_page_allocate((size)KMP_SRC_LOC_CURR) -#define __kmp_free(ptr) ___kmp_free((ptr)KMP_SRC_LOC_CURR) - -#if USE_FAST_MEMORY -extern void *___kmp_fast_allocate(kmp_info_t *this_thr, - size_t size KMP_SRC_LOC_DECL); -extern void ___kmp_fast_free(kmp_info_t *this_thr, void *ptr KMP_SRC_LOC_DECL); -extern void __kmp_free_fast_memory(kmp_info_t *this_thr); -extern void __kmp_initialize_fast_memory(kmp_info_t *this_thr); -#define __kmp_fast_allocate(this_thr, size) \ - ___kmp_fast_allocate((this_thr), (size)KMP_SRC_LOC_CURR) -#define __kmp_fast_free(this_thr, ptr) \ - ___kmp_fast_free((this_thr), (ptr)KMP_SRC_LOC_CURR) -#endif - -extern void *___kmp_thread_malloc(kmp_info_t *th, size_t size KMP_SRC_LOC_DECL); -extern void *___kmp_thread_calloc(kmp_info_t *th, size_t nelem, - size_t elsize KMP_SRC_LOC_DECL); -extern void *___kmp_thread_realloc(kmp_info_t *th, void *ptr, - size_t size KMP_SRC_LOC_DECL); -extern void ___kmp_thread_free(kmp_info_t *th, void *ptr KMP_SRC_LOC_DECL); -#define __kmp_thread_malloc(th, size) \ - ___kmp_thread_malloc((th), (size)KMP_SRC_LOC_CURR) -#define __kmp_thread_calloc(th, nelem, elsize) \ - ___kmp_thread_calloc((th), (nelem), (elsize)KMP_SRC_LOC_CURR) -#define __kmp_thread_realloc(th, ptr, size) \ - ___kmp_thread_realloc((th), (ptr), (size)KMP_SRC_LOC_CURR) -#define __kmp_thread_free(th, ptr) \ - ___kmp_thread_free((th), (ptr)KMP_SRC_LOC_CURR) - -#define KMP_INTERNAL_MALLOC(sz) malloc(sz) -#define KMP_INTERNAL_FREE(p) free(p) -#define KMP_INTERNAL_REALLOC(p, sz) realloc((p), (sz)) -#define KMP_INTERNAL_CALLOC(n, sz) calloc((n), (sz)) - -extern void __kmp_push_num_threads(ident_t *loc, int gtid, int num_threads); - -#if OMP_40_ENABLED -extern void __kmp_push_proc_bind(ident_t *loc, int gtid, - kmp_proc_bind_t proc_bind); -extern void __kmp_push_num_teams(ident_t *loc, int gtid, int num_teams, - int num_threads); -#endif - -extern void __kmp_yield(int cond); - -extern void __kmpc_dispatch_init_4(ident_t *loc, kmp_int32 gtid, - enum sched_type schedule, kmp_int32 lb, - kmp_int32 ub, kmp_int32 st, kmp_int32 chunk); -extern void __kmpc_dispatch_init_4u(ident_t *loc, kmp_int32 gtid, - enum sched_type schedule, kmp_uint32 lb, - kmp_uint32 ub, kmp_int32 st, - kmp_int32 chunk); -extern void __kmpc_dispatch_init_8(ident_t *loc, kmp_int32 gtid, - enum sched_type schedule, kmp_int64 lb, - kmp_int64 ub, kmp_int64 st, kmp_int64 chunk); -extern void __kmpc_dispatch_init_8u(ident_t *loc, kmp_int32 gtid, - enum sched_type schedule, kmp_uint64 lb, - kmp_uint64 ub, kmp_int64 st, - kmp_int64 chunk); - -extern int __kmpc_dispatch_next_4(ident_t *loc, kmp_int32 gtid, - kmp_int32 *p_last, kmp_int32 *p_lb, - kmp_int32 *p_ub, kmp_int32 *p_st); -extern int __kmpc_dispatch_next_4u(ident_t *loc, kmp_int32 gtid, - kmp_int32 *p_last, kmp_uint32 *p_lb, - kmp_uint32 *p_ub, kmp_int32 *p_st); -extern int __kmpc_dispatch_next_8(ident_t *loc, kmp_int32 gtid, - kmp_int32 *p_last, kmp_int64 *p_lb, - kmp_int64 *p_ub, kmp_int64 *p_st); -extern int __kmpc_dispatch_next_8u(ident_t *loc, kmp_int32 gtid, - kmp_int32 *p_last, kmp_uint64 *p_lb, - kmp_uint64 *p_ub, kmp_int64 *p_st); - -extern void __kmpc_dispatch_fini_4(ident_t *loc, kmp_int32 gtid); -extern void __kmpc_dispatch_fini_8(ident_t *loc, kmp_int32 gtid); -extern void __kmpc_dispatch_fini_4u(ident_t *loc, kmp_int32 gtid); -extern void __kmpc_dispatch_fini_8u(ident_t *loc, kmp_int32 gtid); - -#ifdef KMP_GOMP_COMPAT - -extern void __kmp_aux_dispatch_init_4(ident_t *loc, kmp_int32 gtid, - enum sched_type schedule, kmp_int32 lb, - kmp_int32 ub, kmp_int32 st, - kmp_int32 chunk, int push_ws); -extern void __kmp_aux_dispatch_init_4u(ident_t *loc, kmp_int32 gtid, - enum sched_type schedule, kmp_uint32 lb, - kmp_uint32 ub, kmp_int32 st, - kmp_int32 chunk, int push_ws); -extern void __kmp_aux_dispatch_init_8(ident_t *loc, kmp_int32 gtid, - enum sched_type schedule, kmp_int64 lb, - kmp_int64 ub, kmp_int64 st, - kmp_int64 chunk, int push_ws); -extern void __kmp_aux_dispatch_init_8u(ident_t *loc, kmp_int32 gtid, - enum sched_type schedule, kmp_uint64 lb, - kmp_uint64 ub, kmp_int64 st, - kmp_int64 chunk, int push_ws); -extern void __kmp_aux_dispatch_fini_chunk_4(ident_t *loc, kmp_int32 gtid); -extern void __kmp_aux_dispatch_fini_chunk_8(ident_t *loc, kmp_int32 gtid); -extern void __kmp_aux_dispatch_fini_chunk_4u(ident_t *loc, kmp_int32 gtid); -extern void __kmp_aux_dispatch_fini_chunk_8u(ident_t *loc, kmp_int32 gtid); - -#endif /* KMP_GOMP_COMPAT */ - -extern kmp_uint32 __kmp_eq_4(kmp_uint32 value, kmp_uint32 checker); -extern kmp_uint32 __kmp_neq_4(kmp_uint32 value, kmp_uint32 checker); -extern kmp_uint32 __kmp_lt_4(kmp_uint32 value, kmp_uint32 checker); -extern kmp_uint32 __kmp_ge_4(kmp_uint32 value, kmp_uint32 checker); -extern kmp_uint32 __kmp_le_4(kmp_uint32 value, kmp_uint32 checker); -extern kmp_uint32 __kmp_wait_yield_4(kmp_uint32 volatile *spinner, - kmp_uint32 checker, - kmp_uint32 (*pred)(kmp_uint32, kmp_uint32), - void *obj); -extern void __kmp_wait_yield_4_ptr(void *spinner, kmp_uint32 checker, - kmp_uint32 (*pred)(void *, kmp_uint32), - void *obj); - -class kmp_flag_32; -class kmp_flag_64; -class kmp_flag_oncore; -extern void __kmp_wait_64(kmp_info_t *this_thr, kmp_flag_64 *flag, - int final_spin -#if USE_ITT_BUILD - , - void *itt_sync_obj -#endif - ); -extern void __kmp_release_64(kmp_flag_64 *flag); - -extern void __kmp_infinite_loop(void); - -extern void __kmp_cleanup(void); - -#if KMP_HANDLE_SIGNALS -extern int __kmp_handle_signals; -extern void __kmp_install_signals(int parallel_init); -extern void __kmp_remove_signals(void); -#endif - -extern void __kmp_clear_system_time(void); -extern void __kmp_read_system_time(double *delta); - -extern void __kmp_check_stack_overlap(kmp_info_t *thr); - -extern void __kmp_expand_host_name(char *buffer, size_t size); -extern void __kmp_expand_file_name(char *result, size_t rlen, char *pattern); - -#if KMP_ARCH_X86 || KMP_ARCH_X86_64 -extern void -__kmp_initialize_system_tick(void); /* Initialize timer tick value */ -#endif - -extern void -__kmp_runtime_initialize(void); /* machine specific initialization */ -extern void __kmp_runtime_destroy(void); - -#if KMP_AFFINITY_SUPPORTED -extern char *__kmp_affinity_print_mask(char *buf, int buf_len, - kmp_affin_mask_t *mask); -extern kmp_str_buf_t *__kmp_affinity_str_buf_mask(kmp_str_buf_t *buf, - kmp_affin_mask_t *mask); -extern void __kmp_affinity_initialize(void); -extern void __kmp_affinity_uninitialize(void); -extern void __kmp_affinity_set_init_mask( - int gtid, int isa_root); /* set affinity according to KMP_AFFINITY */ -#if OMP_40_ENABLED -extern void __kmp_affinity_set_place(int gtid); -#endif -extern void __kmp_affinity_determine_capable(const char *env_var); -extern int __kmp_aux_set_affinity(void **mask); -extern int __kmp_aux_get_affinity(void **mask); -extern int __kmp_aux_get_affinity_max_proc(); -extern int __kmp_aux_set_affinity_mask_proc(int proc, void **mask); -extern int __kmp_aux_unset_affinity_mask_proc(int proc, void **mask); -extern int __kmp_aux_get_affinity_mask_proc(int proc, void **mask); -extern void __kmp_balanced_affinity(kmp_info_t *th, int team_size); -#if KMP_OS_LINUX -extern int kmp_set_thread_affinity_mask_initial(void); -#endif -#endif /* KMP_AFFINITY_SUPPORTED */ -#if OMP_50_ENABLED -// No need for KMP_AFFINITY_SUPPORTED guard as only one field in the -// format string is for affinity, so platforms that do not support -// affinity can still use the other fields, e.g., %n for num_threads -extern size_t __kmp_aux_capture_affinity(int gtid, const char *format, - kmp_str_buf_t *buffer); -extern void __kmp_aux_display_affinity(int gtid, const char *format); -#endif - -extern void __kmp_cleanup_hierarchy(); -extern void __kmp_get_hierarchy(kmp_uint32 nproc, kmp_bstate_t *thr_bar); - -#if KMP_USE_FUTEX - -extern int __kmp_futex_determine_capable(void); - -#endif // KMP_USE_FUTEX - -extern void __kmp_gtid_set_specific(int gtid); -extern int __kmp_gtid_get_specific(void); - -extern double __kmp_read_cpu_time(void); - -extern int __kmp_read_system_info(struct kmp_sys_info *info); - -#if KMP_USE_MONITOR -extern void __kmp_create_monitor(kmp_info_t *th); -#endif - -extern void *__kmp_launch_thread(kmp_info_t *thr); - -extern void __kmp_create_worker(int gtid, kmp_info_t *th, size_t stack_size); - -#if KMP_OS_WINDOWS -extern int __kmp_still_running(kmp_info_t *th); -extern int __kmp_is_thread_alive(kmp_info_t *th, DWORD *exit_val); -extern void __kmp_free_handle(kmp_thread_t tHandle); -#endif - -#if KMP_USE_MONITOR -extern void __kmp_reap_monitor(kmp_info_t *th); -#endif -extern void __kmp_reap_worker(kmp_info_t *th); -extern void __kmp_terminate_thread(int gtid); - -extern void __kmp_suspend_32(int th_gtid, kmp_flag_32 *flag); -extern void __kmp_suspend_64(int th_gtid, kmp_flag_64 *flag); -extern void __kmp_suspend_oncore(int th_gtid, kmp_flag_oncore *flag); -extern void __kmp_resume_32(int target_gtid, kmp_flag_32 *flag); -extern void __kmp_resume_64(int target_gtid, kmp_flag_64 *flag); -extern void __kmp_resume_oncore(int target_gtid, kmp_flag_oncore *flag); - -extern void __kmp_elapsed(double *); -extern void __kmp_elapsed_tick(double *); - -extern void __kmp_enable(int old_state); -extern void __kmp_disable(int *old_state); - -extern void __kmp_thread_sleep(int millis); - -extern void __kmp_common_initialize(void); -extern void __kmp_common_destroy(void); -extern void __kmp_common_destroy_gtid(int gtid); - -#if KMP_OS_UNIX -extern void __kmp_register_atfork(void); -#endif -extern void __kmp_suspend_initialize(void); -extern void __kmp_suspend_uninitialize_thread(kmp_info_t *th); - -extern kmp_info_t *__kmp_allocate_thread(kmp_root_t *root, kmp_team_t *team, - int tid); -#if OMP_40_ENABLED -extern kmp_team_t * -__kmp_allocate_team(kmp_root_t *root, int new_nproc, int max_nproc, -#if OMPT_SUPPORT - ompt_data_t ompt_parallel_data, -#endif - kmp_proc_bind_t proc_bind, kmp_internal_control_t *new_icvs, - int argc USE_NESTED_HOT_ARG(kmp_info_t *thr)); -#else -extern kmp_team_t * -__kmp_allocate_team(kmp_root_t *root, int new_nproc, int max_nproc, -#if OMPT_SUPPORT - ompt_id_t ompt_parallel_id, -#endif - kmp_internal_control_t *new_icvs, - int argc USE_NESTED_HOT_ARG(kmp_info_t *thr)); -#endif // OMP_40_ENABLED -extern void __kmp_free_thread(kmp_info_t *); -extern void __kmp_free_team(kmp_root_t *, - kmp_team_t *USE_NESTED_HOT_ARG(kmp_info_t *)); -extern kmp_team_t *__kmp_reap_team(kmp_team_t *); - -/* ------------------------------------------------------------------------ */ - -extern void __kmp_initialize_bget(kmp_info_t *th); -extern void __kmp_finalize_bget(kmp_info_t *th); - -KMP_EXPORT void *kmpc_malloc(size_t size); -KMP_EXPORT void *kmpc_aligned_malloc(size_t size, size_t alignment); -KMP_EXPORT void *kmpc_calloc(size_t nelem, size_t elsize); -KMP_EXPORT void *kmpc_realloc(void *ptr, size_t size); -KMP_EXPORT void kmpc_free(void *ptr); - -/* declarations for internal use */ - -extern int __kmp_barrier(enum barrier_type bt, int gtid, int is_split, - size_t reduce_size, void *reduce_data, - void (*reduce)(void *, void *)); -extern void __kmp_end_split_barrier(enum barrier_type bt, int gtid); - -/*! - * Tell the fork call which compiler generated the fork call, and therefore how - * to deal with the call. - */ -enum fork_context_e { - fork_context_gnu, /**< Called from GNU generated code, so must not invoke the - microtask internally. */ - fork_context_intel, /**< Called from Intel generated code. */ - fork_context_last -}; -extern int __kmp_fork_call(ident_t *loc, int gtid, - enum fork_context_e fork_context, kmp_int32 argc, - microtask_t microtask, launch_t invoker, -/* TODO: revert workaround for Intel(R) 64 tracker #96 */ -#if (KMP_ARCH_ARM || KMP_ARCH_X86_64 || KMP_ARCH_AARCH64) && KMP_OS_LINUX - va_list *ap -#else - va_list ap -#endif - ); - -extern void __kmp_join_call(ident_t *loc, int gtid -#if OMPT_SUPPORT - , - enum fork_context_e fork_context -#endif -#if OMP_40_ENABLED - , - int exit_teams = 0 -#endif - ); - -extern void __kmp_serialized_parallel(ident_t *id, kmp_int32 gtid); -extern void __kmp_internal_fork(ident_t *id, int gtid, kmp_team_t *team); -extern void __kmp_internal_join(ident_t *id, int gtid, kmp_team_t *team); -extern int __kmp_invoke_task_func(int gtid); -extern void __kmp_run_before_invoked_task(int gtid, int tid, - kmp_info_t *this_thr, - kmp_team_t *team); -extern void __kmp_run_after_invoked_task(int gtid, int tid, - kmp_info_t *this_thr, - kmp_team_t *team); - -// should never have been exported -KMP_EXPORT int __kmpc_invoke_task_func(int gtid); -#if OMP_40_ENABLED -extern int __kmp_invoke_teams_master(int gtid); -extern void __kmp_teams_master(int gtid); -extern int __kmp_aux_get_team_num(); -extern int __kmp_aux_get_num_teams(); -#endif -extern void __kmp_save_internal_controls(kmp_info_t *thread); -extern void __kmp_user_set_library(enum library_type arg); -extern void __kmp_aux_set_library(enum library_type arg); -extern void __kmp_aux_set_stacksize(size_t arg); -extern void __kmp_aux_set_blocktime(int arg, kmp_info_t *thread, int tid); -extern void __kmp_aux_set_defaults(char const *str, int len); - -/* Functions called from __kmp_aux_env_initialize() in kmp_settings.cpp */ -void kmpc_set_blocktime(int arg); -void ompc_set_nested(int flag); -void ompc_set_dynamic(int flag); -void ompc_set_num_threads(int arg); - -extern void __kmp_push_current_task_to_thread(kmp_info_t *this_thr, - kmp_team_t *team, int tid); -extern void __kmp_pop_current_task_from_thread(kmp_info_t *this_thr); -extern kmp_task_t *__kmp_task_alloc(ident_t *loc_ref, kmp_int32 gtid, - kmp_tasking_flags_t *flags, - size_t sizeof_kmp_task_t, - size_t sizeof_shareds, - kmp_routine_entry_t task_entry); -extern void __kmp_init_implicit_task(ident_t *loc_ref, kmp_info_t *this_thr, - kmp_team_t *team, int tid, - int set_curr_task); -extern void __kmp_finish_implicit_task(kmp_info_t *this_thr); -extern void __kmp_free_implicit_task(kmp_info_t *this_thr); -int __kmp_execute_tasks_32(kmp_info_t *thread, kmp_int32 gtid, - kmp_flag_32 *flag, int final_spin, - int *thread_finished, -#if USE_ITT_BUILD - void *itt_sync_obj, -#endif /* USE_ITT_BUILD */ - kmp_int32 is_constrained); -int __kmp_execute_tasks_64(kmp_info_t *thread, kmp_int32 gtid, - kmp_flag_64 *flag, int final_spin, - int *thread_finished, -#if USE_ITT_BUILD - void *itt_sync_obj, -#endif /* USE_ITT_BUILD */ - kmp_int32 is_constrained); -int __kmp_execute_tasks_oncore(kmp_info_t *thread, kmp_int32 gtid, - kmp_flag_oncore *flag, int final_spin, - int *thread_finished, -#if USE_ITT_BUILD - void *itt_sync_obj, -#endif /* USE_ITT_BUILD */ - kmp_int32 is_constrained); - -extern void __kmp_free_task_team(kmp_info_t *thread, - kmp_task_team_t *task_team); -extern void __kmp_reap_task_teams(void); -extern void __kmp_wait_to_unref_task_teams(void); -extern void __kmp_task_team_setup(kmp_info_t *this_thr, kmp_team_t *team, - int always); -extern void __kmp_task_team_sync(kmp_info_t *this_thr, kmp_team_t *team); -extern void __kmp_task_team_wait(kmp_info_t *this_thr, kmp_team_t *team -#if USE_ITT_BUILD - , - void *itt_sync_obj -#endif /* USE_ITT_BUILD */ - , - int wait = 1); -extern void __kmp_tasking_barrier(kmp_team_t *team, kmp_info_t *thread, - int gtid); - -extern int __kmp_is_address_mapped(void *addr); -extern kmp_uint64 __kmp_hardware_timestamp(void); - -#if KMP_OS_UNIX -extern int __kmp_read_from_file(char const *path, char const *format, ...); -#endif - -/* ------------------------------------------------------------------------ */ -// -// Assembly routines that have no compiler intrinsic replacement -// - -#if KMP_ARCH_X86 || KMP_ARCH_X86_64 - -extern void __kmp_query_cpuid(kmp_cpuinfo_t *p); - -#define __kmp_load_mxcsr(p) _mm_setcsr(*(p)) -static inline void __kmp_store_mxcsr(kmp_uint32 *p) { *p = _mm_getcsr(); } - -extern void __kmp_load_x87_fpu_control_word(kmp_int16 *p); -extern void __kmp_store_x87_fpu_control_word(kmp_int16 *p); -extern void __kmp_clear_x87_fpu_status_word(); -#define KMP_X86_MXCSR_MASK 0xffffffc0 /* ignore status flags (6 lsb) */ - -#endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */ - -extern int __kmp_invoke_microtask(microtask_t pkfn, int gtid, int npr, int argc, - void *argv[] -#if OMPT_SUPPORT - , - void **exit_frame_ptr -#endif - ); - -/* ------------------------------------------------------------------------ */ - -KMP_EXPORT void __kmpc_begin(ident_t *, kmp_int32 flags); -KMP_EXPORT void __kmpc_end(ident_t *); - -KMP_EXPORT void __kmpc_threadprivate_register_vec(ident_t *, void *data, - kmpc_ctor_vec ctor, - kmpc_cctor_vec cctor, - kmpc_dtor_vec dtor, - size_t vector_length); -KMP_EXPORT void __kmpc_threadprivate_register(ident_t *, void *data, - kmpc_ctor ctor, kmpc_cctor cctor, - kmpc_dtor dtor); -KMP_EXPORT void *__kmpc_threadprivate(ident_t *, kmp_int32 global_tid, - void *data, size_t size); - -KMP_EXPORT kmp_int32 __kmpc_global_thread_num(ident_t *); -KMP_EXPORT kmp_int32 __kmpc_global_num_threads(ident_t *); -KMP_EXPORT kmp_int32 __kmpc_bound_thread_num(ident_t *); -KMP_EXPORT kmp_int32 __kmpc_bound_num_threads(ident_t *); - -KMP_EXPORT kmp_int32 __kmpc_ok_to_fork(ident_t *); -KMP_EXPORT void __kmpc_fork_call(ident_t *, kmp_int32 nargs, - kmpc_micro microtask, ...); - -KMP_EXPORT void __kmpc_serialized_parallel(ident_t *, kmp_int32 global_tid); -KMP_EXPORT void __kmpc_end_serialized_parallel(ident_t *, kmp_int32 global_tid); - -KMP_EXPORT void __kmpc_flush(ident_t *); -KMP_EXPORT void __kmpc_barrier(ident_t *, kmp_int32 global_tid); -KMP_EXPORT kmp_int32 __kmpc_master(ident_t *, kmp_int32 global_tid); -KMP_EXPORT void __kmpc_end_master(ident_t *, kmp_int32 global_tid); -KMP_EXPORT void __kmpc_ordered(ident_t *, kmp_int32 global_tid); -KMP_EXPORT void __kmpc_end_ordered(ident_t *, kmp_int32 global_tid); -KMP_EXPORT void __kmpc_critical(ident_t *, kmp_int32 global_tid, - kmp_critical_name *); -KMP_EXPORT void __kmpc_end_critical(ident_t *, kmp_int32 global_tid, - kmp_critical_name *); - -#if OMP_45_ENABLED -KMP_EXPORT void __kmpc_critical_with_hint(ident_t *, kmp_int32 global_tid, - kmp_critical_name *, uint32_t hint); -#endif - -KMP_EXPORT kmp_int32 __kmpc_barrier_master(ident_t *, kmp_int32 global_tid); -KMP_EXPORT void __kmpc_end_barrier_master(ident_t *, kmp_int32 global_tid); - -KMP_EXPORT kmp_int32 __kmpc_barrier_master_nowait(ident_t *, - kmp_int32 global_tid); - -KMP_EXPORT kmp_int32 __kmpc_single(ident_t *, kmp_int32 global_tid); -KMP_EXPORT void __kmpc_end_single(ident_t *, kmp_int32 global_tid); - -KMP_EXPORT void KMPC_FOR_STATIC_INIT(ident_t *loc, kmp_int32 global_tid, - kmp_int32 schedtype, kmp_int32 *plastiter, - kmp_int *plower, kmp_int *pupper, - kmp_int *pstride, kmp_int incr, - kmp_int chunk); - -KMP_EXPORT void __kmpc_for_static_fini(ident_t *loc, kmp_int32 global_tid); - -KMP_EXPORT void __kmpc_copyprivate(ident_t *loc, kmp_int32 global_tid, - size_t cpy_size, void *cpy_data, - void (*cpy_func)(void *, void *), - kmp_int32 didit); - -extern void KMPC_SET_NUM_THREADS(int arg); -extern void KMPC_SET_DYNAMIC(int flag); -extern void KMPC_SET_NESTED(int flag); - -/* Taskq interface routines */ -KMP_EXPORT kmpc_thunk_t *__kmpc_taskq(ident_t *loc, kmp_int32 global_tid, - kmpc_task_t taskq_task, - size_t sizeof_thunk, - size_t sizeof_shareds, kmp_int32 flags, - kmpc_shared_vars_t **shareds); -KMP_EXPORT void __kmpc_end_taskq(ident_t *loc, kmp_int32 global_tid, - kmpc_thunk_t *thunk); -KMP_EXPORT kmp_int32 __kmpc_task(ident_t *loc, kmp_int32 global_tid, - kmpc_thunk_t *thunk); -KMP_EXPORT void __kmpc_taskq_task(ident_t *loc, kmp_int32 global_tid, - kmpc_thunk_t *thunk, kmp_int32 status); -KMP_EXPORT void __kmpc_end_taskq_task(ident_t *loc, kmp_int32 global_tid, - kmpc_thunk_t *thunk); -KMP_EXPORT kmpc_thunk_t *__kmpc_task_buffer(ident_t *loc, kmp_int32 global_tid, - kmpc_thunk_t *taskq_thunk, - kmpc_task_t task); - -/* OMP 3.0 tasking interface routines */ -KMP_EXPORT kmp_int32 __kmpc_omp_task(ident_t *loc_ref, kmp_int32 gtid, - kmp_task_t *new_task); -KMP_EXPORT kmp_task_t *__kmpc_omp_task_alloc(ident_t *loc_ref, kmp_int32 gtid, - kmp_int32 flags, - size_t sizeof_kmp_task_t, - size_t sizeof_shareds, - kmp_routine_entry_t task_entry); -KMP_EXPORT void __kmpc_omp_task_begin_if0(ident_t *loc_ref, kmp_int32 gtid, - kmp_task_t *task); -KMP_EXPORT void __kmpc_omp_task_complete_if0(ident_t *loc_ref, kmp_int32 gtid, - kmp_task_t *task); -KMP_EXPORT kmp_int32 __kmpc_omp_task_parts(ident_t *loc_ref, kmp_int32 gtid, - kmp_task_t *new_task); -KMP_EXPORT kmp_int32 __kmpc_omp_taskwait(ident_t *loc_ref, kmp_int32 gtid); - -KMP_EXPORT kmp_int32 __kmpc_omp_taskyield(ident_t *loc_ref, kmp_int32 gtid, - int end_part); - -#if TASK_UNUSED -void __kmpc_omp_task_begin(ident_t *loc_ref, kmp_int32 gtid, kmp_task_t *task); -void __kmpc_omp_task_complete(ident_t *loc_ref, kmp_int32 gtid, - kmp_task_t *task); -#endif // TASK_UNUSED - -/* ------------------------------------------------------------------------ */ - -#if OMP_40_ENABLED - -KMP_EXPORT void __kmpc_taskgroup(ident_t *loc, int gtid); -KMP_EXPORT void __kmpc_end_taskgroup(ident_t *loc, int gtid); - -KMP_EXPORT kmp_int32 __kmpc_omp_task_with_deps( - ident_t *loc_ref, kmp_int32 gtid, kmp_task_t *new_task, kmp_int32 ndeps, - kmp_depend_info_t *dep_list, kmp_int32 ndeps_noalias, - kmp_depend_info_t *noalias_dep_list); -KMP_EXPORT void __kmpc_omp_wait_deps(ident_t *loc_ref, kmp_int32 gtid, - kmp_int32 ndeps, - kmp_depend_info_t *dep_list, - kmp_int32 ndeps_noalias, - kmp_depend_info_t *noalias_dep_list); - -extern kmp_int32 __kmp_omp_task(kmp_int32 gtid, kmp_task_t *new_task, - bool serialize_immediate); - -KMP_EXPORT kmp_int32 __kmpc_cancel(ident_t *loc_ref, kmp_int32 gtid, - kmp_int32 cncl_kind); -KMP_EXPORT kmp_int32 __kmpc_cancellationpoint(ident_t *loc_ref, kmp_int32 gtid, - kmp_int32 cncl_kind); -KMP_EXPORT kmp_int32 __kmpc_cancel_barrier(ident_t *loc_ref, kmp_int32 gtid); -KMP_EXPORT int __kmp_get_cancellation_status(int cancel_kind); - -#if OMP_45_ENABLED - -KMP_EXPORT void __kmpc_proxy_task_completed(kmp_int32 gtid, kmp_task_t *ptask); -KMP_EXPORT void __kmpc_proxy_task_completed_ooo(kmp_task_t *ptask); -KMP_EXPORT void __kmpc_taskloop(ident_t *loc, kmp_int32 gtid, kmp_task_t *task, - kmp_int32 if_val, kmp_uint64 *lb, - kmp_uint64 *ub, kmp_int64 st, kmp_int32 nogroup, - kmp_int32 sched, kmp_uint64 grainsize, - void *task_dup); -#endif -#if OMP_50_ENABLED -KMP_EXPORT void *__kmpc_task_reduction_init(int gtid, int num_data, void *data); -KMP_EXPORT void *__kmpc_task_reduction_get_th_data(int gtid, void *tg, void *d); -KMP_EXPORT kmp_int32 __kmpc_omp_reg_task_with_affinity( - ident_t *loc_ref, kmp_int32 gtid, kmp_task_t *new_task, kmp_int32 naffins, - kmp_task_affinity_info_t *affin_list); -#endif - -#endif - -/* Lock interface routines (fast versions with gtid passed in) */ -KMP_EXPORT void __kmpc_init_lock(ident_t *loc, kmp_int32 gtid, - void **user_lock); -KMP_EXPORT void __kmpc_init_nest_lock(ident_t *loc, kmp_int32 gtid, - void **user_lock); -KMP_EXPORT void __kmpc_destroy_lock(ident_t *loc, kmp_int32 gtid, - void **user_lock); -KMP_EXPORT void __kmpc_destroy_nest_lock(ident_t *loc, kmp_int32 gtid, - void **user_lock); -KMP_EXPORT void __kmpc_set_lock(ident_t *loc, kmp_int32 gtid, void **user_lock); -KMP_EXPORT void __kmpc_set_nest_lock(ident_t *loc, kmp_int32 gtid, - void **user_lock); -KMP_EXPORT void __kmpc_unset_lock(ident_t *loc, kmp_int32 gtid, - void **user_lock); -KMP_EXPORT void __kmpc_unset_nest_lock(ident_t *loc, kmp_int32 gtid, - void **user_lock); -KMP_EXPORT int __kmpc_test_lock(ident_t *loc, kmp_int32 gtid, void **user_lock); -KMP_EXPORT int __kmpc_test_nest_lock(ident_t *loc, kmp_int32 gtid, - void **user_lock); - -#if OMP_45_ENABLED -KMP_EXPORT void __kmpc_init_lock_with_hint(ident_t *loc, kmp_int32 gtid, - void **user_lock, uintptr_t hint); -KMP_EXPORT void __kmpc_init_nest_lock_with_hint(ident_t *loc, kmp_int32 gtid, - void **user_lock, - uintptr_t hint); -#endif - -/* Interface to fast scalable reduce methods routines */ - -KMP_EXPORT kmp_int32 __kmpc_reduce_nowait( - ident_t *loc, kmp_int32 global_tid, kmp_int32 num_vars, size_t reduce_size, - void *reduce_data, void (*reduce_func)(void *lhs_data, void *rhs_data), - kmp_critical_name *lck); -KMP_EXPORT void __kmpc_end_reduce_nowait(ident_t *loc, kmp_int32 global_tid, - kmp_critical_name *lck); -KMP_EXPORT kmp_int32 __kmpc_reduce( - ident_t *loc, kmp_int32 global_tid, kmp_int32 num_vars, size_t reduce_size, - void *reduce_data, void (*reduce_func)(void *lhs_data, void *rhs_data), - kmp_critical_name *lck); -KMP_EXPORT void __kmpc_end_reduce(ident_t *loc, kmp_int32 global_tid, - kmp_critical_name *lck); - -/* Internal fast reduction routines */ - -extern PACKED_REDUCTION_METHOD_T __kmp_determine_reduction_method( - ident_t *loc, kmp_int32 global_tid, kmp_int32 num_vars, size_t reduce_size, - void *reduce_data, void (*reduce_func)(void *lhs_data, void *rhs_data), - kmp_critical_name *lck); - -// this function is for testing set/get/determine reduce method -KMP_EXPORT kmp_int32 __kmp_get_reduce_method(void); - -KMP_EXPORT kmp_uint64 __kmpc_get_taskid(); -KMP_EXPORT kmp_uint64 __kmpc_get_parent_taskid(); - -// C++ port -// missing 'extern "C"' declarations - -KMP_EXPORT kmp_int32 __kmpc_in_parallel(ident_t *loc); -KMP_EXPORT void __kmpc_pop_num_threads(ident_t *loc, kmp_int32 global_tid); -KMP_EXPORT void __kmpc_push_num_threads(ident_t *loc, kmp_int32 global_tid, - kmp_int32 num_threads); - -#if OMP_40_ENABLED -KMP_EXPORT void __kmpc_push_proc_bind(ident_t *loc, kmp_int32 global_tid, - int proc_bind); -KMP_EXPORT void __kmpc_push_num_teams(ident_t *loc, kmp_int32 global_tid, - kmp_int32 num_teams, - kmp_int32 num_threads); -KMP_EXPORT void __kmpc_fork_teams(ident_t *loc, kmp_int32 argc, - kmpc_micro microtask, ...); -#endif -#if OMP_45_ENABLED -struct kmp_dim { // loop bounds info casted to kmp_int64 - kmp_int64 lo; // lower - kmp_int64 up; // upper - kmp_int64 st; // stride -}; -KMP_EXPORT void __kmpc_doacross_init(ident_t *loc, kmp_int32 gtid, - kmp_int32 num_dims, - const struct kmp_dim *dims); -KMP_EXPORT void __kmpc_doacross_wait(ident_t *loc, kmp_int32 gtid, - const kmp_int64 *vec); -KMP_EXPORT void __kmpc_doacross_post(ident_t *loc, kmp_int32 gtid, - const kmp_int64 *vec); -KMP_EXPORT void __kmpc_doacross_fini(ident_t *loc, kmp_int32 gtid); -#endif - -KMP_EXPORT void *__kmpc_threadprivate_cached(ident_t *loc, kmp_int32 global_tid, - void *data, size_t size, - void ***cache); - -// Symbols for MS mutual detection. -extern int _You_must_link_with_exactly_one_OpenMP_library; -extern int _You_must_link_with_Intel_OpenMP_library; -#if KMP_OS_WINDOWS && (KMP_VERSION_MAJOR > 4) -extern int _You_must_link_with_Microsoft_OpenMP_library; -#endif - -// The routines below are not exported. -// Consider making them 'static' in corresponding source files. -void kmp_threadprivate_insert_private_data(int gtid, void *pc_addr, - void *data_addr, size_t pc_size); -struct private_common *kmp_threadprivate_insert(int gtid, void *pc_addr, - void *data_addr, - size_t pc_size); -void __kmp_threadprivate_resize_cache(int newCapacity); -void __kmp_cleanup_threadprivate_caches(); - -// ompc_, kmpc_ entries moved from omp.h. -#if KMP_OS_WINDOWS -#define KMPC_CONVENTION __cdecl -#else -#define KMPC_CONVENTION -#endif - -#ifndef __OMP_H -typedef enum omp_sched_t { - omp_sched_static = 1, - omp_sched_dynamic = 2, - omp_sched_guided = 3, - omp_sched_auto = 4 -} omp_sched_t; -typedef void *kmp_affinity_mask_t; -#endif - -KMP_EXPORT void KMPC_CONVENTION ompc_set_max_active_levels(int); -KMP_EXPORT void KMPC_CONVENTION ompc_set_schedule(omp_sched_t, int); -KMP_EXPORT int KMPC_CONVENTION ompc_get_ancestor_thread_num(int); -KMP_EXPORT int KMPC_CONVENTION ompc_get_team_size(int); -KMP_EXPORT int KMPC_CONVENTION -kmpc_set_affinity_mask_proc(int, kmp_affinity_mask_t *); -KMP_EXPORT int KMPC_CONVENTION -kmpc_unset_affinity_mask_proc(int, kmp_affinity_mask_t *); -KMP_EXPORT int KMPC_CONVENTION -kmpc_get_affinity_mask_proc(int, kmp_affinity_mask_t *); - -KMP_EXPORT void KMPC_CONVENTION kmpc_set_stacksize(int); -KMP_EXPORT void KMPC_CONVENTION kmpc_set_stacksize_s(size_t); -KMP_EXPORT void KMPC_CONVENTION kmpc_set_library(int); -KMP_EXPORT void KMPC_CONVENTION kmpc_set_defaults(char const *); -KMP_EXPORT void KMPC_CONVENTION kmpc_set_disp_num_buffers(int); - -#if OMP_50_ENABLED -enum kmp_target_offload_kind { - tgt_disabled = 0, - tgt_default = 1, - tgt_mandatory = 2 -}; -typedef enum kmp_target_offload_kind kmp_target_offload_kind_t; -// Set via OMP_TARGET_OFFLOAD if specified, defaults to tgt_default otherwise -extern kmp_target_offload_kind_t __kmp_target_offload; -extern int __kmpc_get_target_offload(); -#endif - -#if OMP_40_ENABLED -// Constants used in libomptarget -#define KMP_DEVICE_DEFAULT -1 // This is libomptarget's default device. -#define KMP_HOST_DEVICE -10 // This is what it is in libomptarget, go figure. -#define KMP_DEVICE_ALL -11 // This is libomptarget's "all devices". -#endif // OMP_40_ENABLED - -#ifdef __cplusplus -} -#endif - -#endif /* KMP_H */ Property changes on: vendor/llvm-openmp/openmp-release80-r363030/runtime/src/kmp.h ___________________________________________________________________ Deleted: svn:eol-style ## -1 +0,0 ## -native \ No newline at end of property Deleted: svn:keywords ## -1 +0,0 ## -FreeBSD=%H \ No newline at end of property Deleted: svn:mime-type ## -1 +0,0 ## -text/plain \ No newline at end of property Index: vendor/llvm-openmp/openmp-release80-r363030/runtime/src/kmp_import.cpp =================================================================== --- vendor/llvm-openmp/openmp-release80-r363030/runtime/src/kmp_import.cpp (revision 348960) +++ vendor/llvm-openmp/openmp-release80-r363030/runtime/src/kmp_import.cpp (nonexistent) @@ -1,34 +0,0 @@ -/* - * kmp_import.cpp - */ - -//===----------------------------------------------------------------------===// -// -// The LLVM Compiler Infrastructure -// -// This file is dual licensed under the MIT and the University of Illinois Open -// Source Licenses. See LICENSE.txt for details. -// -//===----------------------------------------------------------------------===// - -/* Object generated from this source file is linked to Windows* OS DLL import - library (libompmd.lib) only! It is not a part of regular static or dynamic - OpenMP RTL. Any code that just needs to go in the libompmd.lib (but not in - libompmt.lib and libompmd.dll) should be placed in this file. */ - -#ifdef __cplusplus -extern "C" { -#endif - -/*These symbols are required for mutual exclusion with Microsoft OpenMP RTL - (and compatibility with MS Compiler). */ - -int _You_must_link_with_exactly_one_OpenMP_library = 1; -int _You_must_link_with_Intel_OpenMP_library = 1; -int _You_must_link_with_Microsoft_OpenMP_library = 1; - -#ifdef __cplusplus -} -#endif - -// end of file // Property changes on: vendor/llvm-openmp/openmp-release80-r363030/runtime/src/kmp_import.cpp ___________________________________________________________________ Deleted: svn:eol-style ## -1 +0,0 ## -native \ No newline at end of property Deleted: svn:keywords ## -1 +0,0 ## -FreeBSD=%H \ No newline at end of property Deleted: svn:mime-type ## -1 +0,0 ## -text/plain \ No newline at end of property Index: vendor/llvm-openmp/openmp-release80-r363030/runtime/src/kmp_itt.h =================================================================== --- vendor/llvm-openmp/openmp-release80-r363030/runtime/src/kmp_itt.h (revision 348960) +++ vendor/llvm-openmp/openmp-release80-r363030/runtime/src/kmp_itt.h (nonexistent) @@ -1,333 +0,0 @@ -#if USE_ITT_BUILD -/* - * kmp_itt.h -- ITT Notify interface. - */ - -//===----------------------------------------------------------------------===// -// -// The LLVM Compiler Infrastructure -// -// This file is dual licensed under the MIT and the University of Illinois Open -// Source Licenses. See LICENSE.txt for details. -// -//===----------------------------------------------------------------------===// - -#ifndef KMP_ITT_H -#define KMP_ITT_H - -#include "kmp_lock.h" - -#define INTEL_ITTNOTIFY_API_PRIVATE -#include "ittnotify.h" -#include "legacy/ittnotify.h" - -#if KMP_DEBUG -#define __kmp_inline // Turn off inlining in debug mode. -#else -#define __kmp_inline static inline -#endif - -#if USE_ITT_NOTIFY -extern kmp_int32 __kmp_itt_prepare_delay; -#ifdef __cplusplus -extern "C" void __kmp_itt_fini_ittlib(void); -#else -extern void __kmp_itt_fini_ittlib(void); -#endif -#endif - -// Simplify the handling of an argument that is only required when USE_ITT_BUILD -// is enabled. -#define USE_ITT_BUILD_ARG(x) , x - -void __kmp_itt_initialize(); -void __kmp_itt_destroy(); -void __kmp_itt_reset(); - -// ----------------------------------------------------------------------------- -// New stuff for reporting high-level constructs. - -// Note the naming convention: -// __kmp_itt_xxxing() function should be called before action, while -// __kmp_itt_xxxed() function should be called after action. - -// --- Parallel region reporting --- -__kmp_inline void -__kmp_itt_region_forking(int gtid, int team_size, - int barriers); // Master only, before forking threads. -__kmp_inline void -__kmp_itt_region_joined(int gtid); // Master only, after joining threads. -// (*) Note: A thread may execute tasks after this point, though. - -// --- Frame reporting --- -// region=0: no regions, region=1: parallel, region=2: serialized parallel -__kmp_inline void __kmp_itt_frame_submit(int gtid, __itt_timestamp begin, - __itt_timestamp end, int imbalance, - ident_t *loc, int team_size, - int region = 0); - -// --- Metadata reporting --- -// begin/end - begin/end timestamps of a barrier frame, imbalance - aggregated -// wait time value, reduction -if this is a reduction barrier -__kmp_inline void __kmp_itt_metadata_imbalance(int gtid, kmp_uint64 begin, - kmp_uint64 end, - kmp_uint64 imbalance, - kmp_uint64 reduction); -// sched_type: 0 - static, 1 - dynamic, 2 - guided, 3 - custom (all others); -// iterations - loop trip count, chunk - chunk size -__kmp_inline void __kmp_itt_metadata_loop(ident_t *loc, kmp_uint64 sched_type, - kmp_uint64 iterations, - kmp_uint64 chunk); -__kmp_inline void __kmp_itt_metadata_single(ident_t *loc); - -// --- Barrier reporting --- -__kmp_inline void *__kmp_itt_barrier_object(int gtid, int bt, int set_name = 0, - int delta = 0); -__kmp_inline void __kmp_itt_barrier_starting(int gtid, void *object); -__kmp_inline void __kmp_itt_barrier_middle(int gtid, void *object); -__kmp_inline void __kmp_itt_barrier_finished(int gtid, void *object); - -// --- Taskwait reporting --- -__kmp_inline void *__kmp_itt_taskwait_object(int gtid); -__kmp_inline void __kmp_itt_taskwait_starting(int gtid, void *object); -__kmp_inline void __kmp_itt_taskwait_finished(int gtid, void *object); - -// --- Task reporting --- -__kmp_inline void __kmp_itt_task_starting(void *object); -__kmp_inline void __kmp_itt_task_finished(void *object); - -// --- Lock reporting --- -#if KMP_USE_DYNAMIC_LOCK -__kmp_inline void __kmp_itt_lock_creating(kmp_user_lock_p lock, - const ident_t *); -#else -__kmp_inline void __kmp_itt_lock_creating(kmp_user_lock_p lock); -#endif -__kmp_inline void __kmp_itt_lock_acquiring(kmp_user_lock_p lock); -__kmp_inline void __kmp_itt_lock_acquired(kmp_user_lock_p lock); -__kmp_inline void __kmp_itt_lock_releasing(kmp_user_lock_p lock); -__kmp_inline void __kmp_itt_lock_cancelled(kmp_user_lock_p lock); -__kmp_inline void __kmp_itt_lock_destroyed(kmp_user_lock_p lock); - -// --- Critical reporting --- -#if KMP_USE_DYNAMIC_LOCK -__kmp_inline void __kmp_itt_critical_creating(kmp_user_lock_p lock, - const ident_t *); -#else -__kmp_inline void __kmp_itt_critical_creating(kmp_user_lock_p lock); -#endif -__kmp_inline void __kmp_itt_critical_acquiring(kmp_user_lock_p lock); -__kmp_inline void __kmp_itt_critical_acquired(kmp_user_lock_p lock); -__kmp_inline void __kmp_itt_critical_releasing(kmp_user_lock_p lock); -__kmp_inline void __kmp_itt_critical_destroyed(kmp_user_lock_p lock); - -// --- Single reporting --- -__kmp_inline void __kmp_itt_single_start(int gtid); -__kmp_inline void __kmp_itt_single_end(int gtid); - -// --- Ordered reporting --- -__kmp_inline void __kmp_itt_ordered_init(int gtid); -__kmp_inline void __kmp_itt_ordered_prep(int gtid); -__kmp_inline void __kmp_itt_ordered_start(int gtid); -__kmp_inline void __kmp_itt_ordered_end(int gtid); - -// --- Threads reporting --- -__kmp_inline void __kmp_itt_thread_ignore(); -__kmp_inline void __kmp_itt_thread_name(int gtid); - -// --- System objects --- -__kmp_inline void __kmp_itt_system_object_created(void *object, - char const *name); - -// --- Stack stitching --- -__kmp_inline __itt_caller __kmp_itt_stack_caller_create(void); -__kmp_inline void __kmp_itt_stack_caller_destroy(__itt_caller); -__kmp_inline void __kmp_itt_stack_callee_enter(__itt_caller); -__kmp_inline void __kmp_itt_stack_callee_leave(__itt_caller); - -// ----------------------------------------------------------------------------- -// Old stuff for reporting low-level internal synchronization. - -#if USE_ITT_NOTIFY - -/* Support for SSC marks, which are used by SDE - http://software.intel.com/en-us/articles/intel-software-development-emulator - to mark points in instruction traces that represent spin-loops and are - therefore uninteresting when collecting traces for architecture simulation. - */ -#ifndef INCLUDE_SSC_MARKS -#define INCLUDE_SSC_MARKS (KMP_OS_LINUX && KMP_ARCH_X86_64) -#endif - -/* Linux 64 only for now */ -#if (INCLUDE_SSC_MARKS && KMP_OS_LINUX && KMP_ARCH_X86_64) -// Portable (at least for gcc and icc) code to insert the necessary instructions -// to set %ebx and execute the unlikely no-op. -#if defined(__INTEL_COMPILER) -#define INSERT_SSC_MARK(tag) __SSC_MARK(tag) -#else -#define INSERT_SSC_MARK(tag) \ - __asm__ __volatile__("movl %0, %%ebx; .byte 0x64, 0x67, 0x90 " ::"i"(tag) \ - : "%ebx") -#endif -#else -#define INSERT_SSC_MARK(tag) ((void)0) -#endif - -/* Markers for the start and end of regions that represent polling and are - therefore uninteresting to architectural simulations 0x4376 and 0x4377 are - arbitrary numbers that should be unique in the space of SSC tags, but there - is no central issuing authority rather randomness is expected to work. */ -#define SSC_MARK_SPIN_START() INSERT_SSC_MARK(0x4376) -#define SSC_MARK_SPIN_END() INSERT_SSC_MARK(0x4377) - -// Markers for architecture simulation. -// FORKING : Before the master thread forks. -// JOINING : At the start of the join. -// INVOKING : Before the threads invoke microtasks. -// DISPATCH_INIT: At the start of dynamically scheduled loop. -// DISPATCH_NEXT: After claming next iteration of dynamically scheduled loop. -#define SSC_MARK_FORKING() INSERT_SSC_MARK(0xd693) -#define SSC_MARK_JOINING() INSERT_SSC_MARK(0xd694) -#define SSC_MARK_INVOKING() INSERT_SSC_MARK(0xd695) -#define SSC_MARK_DISPATCH_INIT() INSERT_SSC_MARK(0xd696) -#define SSC_MARK_DISPATCH_NEXT() INSERT_SSC_MARK(0xd697) - -// The object is an address that associates a specific set of the prepare, -// acquire, release, and cancel operations. - -/* Sync prepare indicates a thread is going to start waiting for another thread - to send a release event. This operation should be done just before the - thread begins checking for the existence of the release event */ - -/* Sync cancel indicates a thread is cancelling a wait on another thread and - continuing execution without waiting for the other thread to release it */ - -/* Sync acquired indicates a thread has received a release event from another - thread and has stopped waiting. This operation must occur only after the - release event is received. */ - -/* Sync release indicates a thread is going to send a release event to another - thread so it will stop waiting and continue execution. This operation must - just happen before the release event. */ - -#define KMP_FSYNC_PREPARE(obj) __itt_fsync_prepare((void *)(obj)) -#define KMP_FSYNC_CANCEL(obj) __itt_fsync_cancel((void *)(obj)) -#define KMP_FSYNC_ACQUIRED(obj) __itt_fsync_acquired((void *)(obj)) -#define KMP_FSYNC_RELEASING(obj) __itt_fsync_releasing((void *)(obj)) - -/* In case of waiting in a spin loop, ITT wants KMP_FSYNC_PREPARE() to be called - with a delay (and not called at all if waiting time is small). So, in spin - loops, do not use KMP_FSYNC_PREPARE(), but use KMP_FSYNC_SPIN_INIT() (before - spin loop), KMP_FSYNC_SPIN_PREPARE() (whithin the spin loop), and - KMP_FSYNC_SPIN_ACQUIRED(). See KMP_WAIT_YIELD() for example. */ - -#undef KMP_FSYNC_SPIN_INIT -#define KMP_FSYNC_SPIN_INIT(obj, spin) \ - int sync_iters = 0; \ - if (__itt_fsync_prepare_ptr) { \ - if (obj == NULL) { \ - obj = spin; \ - } /* if */ \ - } /* if */ \ - SSC_MARK_SPIN_START() - -#undef KMP_FSYNC_SPIN_PREPARE -#define KMP_FSYNC_SPIN_PREPARE(obj) \ - do { \ - if (__itt_fsync_prepare_ptr && sync_iters < __kmp_itt_prepare_delay) { \ - ++sync_iters; \ - if (sync_iters >= __kmp_itt_prepare_delay) { \ - KMP_FSYNC_PREPARE((void *)obj); \ - } /* if */ \ - } /* if */ \ - } while (0) -#undef KMP_FSYNC_SPIN_ACQUIRED -#define KMP_FSYNC_SPIN_ACQUIRED(obj) \ - do { \ - SSC_MARK_SPIN_END(); \ - if (sync_iters >= __kmp_itt_prepare_delay) { \ - KMP_FSYNC_ACQUIRED((void *)obj); \ - } /* if */ \ - } while (0) - -/* ITT will not report objects created within KMP_ITT_IGNORE(), e. g.: - KMP_ITT_IGNORE( - ptr = malloc( size ); - ); -*/ -#define KMP_ITT_IGNORE(statement) \ - do { \ - __itt_state_t __itt_state_; \ - if (__itt_state_get_ptr) { \ - __itt_state_ = __itt_state_get(); \ - __itt_obj_mode_set(__itt_obj_prop_ignore, __itt_obj_state_set); \ - } /* if */ \ - { statement } \ - if (__itt_state_get_ptr) { \ - __itt_state_set(__itt_state_); \ - } /* if */ \ - } while (0) - -const int KMP_MAX_FRAME_DOMAINS = - 512; // Maximum number of frame domains to use (maps to -// different OpenMP regions in the user source code). -extern kmp_int32 __kmp_barrier_domain_count; -extern kmp_int32 __kmp_region_domain_count; -extern __itt_domain *__kmp_itt_barrier_domains[KMP_MAX_FRAME_DOMAINS]; -extern __itt_domain *__kmp_itt_region_domains[KMP_MAX_FRAME_DOMAINS]; -extern __itt_domain *__kmp_itt_imbalance_domains[KMP_MAX_FRAME_DOMAINS]; -extern kmp_int32 __kmp_itt_region_team_size[KMP_MAX_FRAME_DOMAINS]; -extern __itt_domain *metadata_domain; -extern __itt_string_handle *string_handle_imbl; -extern __itt_string_handle *string_handle_loop; -extern __itt_string_handle *string_handle_sngl; - -#else - -// Null definitions of the synchronization tracing functions. -#define KMP_FSYNC_PREPARE(obj) ((void)0) -#define KMP_FSYNC_CANCEL(obj) ((void)0) -#define KMP_FSYNC_ACQUIRED(obj) ((void)0) -#define KMP_FSYNC_RELEASING(obj) ((void)0) - -#define KMP_FSYNC_SPIN_INIT(obj, spin) ((void)0) -#define KMP_FSYNC_SPIN_PREPARE(obj) ((void)0) -#define KMP_FSYNC_SPIN_ACQUIRED(obj) ((void)0) - -#define KMP_ITT_IGNORE(stmt) \ - do { \ - stmt \ - } while (0) - -#endif // USE_ITT_NOTIFY - -#if !KMP_DEBUG -// In release mode include definitions of inline functions. -#include "kmp_itt.inl" -#endif - -#endif // KMP_ITT_H - -#else /* USE_ITT_BUILD */ - -// Null definitions of the synchronization tracing functions. -// If USE_ITT_BULID is not enabled, USE_ITT_NOTIFY cannot be either. -// By defining these we avoid unpleasant ifdef tests in many places. -#define KMP_FSYNC_PREPARE(obj) ((void)0) -#define KMP_FSYNC_CANCEL(obj) ((void)0) -#define KMP_FSYNC_ACQUIRED(obj) ((void)0) -#define KMP_FSYNC_RELEASING(obj) ((void)0) - -#define KMP_FSYNC_SPIN_INIT(obj, spin) ((void)0) -#define KMP_FSYNC_SPIN_PREPARE(obj) ((void)0) -#define KMP_FSYNC_SPIN_ACQUIRED(obj) ((void)0) - -#define KMP_ITT_IGNORE(stmt) \ - do { \ - stmt \ - } while (0) - -#define USE_ITT_BUILD_ARG(x) - -#endif /* USE_ITT_BUILD */ Property changes on: vendor/llvm-openmp/openmp-release80-r363030/runtime/src/kmp_itt.h ___________________________________________________________________ Deleted: svn:eol-style ## -1 +0,0 ## -native \ No newline at end of property Deleted: svn:keywords ## -1 +0,0 ## -FreeBSD=%H \ No newline at end of property Deleted: svn:mime-type ## -1 +0,0 ## -text/plain \ No newline at end of property Index: vendor/llvm-openmp/openmp-release80-r363030/runtime/src/kmp_tasking.cpp =================================================================== --- vendor/llvm-openmp/openmp-release80-r363030/runtime/src/kmp_tasking.cpp (revision 348960) +++ vendor/llvm-openmp/openmp-release80-r363030/runtime/src/kmp_tasking.cpp (nonexistent) @@ -1,4293 +0,0 @@ -/* - * kmp_tasking.cpp -- OpenMP 3.0 tasking support. - */ - -//===----------------------------------------------------------------------===// -// -// The LLVM Compiler Infrastructure -// -// This file is dual licensed under the MIT and the University of Illinois Open -// Source Licenses. See LICENSE.txt for details. -// -//===----------------------------------------------------------------------===// - -#include "kmp.h" -#include "kmp_i18n.h" -#include "kmp_itt.h" -#include "kmp_stats.h" -#include "kmp_wait_release.h" -#include "kmp_taskdeps.h" - -#if OMPT_SUPPORT -#include "ompt-specific.h" -#endif - -#include "tsan_annotations.h" - -/* forward declaration */ -static void __kmp_enable_tasking(kmp_task_team_t *task_team, - kmp_info_t *this_thr); -static void __kmp_alloc_task_deque(kmp_info_t *thread, - kmp_thread_data_t *thread_data); -static int __kmp_realloc_task_threads_data(kmp_info_t *thread, - kmp_task_team_t *task_team); - -#if OMP_45_ENABLED -static void __kmp_bottom_half_finish_proxy(kmp_int32 gtid, kmp_task_t *ptask); -#endif - -#ifdef BUILD_TIED_TASK_STACK - -// __kmp_trace_task_stack: print the tied tasks from the task stack in order -// from top do bottom -// -// gtid: global thread identifier for thread containing stack -// thread_data: thread data for task team thread containing stack -// threshold: value above which the trace statement triggers -// location: string identifying call site of this function (for trace) -static void __kmp_trace_task_stack(kmp_int32 gtid, - kmp_thread_data_t *thread_data, - int threshold, char *location) { - kmp_task_stack_t *task_stack = &thread_data->td.td_susp_tied_tasks; - kmp_taskdata_t **stack_top = task_stack->ts_top; - kmp_int32 entries = task_stack->ts_entries; - kmp_taskdata_t *tied_task; - - KA_TRACE( - threshold, - ("__kmp_trace_task_stack(start): location = %s, gtid = %d, entries = %d, " - "first_block = %p, stack_top = %p \n", - location, gtid, entries, task_stack->ts_first_block, stack_top)); - - KMP_DEBUG_ASSERT(stack_top != NULL); - KMP_DEBUG_ASSERT(entries > 0); - - while (entries != 0) { - KMP_DEBUG_ASSERT(stack_top != &task_stack->ts_first_block.sb_block[0]); - // fix up ts_top if we need to pop from previous block - if (entries & TASK_STACK_INDEX_MASK == 0) { - kmp_stack_block_t *stack_block = (kmp_stack_block_t *)(stack_top); - - stack_block = stack_block->sb_prev; - stack_top = &stack_block->sb_block[TASK_STACK_BLOCK_SIZE]; - } - - // finish bookkeeping - stack_top--; - entries--; - - tied_task = *stack_top; - - KMP_DEBUG_ASSERT(tied_task != NULL); - KMP_DEBUG_ASSERT(tied_task->td_flags.tasktype == TASK_TIED); - - KA_TRACE(threshold, - ("__kmp_trace_task_stack(%s): gtid=%d, entry=%d, " - "stack_top=%p, tied_task=%p\n", - location, gtid, entries, stack_top, tied_task)); - } - KMP_DEBUG_ASSERT(stack_top == &task_stack->ts_first_block.sb_block[0]); - - KA_TRACE(threshold, - ("__kmp_trace_task_stack(exit): location = %s, gtid = %d\n", - location, gtid)); -} - -// __kmp_init_task_stack: initialize the task stack for the first time -// after a thread_data structure is created. -// It should not be necessary to do this again (assuming the stack works). -// -// gtid: global thread identifier of calling thread -// thread_data: thread data for task team thread containing stack -static void __kmp_init_task_stack(kmp_int32 gtid, - kmp_thread_data_t *thread_data) { - kmp_task_stack_t *task_stack = &thread_data->td.td_susp_tied_tasks; - kmp_stack_block_t *first_block; - - // set up the first block of the stack - first_block = &task_stack->ts_first_block; - task_stack->ts_top = (kmp_taskdata_t **)first_block; - memset((void *)first_block, '\0', - TASK_STACK_BLOCK_SIZE * sizeof(kmp_taskdata_t *)); - - // initialize the stack to be empty - task_stack->ts_entries = TASK_STACK_EMPTY; - first_block->sb_next = NULL; - first_block->sb_prev = NULL; -} - -// __kmp_free_task_stack: free the task stack when thread_data is destroyed. -// -// gtid: global thread identifier for calling thread -// thread_data: thread info for thread containing stack -static void __kmp_free_task_stack(kmp_int32 gtid, - kmp_thread_data_t *thread_data) { - kmp_task_stack_t *task_stack = &thread_data->td.td_susp_tied_tasks; - kmp_stack_block_t *stack_block = &task_stack->ts_first_block; - - KMP_DEBUG_ASSERT(task_stack->ts_entries == TASK_STACK_EMPTY); - // free from the second block of the stack - while (stack_block != NULL) { - kmp_stack_block_t *next_block = (stack_block) ? stack_block->sb_next : NULL; - - stack_block->sb_next = NULL; - stack_block->sb_prev = NULL; - if (stack_block != &task_stack->ts_first_block) { - __kmp_thread_free(thread, - stack_block); // free the block, if not the first - } - stack_block = next_block; - } - // initialize the stack to be empty - task_stack->ts_entries = 0; - task_stack->ts_top = NULL; -} - -// __kmp_push_task_stack: Push the tied task onto the task stack. -// Grow the stack if necessary by allocating another block. -// -// gtid: global thread identifier for calling thread -// thread: thread info for thread containing stack -// tied_task: the task to push on the stack -static void __kmp_push_task_stack(kmp_int32 gtid, kmp_info_t *thread, - kmp_taskdata_t *tied_task) { - // GEH - need to consider what to do if tt_threads_data not allocated yet - kmp_thread_data_t *thread_data = - &thread->th.th_task_team->tt.tt_threads_data[__kmp_tid_from_gtid(gtid)]; - kmp_task_stack_t *task_stack = &thread_data->td.td_susp_tied_tasks; - - if (tied_task->td_flags.team_serial || tied_task->td_flags.tasking_ser) { - return; // Don't push anything on stack if team or team tasks are serialized - } - - KMP_DEBUG_ASSERT(tied_task->td_flags.tasktype == TASK_TIED); - KMP_DEBUG_ASSERT(task_stack->ts_top != NULL); - - KA_TRACE(20, - ("__kmp_push_task_stack(enter): GTID: %d; THREAD: %p; TASK: %p\n", - gtid, thread, tied_task)); - // Store entry - *(task_stack->ts_top) = tied_task; - - // Do bookkeeping for next push - task_stack->ts_top++; - task_stack->ts_entries++; - - if (task_stack->ts_entries & TASK_STACK_INDEX_MASK == 0) { - // Find beginning of this task block - kmp_stack_block_t *stack_block = - (kmp_stack_block_t *)(task_stack->ts_top - TASK_STACK_BLOCK_SIZE); - - // Check if we already have a block - if (stack_block->sb_next != - NULL) { // reset ts_top to beginning of next block - task_stack->ts_top = &stack_block->sb_next->sb_block[0]; - } else { // Alloc new block and link it up - kmp_stack_block_t *new_block = (kmp_stack_block_t *)__kmp_thread_calloc( - thread, sizeof(kmp_stack_block_t)); - - task_stack->ts_top = &new_block->sb_block[0]; - stack_block->sb_next = new_block; - new_block->sb_prev = stack_block; - new_block->sb_next = NULL; - - KA_TRACE( - 30, - ("__kmp_push_task_stack(): GTID: %d; TASK: %p; Alloc new block: %p\n", - gtid, tied_task, new_block)); - } - } - KA_TRACE(20, ("__kmp_push_task_stack(exit): GTID: %d; TASK: %p\n", gtid, - tied_task)); -} - -// __kmp_pop_task_stack: Pop the tied task from the task stack. Don't return -// the task, just check to make sure it matches the ending task passed in. -// -// gtid: global thread identifier for the calling thread -// thread: thread info structure containing stack -// tied_task: the task popped off the stack -// ending_task: the task that is ending (should match popped task) -static void __kmp_pop_task_stack(kmp_int32 gtid, kmp_info_t *thread, - kmp_taskdata_t *ending_task) { - // GEH - need to consider what to do if tt_threads_data not allocated yet - kmp_thread_data_t *thread_data = - &thread->th.th_task_team->tt_threads_data[__kmp_tid_from_gtid(gtid)]; - kmp_task_stack_t *task_stack = &thread_data->td.td_susp_tied_tasks; - kmp_taskdata_t *tied_task; - - if (ending_task->td_flags.team_serial || ending_task->td_flags.tasking_ser) { - // Don't pop anything from stack if team or team tasks are serialized - return; - } - - KMP_DEBUG_ASSERT(task_stack->ts_top != NULL); - KMP_DEBUG_ASSERT(task_stack->ts_entries > 0); - - KA_TRACE(20, ("__kmp_pop_task_stack(enter): GTID: %d; THREAD: %p\n", gtid, - thread)); - - // fix up ts_top if we need to pop from previous block - if (task_stack->ts_entries & TASK_STACK_INDEX_MASK == 0) { - kmp_stack_block_t *stack_block = (kmp_stack_block_t *)(task_stack->ts_top); - - stack_block = stack_block->sb_prev; - task_stack->ts_top = &stack_block->sb_block[TASK_STACK_BLOCK_SIZE]; - } - - // finish bookkeeping - task_stack->ts_top--; - task_stack->ts_entries--; - - tied_task = *(task_stack->ts_top); - - KMP_DEBUG_ASSERT(tied_task != NULL); - KMP_DEBUG_ASSERT(tied_task->td_flags.tasktype == TASK_TIED); - KMP_DEBUG_ASSERT(tied_task == ending_task); // If we built the stack correctly - - KA_TRACE(20, ("__kmp_pop_task_stack(exit): GTID: %d; TASK: %p\n", gtid, - tied_task)); - return; -} -#endif /* BUILD_TIED_TASK_STACK */ - -// returns 1 if new task is allowed to execute, 0 otherwise -// checks Task Scheduling constraint (if requested) and -// mutexinoutset dependencies if any -static bool __kmp_task_is_allowed(int gtid, const kmp_int32 is_constrained, - const kmp_taskdata_t *tasknew, - const kmp_taskdata_t *taskcurr) { - if (is_constrained && (tasknew->td_flags.tiedness == TASK_TIED)) { - // Check if the candidate obeys the Task Scheduling Constraints (TSC) - // only descendant of all deferred tied tasks can be scheduled, checking - // the last one is enough, as it in turn is the descendant of all others - kmp_taskdata_t *current = taskcurr->td_last_tied; - KMP_DEBUG_ASSERT(current != NULL); - // check if the task is not suspended on barrier - if (current->td_flags.tasktype == TASK_EXPLICIT || - current->td_taskwait_thread > 0) { // <= 0 on barrier - kmp_int32 level = current->td_level; - kmp_taskdata_t *parent = tasknew->td_parent; - while (parent != current && parent->td_level > level) { - // check generation up to the level of the current task - parent = parent->td_parent; - KMP_DEBUG_ASSERT(parent != NULL); - } - if (parent != current) - return false; - } - } - // Check mutexinoutset dependencies, acquire locks - kmp_depnode_t *node = tasknew->td_depnode; - if (node && (node->dn.mtx_num_locks > 0)) { - for (int i = 0; i < node->dn.mtx_num_locks; ++i) { - KMP_DEBUG_ASSERT(node->dn.mtx_locks[i] != NULL); - if (__kmp_test_lock(node->dn.mtx_locks[i], gtid)) - continue; - // could not get the lock, release previous locks - for (int j = i - 1; j >= 0; --j) - __kmp_release_lock(node->dn.mtx_locks[j], gtid); - return false; - } - // negative num_locks means all locks acquired successfully - node->dn.mtx_num_locks = -node->dn.mtx_num_locks; - } - return true; -} - -// __kmp_realloc_task_deque: -// Re-allocates a task deque for a particular thread, copies the content from -// the old deque and adjusts the necessary data structures relating to the -// deque. This operation must be done with the deque_lock being held -static void __kmp_realloc_task_deque(kmp_info_t *thread, - kmp_thread_data_t *thread_data) { - kmp_int32 size = TASK_DEQUE_SIZE(thread_data->td); - kmp_int32 new_size = 2 * size; - - KE_TRACE(10, ("__kmp_realloc_task_deque: T#%d reallocating deque[from %d to " - "%d] for thread_data %p\n", - __kmp_gtid_from_thread(thread), size, new_size, thread_data)); - - kmp_taskdata_t **new_deque = - (kmp_taskdata_t **)__kmp_allocate(new_size * sizeof(kmp_taskdata_t *)); - - int i, j; - for (i = thread_data->td.td_deque_head, j = 0; j < size; - i = (i + 1) & TASK_DEQUE_MASK(thread_data->td), j++) - new_deque[j] = thread_data->td.td_deque[i]; - - __kmp_free(thread_data->td.td_deque); - - thread_data->td.td_deque_head = 0; - thread_data->td.td_deque_tail = size; - thread_data->td.td_deque = new_deque; - thread_data->td.td_deque_size = new_size; -} - -// __kmp_push_task: Add a task to the thread's deque -static kmp_int32 __kmp_push_task(kmp_int32 gtid, kmp_task_t *task) { - kmp_info_t *thread = __kmp_threads[gtid]; - kmp_taskdata_t *taskdata = KMP_TASK_TO_TASKDATA(task); - kmp_task_team_t *task_team = thread->th.th_task_team; - kmp_int32 tid = __kmp_tid_from_gtid(gtid); - kmp_thread_data_t *thread_data; - - KA_TRACE(20, - ("__kmp_push_task: T#%d trying to push task %p.\n", gtid, taskdata)); - - if (taskdata->td_flags.tiedness == TASK_UNTIED) { - // untied task needs to increment counter so that the task structure is not - // freed prematurely - kmp_int32 counter = 1 + KMP_ATOMIC_INC(&taskdata->td_untied_count); - KMP_DEBUG_USE_VAR(counter); - KA_TRACE( - 20, - ("__kmp_push_task: T#%d untied_count (%d) incremented for task %p\n", - gtid, counter, taskdata)); - } - - // The first check avoids building task_team thread data if serialized - if (taskdata->td_flags.task_serial) { - KA_TRACE(20, ("__kmp_push_task: T#%d team serialized; returning " - "TASK_NOT_PUSHED for task %p\n", - gtid, taskdata)); - return TASK_NOT_PUSHED; - } - - // Now that serialized tasks have returned, we can assume that we are not in - // immediate exec mode - KMP_DEBUG_ASSERT(__kmp_tasking_mode != tskm_immediate_exec); - if (!KMP_TASKING_ENABLED(task_team)) { - __kmp_enable_tasking(task_team, thread); - } - KMP_DEBUG_ASSERT(TCR_4(task_team->tt.tt_found_tasks) == TRUE); - KMP_DEBUG_ASSERT(TCR_PTR(task_team->tt.tt_threads_data) != NULL); - - // Find tasking deque specific to encountering thread - thread_data = &task_team->tt.tt_threads_data[tid]; - - // No lock needed since only owner can allocate - if (thread_data->td.td_deque == NULL) { - __kmp_alloc_task_deque(thread, thread_data); - } - - int locked = 0; - // Check if deque is full - if (TCR_4(thread_data->td.td_deque_ntasks) >= - TASK_DEQUE_SIZE(thread_data->td)) { - if (__kmp_task_is_allowed(gtid, __kmp_task_stealing_constraint, taskdata, - thread->th.th_current_task)) { - KA_TRACE(20, ("__kmp_push_task: T#%d deque is full; returning " - "TASK_NOT_PUSHED for task %p\n", - gtid, taskdata)); - return TASK_NOT_PUSHED; - } else { - __kmp_acquire_bootstrap_lock(&thread_data->td.td_deque_lock); - locked = 1; - // expand deque to push the task which is not allowed to execute - __kmp_realloc_task_deque(thread, thread_data); - } - } - // Lock the deque for the task push operation - if (!locked) { - __kmp_acquire_bootstrap_lock(&thread_data->td.td_deque_lock); -#if OMP_45_ENABLED - // Need to recheck as we can get a proxy task from thread outside of OpenMP - if (TCR_4(thread_data->td.td_deque_ntasks) >= - TASK_DEQUE_SIZE(thread_data->td)) { - if (__kmp_task_is_allowed(gtid, __kmp_task_stealing_constraint, taskdata, - thread->th.th_current_task)) { - __kmp_release_bootstrap_lock(&thread_data->td.td_deque_lock); - KA_TRACE(20, ("__kmp_push_task: T#%d deque is full on 2nd check; " - "returning TASK_NOT_PUSHED for task %p\n", - gtid, taskdata)); - return TASK_NOT_PUSHED; - } else { - // expand deque to push the task which is not allowed to execute - __kmp_realloc_task_deque(thread, thread_data); - } - } -#endif - } - // Must have room since no thread can add tasks but calling thread - KMP_DEBUG_ASSERT(TCR_4(thread_data->td.td_deque_ntasks) < - TASK_DEQUE_SIZE(thread_data->td)); - - thread_data->td.td_deque[thread_data->td.td_deque_tail] = - taskdata; // Push taskdata - // Wrap index. - thread_data->td.td_deque_tail = - (thread_data->td.td_deque_tail + 1) & TASK_DEQUE_MASK(thread_data->td); - TCW_4(thread_data->td.td_deque_ntasks, - TCR_4(thread_data->td.td_deque_ntasks) + 1); // Adjust task count - - KA_TRACE(20, ("__kmp_push_task: T#%d returning TASK_SUCCESSFULLY_PUSHED: " - "task=%p ntasks=%d head=%u tail=%u\n", - gtid, taskdata, thread_data->td.td_deque_ntasks, - thread_data->td.td_deque_head, thread_data->td.td_deque_tail)); - - __kmp_release_bootstrap_lock(&thread_data->td.td_deque_lock); - - return TASK_SUCCESSFULLY_PUSHED; -} - -// __kmp_pop_current_task_from_thread: set up current task from called thread -// when team ends -// -// this_thr: thread structure to set current_task in. -void __kmp_pop_current_task_from_thread(kmp_info_t *this_thr) { - KF_TRACE(10, ("__kmp_pop_current_task_from_thread(enter): T#%d " - "this_thread=%p, curtask=%p, " - "curtask_parent=%p\n", - 0, this_thr, this_thr->th.th_current_task, - this_thr->th.th_current_task->td_parent)); - - this_thr->th.th_current_task = this_thr->th.th_current_task->td_parent; - - KF_TRACE(10, ("__kmp_pop_current_task_from_thread(exit): T#%d " - "this_thread=%p, curtask=%p, " - "curtask_parent=%p\n", - 0, this_thr, this_thr->th.th_current_task, - this_thr->th.th_current_task->td_parent)); -} - -// __kmp_push_current_task_to_thread: set up current task in called thread for a -// new team -// -// this_thr: thread structure to set up -// team: team for implicit task data -// tid: thread within team to set up -void __kmp_push_current_task_to_thread(kmp_info_t *this_thr, kmp_team_t *team, - int tid) { - // current task of the thread is a parent of the new just created implicit - // tasks of new team - KF_TRACE(10, ("__kmp_push_current_task_to_thread(enter): T#%d this_thread=%p " - "curtask=%p " - "parent_task=%p\n", - tid, this_thr, this_thr->th.th_current_task, - team->t.t_implicit_task_taskdata[tid].td_parent)); - - KMP_DEBUG_ASSERT(this_thr != NULL); - - if (tid == 0) { - if (this_thr->th.th_current_task != &team->t.t_implicit_task_taskdata[0]) { - team->t.t_implicit_task_taskdata[0].td_parent = - this_thr->th.th_current_task; - this_thr->th.th_current_task = &team->t.t_implicit_task_taskdata[0]; - } - } else { - team->t.t_implicit_task_taskdata[tid].td_parent = - team->t.t_implicit_task_taskdata[0].td_parent; - this_thr->th.th_current_task = &team->t.t_implicit_task_taskdata[tid]; - } - - KF_TRACE(10, ("__kmp_push_current_task_to_thread(exit): T#%d this_thread=%p " - "curtask=%p " - "parent_task=%p\n", - tid, this_thr, this_thr->th.th_current_task, - team->t.t_implicit_task_taskdata[tid].td_parent)); -} - -// __kmp_task_start: bookkeeping for a task starting execution -// -// GTID: global thread id of calling thread -// task: task starting execution -// current_task: task suspending -static void __kmp_task_start(kmp_int32 gtid, kmp_task_t *task, - kmp_taskdata_t *current_task) { - kmp_taskdata_t *taskdata = KMP_TASK_TO_TASKDATA(task); - kmp_info_t *thread = __kmp_threads[gtid]; - - KA_TRACE(10, - ("__kmp_task_start(enter): T#%d starting task %p: current_task=%p\n", - gtid, taskdata, current_task)); - - KMP_DEBUG_ASSERT(taskdata->td_flags.tasktype == TASK_EXPLICIT); - - // mark currently executing task as suspended - // TODO: GEH - make sure root team implicit task is initialized properly. - // KMP_DEBUG_ASSERT( current_task -> td_flags.executing == 1 ); - current_task->td_flags.executing = 0; - -// Add task to stack if tied -#ifdef BUILD_TIED_TASK_STACK - if (taskdata->td_flags.tiedness == TASK_TIED) { - __kmp_push_task_stack(gtid, thread, taskdata); - } -#endif /* BUILD_TIED_TASK_STACK */ - - // mark starting task as executing and as current task - thread->th.th_current_task = taskdata; - - KMP_DEBUG_ASSERT(taskdata->td_flags.started == 0 || - taskdata->td_flags.tiedness == TASK_UNTIED); - KMP_DEBUG_ASSERT(taskdata->td_flags.executing == 0 || - taskdata->td_flags.tiedness == TASK_UNTIED); - taskdata->td_flags.started = 1; - taskdata->td_flags.executing = 1; - KMP_DEBUG_ASSERT(taskdata->td_flags.complete == 0); - KMP_DEBUG_ASSERT(taskdata->td_flags.freed == 0); - - // GEH TODO: shouldn't we pass some sort of location identifier here? - // APT: yes, we will pass location here. - // need to store current thread state (in a thread or taskdata structure) - // before setting work_state, otherwise wrong state is set after end of task - - KA_TRACE(10, ("__kmp_task_start(exit): T#%d task=%p\n", gtid, taskdata)); - - return; -} - -#if OMPT_SUPPORT -//------------------------------------------------------------------------------ -// __ompt_task_init: -// Initialize OMPT fields maintained by a task. This will only be called after -// ompt_start_tool, so we already know whether ompt is enabled or not. - -static inline void __ompt_task_init(kmp_taskdata_t *task, int tid) { - // The calls to __ompt_task_init already have the ompt_enabled condition. - task->ompt_task_info.task_data.value = 0; - task->ompt_task_info.frame.exit_frame = ompt_data_none; - task->ompt_task_info.frame.enter_frame = ompt_data_none; - task->ompt_task_info.frame.exit_frame_flags = ompt_frame_runtime | ompt_frame_framepointer; - task->ompt_task_info.frame.enter_frame_flags = ompt_frame_runtime | ompt_frame_framepointer; -#if OMP_40_ENABLED - task->ompt_task_info.ndeps = 0; - task->ompt_task_info.deps = NULL; -#endif /* OMP_40_ENABLED */ -} - -// __ompt_task_start: -// Build and trigger task-begin event -static inline void __ompt_task_start(kmp_task_t *task, - kmp_taskdata_t *current_task, - kmp_int32 gtid) { - kmp_taskdata_t *taskdata = KMP_TASK_TO_TASKDATA(task); - ompt_task_status_t status = ompt_task_switch; - if (__kmp_threads[gtid]->th.ompt_thread_info.ompt_task_yielded) { - status = ompt_task_yield; - __kmp_threads[gtid]->th.ompt_thread_info.ompt_task_yielded = 0; - } - /* let OMPT know that we're about to run this task */ - if (ompt_enabled.ompt_callback_task_schedule) { - ompt_callbacks.ompt_callback(ompt_callback_task_schedule)( - &(current_task->ompt_task_info.task_data), status, - &(taskdata->ompt_task_info.task_data)); - } - taskdata->ompt_task_info.scheduling_parent = current_task; -} - -// __ompt_task_finish: -// Build and trigger final task-schedule event -static inline void -__ompt_task_finish(kmp_task_t *task, kmp_taskdata_t *resumed_task, - ompt_task_status_t status = ompt_task_complete) { - kmp_taskdata_t *taskdata = KMP_TASK_TO_TASKDATA(task); - if (__kmp_omp_cancellation && taskdata->td_taskgroup && - taskdata->td_taskgroup->cancel_request == cancel_taskgroup) { - status = ompt_task_cancel; - } - - /* let OMPT know that we're returning to the callee task */ - if (ompt_enabled.ompt_callback_task_schedule) { - ompt_callbacks.ompt_callback(ompt_callback_task_schedule)( - &(taskdata->ompt_task_info.task_data), status, - &((resumed_task ? resumed_task - : (taskdata->ompt_task_info.scheduling_parent - ? taskdata->ompt_task_info.scheduling_parent - : taskdata->td_parent)) - ->ompt_task_info.task_data)); - } -} -#endif - -template -static void __kmpc_omp_task_begin_if0_template(ident_t *loc_ref, kmp_int32 gtid, - kmp_task_t *task, - void *frame_address, - void *return_address) { - kmp_taskdata_t *taskdata = KMP_TASK_TO_TASKDATA(task); - kmp_taskdata_t *current_task = __kmp_threads[gtid]->th.th_current_task; - - KA_TRACE(10, ("__kmpc_omp_task_begin_if0(enter): T#%d loc=%p task=%p " - "current_task=%p\n", - gtid, loc_ref, taskdata, current_task)); - - if (taskdata->td_flags.tiedness == TASK_UNTIED) { - // untied task needs to increment counter so that the task structure is not - // freed prematurely - kmp_int32 counter = 1 + KMP_ATOMIC_INC(&taskdata->td_untied_count); - KMP_DEBUG_USE_VAR(counter); - KA_TRACE(20, ("__kmpc_omp_task_begin_if0: T#%d untied_count (%d) " - "incremented for task %p\n", - gtid, counter, taskdata)); - } - - taskdata->td_flags.task_serial = - 1; // Execute this task immediately, not deferred. - __kmp_task_start(gtid, task, current_task); - -#if OMPT_SUPPORT - if (ompt) { - if (current_task->ompt_task_info.frame.enter_frame.ptr == NULL) { - current_task->ompt_task_info.frame.enter_frame.ptr = - taskdata->ompt_task_info.frame.exit_frame.ptr = frame_address; - current_task->ompt_task_info.frame.enter_frame_flags = - taskdata->ompt_task_info.frame.exit_frame_flags = ompt_frame_application | ompt_frame_framepointer; - } - if (ompt_enabled.ompt_callback_task_create) { - ompt_task_info_t *parent_info = &(current_task->ompt_task_info); - ompt_callbacks.ompt_callback(ompt_callback_task_create)( - &(parent_info->task_data), &(parent_info->frame), - &(taskdata->ompt_task_info.task_data), - ompt_task_explicit | TASK_TYPE_DETAILS_FORMAT(taskdata), 0, - return_address); - } - __ompt_task_start(task, current_task, gtid); - } -#endif // OMPT_SUPPORT - - KA_TRACE(10, ("__kmpc_omp_task_begin_if0(exit): T#%d loc=%p task=%p,\n", gtid, - loc_ref, taskdata)); -} - -#if OMPT_SUPPORT -OMPT_NOINLINE -static void __kmpc_omp_task_begin_if0_ompt(ident_t *loc_ref, kmp_int32 gtid, - kmp_task_t *task, - void *frame_address, - void *return_address) { - __kmpc_omp_task_begin_if0_template(loc_ref, gtid, task, frame_address, - return_address); -} -#endif // OMPT_SUPPORT - -// __kmpc_omp_task_begin_if0: report that a given serialized task has started -// execution -// -// loc_ref: source location information; points to beginning of task block. -// gtid: global thread number. -// task: task thunk for the started task. -void __kmpc_omp_task_begin_if0(ident_t *loc_ref, kmp_int32 gtid, - kmp_task_t *task) { -#if OMPT_SUPPORT - if (UNLIKELY(ompt_enabled.enabled)) { - OMPT_STORE_RETURN_ADDRESS(gtid); - __kmpc_omp_task_begin_if0_ompt(loc_ref, gtid, task, - OMPT_GET_FRAME_ADDRESS(1), - OMPT_LOAD_RETURN_ADDRESS(gtid)); - return; - } -#endif - __kmpc_omp_task_begin_if0_template(loc_ref, gtid, task, NULL, NULL); -} - -#ifdef TASK_UNUSED -// __kmpc_omp_task_begin: report that a given task has started execution -// NEVER GENERATED BY COMPILER, DEPRECATED!!! -void __kmpc_omp_task_begin(ident_t *loc_ref, kmp_int32 gtid, kmp_task_t *task) { - kmp_taskdata_t *current_task = __kmp_threads[gtid]->th.th_current_task; - - KA_TRACE( - 10, - ("__kmpc_omp_task_begin(enter): T#%d loc=%p task=%p current_task=%p\n", - gtid, loc_ref, KMP_TASK_TO_TASKDATA(task), current_task)); - - __kmp_task_start(gtid, task, current_task); - - KA_TRACE(10, ("__kmpc_omp_task_begin(exit): T#%d loc=%p task=%p,\n", gtid, - loc_ref, KMP_TASK_TO_TASKDATA(task))); - return; -} -#endif // TASK_UNUSED - -// __kmp_free_task: free the current task space and the space for shareds -// -// gtid: Global thread ID of calling thread -// taskdata: task to free -// thread: thread data structure of caller -static void __kmp_free_task(kmp_int32 gtid, kmp_taskdata_t *taskdata, - kmp_info_t *thread) { - KA_TRACE(30, ("__kmp_free_task: T#%d freeing data from task %p\n", gtid, - taskdata)); - - // Check to make sure all flags and counters have the correct values - KMP_DEBUG_ASSERT(taskdata->td_flags.tasktype == TASK_EXPLICIT); - KMP_DEBUG_ASSERT(taskdata->td_flags.executing == 0); - KMP_DEBUG_ASSERT(taskdata->td_flags.complete == 1); - KMP_DEBUG_ASSERT(taskdata->td_flags.freed == 0); - KMP_DEBUG_ASSERT(taskdata->td_allocated_child_tasks == 0 || - taskdata->td_flags.task_serial == 1); - KMP_DEBUG_ASSERT(taskdata->td_incomplete_child_tasks == 0); - - taskdata->td_flags.freed = 1; - ANNOTATE_HAPPENS_BEFORE(taskdata); -// deallocate the taskdata and shared variable blocks associated with this task -#if USE_FAST_MEMORY - __kmp_fast_free(thread, taskdata); -#else /* ! USE_FAST_MEMORY */ - __kmp_thread_free(thread, taskdata); -#endif - - KA_TRACE(20, ("__kmp_free_task: T#%d freed task %p\n", gtid, taskdata)); -} - -// __kmp_free_task_and_ancestors: free the current task and ancestors without -// children -// -// gtid: Global thread ID of calling thread -// taskdata: task to free -// thread: thread data structure of caller -static void __kmp_free_task_and_ancestors(kmp_int32 gtid, - kmp_taskdata_t *taskdata, - kmp_info_t *thread) { -#if OMP_45_ENABLED - // Proxy tasks must always be allowed to free their parents - // because they can be run in background even in serial mode. - kmp_int32 team_serial = - (taskdata->td_flags.team_serial || taskdata->td_flags.tasking_ser) && - !taskdata->td_flags.proxy; -#else - kmp_int32 team_serial = - taskdata->td_flags.team_serial || taskdata->td_flags.tasking_ser; -#endif - KMP_DEBUG_ASSERT(taskdata->td_flags.tasktype == TASK_EXPLICIT); - - kmp_int32 children = KMP_ATOMIC_DEC(&taskdata->td_allocated_child_tasks) - 1; - KMP_DEBUG_ASSERT(children >= 0); - - // Now, go up the ancestor tree to see if any ancestors can now be freed. - while (children == 0) { - kmp_taskdata_t *parent_taskdata = taskdata->td_parent; - - KA_TRACE(20, ("__kmp_free_task_and_ancestors(enter): T#%d task %p complete " - "and freeing itself\n", - gtid, taskdata)); - - // --- Deallocate my ancestor task --- - __kmp_free_task(gtid, taskdata, thread); - - taskdata = parent_taskdata; - - if (team_serial) - return; - // Stop checking ancestors at implicit task instead of walking up ancestor - // tree to avoid premature deallocation of ancestors. - if (taskdata->td_flags.tasktype == TASK_IMPLICIT) { - if (taskdata->td_dephash) { // do we need to cleanup dephash? - int children = KMP_ATOMIC_LD_ACQ(&taskdata->td_incomplete_child_tasks); - kmp_tasking_flags_t flags_old = taskdata->td_flags; - if (children == 0 && flags_old.complete == 1) { - kmp_tasking_flags_t flags_new = flags_old; - flags_new.complete = 0; - if (KMP_COMPARE_AND_STORE_ACQ32( - RCAST(kmp_int32 *, &taskdata->td_flags), - *RCAST(kmp_int32 *, &flags_old), - *RCAST(kmp_int32 *, &flags_new))) { - KA_TRACE(100, ("__kmp_free_task_and_ancestors: T#%d cleans " - "dephash of implicit task %p\n", - gtid, taskdata)); - // cleanup dephash of finished implicit task - __kmp_dephash_free_entries(thread, taskdata->td_dephash); - } - } - } - return; - } - // Predecrement simulated by "- 1" calculation - children = KMP_ATOMIC_DEC(&taskdata->td_allocated_child_tasks) - 1; - KMP_DEBUG_ASSERT(children >= 0); - } - - KA_TRACE( - 20, ("__kmp_free_task_and_ancestors(exit): T#%d task %p has %d children; " - "not freeing it yet\n", - gtid, taskdata, children)); -} - -// __kmp_task_finish: bookkeeping to do when a task finishes execution -// -// gtid: global thread ID for calling thread -// task: task to be finished -// resumed_task: task to be resumed. (may be NULL if task is serialized) -template -static void __kmp_task_finish(kmp_int32 gtid, kmp_task_t *task, - kmp_taskdata_t *resumed_task) { - kmp_taskdata_t *taskdata = KMP_TASK_TO_TASKDATA(task); - kmp_info_t *thread = __kmp_threads[gtid]; -#if OMP_45_ENABLED - kmp_task_team_t *task_team = - thread->th.th_task_team; // might be NULL for serial teams... -#endif // OMP_45_ENABLED - kmp_int32 children = 0; - - KA_TRACE(10, ("__kmp_task_finish(enter): T#%d finishing task %p and resuming " - "task %p\n", - gtid, taskdata, resumed_task)); - - KMP_DEBUG_ASSERT(taskdata->td_flags.tasktype == TASK_EXPLICIT); - -// Pop task from stack if tied -#ifdef BUILD_TIED_TASK_STACK - if (taskdata->td_flags.tiedness == TASK_TIED) { - __kmp_pop_task_stack(gtid, thread, taskdata); - } -#endif /* BUILD_TIED_TASK_STACK */ - - if (taskdata->td_flags.tiedness == TASK_UNTIED) { - // untied task needs to check the counter so that the task structure is not - // freed prematurely - kmp_int32 counter = KMP_ATOMIC_DEC(&taskdata->td_untied_count) - 1; - KA_TRACE( - 20, - ("__kmp_task_finish: T#%d untied_count (%d) decremented for task %p\n", - gtid, counter, taskdata)); - if (counter > 0) { - // untied task is not done, to be continued possibly by other thread, do - // not free it now - if (resumed_task == NULL) { - KMP_DEBUG_ASSERT(taskdata->td_flags.task_serial); - resumed_task = taskdata->td_parent; // In a serialized task, the resumed - // task is the parent - } - thread->th.th_current_task = resumed_task; // restore current_task - resumed_task->td_flags.executing = 1; // resume previous task - KA_TRACE(10, ("__kmp_task_finish(exit): T#%d partially done task %p, " - "resuming task %p\n", - gtid, taskdata, resumed_task)); - return; - } - } -#if OMPT_SUPPORT - if (ompt) - __ompt_task_finish(task, resumed_task); -#endif - - // Check mutexinoutset dependencies, release locks - kmp_depnode_t *node = taskdata->td_depnode; - if (node && (node->dn.mtx_num_locks < 0)) { - // negative num_locks means all locks were acquired - node->dn.mtx_num_locks = -node->dn.mtx_num_locks; - for (int i = node->dn.mtx_num_locks - 1; i >= 0; --i) { - KMP_DEBUG_ASSERT(node->dn.mtx_locks[i] != NULL); - __kmp_release_lock(node->dn.mtx_locks[i], gtid); - } - } - - KMP_DEBUG_ASSERT(taskdata->td_flags.complete == 0); - taskdata->td_flags.complete = 1; // mark the task as completed - KMP_DEBUG_ASSERT(taskdata->td_flags.started == 1); - KMP_DEBUG_ASSERT(taskdata->td_flags.freed == 0); - - // Only need to keep track of count if team parallel and tasking not - // serialized - if (!(taskdata->td_flags.team_serial || taskdata->td_flags.tasking_ser)) { - // Predecrement simulated by "- 1" calculation - children = - KMP_ATOMIC_DEC(&taskdata->td_parent->td_incomplete_child_tasks) - 1; - KMP_DEBUG_ASSERT(children >= 0); -#if OMP_40_ENABLED - if (taskdata->td_taskgroup) - KMP_ATOMIC_DEC(&taskdata->td_taskgroup->count); - __kmp_release_deps(gtid, taskdata); -#if OMP_45_ENABLED - } else if (task_team && task_team->tt.tt_found_proxy_tasks) { - // if we found proxy tasks there could exist a dependency chain - // with the proxy task as origin - __kmp_release_deps(gtid, taskdata); -#endif // OMP_45_ENABLED -#endif // OMP_40_ENABLED - } - - // td_flags.executing must be marked as 0 after __kmp_release_deps has been - // called. Othertwise, if a task is executed immediately from the release_deps - // code, the flag will be reset to 1 again by this same function - KMP_DEBUG_ASSERT(taskdata->td_flags.executing == 1); - taskdata->td_flags.executing = 0; // suspend the finishing task - - KA_TRACE( - 20, ("__kmp_task_finish: T#%d finished task %p, %d incomplete children\n", - gtid, taskdata, children)); - -#if OMP_40_ENABLED - /* If the tasks' destructor thunk flag has been set, we need to invoke the - destructor thunk that has been generated by the compiler. The code is - placed here, since at this point other tasks might have been released - hence overlapping the destructor invokations with some other work in the - released tasks. The OpenMP spec is not specific on when the destructors - are invoked, so we should be free to choose. */ - if (taskdata->td_flags.destructors_thunk) { - kmp_routine_entry_t destr_thunk = task->data1.destructors; - KMP_ASSERT(destr_thunk); - destr_thunk(gtid, task); - } -#endif // OMP_40_ENABLED - - // bookkeeping for resuming task: - // GEH - note tasking_ser => task_serial - KMP_DEBUG_ASSERT( - (taskdata->td_flags.tasking_ser || taskdata->td_flags.task_serial) == - taskdata->td_flags.task_serial); - if (taskdata->td_flags.task_serial) { - if (resumed_task == NULL) { - resumed_task = taskdata->td_parent; // In a serialized task, the resumed - // task is the parent - } - } else { - KMP_DEBUG_ASSERT(resumed_task != - NULL); // verify that resumed task is passed as arguemnt - } - - // Free this task and then ancestor tasks if they have no children. - // Restore th_current_task first as suggested by John: - // johnmc: if an asynchronous inquiry peers into the runtime system - // it doesn't see the freed task as the current task. - thread->th.th_current_task = resumed_task; - __kmp_free_task_and_ancestors(gtid, taskdata, thread); - - // TODO: GEH - make sure root team implicit task is initialized properly. - // KMP_DEBUG_ASSERT( resumed_task->td_flags.executing == 0 ); - resumed_task->td_flags.executing = 1; // resume previous task - - KA_TRACE( - 10, ("__kmp_task_finish(exit): T#%d finished task %p, resuming task %p\n", - gtid, taskdata, resumed_task)); - - return; -} - -template -static void __kmpc_omp_task_complete_if0_template(ident_t *loc_ref, - kmp_int32 gtid, - kmp_task_t *task) { - KA_TRACE(10, ("__kmpc_omp_task_complete_if0(enter): T#%d loc=%p task=%p\n", - gtid, loc_ref, KMP_TASK_TO_TASKDATA(task))); - // this routine will provide task to resume - __kmp_task_finish(gtid, task, NULL); - - KA_TRACE(10, ("__kmpc_omp_task_complete_if0(exit): T#%d loc=%p task=%p\n", - gtid, loc_ref, KMP_TASK_TO_TASKDATA(task))); - -#if OMPT_SUPPORT - if (ompt) { - ompt_frame_t *ompt_frame; - __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL); - ompt_frame->enter_frame = ompt_data_none; - ompt_frame->enter_frame_flags = ompt_frame_runtime | ompt_frame_framepointer; - } -#endif - - return; -} - -#if OMPT_SUPPORT -OMPT_NOINLINE -void __kmpc_omp_task_complete_if0_ompt(ident_t *loc_ref, kmp_int32 gtid, - kmp_task_t *task) { - __kmpc_omp_task_complete_if0_template(loc_ref, gtid, task); -} -#endif // OMPT_SUPPORT - -// __kmpc_omp_task_complete_if0: report that a task has completed execution -// -// loc_ref: source location information; points to end of task block. -// gtid: global thread number. -// task: task thunk for the completed task. -void __kmpc_omp_task_complete_if0(ident_t *loc_ref, kmp_int32 gtid, - kmp_task_t *task) { -#if OMPT_SUPPORT - if (UNLIKELY(ompt_enabled.enabled)) { - __kmpc_omp_task_complete_if0_ompt(loc_ref, gtid, task); - return; - } -#endif - __kmpc_omp_task_complete_if0_template(loc_ref, gtid, task); -} - -#ifdef TASK_UNUSED -// __kmpc_omp_task_complete: report that a task has completed execution -// NEVER GENERATED BY COMPILER, DEPRECATED!!! -void __kmpc_omp_task_complete(ident_t *loc_ref, kmp_int32 gtid, - kmp_task_t *task) { - KA_TRACE(10, ("__kmpc_omp_task_complete(enter): T#%d loc=%p task=%p\n", gtid, - loc_ref, KMP_TASK_TO_TASKDATA(task))); - - __kmp_task_finish(gtid, task, - NULL); // Not sure how to find task to resume - - KA_TRACE(10, ("__kmpc_omp_task_complete(exit): T#%d loc=%p task=%p\n", gtid, - loc_ref, KMP_TASK_TO_TASKDATA(task))); - return; -} -#endif // TASK_UNUSED - -// __kmp_init_implicit_task: Initialize the appropriate fields in the implicit -// task for a given thread -// -// loc_ref: reference to source location of parallel region -// this_thr: thread data structure corresponding to implicit task -// team: team for this_thr -// tid: thread id of given thread within team -// set_curr_task: TRUE if need to push current task to thread -// NOTE: Routine does not set up the implicit task ICVS. This is assumed to -// have already been done elsewhere. -// TODO: Get better loc_ref. Value passed in may be NULL -void __kmp_init_implicit_task(ident_t *loc_ref, kmp_info_t *this_thr, - kmp_team_t *team, int tid, int set_curr_task) { - kmp_taskdata_t *task = &team->t.t_implicit_task_taskdata[tid]; - - KF_TRACE( - 10, - ("__kmp_init_implicit_task(enter): T#:%d team=%p task=%p, reinit=%s\n", - tid, team, task, set_curr_task ? "TRUE" : "FALSE")); - - task->td_task_id = KMP_GEN_TASK_ID(); - task->td_team = team; - // task->td_parent = NULL; // fix for CQ230101 (broken parent task info - // in debugger) - task->td_ident = loc_ref; - task->td_taskwait_ident = NULL; - task->td_taskwait_counter = 0; - task->td_taskwait_thread = 0; - - task->td_flags.tiedness = TASK_TIED; - task->td_flags.tasktype = TASK_IMPLICIT; -#if OMP_45_ENABLED - task->td_flags.proxy = TASK_FULL; -#endif - - // All implicit tasks are executed immediately, not deferred - task->td_flags.task_serial = 1; - task->td_flags.tasking_ser = (__kmp_tasking_mode == tskm_immediate_exec); - task->td_flags.team_serial = (team->t.t_serialized) ? 1 : 0; - - task->td_flags.started = 1; - task->td_flags.executing = 1; - task->td_flags.complete = 0; - task->td_flags.freed = 0; - -#if OMP_40_ENABLED - task->td_depnode = NULL; -#endif - task->td_last_tied = task; - - if (set_curr_task) { // only do this init first time thread is created - KMP_ATOMIC_ST_REL(&task->td_incomplete_child_tasks, 0); - // Not used: don't need to deallocate implicit task - KMP_ATOMIC_ST_REL(&task->td_allocated_child_tasks, 0); -#if OMP_40_ENABLED - task->td_taskgroup = NULL; // An implicit task does not have taskgroup - task->td_dephash = NULL; -#endif - __kmp_push_current_task_to_thread(this_thr, team, tid); - } else { - KMP_DEBUG_ASSERT(task->td_incomplete_child_tasks == 0); - KMP_DEBUG_ASSERT(task->td_allocated_child_tasks == 0); - } - -#if OMPT_SUPPORT - if (UNLIKELY(ompt_enabled.enabled)) - __ompt_task_init(task, tid); -#endif - - KF_TRACE(10, ("__kmp_init_implicit_task(exit): T#:%d team=%p task=%p\n", tid, - team, task)); -} - -// __kmp_finish_implicit_task: Release resources associated to implicit tasks -// at the end of parallel regions. Some resources are kept for reuse in the next -// parallel region. -// -// thread: thread data structure corresponding to implicit task -void __kmp_finish_implicit_task(kmp_info_t *thread) { - kmp_taskdata_t *task = thread->th.th_current_task; - if (task->td_dephash) { - int children; - task->td_flags.complete = 1; - children = KMP_ATOMIC_LD_ACQ(&task->td_incomplete_child_tasks); - kmp_tasking_flags_t flags_old = task->td_flags; - if (children == 0 && flags_old.complete == 1) { - kmp_tasking_flags_t flags_new = flags_old; - flags_new.complete = 0; - if (KMP_COMPARE_AND_STORE_ACQ32(RCAST(kmp_int32 *, &task->td_flags), - *RCAST(kmp_int32 *, &flags_old), - *RCAST(kmp_int32 *, &flags_new))) { - KA_TRACE(100, ("__kmp_finish_implicit_task: T#%d cleans " - "dephash of implicit task %p\n", - thread->th.th_info.ds.ds_gtid, task)); - __kmp_dephash_free_entries(thread, task->td_dephash); - } - } - } -} - -// __kmp_free_implicit_task: Release resources associated to implicit tasks -// when these are destroyed regions -// -// thread: thread data structure corresponding to implicit task -void __kmp_free_implicit_task(kmp_info_t *thread) { - kmp_taskdata_t *task = thread->th.th_current_task; - if (task && task->td_dephash) { - __kmp_dephash_free(thread, task->td_dephash); - task->td_dephash = NULL; - } -} - -// Round up a size to a power of two specified by val: Used to insert padding -// between structures co-allocated using a single malloc() call -static size_t __kmp_round_up_to_val(size_t size, size_t val) { - if (size & (val - 1)) { - size &= ~(val - 1); - if (size <= KMP_SIZE_T_MAX - val) { - size += val; // Round up if there is no overflow. - } - } - return size; -} // __kmp_round_up_to_va - -// __kmp_task_alloc: Allocate the taskdata and task data structures for a task -// -// loc_ref: source location information -// gtid: global thread number. -// flags: include tiedness & task type (explicit vs. implicit) of the ''new'' -// task encountered. Converted from kmp_int32 to kmp_tasking_flags_t in routine. -// sizeof_kmp_task_t: Size in bytes of kmp_task_t data structure including -// private vars accessed in task. -// sizeof_shareds: Size in bytes of array of pointers to shared vars accessed -// in task. -// task_entry: Pointer to task code entry point generated by compiler. -// returns: a pointer to the allocated kmp_task_t structure (task). -kmp_task_t *__kmp_task_alloc(ident_t *loc_ref, kmp_int32 gtid, - kmp_tasking_flags_t *flags, - size_t sizeof_kmp_task_t, size_t sizeof_shareds, - kmp_routine_entry_t task_entry) { - kmp_task_t *task; - kmp_taskdata_t *taskdata; - kmp_info_t *thread = __kmp_threads[gtid]; - kmp_team_t *team = thread->th.th_team; - kmp_taskdata_t *parent_task = thread->th.th_current_task; - size_t shareds_offset; - - if (!TCR_4(__kmp_init_middle)) - __kmp_middle_initialize(); - - KA_TRACE(10, ("__kmp_task_alloc(enter): T#%d loc=%p, flags=(0x%x) " - "sizeof_task=%ld sizeof_shared=%ld entry=%p\n", - gtid, loc_ref, *((kmp_int32 *)flags), sizeof_kmp_task_t, - sizeof_shareds, task_entry)); - - if (parent_task->td_flags.final) { - if (flags->merged_if0) { - } - flags->final = 1; - } - if (flags->tiedness == TASK_UNTIED && !team->t.t_serialized) { - // Untied task encountered causes the TSC algorithm to check entire deque of - // the victim thread. If no untied task encountered, then checking the head - // of the deque should be enough. - KMP_CHECK_UPDATE(thread->th.th_task_team->tt.tt_untied_task_encountered, 1); - } - -#if OMP_45_ENABLED - if (flags->proxy == TASK_PROXY) { - flags->tiedness = TASK_UNTIED; - flags->merged_if0 = 1; - - /* are we running in a sequential parallel or tskm_immediate_exec... we need - tasking support enabled */ - if ((thread->th.th_task_team) == NULL) { - /* This should only happen if the team is serialized - setup a task team and propagate it to the thread */ - KMP_DEBUG_ASSERT(team->t.t_serialized); - KA_TRACE(30, - ("T#%d creating task team in __kmp_task_alloc for proxy task\n", - gtid)); - __kmp_task_team_setup( - thread, team, - 1); // 1 indicates setup the current team regardless of nthreads - thread->th.th_task_team = team->t.t_task_team[thread->th.th_task_state]; - } - kmp_task_team_t *task_team = thread->th.th_task_team; - - /* tasking must be enabled now as the task might not be pushed */ - if (!KMP_TASKING_ENABLED(task_team)) { - KA_TRACE( - 30, - ("T#%d enabling tasking in __kmp_task_alloc for proxy task\n", gtid)); - __kmp_enable_tasking(task_team, thread); - kmp_int32 tid = thread->th.th_info.ds.ds_tid; - kmp_thread_data_t *thread_data = &task_team->tt.tt_threads_data[tid]; - // No lock needed since only owner can allocate - if (thread_data->td.td_deque == NULL) { - __kmp_alloc_task_deque(thread, thread_data); - } - } - - if (task_team->tt.tt_found_proxy_tasks == FALSE) - TCW_4(task_team->tt.tt_found_proxy_tasks, TRUE); - } -#endif - - // Calculate shared structure offset including padding after kmp_task_t struct - // to align pointers in shared struct - shareds_offset = sizeof(kmp_taskdata_t) + sizeof_kmp_task_t; - shareds_offset = __kmp_round_up_to_val(shareds_offset, sizeof(void *)); - - // Allocate a kmp_taskdata_t block and a kmp_task_t block. - KA_TRACE(30, ("__kmp_task_alloc: T#%d First malloc size: %ld\n", gtid, - shareds_offset)); - KA_TRACE(30, ("__kmp_task_alloc: T#%d Second malloc size: %ld\n", gtid, - sizeof_shareds)); - -// Avoid double allocation here by combining shareds with taskdata -#if USE_FAST_MEMORY - taskdata = (kmp_taskdata_t *)__kmp_fast_allocate(thread, shareds_offset + - sizeof_shareds); -#else /* ! USE_FAST_MEMORY */ - taskdata = (kmp_taskdata_t *)__kmp_thread_malloc(thread, shareds_offset + - sizeof_shareds); -#endif /* USE_FAST_MEMORY */ - ANNOTATE_HAPPENS_AFTER(taskdata); - - task = KMP_TASKDATA_TO_TASK(taskdata); - -// Make sure task & taskdata are aligned appropriately -#if KMP_ARCH_X86 || KMP_ARCH_PPC64 || !KMP_HAVE_QUAD - KMP_DEBUG_ASSERT((((kmp_uintptr_t)taskdata) & (sizeof(double) - 1)) == 0); - KMP_DEBUG_ASSERT((((kmp_uintptr_t)task) & (sizeof(double) - 1)) == 0); -#else - KMP_DEBUG_ASSERT((((kmp_uintptr_t)taskdata) & (sizeof(_Quad) - 1)) == 0); - KMP_DEBUG_ASSERT((((kmp_uintptr_t)task) & (sizeof(_Quad) - 1)) == 0); -#endif - if (sizeof_shareds > 0) { - // Avoid double allocation here by combining shareds with taskdata - task->shareds = &((char *)taskdata)[shareds_offset]; - // Make sure shareds struct is aligned to pointer size - KMP_DEBUG_ASSERT((((kmp_uintptr_t)task->shareds) & (sizeof(void *) - 1)) == - 0); - } else { - task->shareds = NULL; - } - task->routine = task_entry; - task->part_id = 0; // AC: Always start with 0 part id - - taskdata->td_task_id = KMP_GEN_TASK_ID(); - taskdata->td_team = team; - taskdata->td_alloc_thread = thread; - taskdata->td_parent = parent_task; - taskdata->td_level = parent_task->td_level + 1; // increment nesting level - KMP_ATOMIC_ST_RLX(&taskdata->td_untied_count, 0); - taskdata->td_ident = loc_ref; - taskdata->td_taskwait_ident = NULL; - taskdata->td_taskwait_counter = 0; - taskdata->td_taskwait_thread = 0; - KMP_DEBUG_ASSERT(taskdata->td_parent != NULL); -#if OMP_45_ENABLED - // avoid copying icvs for proxy tasks - if (flags->proxy == TASK_FULL) -#endif - copy_icvs(&taskdata->td_icvs, &taskdata->td_parent->td_icvs); - - taskdata->td_flags.tiedness = flags->tiedness; - taskdata->td_flags.final = flags->final; - taskdata->td_flags.merged_if0 = flags->merged_if0; -#if OMP_40_ENABLED - taskdata->td_flags.destructors_thunk = flags->destructors_thunk; -#endif // OMP_40_ENABLED -#if OMP_45_ENABLED - taskdata->td_flags.proxy = flags->proxy; - taskdata->td_task_team = thread->th.th_task_team; - taskdata->td_size_alloc = shareds_offset + sizeof_shareds; -#endif - taskdata->td_flags.tasktype = TASK_EXPLICIT; - - // GEH - TODO: fix this to copy parent task's value of tasking_ser flag - taskdata->td_flags.tasking_ser = (__kmp_tasking_mode == tskm_immediate_exec); - - // GEH - TODO: fix this to copy parent task's value of team_serial flag - taskdata->td_flags.team_serial = (team->t.t_serialized) ? 1 : 0; - - // GEH - Note we serialize the task if the team is serialized to make sure - // implicit parallel region tasks are not left until program termination to - // execute. Also, it helps locality to execute immediately. - - taskdata->td_flags.task_serial = - (parent_task->td_flags.final || taskdata->td_flags.team_serial || - taskdata->td_flags.tasking_ser); - - taskdata->td_flags.started = 0; - taskdata->td_flags.executing = 0; - taskdata->td_flags.complete = 0; - taskdata->td_flags.freed = 0; - - taskdata->td_flags.native = flags->native; - - KMP_ATOMIC_ST_RLX(&taskdata->td_incomplete_child_tasks, 0); - // start at one because counts current task and children - KMP_ATOMIC_ST_RLX(&taskdata->td_allocated_child_tasks, 1); -#if OMP_40_ENABLED - taskdata->td_taskgroup = - parent_task->td_taskgroup; // task inherits taskgroup from the parent task - taskdata->td_dephash = NULL; - taskdata->td_depnode = NULL; -#endif - if (flags->tiedness == TASK_UNTIED) - taskdata->td_last_tied = NULL; // will be set when the task is scheduled - else - taskdata->td_last_tied = taskdata; - -#if OMPT_SUPPORT - if (UNLIKELY(ompt_enabled.enabled)) - __ompt_task_init(taskdata, gtid); -#endif -// Only need to keep track of child task counts if team parallel and tasking not -// serialized or if it is a proxy task -#if OMP_45_ENABLED - if (flags->proxy == TASK_PROXY || - !(taskdata->td_flags.team_serial || taskdata->td_flags.tasking_ser)) -#else - if (!(taskdata->td_flags.team_serial || taskdata->td_flags.tasking_ser)) -#endif - { - KMP_ATOMIC_INC(&parent_task->td_incomplete_child_tasks); -#if OMP_40_ENABLED - if (parent_task->td_taskgroup) - KMP_ATOMIC_INC(&parent_task->td_taskgroup->count); -#endif - // Only need to keep track of allocated child tasks for explicit tasks since - // implicit not deallocated - if (taskdata->td_parent->td_flags.tasktype == TASK_EXPLICIT) { - KMP_ATOMIC_INC(&taskdata->td_parent->td_allocated_child_tasks); - } - } - - KA_TRACE(20, ("__kmp_task_alloc(exit): T#%d created task %p parent=%p\n", - gtid, taskdata, taskdata->td_parent)); - ANNOTATE_HAPPENS_BEFORE(task); - - return task; -} - -kmp_task_t *__kmpc_omp_task_alloc(ident_t *loc_ref, kmp_int32 gtid, - kmp_int32 flags, size_t sizeof_kmp_task_t, - size_t sizeof_shareds, - kmp_routine_entry_t task_entry) { - kmp_task_t *retval; - kmp_tasking_flags_t *input_flags = (kmp_tasking_flags_t *)&flags; - - input_flags->native = FALSE; -// __kmp_task_alloc() sets up all other runtime flags - -#if OMP_45_ENABLED - KA_TRACE(10, ("__kmpc_omp_task_alloc(enter): T#%d loc=%p, flags=(%s %s) " - "sizeof_task=%ld sizeof_shared=%ld entry=%p\n", - gtid, loc_ref, input_flags->tiedness ? "tied " : "untied", - input_flags->proxy ? "proxy" : "", sizeof_kmp_task_t, - sizeof_shareds, task_entry)); -#else - KA_TRACE(10, ("__kmpc_omp_task_alloc(enter): T#%d loc=%p, flags=(%s) " - "sizeof_task=%ld sizeof_shared=%ld entry=%p\n", - gtid, loc_ref, input_flags->tiedness ? "tied " : "untied", - sizeof_kmp_task_t, sizeof_shareds, task_entry)); -#endif - - retval = __kmp_task_alloc(loc_ref, gtid, input_flags, sizeof_kmp_task_t, - sizeof_shareds, task_entry); - - KA_TRACE(20, ("__kmpc_omp_task_alloc(exit): T#%d retval %p\n", gtid, retval)); - - return retval; -} - -#if OMP_50_ENABLED -/*! -@ingroup TASKING -@param loc_ref location of the original task directive -@param gtid Global Thread ID of encountering thread -@param new_task task thunk allocated by __kmpc_omp_task_alloc() for the ''new -task'' -@param naffins Number of affinity items -@param affin_list List of affinity items -@return Returns non-zero if registering affinity information was not successful. - Returns 0 if registration was successful -This entry registers the affinity information attached to a task with the task -thunk structure kmp_taskdata_t. -*/ -kmp_int32 -__kmpc_omp_reg_task_with_affinity(ident_t *loc_ref, kmp_int32 gtid, - kmp_task_t *new_task, kmp_int32 naffins, - kmp_task_affinity_info_t *affin_list) { - return 0; -} -#endif - -// __kmp_invoke_task: invoke the specified task -// -// gtid: global thread ID of caller -// task: the task to invoke -// current_task: the task to resume after task invokation -static void __kmp_invoke_task(kmp_int32 gtid, kmp_task_t *task, - kmp_taskdata_t *current_task) { - kmp_taskdata_t *taskdata = KMP_TASK_TO_TASKDATA(task); - kmp_info_t *thread; -#if OMP_40_ENABLED - int discard = 0 /* false */; -#endif - KA_TRACE( - 30, ("__kmp_invoke_task(enter): T#%d invoking task %p, current_task=%p\n", - gtid, taskdata, current_task)); - KMP_DEBUG_ASSERT(task); -#if OMP_45_ENABLED - if (taskdata->td_flags.proxy == TASK_PROXY && - taskdata->td_flags.complete == 1) { - // This is a proxy task that was already completed but it needs to run - // its bottom-half finish - KA_TRACE( - 30, - ("__kmp_invoke_task: T#%d running bottom finish for proxy task %p\n", - gtid, taskdata)); - - __kmp_bottom_half_finish_proxy(gtid, task); - - KA_TRACE(30, ("__kmp_invoke_task(exit): T#%d completed bottom finish for " - "proxy task %p, resuming task %p\n", - gtid, taskdata, current_task)); - - return; - } -#endif - -#if OMPT_SUPPORT - // For untied tasks, the first task executed only calls __kmpc_omp_task and - // does not execute code. - ompt_thread_info_t oldInfo; - if (UNLIKELY(ompt_enabled.enabled)) { - // Store the threads states and restore them after the task - thread = __kmp_threads[gtid]; - oldInfo = thread->th.ompt_thread_info; - thread->th.ompt_thread_info.wait_id = 0; - thread->th.ompt_thread_info.state = (thread->th.th_team_serialized) - ? ompt_state_work_serial - : ompt_state_work_parallel; - taskdata->ompt_task_info.frame.exit_frame.ptr = OMPT_GET_FRAME_ADDRESS(0); - } -#endif - -#if OMP_45_ENABLED - // Proxy tasks are not handled by the runtime - if (taskdata->td_flags.proxy != TASK_PROXY) { -#endif - ANNOTATE_HAPPENS_AFTER(task); - __kmp_task_start(gtid, task, current_task); // OMPT only if not discarded -#if OMP_45_ENABLED - } -#endif - -#if OMP_40_ENABLED - // TODO: cancel tasks if the parallel region has also been cancelled - // TODO: check if this sequence can be hoisted above __kmp_task_start - // if cancellation has been enabled for this run ... - if (__kmp_omp_cancellation) { - thread = __kmp_threads[gtid]; - kmp_team_t *this_team = thread->th.th_team; - kmp_taskgroup_t *taskgroup = taskdata->td_taskgroup; - if ((taskgroup && taskgroup->cancel_request) || - (this_team->t.t_cancel_request == cancel_parallel)) { -#if OMPT_SUPPORT && OMPT_OPTIONAL - ompt_data_t *task_data; - if (UNLIKELY(ompt_enabled.ompt_callback_cancel)) { - __ompt_get_task_info_internal(0, NULL, &task_data, NULL, NULL, NULL); - ompt_callbacks.ompt_callback(ompt_callback_cancel)( - task_data, - ((taskgroup && taskgroup->cancel_request) ? ompt_cancel_taskgroup - : ompt_cancel_parallel) | - ompt_cancel_discarded_task, - NULL); - } -#endif - KMP_COUNT_BLOCK(TASK_cancelled); - // this task belongs to a task group and we need to cancel it - discard = 1 /* true */; - } - } - - // Invoke the task routine and pass in relevant data. - // Thunks generated by gcc take a different argument list. - if (!discard) { - if (taskdata->td_flags.tiedness == TASK_UNTIED) { - taskdata->td_last_tied = current_task->td_last_tied; - KMP_DEBUG_ASSERT(taskdata->td_last_tied); - } -#if KMP_STATS_ENABLED - KMP_COUNT_BLOCK(TASK_executed); - switch (KMP_GET_THREAD_STATE()) { - case FORK_JOIN_BARRIER: - KMP_PUSH_PARTITIONED_TIMER(OMP_task_join_bar); - break; - case PLAIN_BARRIER: - KMP_PUSH_PARTITIONED_TIMER(OMP_task_plain_bar); - break; - case TASKYIELD: - KMP_PUSH_PARTITIONED_TIMER(OMP_task_taskyield); - break; - case TASKWAIT: - KMP_PUSH_PARTITIONED_TIMER(OMP_task_taskwait); - break; - case TASKGROUP: - KMP_PUSH_PARTITIONED_TIMER(OMP_task_taskgroup); - break; - default: - KMP_PUSH_PARTITIONED_TIMER(OMP_task_immediate); - break; - } -#endif // KMP_STATS_ENABLED -#endif // OMP_40_ENABLED - -// OMPT task begin -#if OMPT_SUPPORT - if (UNLIKELY(ompt_enabled.enabled)) - __ompt_task_start(task, current_task, gtid); -#endif - -#if USE_ITT_BUILD && USE_ITT_NOTIFY - kmp_uint64 cur_time; - kmp_int32 kmp_itt_count_task = - __kmp_forkjoin_frames_mode == 3 && !taskdata->td_flags.task_serial && - current_task->td_flags.tasktype == TASK_IMPLICIT; - if (kmp_itt_count_task) { - thread = __kmp_threads[gtid]; - // Time outer level explicit task on barrier for adjusting imbalance time - if (thread->th.th_bar_arrive_time) - cur_time = __itt_get_timestamp(); - else - kmp_itt_count_task = 0; // thread is not on a barrier - skip timing - } -#endif - -#ifdef KMP_GOMP_COMPAT - if (taskdata->td_flags.native) { - ((void (*)(void *))(*(task->routine)))(task->shareds); - } else -#endif /* KMP_GOMP_COMPAT */ - { - (*(task->routine))(gtid, task); - } - KMP_POP_PARTITIONED_TIMER(); - -#if USE_ITT_BUILD && USE_ITT_NOTIFY - if (kmp_itt_count_task) { - // Barrier imbalance - adjust arrive time with the task duration - thread->th.th_bar_arrive_time += (__itt_get_timestamp() - cur_time); - } -#endif - -#if OMP_40_ENABLED - } -#endif // OMP_40_ENABLED - - -#if OMP_45_ENABLED - // Proxy tasks are not handled by the runtime - if (taskdata->td_flags.proxy != TASK_PROXY) { -#endif - ANNOTATE_HAPPENS_BEFORE(taskdata->td_parent); -#if OMPT_SUPPORT - if (UNLIKELY(ompt_enabled.enabled)) { - thread->th.ompt_thread_info = oldInfo; - if (taskdata->td_flags.tiedness == TASK_TIED) { - taskdata->ompt_task_info.frame.exit_frame = ompt_data_none; - } - __kmp_task_finish(gtid, task, current_task); - } else -#endif - __kmp_task_finish(gtid, task, current_task); -#if OMP_45_ENABLED - } -#endif - - KA_TRACE( - 30, - ("__kmp_invoke_task(exit): T#%d completed task %p, resuming task %p\n", - gtid, taskdata, current_task)); - return; -} - -// __kmpc_omp_task_parts: Schedule a thread-switchable task for execution -// -// loc_ref: location of original task pragma (ignored) -// gtid: Global Thread ID of encountering thread -// new_task: task thunk allocated by __kmp_omp_task_alloc() for the ''new task'' -// Returns: -// TASK_CURRENT_NOT_QUEUED (0) if did not suspend and queue current task to -// be resumed later. -// TASK_CURRENT_QUEUED (1) if suspended and queued the current task to be -// resumed later. -kmp_int32 __kmpc_omp_task_parts(ident_t *loc_ref, kmp_int32 gtid, - kmp_task_t *new_task) { - kmp_taskdata_t *new_taskdata = KMP_TASK_TO_TASKDATA(new_task); - - KA_TRACE(10, ("__kmpc_omp_task_parts(enter): T#%d loc=%p task=%p\n", gtid, - loc_ref, new_taskdata)); - -#if OMPT_SUPPORT - kmp_taskdata_t *parent; - if (UNLIKELY(ompt_enabled.enabled)) { - parent = new_taskdata->td_parent; - if (ompt_enabled.ompt_callback_task_create) { - ompt_data_t task_data = ompt_data_none; - ompt_callbacks.ompt_callback(ompt_callback_task_create)( - parent ? &(parent->ompt_task_info.task_data) : &task_data, - parent ? &(parent->ompt_task_info.frame) : NULL, - &(new_taskdata->ompt_task_info.task_data), ompt_task_explicit, 0, - OMPT_GET_RETURN_ADDRESS(0)); - } - } -#endif - - /* Should we execute the new task or queue it? For now, let's just always try - to queue it. If the queue fills up, then we'll execute it. */ - - if (__kmp_push_task(gtid, new_task) == TASK_NOT_PUSHED) // if cannot defer - { // Execute this task immediately - kmp_taskdata_t *current_task = __kmp_threads[gtid]->th.th_current_task; - new_taskdata->td_flags.task_serial = 1; - __kmp_invoke_task(gtid, new_task, current_task); - } - - KA_TRACE( - 10, - ("__kmpc_omp_task_parts(exit): T#%d returning TASK_CURRENT_NOT_QUEUED: " - "loc=%p task=%p, return: TASK_CURRENT_NOT_QUEUED\n", - gtid, loc_ref, new_taskdata)); - - ANNOTATE_HAPPENS_BEFORE(new_task); -#if OMPT_SUPPORT - if (UNLIKELY(ompt_enabled.enabled)) { - parent->ompt_task_info.frame.enter_frame = ompt_data_none; - } -#endif - return TASK_CURRENT_NOT_QUEUED; -} - -// __kmp_omp_task: Schedule a non-thread-switchable task for execution -// -// gtid: Global Thread ID of encountering thread -// new_task:non-thread-switchable task thunk allocated by __kmp_omp_task_alloc() -// serialize_immediate: if TRUE then if the task is executed immediately its -// execution will be serialized -// Returns: -// TASK_CURRENT_NOT_QUEUED (0) if did not suspend and queue current task to -// be resumed later. -// TASK_CURRENT_QUEUED (1) if suspended and queued the current task to be -// resumed later. -kmp_int32 __kmp_omp_task(kmp_int32 gtid, kmp_task_t *new_task, - bool serialize_immediate) { - kmp_taskdata_t *new_taskdata = KMP_TASK_TO_TASKDATA(new_task); - -/* Should we execute the new task or queue it? For now, let's just always try to - queue it. If the queue fills up, then we'll execute it. */ -#if OMP_45_ENABLED - if (new_taskdata->td_flags.proxy == TASK_PROXY || - __kmp_push_task(gtid, new_task) == TASK_NOT_PUSHED) // if cannot defer -#else - if (__kmp_push_task(gtid, new_task) == TASK_NOT_PUSHED) // if cannot defer -#endif - { // Execute this task immediately - kmp_taskdata_t *current_task = __kmp_threads[gtid]->th.th_current_task; - if (serialize_immediate) - new_taskdata->td_flags.task_serial = 1; - __kmp_invoke_task(gtid, new_task, current_task); - } - - ANNOTATE_HAPPENS_BEFORE(new_task); - return TASK_CURRENT_NOT_QUEUED; -} - -// __kmpc_omp_task: Wrapper around __kmp_omp_task to schedule a -// non-thread-switchable task from the parent thread only! -// -// loc_ref: location of original task pragma (ignored) -// gtid: Global Thread ID of encountering thread -// new_task: non-thread-switchable task thunk allocated by -// __kmp_omp_task_alloc() -// Returns: -// TASK_CURRENT_NOT_QUEUED (0) if did not suspend and queue current task to -// be resumed later. -// TASK_CURRENT_QUEUED (1) if suspended and queued the current task to be -// resumed later. -kmp_int32 __kmpc_omp_task(ident_t *loc_ref, kmp_int32 gtid, - kmp_task_t *new_task) { - kmp_int32 res; - KMP_SET_THREAD_STATE_BLOCK(EXPLICIT_TASK); - -#if KMP_DEBUG || OMPT_SUPPORT - kmp_taskdata_t *new_taskdata = KMP_TASK_TO_TASKDATA(new_task); -#endif - KA_TRACE(10, ("__kmpc_omp_task(enter): T#%d loc=%p task=%p\n", gtid, loc_ref, - new_taskdata)); - -#if OMPT_SUPPORT - kmp_taskdata_t *parent = NULL; - if (UNLIKELY(ompt_enabled.enabled)) { - if (!new_taskdata->td_flags.started) { - OMPT_STORE_RETURN_ADDRESS(gtid); - parent = new_taskdata->td_parent; - if (!parent->ompt_task_info.frame.enter_frame.ptr) { - parent->ompt_task_info.frame.enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0); - } - if (ompt_enabled.ompt_callback_task_create) { - ompt_data_t task_data = ompt_data_none; - ompt_callbacks.ompt_callback(ompt_callback_task_create)( - parent ? &(parent->ompt_task_info.task_data) : &task_data, - parent ? &(parent->ompt_task_info.frame) : NULL, - &(new_taskdata->ompt_task_info.task_data), - ompt_task_explicit | TASK_TYPE_DETAILS_FORMAT(new_taskdata), 0, - OMPT_LOAD_RETURN_ADDRESS(gtid)); - } - } else { - // We are scheduling the continuation of an UNTIED task. - // Scheduling back to the parent task. - __ompt_task_finish(new_task, - new_taskdata->ompt_task_info.scheduling_parent, - ompt_task_switch); - new_taskdata->ompt_task_info.frame.exit_frame = ompt_data_none; - } - } -#endif - - res = __kmp_omp_task(gtid, new_task, true); - - KA_TRACE(10, ("__kmpc_omp_task(exit): T#%d returning " - "TASK_CURRENT_NOT_QUEUED: loc=%p task=%p\n", - gtid, loc_ref, new_taskdata)); -#if OMPT_SUPPORT - if (UNLIKELY(ompt_enabled.enabled && parent != NULL)) { - parent->ompt_task_info.frame.enter_frame = ompt_data_none; - } -#endif - return res; -} - -// __kmp_omp_taskloop_task: Wrapper around __kmp_omp_task to schedule -// a taskloop task with the correct OMPT return address -// -// loc_ref: location of original task pragma (ignored) -// gtid: Global Thread ID of encountering thread -// new_task: non-thread-switchable task thunk allocated by -// __kmp_omp_task_alloc() -// codeptr_ra: return address for OMPT callback -// Returns: -// TASK_CURRENT_NOT_QUEUED (0) if did not suspend and queue current task to -// be resumed later. -// TASK_CURRENT_QUEUED (1) if suspended and queued the current task to be -// resumed later. -kmp_int32 __kmp_omp_taskloop_task(ident_t *loc_ref, kmp_int32 gtid, - kmp_task_t *new_task, void *codeptr_ra) { - kmp_int32 res; - KMP_SET_THREAD_STATE_BLOCK(EXPLICIT_TASK); - -#if KMP_DEBUG || OMPT_SUPPORT - kmp_taskdata_t *new_taskdata = KMP_TASK_TO_TASKDATA(new_task); -#endif - KA_TRACE(10, ("__kmpc_omp_task(enter): T#%d loc=%p task=%p\n", gtid, loc_ref, - new_taskdata)); - -#if OMPT_SUPPORT - kmp_taskdata_t *parent = NULL; - if (UNLIKELY(ompt_enabled.enabled && !new_taskdata->td_flags.started)) { - parent = new_taskdata->td_parent; - if (!parent->ompt_task_info.frame.enter_frame.ptr) - parent->ompt_task_info.frame.enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0); - if (ompt_enabled.ompt_callback_task_create) { - ompt_data_t task_data = ompt_data_none; - ompt_callbacks.ompt_callback(ompt_callback_task_create)( - parent ? &(parent->ompt_task_info.task_data) : &task_data, - parent ? &(parent->ompt_task_info.frame) : NULL, - &(new_taskdata->ompt_task_info.task_data), - ompt_task_explicit | TASK_TYPE_DETAILS_FORMAT(new_taskdata), 0, - codeptr_ra); - } - } -#endif - - res = __kmp_omp_task(gtid, new_task, true); - - KA_TRACE(10, ("__kmpc_omp_task(exit): T#%d returning " - "TASK_CURRENT_NOT_QUEUED: loc=%p task=%p\n", - gtid, loc_ref, new_taskdata)); -#if OMPT_SUPPORT - if (UNLIKELY(ompt_enabled.enabled && parent != NULL)) { - parent->ompt_task_info.frame.enter_frame = ompt_data_none; - } -#endif - return res; -} - -template -static kmp_int32 __kmpc_omp_taskwait_template(ident_t *loc_ref, kmp_int32 gtid, - void *frame_address, - void *return_address) { - kmp_taskdata_t *taskdata; - kmp_info_t *thread; - int thread_finished = FALSE; - KMP_SET_THREAD_STATE_BLOCK(TASKWAIT); - - KA_TRACE(10, ("__kmpc_omp_taskwait(enter): T#%d loc=%p\n", gtid, loc_ref)); - - if (__kmp_tasking_mode != tskm_immediate_exec) { - thread = __kmp_threads[gtid]; - taskdata = thread->th.th_current_task; - -#if OMPT_SUPPORT && OMPT_OPTIONAL - ompt_data_t *my_task_data; - ompt_data_t *my_parallel_data; - - if (ompt) { - my_task_data = &(taskdata->ompt_task_info.task_data); - my_parallel_data = OMPT_CUR_TEAM_DATA(thread); - - taskdata->ompt_task_info.frame.enter_frame.ptr = frame_address; - - if (ompt_enabled.ompt_callback_sync_region) { - ompt_callbacks.ompt_callback(ompt_callback_sync_region)( - ompt_sync_region_taskwait, ompt_scope_begin, my_parallel_data, - my_task_data, return_address); - } - - if (ompt_enabled.ompt_callback_sync_region_wait) { - ompt_callbacks.ompt_callback(ompt_callback_sync_region_wait)( - ompt_sync_region_taskwait, ompt_scope_begin, my_parallel_data, - my_task_data, return_address); - } - } -#endif // OMPT_SUPPORT && OMPT_OPTIONAL - -// Debugger: The taskwait is active. Store location and thread encountered the -// taskwait. -#if USE_ITT_BUILD -// Note: These values are used by ITT events as well. -#endif /* USE_ITT_BUILD */ - taskdata->td_taskwait_counter += 1; - taskdata->td_taskwait_ident = loc_ref; - taskdata->td_taskwait_thread = gtid + 1; - -#if USE_ITT_BUILD - void *itt_sync_obj = __kmp_itt_taskwait_object(gtid); - if (itt_sync_obj != NULL) - __kmp_itt_taskwait_starting(gtid, itt_sync_obj); -#endif /* USE_ITT_BUILD */ - - bool must_wait = - !taskdata->td_flags.team_serial && !taskdata->td_flags.final; - -#if OMP_45_ENABLED - must_wait = must_wait || (thread->th.th_task_team != NULL && - thread->th.th_task_team->tt.tt_found_proxy_tasks); -#endif - if (must_wait) { - kmp_flag_32 flag(RCAST(std::atomic *, - &(taskdata->td_incomplete_child_tasks)), - 0U); - while (KMP_ATOMIC_LD_ACQ(&taskdata->td_incomplete_child_tasks) != 0) { - flag.execute_tasks(thread, gtid, FALSE, - &thread_finished USE_ITT_BUILD_ARG(itt_sync_obj), - __kmp_task_stealing_constraint); - } - } -#if USE_ITT_BUILD - if (itt_sync_obj != NULL) - __kmp_itt_taskwait_finished(gtid, itt_sync_obj); -#endif /* USE_ITT_BUILD */ - - // Debugger: The taskwait is completed. Location remains, but thread is - // negated. - taskdata->td_taskwait_thread = -taskdata->td_taskwait_thread; - -#if OMPT_SUPPORT && OMPT_OPTIONAL - if (ompt) { - if (ompt_enabled.ompt_callback_sync_region_wait) { - ompt_callbacks.ompt_callback(ompt_callback_sync_region_wait)( - ompt_sync_region_taskwait, ompt_scope_end, my_parallel_data, - my_task_data, return_address); - } - if (ompt_enabled.ompt_callback_sync_region) { - ompt_callbacks.ompt_callback(ompt_callback_sync_region)( - ompt_sync_region_taskwait, ompt_scope_end, my_parallel_data, - my_task_data, return_address); - } - taskdata->ompt_task_info.frame.enter_frame = ompt_data_none; - } -#endif // OMPT_SUPPORT && OMPT_OPTIONAL - - ANNOTATE_HAPPENS_AFTER(taskdata); - } - - KA_TRACE(10, ("__kmpc_omp_taskwait(exit): T#%d task %p finished waiting, " - "returning TASK_CURRENT_NOT_QUEUED\n", - gtid, taskdata)); - - return TASK_CURRENT_NOT_QUEUED; -} - -#if OMPT_SUPPORT && OMPT_OPTIONAL -OMPT_NOINLINE -static kmp_int32 __kmpc_omp_taskwait_ompt(ident_t *loc_ref, kmp_int32 gtid, - void *frame_address, - void *return_address) { - return __kmpc_omp_taskwait_template(loc_ref, gtid, frame_address, - return_address); -} -#endif // OMPT_SUPPORT && OMPT_OPTIONAL - -// __kmpc_omp_taskwait: Wait until all tasks generated by the current task are -// complete -kmp_int32 __kmpc_omp_taskwait(ident_t *loc_ref, kmp_int32 gtid) { -#if OMPT_SUPPORT && OMPT_OPTIONAL - if (UNLIKELY(ompt_enabled.enabled)) { - OMPT_STORE_RETURN_ADDRESS(gtid); - return __kmpc_omp_taskwait_ompt(loc_ref, gtid, OMPT_GET_FRAME_ADDRESS(0), - OMPT_LOAD_RETURN_ADDRESS(gtid)); - } -#endif - return __kmpc_omp_taskwait_template(loc_ref, gtid, NULL, NULL); -} - -// __kmpc_omp_taskyield: switch to a different task -kmp_int32 __kmpc_omp_taskyield(ident_t *loc_ref, kmp_int32 gtid, int end_part) { - kmp_taskdata_t *taskdata; - kmp_info_t *thread; - int thread_finished = FALSE; - - KMP_COUNT_BLOCK(OMP_TASKYIELD); - KMP_SET_THREAD_STATE_BLOCK(TASKYIELD); - - KA_TRACE(10, ("__kmpc_omp_taskyield(enter): T#%d loc=%p end_part = %d\n", - gtid, loc_ref, end_part)); - - if (__kmp_tasking_mode != tskm_immediate_exec && __kmp_init_parallel) { - thread = __kmp_threads[gtid]; - taskdata = thread->th.th_current_task; -// Should we model this as a task wait or not? -// Debugger: The taskwait is active. Store location and thread encountered the -// taskwait. -#if USE_ITT_BUILD -// Note: These values are used by ITT events as well. -#endif /* USE_ITT_BUILD */ - taskdata->td_taskwait_counter += 1; - taskdata->td_taskwait_ident = loc_ref; - taskdata->td_taskwait_thread = gtid + 1; - -#if USE_ITT_BUILD - void *itt_sync_obj = __kmp_itt_taskwait_object(gtid); - if (itt_sync_obj != NULL) - __kmp_itt_taskwait_starting(gtid, itt_sync_obj); -#endif /* USE_ITT_BUILD */ - if (!taskdata->td_flags.team_serial) { - kmp_task_team_t *task_team = thread->th.th_task_team; - if (task_team != NULL) { - if (KMP_TASKING_ENABLED(task_team)) { -#if OMPT_SUPPORT - if (UNLIKELY(ompt_enabled.enabled)) - thread->th.ompt_thread_info.ompt_task_yielded = 1; -#endif - __kmp_execute_tasks_32( - thread, gtid, NULL, FALSE, - &thread_finished USE_ITT_BUILD_ARG(itt_sync_obj), - __kmp_task_stealing_constraint); -#if OMPT_SUPPORT - if (UNLIKELY(ompt_enabled.enabled)) - thread->th.ompt_thread_info.ompt_task_yielded = 0; -#endif - } - } - } -#if USE_ITT_BUILD - if (itt_sync_obj != NULL) - __kmp_itt_taskwait_finished(gtid, itt_sync_obj); -#endif /* USE_ITT_BUILD */ - - // Debugger: The taskwait is completed. Location remains, but thread is - // negated. - taskdata->td_taskwait_thread = -taskdata->td_taskwait_thread; - } - - KA_TRACE(10, ("__kmpc_omp_taskyield(exit): T#%d task %p resuming, " - "returning TASK_CURRENT_NOT_QUEUED\n", - gtid, taskdata)); - - return TASK_CURRENT_NOT_QUEUED; -} - -#if OMP_50_ENABLED -// Task Reduction implementation - -typedef struct kmp_task_red_flags { - unsigned lazy_priv : 1; // hint: (1) use lazy allocation (big objects) - unsigned reserved31 : 31; -} kmp_task_red_flags_t; - -// internal structure for reduction data item related info -typedef struct kmp_task_red_data { - void *reduce_shar; // shared reduction item - size_t reduce_size; // size of data item - void *reduce_priv; // thread specific data - void *reduce_pend; // end of private data for comparison op - void *reduce_init; // data initialization routine - void *reduce_fini; // data finalization routine - void *reduce_comb; // data combiner routine - kmp_task_red_flags_t flags; // flags for additional info from compiler -} kmp_task_red_data_t; - -// structure sent us by compiler - one per reduction item -typedef struct kmp_task_red_input { - void *reduce_shar; // shared reduction item - size_t reduce_size; // size of data item - void *reduce_init; // data initialization routine - void *reduce_fini; // data finalization routine - void *reduce_comb; // data combiner routine - kmp_task_red_flags_t flags; // flags for additional info from compiler -} kmp_task_red_input_t; - -/*! -@ingroup TASKING -@param gtid Global thread ID -@param num Number of data items to reduce -@param data Array of data for reduction -@return The taskgroup identifier - -Initialize task reduction for the taskgroup. -*/ -void *__kmpc_task_reduction_init(int gtid, int num, void *data) { - kmp_info_t *thread = __kmp_threads[gtid]; - kmp_taskgroup_t *tg = thread->th.th_current_task->td_taskgroup; - kmp_int32 nth = thread->th.th_team_nproc; - kmp_task_red_input_t *input = (kmp_task_red_input_t *)data; - kmp_task_red_data_t *arr; - - // check input data just in case - KMP_ASSERT(tg != NULL); - KMP_ASSERT(data != NULL); - KMP_ASSERT(num > 0); - if (nth == 1) { - KA_TRACE(10, ("__kmpc_task_reduction_init: T#%d, tg %p, exiting nth=1\n", - gtid, tg)); - return (void *)tg; - } - KA_TRACE(10, ("__kmpc_task_reduction_init: T#%d, taskgroup %p, #items %d\n", - gtid, tg, num)); - arr = (kmp_task_red_data_t *)__kmp_thread_malloc( - thread, num * sizeof(kmp_task_red_data_t)); - for (int i = 0; i < num; ++i) { - void (*f_init)(void *) = (void (*)(void *))(input[i].reduce_init); - size_t size = input[i].reduce_size - 1; - // round the size up to cache line per thread-specific item - size += CACHE_LINE - size % CACHE_LINE; - KMP_ASSERT(input[i].reduce_comb != NULL); // combiner is mandatory - arr[i].reduce_shar = input[i].reduce_shar; - arr[i].reduce_size = size; - arr[i].reduce_init = input[i].reduce_init; - arr[i].reduce_fini = input[i].reduce_fini; - arr[i].reduce_comb = input[i].reduce_comb; - arr[i].flags = input[i].flags; - if (!input[i].flags.lazy_priv) { - // allocate cache-line aligned block and fill it with zeros - arr[i].reduce_priv = __kmp_allocate(nth * size); - arr[i].reduce_pend = (char *)(arr[i].reduce_priv) + nth * size; - if (f_init != NULL) { - // initialize thread-specific items - for (int j = 0; j < nth; ++j) { - f_init((char *)(arr[i].reduce_priv) + j * size); - } - } - } else { - // only allocate space for pointers now, - // objects will be lazily allocated/initialized once requested - arr[i].reduce_priv = __kmp_allocate(nth * sizeof(void *)); - } - } - tg->reduce_data = (void *)arr; - tg->reduce_num_data = num; - return (void *)tg; -} - -/*! -@ingroup TASKING -@param gtid Global thread ID -@param tskgrp The taskgroup ID (optional) -@param data Shared location of the item -@return The pointer to per-thread data - -Get thread-specific location of data item -*/ -void *__kmpc_task_reduction_get_th_data(int gtid, void *tskgrp, void *data) { - kmp_info_t *thread = __kmp_threads[gtid]; - kmp_int32 nth = thread->th.th_team_nproc; - if (nth == 1) - return data; // nothing to do - - kmp_taskgroup_t *tg = (kmp_taskgroup_t *)tskgrp; - if (tg == NULL) - tg = thread->th.th_current_task->td_taskgroup; - KMP_ASSERT(tg != NULL); - kmp_task_red_data_t *arr = (kmp_task_red_data_t *)(tg->reduce_data); - kmp_int32 num = tg->reduce_num_data; - kmp_int32 tid = thread->th.th_info.ds.ds_tid; - - KMP_ASSERT(data != NULL); - while (tg != NULL) { - for (int i = 0; i < num; ++i) { - if (!arr[i].flags.lazy_priv) { - if (data == arr[i].reduce_shar || - (data >= arr[i].reduce_priv && data < arr[i].reduce_pend)) - return (char *)(arr[i].reduce_priv) + tid * arr[i].reduce_size; - } else { - // check shared location first - void **p_priv = (void **)(arr[i].reduce_priv); - if (data == arr[i].reduce_shar) - goto found; - // check if we get some thread specific location as parameter - for (int j = 0; j < nth; ++j) - if (data == p_priv[j]) - goto found; - continue; // not found, continue search - found: - if (p_priv[tid] == NULL) { - // allocate thread specific object lazily - void (*f_init)(void *) = (void (*)(void *))(arr[i].reduce_init); - p_priv[tid] = __kmp_allocate(arr[i].reduce_size); - if (f_init != NULL) { - f_init(p_priv[tid]); - } - } - return p_priv[tid]; - } - } - tg = tg->parent; - arr = (kmp_task_red_data_t *)(tg->reduce_data); - num = tg->reduce_num_data; - } - KMP_ASSERT2(0, "Unknown task reduction item"); - return NULL; // ERROR, this line never executed -} - -// Finalize task reduction. -// Called from __kmpc_end_taskgroup() -static void __kmp_task_reduction_fini(kmp_info_t *th, kmp_taskgroup_t *tg) { - kmp_int32 nth = th->th.th_team_nproc; - KMP_DEBUG_ASSERT(nth > 1); // should not be called if nth == 1 - kmp_task_red_data_t *arr = (kmp_task_red_data_t *)tg->reduce_data; - kmp_int32 num = tg->reduce_num_data; - for (int i = 0; i < num; ++i) { - void *sh_data = arr[i].reduce_shar; - void (*f_fini)(void *) = (void (*)(void *))(arr[i].reduce_fini); - void (*f_comb)(void *, void *) = - (void (*)(void *, void *))(arr[i].reduce_comb); - if (!arr[i].flags.lazy_priv) { - void *pr_data = arr[i].reduce_priv; - size_t size = arr[i].reduce_size; - for (int j = 0; j < nth; ++j) { - void *priv_data = (char *)pr_data + j * size; - f_comb(sh_data, priv_data); // combine results - if (f_fini) - f_fini(priv_data); // finalize if needed - } - } else { - void **pr_data = (void **)(arr[i].reduce_priv); - for (int j = 0; j < nth; ++j) { - if (pr_data[j] != NULL) { - f_comb(sh_data, pr_data[j]); // combine results - if (f_fini) - f_fini(pr_data[j]); // finalize if needed - __kmp_free(pr_data[j]); - } - } - } - __kmp_free(arr[i].reduce_priv); - } - __kmp_thread_free(th, arr); - tg->reduce_data = NULL; - tg->reduce_num_data = 0; -} -#endif - -#if OMP_40_ENABLED -// __kmpc_taskgroup: Start a new taskgroup -void __kmpc_taskgroup(ident_t *loc, int gtid) { - kmp_info_t *thread = __kmp_threads[gtid]; - kmp_taskdata_t *taskdata = thread->th.th_current_task; - kmp_taskgroup_t *tg_new = - (kmp_taskgroup_t *)__kmp_thread_malloc(thread, sizeof(kmp_taskgroup_t)); - KA_TRACE(10, ("__kmpc_taskgroup: T#%d loc=%p group=%p\n", gtid, loc, tg_new)); - KMP_ATOMIC_ST_RLX(&tg_new->count, 0); - KMP_ATOMIC_ST_RLX(&tg_new->cancel_request, cancel_noreq); - tg_new->parent = taskdata->td_taskgroup; -#if OMP_50_ENABLED - tg_new->reduce_data = NULL; - tg_new->reduce_num_data = 0; -#endif - taskdata->td_taskgroup = tg_new; - -#if OMPT_SUPPORT && OMPT_OPTIONAL - if (UNLIKELY(ompt_enabled.ompt_callback_sync_region)) { - void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); - if (!codeptr) - codeptr = OMPT_GET_RETURN_ADDRESS(0); - kmp_team_t *team = thread->th.th_team; - ompt_data_t my_task_data = taskdata->ompt_task_info.task_data; - // FIXME: I think this is wrong for lwt! - ompt_data_t my_parallel_data = team->t.ompt_team_info.parallel_data; - - ompt_callbacks.ompt_callback(ompt_callback_sync_region)( - ompt_sync_region_taskgroup, ompt_scope_begin, &(my_parallel_data), - &(my_task_data), codeptr); - } -#endif -} - -// __kmpc_end_taskgroup: Wait until all tasks generated by the current task -// and its descendants are complete -void __kmpc_end_taskgroup(ident_t *loc, int gtid) { - kmp_info_t *thread = __kmp_threads[gtid]; - kmp_taskdata_t *taskdata = thread->th.th_current_task; - kmp_taskgroup_t *taskgroup = taskdata->td_taskgroup; - int thread_finished = FALSE; - -#if OMPT_SUPPORT && OMPT_OPTIONAL - kmp_team_t *team; - ompt_data_t my_task_data; - ompt_data_t my_parallel_data; - void *codeptr; - if (UNLIKELY(ompt_enabled.enabled)) { - team = thread->th.th_team; - my_task_data = taskdata->ompt_task_info.task_data; - // FIXME: I think this is wrong for lwt! - my_parallel_data = team->t.ompt_team_info.parallel_data; - codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); - if (!codeptr) - codeptr = OMPT_GET_RETURN_ADDRESS(0); - } -#endif - - KA_TRACE(10, ("__kmpc_end_taskgroup(enter): T#%d loc=%p\n", gtid, loc)); - KMP_DEBUG_ASSERT(taskgroup != NULL); - KMP_SET_THREAD_STATE_BLOCK(TASKGROUP); - - if (__kmp_tasking_mode != tskm_immediate_exec) { - // mark task as waiting not on a barrier - taskdata->td_taskwait_counter += 1; - taskdata->td_taskwait_ident = loc; - taskdata->td_taskwait_thread = gtid + 1; -#if USE_ITT_BUILD - // For ITT the taskgroup wait is similar to taskwait until we need to - // distinguish them - void *itt_sync_obj = __kmp_itt_taskwait_object(gtid); - if (itt_sync_obj != NULL) - __kmp_itt_taskwait_starting(gtid, itt_sync_obj); -#endif /* USE_ITT_BUILD */ - -#if OMPT_SUPPORT && OMPT_OPTIONAL - if (UNLIKELY(ompt_enabled.ompt_callback_sync_region_wait)) { - ompt_callbacks.ompt_callback(ompt_callback_sync_region_wait)( - ompt_sync_region_taskgroup, ompt_scope_begin, &(my_parallel_data), - &(my_task_data), codeptr); - } -#endif - -#if OMP_45_ENABLED - if (!taskdata->td_flags.team_serial || - (thread->th.th_task_team != NULL && - thread->th.th_task_team->tt.tt_found_proxy_tasks)) -#else - if (!taskdata->td_flags.team_serial) -#endif - { - kmp_flag_32 flag(RCAST(std::atomic *, &(taskgroup->count)), - 0U); - while (KMP_ATOMIC_LD_ACQ(&taskgroup->count) != 0) { - flag.execute_tasks(thread, gtid, FALSE, - &thread_finished USE_ITT_BUILD_ARG(itt_sync_obj), - __kmp_task_stealing_constraint); - } - } - taskdata->td_taskwait_thread = -taskdata->td_taskwait_thread; // end waiting - -#if OMPT_SUPPORT && OMPT_OPTIONAL - if (UNLIKELY(ompt_enabled.ompt_callback_sync_region_wait)) { - ompt_callbacks.ompt_callback(ompt_callback_sync_region_wait)( - ompt_sync_region_taskgroup, ompt_scope_end, &(my_parallel_data), - &(my_task_data), codeptr); - } -#endif - -#if USE_ITT_BUILD - if (itt_sync_obj != NULL) - __kmp_itt_taskwait_finished(gtid, itt_sync_obj); -#endif /* USE_ITT_BUILD */ - } - KMP_DEBUG_ASSERT(taskgroup->count == 0); - -#if OMP_50_ENABLED - if (taskgroup->reduce_data != NULL) // need to reduce? - __kmp_task_reduction_fini(thread, taskgroup); -#endif - // Restore parent taskgroup for the current task - taskdata->td_taskgroup = taskgroup->parent; - __kmp_thread_free(thread, taskgroup); - - KA_TRACE(10, ("__kmpc_end_taskgroup(exit): T#%d task %p finished waiting\n", - gtid, taskdata)); - ANNOTATE_HAPPENS_AFTER(taskdata); - -#if OMPT_SUPPORT && OMPT_OPTIONAL - if (UNLIKELY(ompt_enabled.ompt_callback_sync_region)) { - ompt_callbacks.ompt_callback(ompt_callback_sync_region)( - ompt_sync_region_taskgroup, ompt_scope_end, &(my_parallel_data), - &(my_task_data), codeptr); - } -#endif -} -#endif - -// __kmp_remove_my_task: remove a task from my own deque -static kmp_task_t *__kmp_remove_my_task(kmp_info_t *thread, kmp_int32 gtid, - kmp_task_team_t *task_team, - kmp_int32 is_constrained) { - kmp_task_t *task; - kmp_taskdata_t *taskdata; - kmp_thread_data_t *thread_data; - kmp_uint32 tail; - - KMP_DEBUG_ASSERT(__kmp_tasking_mode != tskm_immediate_exec); - KMP_DEBUG_ASSERT(task_team->tt.tt_threads_data != - NULL); // Caller should check this condition - - thread_data = &task_team->tt.tt_threads_data[__kmp_tid_from_gtid(gtid)]; - - KA_TRACE(10, ("__kmp_remove_my_task(enter): T#%d ntasks=%d head=%u tail=%u\n", - gtid, thread_data->td.td_deque_ntasks, - thread_data->td.td_deque_head, thread_data->td.td_deque_tail)); - - if (TCR_4(thread_data->td.td_deque_ntasks) == 0) { - KA_TRACE(10, - ("__kmp_remove_my_task(exit #1): T#%d No tasks to remove: " - "ntasks=%d head=%u tail=%u\n", - gtid, thread_data->td.td_deque_ntasks, - thread_data->td.td_deque_head, thread_data->td.td_deque_tail)); - return NULL; - } - - __kmp_acquire_bootstrap_lock(&thread_data->td.td_deque_lock); - - if (TCR_4(thread_data->td.td_deque_ntasks) == 0) { - __kmp_release_bootstrap_lock(&thread_data->td.td_deque_lock); - KA_TRACE(10, - ("__kmp_remove_my_task(exit #2): T#%d No tasks to remove: " - "ntasks=%d head=%u tail=%u\n", - gtid, thread_data->td.td_deque_ntasks, - thread_data->td.td_deque_head, thread_data->td.td_deque_tail)); - return NULL; - } - - tail = (thread_data->td.td_deque_tail - 1) & - TASK_DEQUE_MASK(thread_data->td); // Wrap index. - taskdata = thread_data->td.td_deque[tail]; - - if (!__kmp_task_is_allowed(gtid, is_constrained, taskdata, - thread->th.th_current_task)) { - // The TSC does not allow to steal victim task - __kmp_release_bootstrap_lock(&thread_data->td.td_deque_lock); - KA_TRACE(10, - ("__kmp_remove_my_task(exit #3): T#%d TSC blocks tail task: " - "ntasks=%d head=%u tail=%u\n", - gtid, thread_data->td.td_deque_ntasks, - thread_data->td.td_deque_head, thread_data->td.td_deque_tail)); - return NULL; - } - - thread_data->td.td_deque_tail = tail; - TCW_4(thread_data->td.td_deque_ntasks, thread_data->td.td_deque_ntasks - 1); - - __kmp_release_bootstrap_lock(&thread_data->td.td_deque_lock); - - KA_TRACE(10, ("__kmp_remove_my_task(exit #4): T#%d task %p removed: " - "ntasks=%d head=%u tail=%u\n", - gtid, taskdata, thread_data->td.td_deque_ntasks, - thread_data->td.td_deque_head, thread_data->td.td_deque_tail)); - - task = KMP_TASKDATA_TO_TASK(taskdata); - return task; -} - -// __kmp_steal_task: remove a task from another thread's deque -// Assume that calling thread has already checked existence of -// task_team thread_data before calling this routine. -static kmp_task_t *__kmp_steal_task(kmp_info_t *victim_thr, kmp_int32 gtid, - kmp_task_team_t *task_team, - std::atomic *unfinished_threads, - int *thread_finished, - kmp_int32 is_constrained) { - kmp_task_t *task; - kmp_taskdata_t *taskdata; - kmp_taskdata_t *current; - kmp_thread_data_t *victim_td, *threads_data; - kmp_int32 target; - kmp_int32 victim_tid; - - KMP_DEBUG_ASSERT(__kmp_tasking_mode != tskm_immediate_exec); - - threads_data = task_team->tt.tt_threads_data; - KMP_DEBUG_ASSERT(threads_data != NULL); // Caller should check this condition - - victim_tid = victim_thr->th.th_info.ds.ds_tid; - victim_td = &threads_data[victim_tid]; - - KA_TRACE(10, ("__kmp_steal_task(enter): T#%d try to steal from T#%d: " - "task_team=%p ntasks=%d head=%u tail=%u\n", - gtid, __kmp_gtid_from_thread(victim_thr), task_team, - victim_td->td.td_deque_ntasks, victim_td->td.td_deque_head, - victim_td->td.td_deque_tail)); - - if (TCR_4(victim_td->td.td_deque_ntasks) == 0) { - KA_TRACE(10, ("__kmp_steal_task(exit #1): T#%d could not steal from T#%d: " - "task_team=%p ntasks=%d head=%u tail=%u\n", - gtid, __kmp_gtid_from_thread(victim_thr), task_team, - victim_td->td.td_deque_ntasks, victim_td->td.td_deque_head, - victim_td->td.td_deque_tail)); - return NULL; - } - - __kmp_acquire_bootstrap_lock(&victim_td->td.td_deque_lock); - - int ntasks = TCR_4(victim_td->td.td_deque_ntasks); - // Check again after we acquire the lock - if (ntasks == 0) { - __kmp_release_bootstrap_lock(&victim_td->td.td_deque_lock); - KA_TRACE(10, ("__kmp_steal_task(exit #2): T#%d could not steal from T#%d: " - "task_team=%p ntasks=%d head=%u tail=%u\n", - gtid, __kmp_gtid_from_thread(victim_thr), task_team, ntasks, - victim_td->td.td_deque_head, victim_td->td.td_deque_tail)); - return NULL; - } - - KMP_DEBUG_ASSERT(victim_td->td.td_deque != NULL); - current = __kmp_threads[gtid]->th.th_current_task; - taskdata = victim_td->td.td_deque[victim_td->td.td_deque_head]; - if (__kmp_task_is_allowed(gtid, is_constrained, taskdata, current)) { - // Bump head pointer and Wrap. - victim_td->td.td_deque_head = - (victim_td->td.td_deque_head + 1) & TASK_DEQUE_MASK(victim_td->td); - } else { - if (!task_team->tt.tt_untied_task_encountered) { - // The TSC does not allow to steal victim task - __kmp_release_bootstrap_lock(&victim_td->td.td_deque_lock); - KA_TRACE(10, ("__kmp_steal_task(exit #3): T#%d could not steal from " - "T#%d: task_team=%p ntasks=%d head=%u tail=%u\n", - gtid, __kmp_gtid_from_thread(victim_thr), task_team, ntasks, - victim_td->td.td_deque_head, victim_td->td.td_deque_tail)); - return NULL; - } - int i; - // walk through victim's deque trying to steal any task - target = victim_td->td.td_deque_head; - taskdata = NULL; - for (i = 1; i < ntasks; ++i) { - target = (target + 1) & TASK_DEQUE_MASK(victim_td->td); - taskdata = victim_td->td.td_deque[target]; - if (__kmp_task_is_allowed(gtid, is_constrained, taskdata, current)) { - break; // found victim task - } else { - taskdata = NULL; - } - } - if (taskdata == NULL) { - // No appropriate candidate to steal found - __kmp_release_bootstrap_lock(&victim_td->td.td_deque_lock); - KA_TRACE(10, ("__kmp_steal_task(exit #4): T#%d could not steal from " - "T#%d: task_team=%p ntasks=%d head=%u tail=%u\n", - gtid, __kmp_gtid_from_thread(victim_thr), task_team, ntasks, - victim_td->td.td_deque_head, victim_td->td.td_deque_tail)); - return NULL; - } - int prev = target; - for (i = i + 1; i < ntasks; ++i) { - // shift remaining tasks in the deque left by 1 - target = (target + 1) & TASK_DEQUE_MASK(victim_td->td); - victim_td->td.td_deque[prev] = victim_td->td.td_deque[target]; - prev = target; - } - KMP_DEBUG_ASSERT( - victim_td->td.td_deque_tail == - (kmp_uint32)((target + 1) & TASK_DEQUE_MASK(victim_td->td))); - victim_td->td.td_deque_tail = target; // tail -= 1 (wrapped)) - } - if (*thread_finished) { - // We need to un-mark this victim as a finished victim. This must be done - // before releasing the lock, or else other threads (starting with the - // master victim) might be prematurely released from the barrier!!! - kmp_int32 count; - - count = KMP_ATOMIC_INC(unfinished_threads); - - KA_TRACE( - 20, - ("__kmp_steal_task: T#%d inc unfinished_threads to %d: task_team=%p\n", - gtid, count + 1, task_team)); - - *thread_finished = FALSE; - } - TCW_4(victim_td->td.td_deque_ntasks, ntasks - 1); - - __kmp_release_bootstrap_lock(&victim_td->td.td_deque_lock); - - KMP_COUNT_BLOCK(TASK_stolen); - KA_TRACE(10, - ("__kmp_steal_task(exit #5): T#%d stole task %p from T#%d: " - "task_team=%p ntasks=%d head=%u tail=%u\n", - gtid, taskdata, __kmp_gtid_from_thread(victim_thr), task_team, - ntasks, victim_td->td.td_deque_head, victim_td->td.td_deque_tail)); - - task = KMP_TASKDATA_TO_TASK(taskdata); - return task; -} - -// __kmp_execute_tasks_template: Choose and execute tasks until either the -// condition is statisfied (return true) or there are none left (return false). -// -// final_spin is TRUE if this is the spin at the release barrier. -// thread_finished indicates whether the thread is finished executing all -// the tasks it has on its deque, and is at the release barrier. -// spinner is the location on which to spin. -// spinner == NULL means only execute a single task and return. -// checker is the value to check to terminate the spin. -template -static inline int __kmp_execute_tasks_template( - kmp_info_t *thread, kmp_int32 gtid, C *flag, int final_spin, - int *thread_finished USE_ITT_BUILD_ARG(void *itt_sync_obj), - kmp_int32 is_constrained) { - kmp_task_team_t *task_team = thread->th.th_task_team; - kmp_thread_data_t *threads_data; - kmp_task_t *task; - kmp_info_t *other_thread; - kmp_taskdata_t *current_task = thread->th.th_current_task; - std::atomic *unfinished_threads; - kmp_int32 nthreads, victim_tid = -2, use_own_tasks = 1, new_victim = 0, - tid = thread->th.th_info.ds.ds_tid; - - KMP_DEBUG_ASSERT(__kmp_tasking_mode != tskm_immediate_exec); - KMP_DEBUG_ASSERT(thread == __kmp_threads[gtid]); - - if (task_team == NULL || current_task == NULL) - return FALSE; - - KA_TRACE(15, ("__kmp_execute_tasks_template(enter): T#%d final_spin=%d " - "*thread_finished=%d\n", - gtid, final_spin, *thread_finished)); - - thread->th.th_reap_state = KMP_NOT_SAFE_TO_REAP; - threads_data = (kmp_thread_data_t *)TCR_PTR(task_team->tt.tt_threads_data); - KMP_DEBUG_ASSERT(threads_data != NULL); - - nthreads = task_team->tt.tt_nproc; - unfinished_threads = &(task_team->tt.tt_unfinished_threads); -#if OMP_45_ENABLED - KMP_DEBUG_ASSERT(nthreads > 1 || task_team->tt.tt_found_proxy_tasks); -#else - KMP_DEBUG_ASSERT(nthreads > 1); -#endif - KMP_DEBUG_ASSERT(*unfinished_threads >= 0); - - while (1) { // Outer loop keeps trying to find tasks in case of single thread - // getting tasks from target constructs - while (1) { // Inner loop to find a task and execute it - task = NULL; - if (use_own_tasks) { // check on own queue first - task = __kmp_remove_my_task(thread, gtid, task_team, is_constrained); - } - if ((task == NULL) && (nthreads > 1)) { // Steal a task - int asleep = 1; - use_own_tasks = 0; - // Try to steal from the last place I stole from successfully. - if (victim_tid == -2) { // haven't stolen anything yet - victim_tid = threads_data[tid].td.td_deque_last_stolen; - if (victim_tid != - -1) // if we have a last stolen from victim, get the thread - other_thread = threads_data[victim_tid].td.td_thr; - } - if (victim_tid != -1) { // found last victim - asleep = 0; - } else if (!new_victim) { // no recent steals and we haven't already - // used a new victim; select a random thread - do { // Find a different thread to steal work from. - // Pick a random thread. Initial plan was to cycle through all the - // threads, and only return if we tried to steal from every thread, - // and failed. Arch says that's not such a great idea. - victim_tid = __kmp_get_random(thread) % (nthreads - 1); - if (victim_tid >= tid) { - ++victim_tid; // Adjusts random distribution to exclude self - } - // Found a potential victim - other_thread = threads_data[victim_tid].td.td_thr; - // There is a slight chance that __kmp_enable_tasking() did not wake - // up all threads waiting at the barrier. If victim is sleeping, - // then wake it up. Since we were going to pay the cache miss - // penalty for referencing another thread's kmp_info_t struct - // anyway, - // the check shouldn't cost too much performance at this point. In - // extra barrier mode, tasks do not sleep at the separate tasking - // barrier, so this isn't a problem. - asleep = 0; - if ((__kmp_tasking_mode == tskm_task_teams) && - (__kmp_dflt_blocktime != KMP_MAX_BLOCKTIME) && - (TCR_PTR(CCAST(void *, other_thread->th.th_sleep_loc)) != - NULL)) { - asleep = 1; - __kmp_null_resume_wrapper(__kmp_gtid_from_thread(other_thread), - other_thread->th.th_sleep_loc); - // A sleeping thread should not have any tasks on it's queue. - // There is a slight possibility that it resumes, steals a task - // from another thread, which spawns more tasks, all in the time - // that it takes this thread to check => don't write an assertion - // that the victim's queue is empty. Try stealing from a - // different thread. - } - } while (asleep); - } - - if (!asleep) { - // We have a victim to try to steal from - task = __kmp_steal_task(other_thread, gtid, task_team, - unfinished_threads, thread_finished, - is_constrained); - } - if (task != NULL) { // set last stolen to victim - if (threads_data[tid].td.td_deque_last_stolen != victim_tid) { - threads_data[tid].td.td_deque_last_stolen = victim_tid; - // The pre-refactored code did not try more than 1 successful new - // vicitm, unless the last one generated more local tasks; - // new_victim keeps track of this - new_victim = 1; - } - } else { // No tasks found; unset last_stolen - KMP_CHECK_UPDATE(threads_data[tid].td.td_deque_last_stolen, -1); - victim_tid = -2; // no successful victim found - } - } - - if (task == NULL) // break out of tasking loop - break; - -// Found a task; execute it -#if USE_ITT_BUILD && USE_ITT_NOTIFY - if (__itt_sync_create_ptr || KMP_ITT_DEBUG) { - if (itt_sync_obj == NULL) { // we are at fork barrier where we could not - // get the object reliably - itt_sync_obj = __kmp_itt_barrier_object(gtid, bs_forkjoin_barrier); - } - __kmp_itt_task_starting(itt_sync_obj); - } -#endif /* USE_ITT_BUILD && USE_ITT_NOTIFY */ - __kmp_invoke_task(gtid, task, current_task); -#if USE_ITT_BUILD - if (itt_sync_obj != NULL) - __kmp_itt_task_finished(itt_sync_obj); -#endif /* USE_ITT_BUILD */ - // If this thread is only partway through the barrier and the condition is - // met, then return now, so that the barrier gather/release pattern can - // proceed. If this thread is in the last spin loop in the barrier, - // waiting to be released, we know that the termination condition will not - // be satisified, so don't waste any cycles checking it. - if (flag == NULL || (!final_spin && flag->done_check())) { - KA_TRACE( - 15, - ("__kmp_execute_tasks_template: T#%d spin condition satisfied\n", - gtid)); - return TRUE; - } - if (thread->th.th_task_team == NULL) { - break; - } - // Yield before executing next task - KMP_YIELD(__kmp_library == library_throughput); - // If execution of a stolen task results in more tasks being placed on our - // run queue, reset use_own_tasks - if (!use_own_tasks && TCR_4(threads_data[tid].td.td_deque_ntasks) != 0) { - KA_TRACE(20, ("__kmp_execute_tasks_template: T#%d stolen task spawned " - "other tasks, restart\n", - gtid)); - use_own_tasks = 1; - new_victim = 0; - } - } - -// The task source has been exhausted. If in final spin loop of barrier, check -// if termination condition is satisfied. -#if OMP_45_ENABLED - // The work queue may be empty but there might be proxy tasks still - // executing - if (final_spin && - KMP_ATOMIC_LD_ACQ(¤t_task->td_incomplete_child_tasks) == 0) -#else - if (final_spin) -#endif - { - // First, decrement the #unfinished threads, if that has not already been - // done. This decrement might be to the spin location, and result in the - // termination condition being satisfied. - if (!*thread_finished) { - kmp_int32 count; - - count = KMP_ATOMIC_DEC(unfinished_threads) - 1; - KA_TRACE(20, ("__kmp_execute_tasks_template: T#%d dec " - "unfinished_threads to %d task_team=%p\n", - gtid, count, task_team)); - *thread_finished = TRUE; - } - - // It is now unsafe to reference thread->th.th_team !!! - // Decrementing task_team->tt.tt_unfinished_threads can allow the master - // thread to pass through the barrier, where it might reset each thread's - // th.th_team field for the next parallel region. If we can steal more - // work, we know that this has not happened yet. - if (flag != NULL && flag->done_check()) { - KA_TRACE( - 15, - ("__kmp_execute_tasks_template: T#%d spin condition satisfied\n", - gtid)); - return TRUE; - } - } - - // If this thread's task team is NULL, master has recognized that there are - // no more tasks; bail out - if (thread->th.th_task_team == NULL) { - KA_TRACE(15, - ("__kmp_execute_tasks_template: T#%d no more tasks\n", gtid)); - return FALSE; - } - -#if OMP_45_ENABLED - // We could be getting tasks from target constructs; if this is the only - // thread, keep trying to execute tasks from own queue - if (nthreads == 1) - use_own_tasks = 1; - else -#endif - { - KA_TRACE(15, - ("__kmp_execute_tasks_template: T#%d can't find work\n", gtid)); - return FALSE; - } - } -} - -int __kmp_execute_tasks_32( - kmp_info_t *thread, kmp_int32 gtid, kmp_flag_32 *flag, int final_spin, - int *thread_finished USE_ITT_BUILD_ARG(void *itt_sync_obj), - kmp_int32 is_constrained) { - return __kmp_execute_tasks_template( - thread, gtid, flag, final_spin, - thread_finished USE_ITT_BUILD_ARG(itt_sync_obj), is_constrained); -} - -int __kmp_execute_tasks_64( - kmp_info_t *thread, kmp_int32 gtid, kmp_flag_64 *flag, int final_spin, - int *thread_finished USE_ITT_BUILD_ARG(void *itt_sync_obj), - kmp_int32 is_constrained) { - return __kmp_execute_tasks_template( - thread, gtid, flag, final_spin, - thread_finished USE_ITT_BUILD_ARG(itt_sync_obj), is_constrained); -} - -int __kmp_execute_tasks_oncore( - kmp_info_t *thread, kmp_int32 gtid, kmp_flag_oncore *flag, int final_spin, - int *thread_finished USE_ITT_BUILD_ARG(void *itt_sync_obj), - kmp_int32 is_constrained) { - return __kmp_execute_tasks_template( - thread, gtid, flag, final_spin, - thread_finished USE_ITT_BUILD_ARG(itt_sync_obj), is_constrained); -} - -// __kmp_enable_tasking: Allocate task team and resume threads sleeping at the -// next barrier so they can assist in executing enqueued tasks. -// First thread in allocates the task team atomically. -static void __kmp_enable_tasking(kmp_task_team_t *task_team, - kmp_info_t *this_thr) { - kmp_thread_data_t *threads_data; - int nthreads, i, is_init_thread; - - KA_TRACE(10, ("__kmp_enable_tasking(enter): T#%d\n", - __kmp_gtid_from_thread(this_thr))); - - KMP_DEBUG_ASSERT(task_team != NULL); - KMP_DEBUG_ASSERT(this_thr->th.th_team != NULL); - - nthreads = task_team->tt.tt_nproc; - KMP_DEBUG_ASSERT(nthreads > 0); - KMP_DEBUG_ASSERT(nthreads == this_thr->th.th_team->t.t_nproc); - - // Allocate or increase the size of threads_data if necessary - is_init_thread = __kmp_realloc_task_threads_data(this_thr, task_team); - - if (!is_init_thread) { - // Some other thread already set up the array. - KA_TRACE( - 20, - ("__kmp_enable_tasking(exit): T#%d: threads array already set up.\n", - __kmp_gtid_from_thread(this_thr))); - return; - } - threads_data = (kmp_thread_data_t *)TCR_PTR(task_team->tt.tt_threads_data); - KMP_DEBUG_ASSERT(threads_data != NULL); - - if ((__kmp_tasking_mode == tskm_task_teams) && - (__kmp_dflt_blocktime != KMP_MAX_BLOCKTIME)) { - // Release any threads sleeping at the barrier, so that they can steal - // tasks and execute them. In extra barrier mode, tasks do not sleep - // at the separate tasking barrier, so this isn't a problem. - for (i = 0; i < nthreads; i++) { - volatile void *sleep_loc; - kmp_info_t *thread = threads_data[i].td.td_thr; - - if (i == this_thr->th.th_info.ds.ds_tid) { - continue; - } - // Since we haven't locked the thread's suspend mutex lock at this - // point, there is a small window where a thread might be putting - // itself to sleep, but hasn't set the th_sleep_loc field yet. - // To work around this, __kmp_execute_tasks_template() periodically checks - // see if other threads are sleeping (using the same random mechanism that - // is used for task stealing) and awakens them if they are. - if ((sleep_loc = TCR_PTR(CCAST(void *, thread->th.th_sleep_loc))) != - NULL) { - KF_TRACE(50, ("__kmp_enable_tasking: T#%d waking up thread T#%d\n", - __kmp_gtid_from_thread(this_thr), - __kmp_gtid_from_thread(thread))); - __kmp_null_resume_wrapper(__kmp_gtid_from_thread(thread), sleep_loc); - } else { - KF_TRACE(50, ("__kmp_enable_tasking: T#%d don't wake up thread T#%d\n", - __kmp_gtid_from_thread(this_thr), - __kmp_gtid_from_thread(thread))); - } - } - } - - KA_TRACE(10, ("__kmp_enable_tasking(exit): T#%d\n", - __kmp_gtid_from_thread(this_thr))); -} - -/* // TODO: Check the comment consistency - * Utility routines for "task teams". A task team (kmp_task_t) is kind of - * like a shadow of the kmp_team_t data struct, with a different lifetime. - * After a child * thread checks into a barrier and calls __kmp_release() from - * the particular variant of __kmp__barrier_gather(), it can no - * longer assume that the kmp_team_t structure is intact (at any moment, the - * master thread may exit the barrier code and free the team data structure, - * and return the threads to the thread pool). - * - * This does not work with the the tasking code, as the thread is still - * expected to participate in the execution of any tasks that may have been - * spawned my a member of the team, and the thread still needs access to all - * to each thread in the team, so that it can steal work from it. - * - * Enter the existence of the kmp_task_team_t struct. It employs a reference - * counting mechanims, and is allocated by the master thread before calling - * __kmp__release, and then is release by the last thread to - * exit __kmp__release at the next barrier. I.e. the lifetimes - * of the kmp_task_team_t structs for consecutive barriers can overlap - * (and will, unless the master thread is the last thread to exit the barrier - * release phase, which is not typical). - * - * The existence of such a struct is useful outside the context of tasking, - * but for now, I'm trying to keep it specific to the OMP_30_ENABLED macro, - * so that any performance differences show up when comparing the 2.5 vs. 3.0 - * libraries. - * - * We currently use the existence of the threads array as an indicator that - * tasks were spawned since the last barrier. If the structure is to be - * useful outside the context of tasking, then this will have to change, but - * not settting the field minimizes the performance impact of tasking on - * barriers, when no explicit tasks were spawned (pushed, actually). - */ - -static kmp_task_team_t *__kmp_free_task_teams = - NULL; // Free list for task_team data structures -// Lock for task team data structures -kmp_bootstrap_lock_t __kmp_task_team_lock = - KMP_BOOTSTRAP_LOCK_INITIALIZER(__kmp_task_team_lock); - -// __kmp_alloc_task_deque: -// Allocates a task deque for a particular thread, and initialize the necessary -// data structures relating to the deque. This only happens once per thread -// per task team since task teams are recycled. No lock is needed during -// allocation since each thread allocates its own deque. -static void __kmp_alloc_task_deque(kmp_info_t *thread, - kmp_thread_data_t *thread_data) { - __kmp_init_bootstrap_lock(&thread_data->td.td_deque_lock); - KMP_DEBUG_ASSERT(thread_data->td.td_deque == NULL); - - // Initialize last stolen task field to "none" - thread_data->td.td_deque_last_stolen = -1; - - KMP_DEBUG_ASSERT(TCR_4(thread_data->td.td_deque_ntasks) == 0); - KMP_DEBUG_ASSERT(thread_data->td.td_deque_head == 0); - KMP_DEBUG_ASSERT(thread_data->td.td_deque_tail == 0); - - KE_TRACE( - 10, - ("__kmp_alloc_task_deque: T#%d allocating deque[%d] for thread_data %p\n", - __kmp_gtid_from_thread(thread), INITIAL_TASK_DEQUE_SIZE, thread_data)); - // Allocate space for task deque, and zero the deque - // Cannot use __kmp_thread_calloc() because threads not around for - // kmp_reap_task_team( ). - thread_data->td.td_deque = (kmp_taskdata_t **)__kmp_allocate( - INITIAL_TASK_DEQUE_SIZE * sizeof(kmp_taskdata_t *)); - thread_data->td.td_deque_size = INITIAL_TASK_DEQUE_SIZE; -} - -// __kmp_free_task_deque: -// Deallocates a task deque for a particular thread. Happens at library -// deallocation so don't need to reset all thread data fields. -static void __kmp_free_task_deque(kmp_thread_data_t *thread_data) { - if (thread_data->td.td_deque != NULL) { - __kmp_acquire_bootstrap_lock(&thread_data->td.td_deque_lock); - TCW_4(thread_data->td.td_deque_ntasks, 0); - __kmp_free(thread_data->td.td_deque); - thread_data->td.td_deque = NULL; - __kmp_release_bootstrap_lock(&thread_data->td.td_deque_lock); - } - -#ifdef BUILD_TIED_TASK_STACK - // GEH: Figure out what to do here for td_susp_tied_tasks - if (thread_data->td.td_susp_tied_tasks.ts_entries != TASK_STACK_EMPTY) { - __kmp_free_task_stack(__kmp_thread_from_gtid(gtid), thread_data); - } -#endif // BUILD_TIED_TASK_STACK -} - -// __kmp_realloc_task_threads_data: -// Allocates a threads_data array for a task team, either by allocating an -// initial array or enlarging an existing array. Only the first thread to get -// the lock allocs or enlarges the array and re-initializes the array eleemnts. -// That thread returns "TRUE", the rest return "FALSE". -// Assumes that the new array size is given by task_team -> tt.tt_nproc. -// The current size is given by task_team -> tt.tt_max_threads. -static int __kmp_realloc_task_threads_data(kmp_info_t *thread, - kmp_task_team_t *task_team) { - kmp_thread_data_t **threads_data_p; - kmp_int32 nthreads, maxthreads; - int is_init_thread = FALSE; - - if (TCR_4(task_team->tt.tt_found_tasks)) { - // Already reallocated and initialized. - return FALSE; - } - - threads_data_p = &task_team->tt.tt_threads_data; - nthreads = task_team->tt.tt_nproc; - maxthreads = task_team->tt.tt_max_threads; - - // All threads must lock when they encounter the first task of the implicit - // task region to make sure threads_data fields are (re)initialized before - // used. - __kmp_acquire_bootstrap_lock(&task_team->tt.tt_threads_lock); - - if (!TCR_4(task_team->tt.tt_found_tasks)) { - // first thread to enable tasking - kmp_team_t *team = thread->th.th_team; - int i; - - is_init_thread = TRUE; - if (maxthreads < nthreads) { - - if (*threads_data_p != NULL) { - kmp_thread_data_t *old_data = *threads_data_p; - kmp_thread_data_t *new_data = NULL; - - KE_TRACE( - 10, - ("__kmp_realloc_task_threads_data: T#%d reallocating " - "threads data for task_team %p, new_size = %d, old_size = %d\n", - __kmp_gtid_from_thread(thread), task_team, nthreads, maxthreads)); - // Reallocate threads_data to have more elements than current array - // Cannot use __kmp_thread_realloc() because threads not around for - // kmp_reap_task_team( ). Note all new array entries are initialized - // to zero by __kmp_allocate(). - new_data = (kmp_thread_data_t *)__kmp_allocate( - nthreads * sizeof(kmp_thread_data_t)); - // copy old data to new data - KMP_MEMCPY_S((void *)new_data, nthreads * sizeof(kmp_thread_data_t), - (void *)old_data, maxthreads * sizeof(kmp_thread_data_t)); - -#ifdef BUILD_TIED_TASK_STACK - // GEH: Figure out if this is the right thing to do - for (i = maxthreads; i < nthreads; i++) { - kmp_thread_data_t *thread_data = &(*threads_data_p)[i]; - __kmp_init_task_stack(__kmp_gtid_from_thread(thread), thread_data); - } -#endif // BUILD_TIED_TASK_STACK - // Install the new data and free the old data - (*threads_data_p) = new_data; - __kmp_free(old_data); - } else { - KE_TRACE(10, ("__kmp_realloc_task_threads_data: T#%d allocating " - "threads data for task_team %p, size = %d\n", - __kmp_gtid_from_thread(thread), task_team, nthreads)); - // Make the initial allocate for threads_data array, and zero entries - // Cannot use __kmp_thread_calloc() because threads not around for - // kmp_reap_task_team( ). - ANNOTATE_IGNORE_WRITES_BEGIN(); - *threads_data_p = (kmp_thread_data_t *)__kmp_allocate( - nthreads * sizeof(kmp_thread_data_t)); - ANNOTATE_IGNORE_WRITES_END(); -#ifdef BUILD_TIED_TASK_STACK - // GEH: Figure out if this is the right thing to do - for (i = 0; i < nthreads; i++) { - kmp_thread_data_t *thread_data = &(*threads_data_p)[i]; - __kmp_init_task_stack(__kmp_gtid_from_thread(thread), thread_data); - } -#endif // BUILD_TIED_TASK_STACK - } - task_team->tt.tt_max_threads = nthreads; - } else { - // If array has (more than) enough elements, go ahead and use it - KMP_DEBUG_ASSERT(*threads_data_p != NULL); - } - - // initialize threads_data pointers back to thread_info structures - for (i = 0; i < nthreads; i++) { - kmp_thread_data_t *thread_data = &(*threads_data_p)[i]; - thread_data->td.td_thr = team->t.t_threads[i]; - - if (thread_data->td.td_deque_last_stolen >= nthreads) { - // The last stolen field survives across teams / barrier, and the number - // of threads may have changed. It's possible (likely?) that a new - // parallel region will exhibit the same behavior as previous region. - thread_data->td.td_deque_last_stolen = -1; - } - } - - KMP_MB(); - TCW_SYNC_4(task_team->tt.tt_found_tasks, TRUE); - } - - __kmp_release_bootstrap_lock(&task_team->tt.tt_threads_lock); - return is_init_thread; -} - -// __kmp_free_task_threads_data: -// Deallocates a threads_data array for a task team, including any attached -// tasking deques. Only occurs at library shutdown. -static void __kmp_free_task_threads_data(kmp_task_team_t *task_team) { - __kmp_acquire_bootstrap_lock(&task_team->tt.tt_threads_lock); - if (task_team->tt.tt_threads_data != NULL) { - int i; - for (i = 0; i < task_team->tt.tt_max_threads; i++) { - __kmp_free_task_deque(&task_team->tt.tt_threads_data[i]); - } - __kmp_free(task_team->tt.tt_threads_data); - task_team->tt.tt_threads_data = NULL; - } - __kmp_release_bootstrap_lock(&task_team->tt.tt_threads_lock); -} - -// __kmp_allocate_task_team: -// Allocates a task team associated with a specific team, taking it from -// the global task team free list if possible. Also initializes data -// structures. -static kmp_task_team_t *__kmp_allocate_task_team(kmp_info_t *thread, - kmp_team_t *team) { - kmp_task_team_t *task_team = NULL; - int nthreads; - - KA_TRACE(20, ("__kmp_allocate_task_team: T#%d entering; team = %p\n", - (thread ? __kmp_gtid_from_thread(thread) : -1), team)); - - if (TCR_PTR(__kmp_free_task_teams) != NULL) { - // Take a task team from the task team pool - __kmp_acquire_bootstrap_lock(&__kmp_task_team_lock); - if (__kmp_free_task_teams != NULL) { - task_team = __kmp_free_task_teams; - TCW_PTR(__kmp_free_task_teams, task_team->tt.tt_next); - task_team->tt.tt_next = NULL; - } - __kmp_release_bootstrap_lock(&__kmp_task_team_lock); - } - - if (task_team == NULL) { - KE_TRACE(10, ("__kmp_allocate_task_team: T#%d allocating " - "task team for team %p\n", - __kmp_gtid_from_thread(thread), team)); - // Allocate a new task team if one is not available. - // Cannot use __kmp_thread_malloc() because threads not around for - // kmp_reap_task_team( ). - task_team = (kmp_task_team_t *)__kmp_allocate(sizeof(kmp_task_team_t)); - __kmp_init_bootstrap_lock(&task_team->tt.tt_threads_lock); - // AC: __kmp_allocate zeroes returned memory - // task_team -> tt.tt_threads_data = NULL; - // task_team -> tt.tt_max_threads = 0; - // task_team -> tt.tt_next = NULL; - } - - TCW_4(task_team->tt.tt_found_tasks, FALSE); -#if OMP_45_ENABLED - TCW_4(task_team->tt.tt_found_proxy_tasks, FALSE); -#endif - task_team->tt.tt_nproc = nthreads = team->t.t_nproc; - - KMP_ATOMIC_ST_REL(&task_team->tt.tt_unfinished_threads, nthreads); - TCW_4(task_team->tt.tt_active, TRUE); - - KA_TRACE(20, ("__kmp_allocate_task_team: T#%d exiting; task_team = %p " - "unfinished_threads init'd to %d\n", - (thread ? __kmp_gtid_from_thread(thread) : -1), task_team, - KMP_ATOMIC_LD_RLX(&task_team->tt.tt_unfinished_threads))); - return task_team; -} - -// __kmp_free_task_team: -// Frees the task team associated with a specific thread, and adds it -// to the global task team free list. -void __kmp_free_task_team(kmp_info_t *thread, kmp_task_team_t *task_team) { - KA_TRACE(20, ("__kmp_free_task_team: T#%d task_team = %p\n", - thread ? __kmp_gtid_from_thread(thread) : -1, task_team)); - - // Put task team back on free list - __kmp_acquire_bootstrap_lock(&__kmp_task_team_lock); - - KMP_DEBUG_ASSERT(task_team->tt.tt_next == NULL); - task_team->tt.tt_next = __kmp_free_task_teams; - TCW_PTR(__kmp_free_task_teams, task_team); - - __kmp_release_bootstrap_lock(&__kmp_task_team_lock); -} - -// __kmp_reap_task_teams: -// Free all the task teams on the task team free list. -// Should only be done during library shutdown. -// Cannot do anything that needs a thread structure or gtid since they are -// already gone. -void __kmp_reap_task_teams(void) { - kmp_task_team_t *task_team; - - if (TCR_PTR(__kmp_free_task_teams) != NULL) { - // Free all task_teams on the free list - __kmp_acquire_bootstrap_lock(&__kmp_task_team_lock); - while ((task_team = __kmp_free_task_teams) != NULL) { - __kmp_free_task_teams = task_team->tt.tt_next; - task_team->tt.tt_next = NULL; - - // Free threads_data if necessary - if (task_team->tt.tt_threads_data != NULL) { - __kmp_free_task_threads_data(task_team); - } - __kmp_free(task_team); - } - __kmp_release_bootstrap_lock(&__kmp_task_team_lock); - } -} - -// __kmp_wait_to_unref_task_teams: -// Some threads could still be in the fork barrier release code, possibly -// trying to steal tasks. Wait for each thread to unreference its task team. -void __kmp_wait_to_unref_task_teams(void) { - kmp_info_t *thread; - kmp_uint32 spins; - int done; - - KMP_INIT_YIELD(spins); - - for (;;) { - done = TRUE; - - // TODO: GEH - this may be is wrong because some sync would be necessary - // in case threads are added to the pool during the traversal. Need to - // verify that lock for thread pool is held when calling this routine. - for (thread = CCAST(kmp_info_t *, __kmp_thread_pool); thread != NULL; - thread = thread->th.th_next_pool) { -#if KMP_OS_WINDOWS - DWORD exit_val; -#endif - if (TCR_PTR(thread->th.th_task_team) == NULL) { - KA_TRACE(10, ("__kmp_wait_to_unref_task_team: T#%d task_team == NULL\n", - __kmp_gtid_from_thread(thread))); - continue; - } -#if KMP_OS_WINDOWS - // TODO: GEH - add this check for Linux* OS / OS X* as well? - if (!__kmp_is_thread_alive(thread, &exit_val)) { - thread->th.th_task_team = NULL; - continue; - } -#endif - - done = FALSE; // Because th_task_team pointer is not NULL for this thread - - KA_TRACE(10, ("__kmp_wait_to_unref_task_team: Waiting for T#%d to " - "unreference task_team\n", - __kmp_gtid_from_thread(thread))); - - if (__kmp_dflt_blocktime != KMP_MAX_BLOCKTIME) { - volatile void *sleep_loc; - // If the thread is sleeping, awaken it. - if ((sleep_loc = TCR_PTR(CCAST(void *, thread->th.th_sleep_loc))) != - NULL) { - KA_TRACE( - 10, - ("__kmp_wait_to_unref_task_team: T#%d waking up thread T#%d\n", - __kmp_gtid_from_thread(thread), __kmp_gtid_from_thread(thread))); - __kmp_null_resume_wrapper(__kmp_gtid_from_thread(thread), sleep_loc); - } - } - } - if (done) { - break; - } - - // If we are oversubscribed, or have waited a bit (and library mode is - // throughput), yield. Pause is in the following code. - KMP_YIELD(TCR_4(__kmp_nth) > __kmp_avail_proc); - KMP_YIELD_SPIN(spins); // Yields only if KMP_LIBRARY=throughput - } -} - -// __kmp_task_team_setup: Create a task_team for the current team, but use -// an already created, unused one if it already exists. -void __kmp_task_team_setup(kmp_info_t *this_thr, kmp_team_t *team, int always) { - KMP_DEBUG_ASSERT(__kmp_tasking_mode != tskm_immediate_exec); - - // If this task_team hasn't been created yet, allocate it. It will be used in - // the region after the next. - // If it exists, it is the current task team and shouldn't be touched yet as - // it may still be in use. - if (team->t.t_task_team[this_thr->th.th_task_state] == NULL && - (always || team->t.t_nproc > 1)) { - team->t.t_task_team[this_thr->th.th_task_state] = - __kmp_allocate_task_team(this_thr, team); - KA_TRACE(20, ("__kmp_task_team_setup: Master T#%d created new task_team %p " - "for team %d at parity=%d\n", - __kmp_gtid_from_thread(this_thr), - team->t.t_task_team[this_thr->th.th_task_state], - ((team != NULL) ? team->t.t_id : -1), - this_thr->th.th_task_state)); - } - - // After threads exit the release, they will call sync, and then point to this - // other task_team; make sure it is allocated and properly initialized. As - // threads spin in the barrier release phase, they will continue to use the - // previous task_team struct(above), until they receive the signal to stop - // checking for tasks (they can't safely reference the kmp_team_t struct, - // which could be reallocated by the master thread). No task teams are formed - // for serialized teams. - if (team->t.t_nproc > 1) { - int other_team = 1 - this_thr->th.th_task_state; - if (team->t.t_task_team[other_team] == NULL) { // setup other team as well - team->t.t_task_team[other_team] = - __kmp_allocate_task_team(this_thr, team); - KA_TRACE(20, ("__kmp_task_team_setup: Master T#%d created second new " - "task_team %p for team %d at parity=%d\n", - __kmp_gtid_from_thread(this_thr), - team->t.t_task_team[other_team], - ((team != NULL) ? team->t.t_id : -1), other_team)); - } else { // Leave the old task team struct in place for the upcoming region; - // adjust as needed - kmp_task_team_t *task_team = team->t.t_task_team[other_team]; - if (!task_team->tt.tt_active || - team->t.t_nproc != task_team->tt.tt_nproc) { - TCW_4(task_team->tt.tt_nproc, team->t.t_nproc); - TCW_4(task_team->tt.tt_found_tasks, FALSE); -#if OMP_45_ENABLED - TCW_4(task_team->tt.tt_found_proxy_tasks, FALSE); -#endif - KMP_ATOMIC_ST_REL(&task_team->tt.tt_unfinished_threads, - team->t.t_nproc); - TCW_4(task_team->tt.tt_active, TRUE); - } - // if team size has changed, the first thread to enable tasking will - // realloc threads_data if necessary - KA_TRACE(20, ("__kmp_task_team_setup: Master T#%d reset next task_team " - "%p for team %d at parity=%d\n", - __kmp_gtid_from_thread(this_thr), - team->t.t_task_team[other_team], - ((team != NULL) ? team->t.t_id : -1), other_team)); - } - } -} - -// __kmp_task_team_sync: Propagation of task team data from team to threads -// which happens just after the release phase of a team barrier. This may be -// called by any thread, but only for teams with # threads > 1. -void __kmp_task_team_sync(kmp_info_t *this_thr, kmp_team_t *team) { - KMP_DEBUG_ASSERT(__kmp_tasking_mode != tskm_immediate_exec); - - // Toggle the th_task_state field, to switch which task_team this thread - // refers to - this_thr->th.th_task_state = 1 - this_thr->th.th_task_state; - // It is now safe to propagate the task team pointer from the team struct to - // the current thread. - TCW_PTR(this_thr->th.th_task_team, - team->t.t_task_team[this_thr->th.th_task_state]); - KA_TRACE(20, - ("__kmp_task_team_sync: Thread T#%d task team switched to task_team " - "%p from Team #%d (parity=%d)\n", - __kmp_gtid_from_thread(this_thr), this_thr->th.th_task_team, - ((team != NULL) ? team->t.t_id : -1), this_thr->th.th_task_state)); -} - -// __kmp_task_team_wait: Master thread waits for outstanding tasks after the -// barrier gather phase. Only called by master thread if #threads in team > 1 or -// if proxy tasks were created. -// -// wait is a flag that defaults to 1 (see kmp.h), but waiting can be turned off -// by passing in 0 optionally as the last argument. When wait is zero, master -// thread does not wait for unfinished_threads to reach 0. -void __kmp_task_team_wait( - kmp_info_t *this_thr, - kmp_team_t *team USE_ITT_BUILD_ARG(void *itt_sync_obj), int wait) { - kmp_task_team_t *task_team = team->t.t_task_team[this_thr->th.th_task_state]; - - KMP_DEBUG_ASSERT(__kmp_tasking_mode != tskm_immediate_exec); - KMP_DEBUG_ASSERT(task_team == this_thr->th.th_task_team); - - if ((task_team != NULL) && KMP_TASKING_ENABLED(task_team)) { - if (wait) { - KA_TRACE(20, ("__kmp_task_team_wait: Master T#%d waiting for all tasks " - "(for unfinished_threads to reach 0) on task_team = %p\n", - __kmp_gtid_from_thread(this_thr), task_team)); - // Worker threads may have dropped through to release phase, but could - // still be executing tasks. Wait here for tasks to complete. To avoid - // memory contention, only master thread checks termination condition. - kmp_flag_32 flag(RCAST(std::atomic *, - &task_team->tt.tt_unfinished_threads), - 0U); - flag.wait(this_thr, TRUE USE_ITT_BUILD_ARG(itt_sync_obj)); - } - // Deactivate the old task team, so that the worker threads will stop - // referencing it while spinning. - KA_TRACE( - 20, - ("__kmp_task_team_wait: Master T#%d deactivating task_team %p: " - "setting active to false, setting local and team's pointer to NULL\n", - __kmp_gtid_from_thread(this_thr), task_team)); -#if OMP_45_ENABLED - KMP_DEBUG_ASSERT(task_team->tt.tt_nproc > 1 || - task_team->tt.tt_found_proxy_tasks == TRUE); - TCW_SYNC_4(task_team->tt.tt_found_proxy_tasks, FALSE); -#else - KMP_DEBUG_ASSERT(task_team->tt.tt_nproc > 1); -#endif - KMP_CHECK_UPDATE(task_team->tt.tt_untied_task_encountered, 0); - TCW_SYNC_4(task_team->tt.tt_active, FALSE); - KMP_MB(); - - TCW_PTR(this_thr->th.th_task_team, NULL); - } -} - -// __kmp_tasking_barrier: -// This routine may only called when __kmp_tasking_mode == tskm_extra_barrier. -// Internal function to execute all tasks prior to a regular barrier or a join -// barrier. It is a full barrier itself, which unfortunately turns regular -// barriers into double barriers and join barriers into 1 1/2 barriers. -void __kmp_tasking_barrier(kmp_team_t *team, kmp_info_t *thread, int gtid) { - std::atomic *spin = RCAST( - std::atomic *, - &team->t.t_task_team[thread->th.th_task_state]->tt.tt_unfinished_threads); - int flag = FALSE; - KMP_DEBUG_ASSERT(__kmp_tasking_mode == tskm_extra_barrier); - -#if USE_ITT_BUILD - KMP_FSYNC_SPIN_INIT(spin, NULL); -#endif /* USE_ITT_BUILD */ - kmp_flag_32 spin_flag(spin, 0U); - while (!spin_flag.execute_tasks(thread, gtid, TRUE, - &flag USE_ITT_BUILD_ARG(NULL), 0)) { -#if USE_ITT_BUILD - // TODO: What about itt_sync_obj?? - KMP_FSYNC_SPIN_PREPARE(RCAST(void *, spin)); -#endif /* USE_ITT_BUILD */ - - if (TCR_4(__kmp_global.g.g_done)) { - if (__kmp_global.g.g_abort) - __kmp_abort_thread(); - break; - } - KMP_YIELD(TRUE); // GH: We always yield here - } -#if USE_ITT_BUILD - KMP_FSYNC_SPIN_ACQUIRED(RCAST(void *, spin)); -#endif /* USE_ITT_BUILD */ -} - -#if OMP_45_ENABLED - -// __kmp_give_task puts a task into a given thread queue if: -// - the queue for that thread was created -// - there's space in that queue -// Because of this, __kmp_push_task needs to check if there's space after -// getting the lock -static bool __kmp_give_task(kmp_info_t *thread, kmp_int32 tid, kmp_task_t *task, - kmp_int32 pass) { - kmp_taskdata_t *taskdata = KMP_TASK_TO_TASKDATA(task); - kmp_task_team_t *task_team = taskdata->td_task_team; - - KA_TRACE(20, ("__kmp_give_task: trying to give task %p to thread %d.\n", - taskdata, tid)); - - // If task_team is NULL something went really bad... - KMP_DEBUG_ASSERT(task_team != NULL); - - bool result = false; - kmp_thread_data_t *thread_data = &task_team->tt.tt_threads_data[tid]; - - if (thread_data->td.td_deque == NULL) { - // There's no queue in this thread, go find another one - // We're guaranteed that at least one thread has a queue - KA_TRACE(30, - ("__kmp_give_task: thread %d has no queue while giving task %p.\n", - tid, taskdata)); - return result; - } - - if (TCR_4(thread_data->td.td_deque_ntasks) >= - TASK_DEQUE_SIZE(thread_data->td)) { - KA_TRACE( - 30, - ("__kmp_give_task: queue is full while giving task %p to thread %d.\n", - taskdata, tid)); - - // if this deque is bigger than the pass ratio give a chance to another - // thread - if (TASK_DEQUE_SIZE(thread_data->td) / INITIAL_TASK_DEQUE_SIZE >= pass) - return result; - - __kmp_acquire_bootstrap_lock(&thread_data->td.td_deque_lock); - __kmp_realloc_task_deque(thread, thread_data); - - } else { - - __kmp_acquire_bootstrap_lock(&thread_data->td.td_deque_lock); - - if (TCR_4(thread_data->td.td_deque_ntasks) >= - TASK_DEQUE_SIZE(thread_data->td)) { - KA_TRACE(30, ("__kmp_give_task: queue is full while giving task %p to " - "thread %d.\n", - taskdata, tid)); - - // if this deque is bigger than the pass ratio give a chance to another - // thread - if (TASK_DEQUE_SIZE(thread_data->td) / INITIAL_TASK_DEQUE_SIZE >= pass) - goto release_and_exit; - - __kmp_realloc_task_deque(thread, thread_data); - } - } - - // lock is held here, and there is space in the deque - - thread_data->td.td_deque[thread_data->td.td_deque_tail] = taskdata; - // Wrap index. - thread_data->td.td_deque_tail = - (thread_data->td.td_deque_tail + 1) & TASK_DEQUE_MASK(thread_data->td); - TCW_4(thread_data->td.td_deque_ntasks, - TCR_4(thread_data->td.td_deque_ntasks) + 1); - - result = true; - KA_TRACE(30, ("__kmp_give_task: successfully gave task %p to thread %d.\n", - taskdata, tid)); - -release_and_exit: - __kmp_release_bootstrap_lock(&thread_data->td.td_deque_lock); - - return result; -} - -/* The finish of the proxy tasks is divided in two pieces: - - the top half is the one that can be done from a thread outside the team - - the bottom half must be run from a thread within the team - - In order to run the bottom half the task gets queued back into one of the - threads of the team. Once the td_incomplete_child_task counter of the parent - is decremented the threads can leave the barriers. So, the bottom half needs - to be queued before the counter is decremented. The top half is therefore - divided in two parts: - - things that can be run before queuing the bottom half - - things that must be run after queuing the bottom half - - This creates a second race as the bottom half can free the task before the - second top half is executed. To avoid this we use the - td_incomplete_child_task of the proxy task to synchronize the top and bottom - half. */ -static void __kmp_first_top_half_finish_proxy(kmp_taskdata_t *taskdata) { - KMP_DEBUG_ASSERT(taskdata->td_flags.tasktype == TASK_EXPLICIT); - KMP_DEBUG_ASSERT(taskdata->td_flags.proxy == TASK_PROXY); - KMP_DEBUG_ASSERT(taskdata->td_flags.complete == 0); - KMP_DEBUG_ASSERT(taskdata->td_flags.freed == 0); - - taskdata->td_flags.complete = 1; // mark the task as completed - - if (taskdata->td_taskgroup) - KMP_ATOMIC_DEC(&taskdata->td_taskgroup->count); - - // Create an imaginary children for this task so the bottom half cannot - // release the task before we have completed the second top half - KMP_ATOMIC_INC(&taskdata->td_incomplete_child_tasks); -} - -static void __kmp_second_top_half_finish_proxy(kmp_taskdata_t *taskdata) { - kmp_int32 children = 0; - - // Predecrement simulated by "- 1" calculation - children = - KMP_ATOMIC_DEC(&taskdata->td_parent->td_incomplete_child_tasks) - 1; - KMP_DEBUG_ASSERT(children >= 0); - - // Remove the imaginary children - KMP_ATOMIC_DEC(&taskdata->td_incomplete_child_tasks); -} - -static void __kmp_bottom_half_finish_proxy(kmp_int32 gtid, kmp_task_t *ptask) { - kmp_taskdata_t *taskdata = KMP_TASK_TO_TASKDATA(ptask); - kmp_info_t *thread = __kmp_threads[gtid]; - - KMP_DEBUG_ASSERT(taskdata->td_flags.proxy == TASK_PROXY); - KMP_DEBUG_ASSERT(taskdata->td_flags.complete == - 1); // top half must run before bottom half - - // We need to wait to make sure the top half is finished - // Spinning here should be ok as this should happen quickly - while (KMP_ATOMIC_LD_ACQ(&taskdata->td_incomplete_child_tasks) > 0) - ; - - __kmp_release_deps(gtid, taskdata); - __kmp_free_task_and_ancestors(gtid, taskdata, thread); -} - -/*! -@ingroup TASKING -@param gtid Global Thread ID of encountering thread -@param ptask Task which execution is completed - -Execute the completation of a proxy task from a thread of that is part of the -team. Run first and bottom halves directly. -*/ -void __kmpc_proxy_task_completed(kmp_int32 gtid, kmp_task_t *ptask) { - KMP_DEBUG_ASSERT(ptask != NULL); - kmp_taskdata_t *taskdata = KMP_TASK_TO_TASKDATA(ptask); - KA_TRACE( - 10, ("__kmp_proxy_task_completed(enter): T#%d proxy task %p completing\n", - gtid, taskdata)); - - KMP_DEBUG_ASSERT(taskdata->td_flags.proxy == TASK_PROXY); - - __kmp_first_top_half_finish_proxy(taskdata); - __kmp_second_top_half_finish_proxy(taskdata); - __kmp_bottom_half_finish_proxy(gtid, ptask); - - KA_TRACE(10, - ("__kmp_proxy_task_completed(exit): T#%d proxy task %p completing\n", - gtid, taskdata)); -} - -/*! -@ingroup TASKING -@param ptask Task which execution is completed - -Execute the completation of a proxy task from a thread that could not belong to -the team. -*/ -void __kmpc_proxy_task_completed_ooo(kmp_task_t *ptask) { - KMP_DEBUG_ASSERT(ptask != NULL); - kmp_taskdata_t *taskdata = KMP_TASK_TO_TASKDATA(ptask); - - KA_TRACE( - 10, - ("__kmp_proxy_task_completed_ooo(enter): proxy task completing ooo %p\n", - taskdata)); - - KMP_DEBUG_ASSERT(taskdata->td_flags.proxy == TASK_PROXY); - - __kmp_first_top_half_finish_proxy(taskdata); - - // Enqueue task to complete bottom half completion from a thread within the - // corresponding team - kmp_team_t *team = taskdata->td_team; - kmp_int32 nthreads = team->t.t_nproc; - kmp_info_t *thread; - - // This should be similar to start_k = __kmp_get_random( thread ) % nthreads - // but we cannot use __kmp_get_random here - kmp_int32 start_k = 0; - kmp_int32 pass = 1; - kmp_int32 k = start_k; - - do { - // For now we're just linearly trying to find a thread - thread = team->t.t_threads[k]; - k = (k + 1) % nthreads; - - // we did a full pass through all the threads - if (k == start_k) - pass = pass << 1; - - } while (!__kmp_give_task(thread, k, ptask, pass)); - - __kmp_second_top_half_finish_proxy(taskdata); - - KA_TRACE( - 10, - ("__kmp_proxy_task_completed_ooo(exit): proxy task completing ooo %p\n", - taskdata)); -} - -// __kmp_task_dup_alloc: Allocate the taskdata and make a copy of source task -// for taskloop -// -// thread: allocating thread -// task_src: pointer to source task to be duplicated -// returns: a pointer to the allocated kmp_task_t structure (task). -kmp_task_t *__kmp_task_dup_alloc(kmp_info_t *thread, kmp_task_t *task_src) { - kmp_task_t *task; - kmp_taskdata_t *taskdata; - kmp_taskdata_t *taskdata_src; - kmp_taskdata_t *parent_task = thread->th.th_current_task; - size_t shareds_offset; - size_t task_size; - - KA_TRACE(10, ("__kmp_task_dup_alloc(enter): Th %p, source task %p\n", thread, - task_src)); - taskdata_src = KMP_TASK_TO_TASKDATA(task_src); - KMP_DEBUG_ASSERT(taskdata_src->td_flags.proxy == - TASK_FULL); // it should not be proxy task - KMP_DEBUG_ASSERT(taskdata_src->td_flags.tasktype == TASK_EXPLICIT); - task_size = taskdata_src->td_size_alloc; - - // Allocate a kmp_taskdata_t block and a kmp_task_t block. - KA_TRACE(30, ("__kmp_task_dup_alloc: Th %p, malloc size %ld\n", thread, - task_size)); -#if USE_FAST_MEMORY - taskdata = (kmp_taskdata_t *)__kmp_fast_allocate(thread, task_size); -#else - taskdata = (kmp_taskdata_t *)__kmp_thread_malloc(thread, task_size); -#endif /* USE_FAST_MEMORY */ - KMP_MEMCPY(taskdata, taskdata_src, task_size); - - task = KMP_TASKDATA_TO_TASK(taskdata); - - // Initialize new task (only specific fields not affected by memcpy) - taskdata->td_task_id = KMP_GEN_TASK_ID(); - if (task->shareds != NULL) { // need setup shareds pointer - shareds_offset = (char *)task_src->shareds - (char *)taskdata_src; - task->shareds = &((char *)taskdata)[shareds_offset]; - KMP_DEBUG_ASSERT((((kmp_uintptr_t)task->shareds) & (sizeof(void *) - 1)) == - 0); - } - taskdata->td_alloc_thread = thread; - taskdata->td_parent = parent_task; - taskdata->td_taskgroup = - parent_task - ->td_taskgroup; // task inherits the taskgroup from the parent task - - // Only need to keep track of child task counts if team parallel and tasking - // not serialized - if (!(taskdata->td_flags.team_serial || taskdata->td_flags.tasking_ser)) { - KMP_ATOMIC_INC(&parent_task->td_incomplete_child_tasks); - if (parent_task->td_taskgroup) - KMP_ATOMIC_INC(&parent_task->td_taskgroup->count); - // Only need to keep track of allocated child tasks for explicit tasks since - // implicit not deallocated - if (taskdata->td_parent->td_flags.tasktype == TASK_EXPLICIT) - KMP_ATOMIC_INC(&taskdata->td_parent->td_allocated_child_tasks); - } - - KA_TRACE(20, - ("__kmp_task_dup_alloc(exit): Th %p, created task %p, parent=%p\n", - thread, taskdata, taskdata->td_parent)); -#if OMPT_SUPPORT - if (UNLIKELY(ompt_enabled.enabled)) - __ompt_task_init(taskdata, thread->th.th_info.ds.ds_gtid); -#endif - return task; -} - -// Routine optionally generated by the compiler for setting the lastprivate flag -// and calling needed constructors for private/firstprivate objects -// (used to form taskloop tasks from pattern task) -// Parameters: dest task, src task, lastprivate flag. -typedef void (*p_task_dup_t)(kmp_task_t *, kmp_task_t *, kmp_int32); - -KMP_BUILD_ASSERT(sizeof(long) == 4 || sizeof(long) == 8); - -// class to encapsulate manipulating loop bounds in a taskloop task. -// this abstracts away the Intel vs GOMP taskloop interface for setting/getting -// the loop bound variables. -class kmp_taskloop_bounds_t { - kmp_task_t *task; - const kmp_taskdata_t *taskdata; - size_t lower_offset; - size_t upper_offset; - -public: - kmp_taskloop_bounds_t(kmp_task_t *_task, kmp_uint64 *lb, kmp_uint64 *ub) - : task(_task), taskdata(KMP_TASK_TO_TASKDATA(task)), - lower_offset((char *)lb - (char *)task), - upper_offset((char *)ub - (char *)task) { - KMP_DEBUG_ASSERT((char *)lb > (char *)_task); - KMP_DEBUG_ASSERT((char *)ub > (char *)_task); - } - kmp_taskloop_bounds_t(kmp_task_t *_task, const kmp_taskloop_bounds_t &bounds) - : task(_task), taskdata(KMP_TASK_TO_TASKDATA(_task)), - lower_offset(bounds.lower_offset), upper_offset(bounds.upper_offset) {} - size_t get_lower_offset() const { return lower_offset; } - size_t get_upper_offset() const { return upper_offset; } - kmp_uint64 get_lb() const { - kmp_int64 retval; -#if defined(KMP_GOMP_COMPAT) - // Intel task just returns the lower bound normally - if (!taskdata->td_flags.native) { - retval = *(kmp_int64 *)((char *)task + lower_offset); - } else { - // GOMP task has to take into account the sizeof(long) - if (taskdata->td_size_loop_bounds == 4) { - kmp_int32 *lb = RCAST(kmp_int32 *, task->shareds); - retval = (kmp_int64)*lb; - } else { - kmp_int64 *lb = RCAST(kmp_int64 *, task->shareds); - retval = (kmp_int64)*lb; - } - } -#else - retval = *(kmp_int64 *)((char *)task + lower_offset); -#endif // defined(KMP_GOMP_COMPAT) - return retval; - } - kmp_uint64 get_ub() const { - kmp_int64 retval; -#if defined(KMP_GOMP_COMPAT) - // Intel task just returns the upper bound normally - if (!taskdata->td_flags.native) { - retval = *(kmp_int64 *)((char *)task + upper_offset); - } else { - // GOMP task has to take into account the sizeof(long) - if (taskdata->td_size_loop_bounds == 4) { - kmp_int32 *ub = RCAST(kmp_int32 *, task->shareds) + 1; - retval = (kmp_int64)*ub; - } else { - kmp_int64 *ub = RCAST(kmp_int64 *, task->shareds) + 1; - retval = (kmp_int64)*ub; - } - } -#else - retval = *(kmp_int64 *)((char *)task + upper_offset); -#endif // defined(KMP_GOMP_COMPAT) - return retval; - } - void set_lb(kmp_uint64 lb) { -#if defined(KMP_GOMP_COMPAT) - // Intel task just sets the lower bound normally - if (!taskdata->td_flags.native) { - *(kmp_uint64 *)((char *)task + lower_offset) = lb; - } else { - // GOMP task has to take into account the sizeof(long) - if (taskdata->td_size_loop_bounds == 4) { - kmp_uint32 *lower = RCAST(kmp_uint32 *, task->shareds); - *lower = (kmp_uint32)lb; - } else { - kmp_uint64 *lower = RCAST(kmp_uint64 *, task->shareds); - *lower = (kmp_uint64)lb; - } - } -#else - *(kmp_uint64 *)((char *)task + lower_offset) = lb; -#endif // defined(KMP_GOMP_COMPAT) - } - void set_ub(kmp_uint64 ub) { -#if defined(KMP_GOMP_COMPAT) - // Intel task just sets the upper bound normally - if (!taskdata->td_flags.native) { - *(kmp_uint64 *)((char *)task + upper_offset) = ub; - } else { - // GOMP task has to take into account the sizeof(long) - if (taskdata->td_size_loop_bounds == 4) { - kmp_uint32 *upper = RCAST(kmp_uint32 *, task->shareds) + 1; - *upper = (kmp_uint32)ub; - } else { - kmp_uint64 *upper = RCAST(kmp_uint64 *, task->shareds) + 1; - *upper = (kmp_uint64)ub; - } - } -#else - *(kmp_uint64 *)((char *)task + upper_offset) = ub; -#endif // defined(KMP_GOMP_COMPAT) - } -}; - -// __kmp_taskloop_linear: Start tasks of the taskloop linearly -// -// loc Source location information -// gtid Global thread ID -// task Pattern task, exposes the loop iteration range -// lb Pointer to loop lower bound in task structure -// ub Pointer to loop upper bound in task structure -// st Loop stride -// ub_glob Global upper bound (used for lastprivate check) -// num_tasks Number of tasks to execute -// grainsize Number of loop iterations per task -// extras Number of chunks with grainsize+1 iterations -// tc Iterations count -// task_dup Tasks duplication routine -// codeptr_ra Return address for OMPT events -void __kmp_taskloop_linear(ident_t *loc, int gtid, kmp_task_t *task, - kmp_uint64 *lb, kmp_uint64 *ub, kmp_int64 st, - kmp_uint64 ub_glob, kmp_uint64 num_tasks, - kmp_uint64 grainsize, kmp_uint64 extras, - kmp_uint64 tc, -#if OMPT_SUPPORT - void *codeptr_ra, -#endif - void *task_dup) { - KMP_COUNT_BLOCK(OMP_TASKLOOP); - KMP_TIME_PARTITIONED_BLOCK(OMP_taskloop_scheduling); - p_task_dup_t ptask_dup = (p_task_dup_t)task_dup; - // compiler provides global bounds here - kmp_taskloop_bounds_t task_bounds(task, lb, ub); - kmp_uint64 lower = task_bounds.get_lb(); - kmp_uint64 upper = task_bounds.get_ub(); - kmp_uint64 i; - kmp_info_t *thread = __kmp_threads[gtid]; - kmp_taskdata_t *current_task = thread->th.th_current_task; - kmp_task_t *next_task; - kmp_int32 lastpriv = 0; - - KMP_DEBUG_ASSERT(tc == num_tasks * grainsize + extras); - KMP_DEBUG_ASSERT(num_tasks > extras); - KMP_DEBUG_ASSERT(num_tasks > 0); - KA_TRACE(20, ("__kmp_taskloop_linear: T#%d: %lld tasks, grainsize %lld, " - "extras %lld, i=%lld,%lld(%d)%lld, dup %p\n", - gtid, num_tasks, grainsize, extras, lower, upper, ub_glob, st, - task_dup)); - - // Launch num_tasks tasks, assign grainsize iterations each task - for (i = 0; i < num_tasks; ++i) { - kmp_uint64 chunk_minus_1; - if (extras == 0) { - chunk_minus_1 = grainsize - 1; - } else { - chunk_minus_1 = grainsize; - --extras; // first extras iterations get bigger chunk (grainsize+1) - } - upper = lower + st * chunk_minus_1; - if (i == num_tasks - 1) { - // schedule the last task, set lastprivate flag if needed - if (st == 1) { // most common case - KMP_DEBUG_ASSERT(upper == *ub); - if (upper == ub_glob) - lastpriv = 1; - } else if (st > 0) { // positive loop stride - KMP_DEBUG_ASSERT((kmp_uint64)st > *ub - upper); - if ((kmp_uint64)st > ub_glob - upper) - lastpriv = 1; - } else { // negative loop stride - KMP_DEBUG_ASSERT(upper + st < *ub); - if (upper - ub_glob < (kmp_uint64)(-st)) - lastpriv = 1; - } - } - next_task = __kmp_task_dup_alloc(thread, task); // allocate new task - kmp_taskdata_t *next_taskdata = KMP_TASK_TO_TASKDATA(next_task); - kmp_taskloop_bounds_t next_task_bounds = - kmp_taskloop_bounds_t(next_task, task_bounds); - - // adjust task-specific bounds - next_task_bounds.set_lb(lower); - if (next_taskdata->td_flags.native) { - next_task_bounds.set_ub(upper + (st > 0 ? 1 : -1)); - } else { - next_task_bounds.set_ub(upper); - } - if (ptask_dup != NULL) // set lastprivate flag, construct fistprivates, etc. - ptask_dup(next_task, task, lastpriv); - KA_TRACE(40, - ("__kmp_taskloop_linear: T#%d; task #%llu: task %p: lower %lld, " - "upper %lld stride %lld, (offsets %p %p)\n", - gtid, i, next_task, lower, upper, st, - next_task_bounds.get_lower_offset(), - next_task_bounds.get_upper_offset())); -#if OMPT_SUPPORT - __kmp_omp_taskloop_task(NULL, gtid, next_task, - codeptr_ra); // schedule new task -#else - __kmp_omp_task(gtid, next_task, true); // schedule new task -#endif - lower = upper + st; // adjust lower bound for the next iteration - } - // free the pattern task and exit - __kmp_task_start(gtid, task, current_task); // make internal bookkeeping - // do not execute the pattern task, just do internal bookkeeping - __kmp_task_finish(gtid, task, current_task); -} - -// Structure to keep taskloop parameters for auxiliary task -// kept in the shareds of the task structure. -typedef struct __taskloop_params { - kmp_task_t *task; - kmp_uint64 *lb; - kmp_uint64 *ub; - void *task_dup; - kmp_int64 st; - kmp_uint64 ub_glob; - kmp_uint64 num_tasks; - kmp_uint64 grainsize; - kmp_uint64 extras; - kmp_uint64 tc; - kmp_uint64 num_t_min; -#if OMPT_SUPPORT - void *codeptr_ra; -#endif -} __taskloop_params_t; - -void __kmp_taskloop_recur(ident_t *, int, kmp_task_t *, kmp_uint64 *, - kmp_uint64 *, kmp_int64, kmp_uint64, kmp_uint64, - kmp_uint64, kmp_uint64, kmp_uint64, kmp_uint64, -#if OMPT_SUPPORT - void *, -#endif - void *); - -// Execute part of the the taskloop submitted as a task. -int __kmp_taskloop_task(int gtid, void *ptask) { - __taskloop_params_t *p = - (__taskloop_params_t *)((kmp_task_t *)ptask)->shareds; - kmp_task_t *task = p->task; - kmp_uint64 *lb = p->lb; - kmp_uint64 *ub = p->ub; - void *task_dup = p->task_dup; - // p_task_dup_t ptask_dup = (p_task_dup_t)task_dup; - kmp_int64 st = p->st; - kmp_uint64 ub_glob = p->ub_glob; - kmp_uint64 num_tasks = p->num_tasks; - kmp_uint64 grainsize = p->grainsize; - kmp_uint64 extras = p->extras; - kmp_uint64 tc = p->tc; - kmp_uint64 num_t_min = p->num_t_min; -#if OMPT_SUPPORT - void *codeptr_ra = p->codeptr_ra; -#endif -#if KMP_DEBUG - kmp_taskdata_t *taskdata = KMP_TASK_TO_TASKDATA(task); - KMP_DEBUG_ASSERT(task != NULL); - KA_TRACE(20, ("__kmp_taskloop_task: T#%d, task %p: %lld tasks, grainsize" - " %lld, extras %lld, i=%lld,%lld(%d), dup %p\n", - gtid, taskdata, num_tasks, grainsize, extras, *lb, *ub, st, - task_dup)); -#endif - KMP_DEBUG_ASSERT(num_tasks * 2 + 1 > num_t_min); - if (num_tasks > num_t_min) - __kmp_taskloop_recur(NULL, gtid, task, lb, ub, st, ub_glob, num_tasks, - grainsize, extras, tc, num_t_min, -#if OMPT_SUPPORT - codeptr_ra, -#endif - task_dup); - else - __kmp_taskloop_linear(NULL, gtid, task, lb, ub, st, ub_glob, num_tasks, - grainsize, extras, tc, -#if OMPT_SUPPORT - codeptr_ra, -#endif - task_dup); - - KA_TRACE(40, ("__kmp_taskloop_task(exit): T#%d\n", gtid)); - return 0; -} - -// Schedule part of the the taskloop as a task, -// execute the rest of the the taskloop. -// -// loc Source location information -// gtid Global thread ID -// task Pattern task, exposes the loop iteration range -// lb Pointer to loop lower bound in task structure -// ub Pointer to loop upper bound in task structure -// st Loop stride -// ub_glob Global upper bound (used for lastprivate check) -// num_tasks Number of tasks to execute -// grainsize Number of loop iterations per task -// extras Number of chunks with grainsize+1 iterations -// tc Iterations count -// num_t_min Threashold to launch tasks recursively -// task_dup Tasks duplication routine -// codeptr_ra Return address for OMPT events -void __kmp_taskloop_recur(ident_t *loc, int gtid, kmp_task_t *task, - kmp_uint64 *lb, kmp_uint64 *ub, kmp_int64 st, - kmp_uint64 ub_glob, kmp_uint64 num_tasks, - kmp_uint64 grainsize, kmp_uint64 extras, - kmp_uint64 tc, kmp_uint64 num_t_min, -#if OMPT_SUPPORT - void *codeptr_ra, -#endif - void *task_dup) { -#if KMP_DEBUG - kmp_taskdata_t *taskdata = KMP_TASK_TO_TASKDATA(task); - KMP_DEBUG_ASSERT(task != NULL); - KMP_DEBUG_ASSERT(num_tasks > num_t_min); - KA_TRACE(20, ("__kmp_taskloop_recur: T#%d, task %p: %lld tasks, grainsize" - " %lld, extras %lld, i=%lld,%lld(%d), dup %p\n", - gtid, taskdata, num_tasks, grainsize, extras, *lb, *ub, st, - task_dup)); -#endif - p_task_dup_t ptask_dup = (p_task_dup_t)task_dup; - kmp_uint64 lower = *lb; - kmp_info_t *thread = __kmp_threads[gtid]; - // kmp_taskdata_t *current_task = thread->th.th_current_task; - kmp_task_t *next_task; - size_t lower_offset = - (char *)lb - (char *)task; // remember offset of lb in the task structure - size_t upper_offset = - (char *)ub - (char *)task; // remember offset of ub in the task structure - - KMP_DEBUG_ASSERT(tc == num_tasks * grainsize + extras); - KMP_DEBUG_ASSERT(num_tasks > extras); - KMP_DEBUG_ASSERT(num_tasks > 0); - - // split the loop in two halves - kmp_uint64 lb1, ub0, tc0, tc1, ext0, ext1; - kmp_uint64 gr_size0 = grainsize; - kmp_uint64 n_tsk0 = num_tasks >> 1; // num_tasks/2 to execute - kmp_uint64 n_tsk1 = num_tasks - n_tsk0; // to schedule as a task - if (n_tsk0 <= extras) { - gr_size0++; // integrate extras into grainsize - ext0 = 0; // no extra iters in 1st half - ext1 = extras - n_tsk0; // remaining extras - tc0 = gr_size0 * n_tsk0; - tc1 = tc - tc0; - } else { // n_tsk0 > extras - ext1 = 0; // no extra iters in 2nd half - ext0 = extras; - tc1 = grainsize * n_tsk1; - tc0 = tc - tc1; - } - ub0 = lower + st * (tc0 - 1); - lb1 = ub0 + st; - - // create pattern task for 2nd half of the loop - next_task = __kmp_task_dup_alloc(thread, task); // duplicate the task - // adjust lower bound (upper bound is not changed) for the 2nd half - *(kmp_uint64 *)((char *)next_task + lower_offset) = lb1; - if (ptask_dup != NULL) // construct fistprivates, etc. - ptask_dup(next_task, task, 0); - *ub = ub0; // adjust upper bound for the 1st half - - // create auxiliary task for 2nd half of the loop - kmp_task_t *new_task = - __kmpc_omp_task_alloc(loc, gtid, 1, 3 * sizeof(void *), - sizeof(__taskloop_params_t), &__kmp_taskloop_task); - __taskloop_params_t *p = (__taskloop_params_t *)new_task->shareds; - p->task = next_task; - p->lb = (kmp_uint64 *)((char *)next_task + lower_offset); - p->ub = (kmp_uint64 *)((char *)next_task + upper_offset); - p->task_dup = task_dup; - p->st = st; - p->ub_glob = ub_glob; - p->num_tasks = n_tsk1; - p->grainsize = grainsize; - p->extras = ext1; - p->tc = tc1; - p->num_t_min = num_t_min; -#if OMPT_SUPPORT - p->codeptr_ra = codeptr_ra; -#endif - -#if OMPT_SUPPORT - // schedule new task with correct return address for OMPT events - __kmp_omp_taskloop_task(NULL, gtid, new_task, codeptr_ra); -#else - __kmp_omp_task(gtid, new_task, true); // schedule new task -#endif - - // execute the 1st half of current subrange - if (n_tsk0 > num_t_min) - __kmp_taskloop_recur(loc, gtid, task, lb, ub, st, ub_glob, n_tsk0, gr_size0, - ext0, tc0, num_t_min, -#if OMPT_SUPPORT - codeptr_ra, -#endif - task_dup); - else - __kmp_taskloop_linear(loc, gtid, task, lb, ub, st, ub_glob, n_tsk0, - gr_size0, ext0, tc0, -#if OMPT_SUPPORT - codeptr_ra, -#endif - task_dup); - - KA_TRACE(40, ("__kmpc_taskloop_recur(exit): T#%d\n", gtid)); -} - -/*! -@ingroup TASKING -@param loc Source location information -@param gtid Global thread ID -@param task Task structure -@param if_val Value of the if clause -@param lb Pointer to loop lower bound in task structure -@param ub Pointer to loop upper bound in task structure -@param st Loop stride -@param nogroup Flag, 1 if no taskgroup needs to be added, 0 otherwise -@param sched Schedule specified 0/1/2 for none/grainsize/num_tasks -@param grainsize Schedule value if specified -@param task_dup Tasks duplication routine - -Execute the taskloop construct. -*/ -void __kmpc_taskloop(ident_t *loc, int gtid, kmp_task_t *task, int if_val, - kmp_uint64 *lb, kmp_uint64 *ub, kmp_int64 st, int nogroup, - int sched, kmp_uint64 grainsize, void *task_dup) { - kmp_taskdata_t *taskdata = KMP_TASK_TO_TASKDATA(task); - KMP_DEBUG_ASSERT(task != NULL); - - if (nogroup == 0) { -#if OMPT_SUPPORT && OMPT_OPTIONAL - OMPT_STORE_RETURN_ADDRESS(gtid); -#endif - __kmpc_taskgroup(loc, gtid); - } - - // ========================================================================= - // calculate loop parameters - kmp_taskloop_bounds_t task_bounds(task, lb, ub); - kmp_uint64 tc; - // compiler provides global bounds here - kmp_uint64 lower = task_bounds.get_lb(); - kmp_uint64 upper = task_bounds.get_ub(); - kmp_uint64 ub_glob = upper; // global upper used to calc lastprivate flag - kmp_uint64 num_tasks = 0, extras = 0; - kmp_uint64 num_tasks_min = __kmp_taskloop_min_tasks; - kmp_info_t *thread = __kmp_threads[gtid]; - kmp_taskdata_t *current_task = thread->th.th_current_task; - - KA_TRACE(20, ("__kmpc_taskloop: T#%d, task %p, lb %lld, ub %lld, st %lld, " - "grain %llu(%d), dup %p\n", - gtid, taskdata, lower, upper, st, grainsize, sched, task_dup)); - - // compute trip count - if (st == 1) { // most common case - tc = upper - lower + 1; - } else if (st < 0) { - tc = (lower - upper) / (-st) + 1; - } else { // st > 0 - tc = (upper - lower) / st + 1; - } - if (tc == 0) { - KA_TRACE(20, ("__kmpc_taskloop(exit): T#%d zero-trip loop\n", gtid)); - // free the pattern task and exit - __kmp_task_start(gtid, task, current_task); - // do not execute anything for zero-trip loop - __kmp_task_finish(gtid, task, current_task); - return; - } - -#if OMPT_SUPPORT && OMPT_OPTIONAL - ompt_team_info_t *team_info = __ompt_get_teaminfo(0, NULL); - ompt_task_info_t *task_info = __ompt_get_task_info_object(0); - if (ompt_enabled.ompt_callback_work) { - ompt_callbacks.ompt_callback(ompt_callback_work)( - ompt_work_taskloop, ompt_scope_begin, &(team_info->parallel_data), - &(task_info->task_data), tc, OMPT_GET_RETURN_ADDRESS(0)); - } -#endif - - if (num_tasks_min == 0) - // TODO: can we choose better default heuristic? - num_tasks_min = - KMP_MIN(thread->th.th_team_nproc * 10, INITIAL_TASK_DEQUE_SIZE); - - // compute num_tasks/grainsize based on the input provided - switch (sched) { - case 0: // no schedule clause specified, we can choose the default - // let's try to schedule (team_size*10) tasks - grainsize = thread->th.th_team_nproc * 10; - case 2: // num_tasks provided - if (grainsize > tc) { - num_tasks = tc; // too big num_tasks requested, adjust values - grainsize = 1; - extras = 0; - } else { - num_tasks = grainsize; - grainsize = tc / num_tasks; - extras = tc % num_tasks; - } - break; - case 1: // grainsize provided - if (grainsize > tc) { - num_tasks = 1; // too big grainsize requested, adjust values - grainsize = tc; - extras = 0; - } else { - num_tasks = tc / grainsize; - // adjust grainsize for balanced distribution of iterations - grainsize = tc / num_tasks; - extras = tc % num_tasks; - } - break; - default: - KMP_ASSERT2(0, "unknown scheduling of taskloop"); - } - KMP_DEBUG_ASSERT(tc == num_tasks * grainsize + extras); - KMP_DEBUG_ASSERT(num_tasks > extras); - KMP_DEBUG_ASSERT(num_tasks > 0); - // ========================================================================= - - // check if clause value first - // Also require GOMP_taskloop to reduce to linear (taskdata->td_flags.native) - if (if_val == 0) { // if(0) specified, mark task as serial - taskdata->td_flags.task_serial = 1; - taskdata->td_flags.tiedness = TASK_TIED; // AC: serial task cannot be untied - // always start serial tasks linearly - __kmp_taskloop_linear(loc, gtid, task, lb, ub, st, ub_glob, num_tasks, - grainsize, extras, tc, -#if OMPT_SUPPORT - OMPT_GET_RETURN_ADDRESS(0), -#endif - task_dup); - // !taskdata->td_flags.native => currently force linear spawning of tasks - // for GOMP_taskloop - } else if (num_tasks > num_tasks_min && !taskdata->td_flags.native) { - KA_TRACE(20, ("__kmpc_taskloop: T#%d, go recursive: tc %llu, #tasks %llu" - "(%lld), grain %llu, extras %llu\n", - gtid, tc, num_tasks, num_tasks_min, grainsize, extras)); - __kmp_taskloop_recur(loc, gtid, task, lb, ub, st, ub_glob, num_tasks, - grainsize, extras, tc, num_tasks_min, -#if OMPT_SUPPORT - OMPT_GET_RETURN_ADDRESS(0), -#endif - task_dup); - } else { - KA_TRACE(20, ("__kmpc_taskloop: T#%d, go linear: tc %llu, #tasks %llu" - "(%lld), grain %llu, extras %llu\n", - gtid, tc, num_tasks, num_tasks_min, grainsize, extras)); - __kmp_taskloop_linear(loc, gtid, task, lb, ub, st, ub_glob, num_tasks, - grainsize, extras, tc, -#if OMPT_SUPPORT - OMPT_GET_RETURN_ADDRESS(0), -#endif - task_dup); - } - -#if OMPT_SUPPORT && OMPT_OPTIONAL - if (ompt_enabled.ompt_callback_work) { - ompt_callbacks.ompt_callback(ompt_callback_work)( - ompt_work_taskloop, ompt_scope_end, &(team_info->parallel_data), - &(task_info->task_data), tc, OMPT_GET_RETURN_ADDRESS(0)); - } -#endif - - if (nogroup == 0) { -#if OMPT_SUPPORT && OMPT_OPTIONAL - OMPT_STORE_RETURN_ADDRESS(gtid); -#endif - __kmpc_end_taskgroup(loc, gtid); - } - KA_TRACE(20, ("__kmpc_taskloop(exit): T#%d\n", gtid)); -} - -#endif Property changes on: vendor/llvm-openmp/openmp-release80-r363030/runtime/src/kmp_tasking.cpp ___________________________________________________________________ Deleted: svn:eol-style ## -1 +0,0 ## -native \ No newline at end of property Deleted: svn:keywords ## -1 +0,0 ## -FreeBSD=%H \ No newline at end of property Deleted: svn:mime-type ## -1 +0,0 ## -text/plain \ No newline at end of property Index: vendor/llvm-openmp/openmp-release80-r363030/runtime/src/dllexports =================================================================== --- vendor/llvm-openmp/openmp-release80-r363030/runtime/src/dllexports (revision 348960) +++ vendor/llvm-openmp/openmp-release80-r363030/runtime/src/dllexports (nonexistent) @@ -1,1215 +0,0 @@ -# -#//===----------------------------------------------------------------------===// -#// -#// The LLVM Compiler Infrastructure -#// -#// This file is dual licensed under the MIT and the University of Illinois Open -#// Source Licenses. See LICENSE.txt for details. -#// -#//===----------------------------------------------------------------------===// -# - -# Deprecated entry points (numbers are reserved): -- __kmpc_barrier_reduce_master 109 -- __kmpc_end_barrier_reduce_master 122 -- __kmpc_for_init_4 131 -- __kmpc_for_init_8 132 -- __kmpc_for_next_4 133 -- __kmpc_for_next_8 134 -- __kmpc_fork_call_bound 139 -- __kmpc_reduce_master_nowait 149 -- __kmpc_omp_task_begin 194 -- __kmpc_omp_task_complete 195 -- kmpc_sharable_calloc 218 -- kmpc_sharable_free 219 -- kmpc_sharable_malloc 220 -- kmpc_sharable_realloc 221 -- kmpc_aligned_sharable_malloc 223 -- mpai4a 500 -- mpai8a 501 -- mpar4a 502 -- mpar8a 503 -- mpax4x 504 -- mpax8x 505 -- mpobar 506 -- mpoebr 507 -- mpofork 508 -- mpofrk 509 -- mpojoin 510 -- mpoxbr 511 -- mppadj 512 -- mppaff 513 -- mppbar 514 -- mppbeg 515 -- mppdeo 516 -- mppdnx 517 -- mppdnxd 518 -- mppdon 519 -- mppdxo 520 -- mppebr 521 -- mppecs 522 -- mppems 523 -- mppenc 524 -- mppend 525 -- mppepa 526 -- mppesp 527 -- mppfkd 528 -- mppfkt 529 -- mppfork 530 -- mppfrk 531 -- mppioa 532 -- mppiws 533 -- mppjoin 534 -- mppnth 535 -- mpppqa 536 -- mpppqc 537 -- mpppqs 538 -- mpptid 539 -- mpptpa 540 -- mpptpc 541 -- mpptpz 542 -- mppvsy 543 -- mppxbr 544 -- mppxcs 545 -- mppxms 546 -- mppxnc 547 -- mppxpa 548 -- mppxpr 549 -- mppxsp 550 -- mppxth 551 -- mpsbar 552 -- mpscpr 597 -- mpsebr 553 -- mpserd 554 -- mpsfd4 555 -- mpsfd8 556 -- mpsid4 557 -- mpsid8 558 -- mpsnd4 559 -- mpsnd8 560 -- mpsont 561 -- mpsred 562 -- mpsunt 563 -- mpsxbr 564 -- mpsxrd 565 -- mptadj 566 -- mptaff 567 -- mptbar 568 -- mptdeo 569 -- mptdin 570 -- mptdind 571 -- mptdnx 572 -- mptdnxd 573 -- mptdon 574 -- mptdxo 575 -- mptebr 576 -- mptecs 577 -- mptems 578 -- mptenc 579 -- mptepa 580 -- mptesp 581 -- mptfkd 582 -- mptppa 583 -- mptppc 584 -- mptpps 585 -- mpttpa 586 -- mpttpc 587 -- mpttpz 588 -- mptvsy 589 -- mptxbr 590 -- mptxcs 591 -- mptxms 592 -- mptxnc 593 -- mptxpa 594 -- mptxsp 595 -- mppcpr 596 -- ftn_set_library_gang 736 -- kmp_set_library_gang -- kmp_sharable_calloc 760 -- kmp_sharable_free 761 -- kmp_sharable_malloc 762 -- kmp_sharable_realloc 763 -- kmp_aligned_sharable_malloc 764 -- kmp_deferred_atomic_add_i4 765 -- kmp_deferred_atomic_add_i8 766 -- kmp_deferred_atomic_add_r4 767 -- kmp_deferred_atomic_add_r8 768 -- kmp_lock_cond_wait 770 -- kmp_lock_cond_signal 771 -- kmp_lock_cond_broadcast 772 -- kmp_nest_lock_cond_wait 773 -- kmp_nest_lock_cond_signal 774 -- kmp_nest_lock_cond_broadcast 775 -- kmp_get_process_num 781 -- kmp_get_num_processes 782 -- kmp_get_process_thread_num 783 -- kmp_private_mmap 784 # not implemented? -- kmp_sharable_mmap 785 # not implemented? -- kmp_private_munmap 786 # not implemented? -- kmp_sharable_munmap 787 # not implemented? -- kmp_is_sharable 788 # not implemented? - -%ifndef stub - - - # - # The following entry points are added so that the backtraces from - # the tools contain meaningful names for all the functions that might - # appear in a backtrace of a thread which is blocked in the RTL. - # - - # Regular entry points - __kmp_wait_yield_4 - __kmp_fork_call - __kmp_invoke_microtask - %ifdef KMP_USE_MONITOR - __kmp_launch_monitor - __kmp_reap_monitor - %endif - __kmp_launch_worker - __kmp_reap_worker - __kmp_acquire_tas_lock - __kmp_acquire_nested_tas_lock - __kmp_acquire_ticket_lock - __kmp_acquire_nested_ticket_lock - __kmp_acquire_queuing_lock - __kmp_acquire_nested_queuing_lock - __kmp_acquire_drdpa_lock - __kmp_acquire_nested_drdpa_lock - - %ifdef KMP_DEBUG - # allows console output capability for applications those don't have it - __kmp_printf - %endif - - - %ifdef USE_DEBUGGER - __kmp_debugging DATA - __kmp_omp_debug_struct_info DATA - %endif - - # Symbols for MS mutual detection: - _You_must_link_with_exactly_one_OpenMP_library DATA - _You_must_link_with_Intel_OpenMP_library DATA - %ifdef msvc_compat - _You_must_link_with_Microsoft_OpenMP_library DATA - %endif - - __kmp_wait_64 - __kmp_release_64 - - -# VT_getthid 1 -# vtgthid 2 - - __kmpc_atomic_4 100 - __kmpc_atomic_8 101 - __kmpc_atomic_fixed4_add 102 - __kmpc_atomic_fixed8_add 103 - __kmpc_atomic_float4_add 104 - __kmpc_atomic_float8_add 105 - __kmpc_barrier 106 - __kmpc_barrier_master 107 - __kmpc_barrier_master_nowait 108 - __kmpc_begin 110 - __kmpc_bound_num_threads 111 - __kmpc_bound_thread_num 112 - __kmpc_critical 113 - __kmpc_dispatch_fini_4 114 - __kmpc_dispatch_fini_8 115 - __kmpc_dispatch_init_4 116 - __kmpc_dispatch_init_8 117 - __kmpc_dispatch_next_4 118 - __kmpc_dispatch_next_8 119 - __kmpc_end 120 - __kmpc_end_barrier_master 121 - __kmpc_end_critical 123 - __kmpc_end_master 124 - __kmpc_end_ordered 125 - __kmpc_end_serialized_parallel 126 - __kmpc_end_single 127 - __kmpc_end_taskq 128 - __kmpc_end_taskq_task 129 - __kmpc_flush 130 - __kmpc_for_static_fini 135 - __kmpc_for_static_init_4 136 - __kmpc_for_static_init_8 137 - __kmpc_fork_call 138 - __kmpc_global_num_threads 140 - __kmpc_global_thread_num 141 - __kmpc_in_parallel 142 - __kmpc_invoke_task_func 143 - __kmpc_master 144 - __kmpc_ok_to_fork 145 - __kmpc_ordered 146 - __kmpc_pop_num_threads 147 - __kmpc_push_num_threads 148 - __kmpc_serialized_parallel 150 - __kmpc_single 151 - __kmpc_task 152 - __kmpc_task_buffer 153 - __kmpc_taskq 154 - __kmpc_taskq_task 155 - __kmpc_threadprivate 156 - __kmpc_threadprivate_cached 157 - __kmpc_threadprivate_register 158 - __kmpc_threadprivate_register_vec 159 -# __kmpc_ssp_begin 160 -# __kmpc_ssp_fork 161 -# __kmpc_ssp_end 162 -# __kmpc_ssp_post_4 163 -# __kmpc_ssp_post_8 164 -# __kmpc_ssp_wait_4 165 -# __kmpc_ssp_wait_8 166 -# __kmpc_ssp_distance_4 167 -# __kmpc_ssp_distance_8 168 -# __kmpc_in_ssp 169 -# __kmpc_ssp_thread_num 170 -# __kmpc_ssp_num_threads 171 - __kmpc_copyprivate 172 -# __kmpc_ssp_get_max_threads 173 -# __kmpc_ssp_set_max_threads 174 - __kmpc_init_lock 175 - __kmpc_destroy_lock 176 - __kmpc_set_lock 177 - __kmpc_unset_lock 178 - __kmpc_test_lock 179 - __kmpc_init_nest_lock 180 - __kmpc_destroy_nest_lock 181 - __kmpc_set_nest_lock 182 - __kmpc_unset_nest_lock 183 - __kmpc_test_nest_lock 184 -# __kmpc_ssp_init_thread 185 -# __kmpc_ssp_set_event 186 - __kmpc_reduce_nowait 187 - __kmpc_end_reduce_nowait 188 - __kmpc_reduce 189 - __kmpc_end_reduce 190 - -# OpenMP 3.0 - -%ifdef OMP_30 - __kmpc_omp_task_alloc 191 - __kmpc_omp_task 192 - __kmpc_omp_taskwait 193 - __kmpc_omp_task_begin_if0 196 - __kmpc_omp_task_complete_if0 197 - __kmpc_omp_task_parts 198 -%endif # OMP_30 - -# __omp_collector_api 199 - - # These functions are for testing purposes. There is no need in stable ordinal number: - __kmp_get_reduce_method - -%endif # not defined stub - -kmpc_calloc 200 -kmpc_free 201 -%ifndef stub - # These functions are exported from libguide, but declared neither in omp.h not in omp_lib.h. -# kmpc_get_banner 202 -# kmpc_get_poolmode 203 -# kmpc_get_poolsize 204 -# kmpc_get_poolstat 205 -# kmpc_poolprint 207 -# kmpc_print_banner 208 -# kmpc_set_poolmode 214 -# kmpc_set_poolsize 215 -%endif -kmpc_malloc 206 -kmpc_realloc 209 -kmpc_set_blocktime 211 -kmpc_set_library 212 -# kmpc_set_parallel_name 213 -kmpc_set_stacksize 216 -kmpc_set_stacksize_s 222 -# kmpc_set_stats 217 -kmpc_set_defaults 224 - -# OMP 3.0 entry points for unsigned loop iteration variables -%ifndef stub - %ifdef OMP_30 - __kmpc_for_static_init_8u 225 - __kmpc_dispatch_init_8u 226 - __kmpc_dispatch_next_8u 227 - __kmpc_dispatch_fini_8u 228 - __kmpc_for_static_init_4u 229 - __kmpc_dispatch_init_4u 230 - __kmpc_dispatch_next_4u 231 - __kmpc_dispatch_fini_4u 232 - %endif # OMP_30 -%endif - -%ifndef stub - __kmpc_get_taskid 233 - __kmpc_get_parent_taskid 234 -%endif - -# OpenMP 3.1 entry points -%ifndef stub - %ifdef OMP_30 - __kmpc_omp_taskyield 235 - %endif # OMP_30 -# __kmpc_place_threads 236 -%endif - -# OpenMP 4.0 entry points -%ifndef stub - %ifdef OMP_40 - __kmpc_push_proc_bind 237 - __kmpc_taskgroup 238 - __kmpc_end_taskgroup 239 - __kmpc_push_num_teams 240 - __kmpc_fork_teams 241 - __kmpc_omp_task_with_deps 242 - __kmpc_omp_wait_deps 243 - __kmpc_cancel 244 - __kmpc_cancellationpoint 245 - __kmpc_cancel_barrier 246 - __kmpc_dist_for_static_init_4 247 - __kmpc_dist_for_static_init_4u 248 - __kmpc_dist_for_static_init_8 249 - __kmpc_dist_for_static_init_8u 250 - __kmpc_dist_dispatch_init_4 251 - __kmpc_dist_dispatch_init_4u 252 - __kmpc_dist_dispatch_init_8 253 - __kmpc_dist_dispatch_init_8u 254 - __kmpc_team_static_init_4 255 - __kmpc_team_static_init_4u 256 - __kmpc_team_static_init_8 257 - __kmpc_team_static_init_8u 258 - %endif # OMP_40 -%endif - -# OpenMP 4.5 entry points -%ifndef stub - %ifdef OMP_45 - __kmpc_proxy_task_completed 259 - __kmpc_proxy_task_completed_ooo 260 - __kmpc_doacross_init 261 - __kmpc_doacross_wait 262 - __kmpc_doacross_post 263 - __kmpc_doacross_fini 264 - __kmpc_taskloop 266 - __kmpc_critical_with_hint 270 - %endif -%endif -kmpc_aligned_malloc 265 -kmpc_set_disp_num_buffers 267 - -# OpenMP 5.0 entry points -%ifndef stub - %ifdef OMP_50 - __kmpc_task_reduction_init 268 - __kmpc_task_reduction_get_th_data 269 -# USED FOR 4.5 __kmpc_critical_with_hint 270 - __kmpc_get_target_offload 271 - __kmpc_omp_reg_task_with_affinity 272 - %endif -%endif - -# User API entry points that have both lower- and upper- case versions for Fortran. -# Number for lowercase version is indicated. Number for uppercase is obtained by adding 1000. -# User API entry points are entry points that start with 'kmp_' or 'omp_'. - -omp_destroy_lock 700 -omp_destroy_nest_lock 701 -omp_get_dynamic 702 -omp_get_max_threads 703 -omp_get_nested 704 -omp_get_num_procs 705 -omp_get_num_threads 706 -omp_get_thread_num 707 -omp_get_wtick 708 -omp_get_wtime 709 -omp_in_parallel 710 -omp_init_lock 711 -omp_init_nest_lock 712 -omp_set_dynamic 713 -omp_set_lock 714 -omp_set_nest_lock 715 -omp_set_nested 716 -omp_set_num_threads 717 -omp_test_lock 718 -omp_test_nest_lock 719 -omp_unset_lock 720 -omp_unset_nest_lock 721 - -ompc_set_dynamic 722 -ompc_set_nested 723 -ompc_set_num_threads 724 - -kmp_calloc 725 -kmp_free 726 -kmp_get_blocktime 727 -kmp_get_library 728 -kmp_get_stacksize 729 -kmp_malloc 730 -#kmp_print_banner 731 -kmp_realloc 732 -kmp_set_blocktime 734 -kmp_set_library 735 -kmp_set_library_serial 737 -kmp_set_library_throughput 738 -kmp_set_library_turnaround 739 -# kmp_set_parallel_name 740 -kmp_set_stacksize 741 -# kmp_set_stats 742 -kmp_get_num_known_threads 743 -kmp_set_stacksize_s 744 -kmp_get_stacksize_s 745 -kmp_set_defaults 746 -kmp_aligned_malloc 747 -kmp_set_warnings_on 779 -kmp_set_warnings_off 780 - -%ifdef OMP_30 - omp_get_active_level 789 - omp_get_level 790 - omp_get_ancestor_thread_num 791 - omp_get_team_size 792 - omp_get_thread_limit 793 - omp_get_max_active_levels 794 - omp_set_max_active_levels 795 - omp_get_schedule 796 - omp_set_schedule 797 - ompc_set_max_active_levels 798 - ompc_set_schedule 799 - ompc_get_ancestor_thread_num 800 - ompc_get_team_size 801 - kmp_set_affinity 850 - kmp_get_affinity 851 - kmp_get_affinity_max_proc 852 - kmp_create_affinity_mask 853 - kmp_destroy_affinity_mask 854 - kmp_set_affinity_mask_proc 855 - kmpc_set_affinity_mask_proc 856 - kmp_unset_affinity_mask_proc 857 - kmpc_unset_affinity_mask_proc 858 - kmp_get_affinity_mask_proc 859 - kmpc_get_affinity_mask_proc 860 -%endif # OMP_30 - -# OpenMP 3.1 - -%ifdef OMP_30 - omp_in_final 861 -%endif # OMP_30 - -# OpenMP 40 - -%ifdef OMP_40 - omp_get_proc_bind 862 - #omp_set_proc_bind 863 - #omp_curr_proc_bind 864 - omp_get_num_teams 865 - omp_get_team_num 866 - omp_get_cancellation 867 - kmp_get_cancellation_status 868 - omp_is_initial_device 869 - omp_set_default_device 879 - omp_get_default_device 880 - omp_get_num_devices 881 -%endif # OMP_40 - -# OpenMP 45 - -%ifdef OMP_45 - omp_init_lock_with_hint 870 - omp_init_nest_lock_with_hint 871 - omp_get_max_task_priority 872 - omp_get_num_places 873 - omp_get_place_num_procs 874 - omp_get_place_proc_ids 875 - omp_get_place_num 876 - omp_get_partition_num_places 877 - omp_get_partition_place_nums 878 - omp_get_initial_device 882 - %ifdef stub - omp_target_alloc 883 - omp_target_free 884 - omp_target_is_present 885 - omp_target_memcpy 886 - omp_target_memcpy_rect 887 - omp_target_associate_ptr 888 - omp_target_disassociate_ptr 889 - %endif -%endif # OMP_45 - -kmp_set_disp_num_buffers 890 - -%ifdef OMP_50 - omp_control_tool 891 - omp_set_default_allocator 892 - omp_get_default_allocator 893 - omp_alloc 894 - omp_free 895 - omp_get_device_num 896 - omp_set_affinity_format 748 - omp_get_affinity_format 749 - omp_display_affinity 750 - omp_capture_affinity 751 - ompc_set_affinity_format 752 - ompc_get_affinity_format 753 - ompc_display_affinity 754 - ompc_capture_affinity 755 - - OMP_NULL_ALLOCATOR DATA - omp_default_mem_alloc DATA - omp_large_cap_mem_alloc DATA - omp_const_mem_alloc DATA - omp_high_bw_mem_alloc DATA - omp_low_lat_mem_alloc DATA - omp_cgroup_mem_alloc DATA - omp_pteam_mem_alloc DATA - omp_thread_mem_alloc DATA -%endif # OMP_50 - -%ifndef stub - # Ordinals between 900 and 999 are reserved - - # Ordinals between 1000 and 1999 are reserved - # for user-callable uppercase Fortran entries. - - - # ATOMIC entries - - %ifdef HAVE_QUAD - __kmpc_atomic_cmplx16_div 2000 - %endif - - __kmpc_atomic_fixed1_add 2001 - __kmpc_atomic_fixed1_andb 2002 - __kmpc_atomic_fixed1_div 2003 - __kmpc_atomic_fixed1u_div 2004 - __kmpc_atomic_fixed1_mul 2005 - __kmpc_atomic_fixed1_orb 2006 - __kmpc_atomic_fixed1_shl 2007 - __kmpc_atomic_fixed1_shr 2008 - __kmpc_atomic_fixed1u_shr 2009 - __kmpc_atomic_fixed1_sub 2010 - __kmpc_atomic_fixed1_xor 2011 - - __kmpc_atomic_fixed2_add 2012 - __kmpc_atomic_fixed2_andb 2013 - __kmpc_atomic_fixed2_div 2014 - __kmpc_atomic_fixed2u_div 2015 - __kmpc_atomic_fixed2_mul 2016 - __kmpc_atomic_fixed2_orb 2017 - __kmpc_atomic_fixed2_shl 2018 - __kmpc_atomic_fixed2_shr 2019 - __kmpc_atomic_fixed2u_shr 2020 - __kmpc_atomic_fixed2_sub 2021 - __kmpc_atomic_fixed2_xor 2022 - - #__kmpc_atomic_fixed4_add # declared above #102 - __kmpc_atomic_fixed4_sub 2024 - #__kmpc_atomic_float4_add # declared above #104 - __kmpc_atomic_float4_sub 2026 - #__kmpc_atomic_fixed8_add # declared above #103 - __kmpc_atomic_fixed8_sub 2028 - #__kmpc_atomic_float8_add # declared above #105 - __kmpc_atomic_float8_sub 2030 - - __kmpc_atomic_fixed4_andb 2031 - __kmpc_atomic_fixed4_div 2032 - __kmpc_atomic_fixed4u_div 2033 - __kmpc_atomic_fixed4_mul 2034 - __kmpc_atomic_fixed4_orb 2035 - __kmpc_atomic_fixed4_shl 2036 - __kmpc_atomic_fixed4_shr 2037 - __kmpc_atomic_fixed4u_shr 2038 - __kmpc_atomic_fixed4_xor 2039 - __kmpc_atomic_fixed8_andb 2040 - __kmpc_atomic_fixed8_div 2041 - __kmpc_atomic_fixed8u_div 2042 - __kmpc_atomic_fixed8_mul 2043 - __kmpc_atomic_fixed8_orb 2044 - __kmpc_atomic_fixed8_shl 2045 - __kmpc_atomic_fixed8_shr 2046 - __kmpc_atomic_fixed8u_shr 2047 - __kmpc_atomic_fixed8_xor 2048 - __kmpc_atomic_float4_div 2049 - __kmpc_atomic_float4_mul 2050 - __kmpc_atomic_float8_div 2051 - __kmpc_atomic_float8_mul 2052 - - __kmpc_atomic_fixed1_andl 2053 - __kmpc_atomic_fixed1_orl 2054 - __kmpc_atomic_fixed2_andl 2055 - __kmpc_atomic_fixed2_orl 2056 - __kmpc_atomic_fixed4_andl 2057 - __kmpc_atomic_fixed4_orl 2058 - __kmpc_atomic_fixed8_andl 2059 - __kmpc_atomic_fixed8_orl 2060 - - __kmpc_atomic_fixed1_max 2061 - __kmpc_atomic_fixed1_min 2062 - __kmpc_atomic_fixed2_max 2063 - __kmpc_atomic_fixed2_min 2064 - __kmpc_atomic_fixed4_max 2065 - __kmpc_atomic_fixed4_min 2066 - __kmpc_atomic_fixed8_max 2067 - __kmpc_atomic_fixed8_min 2068 - __kmpc_atomic_float4_max 2069 - __kmpc_atomic_float4_min 2070 - __kmpc_atomic_float8_max 2071 - __kmpc_atomic_float8_min 2072 - - __kmpc_atomic_fixed1_neqv 2073 - __kmpc_atomic_fixed2_neqv 2074 - __kmpc_atomic_fixed4_neqv 2075 - __kmpc_atomic_fixed8_neqv 2076 - __kmpc_atomic_fixed1_eqv 2077 - __kmpc_atomic_fixed2_eqv 2078 - __kmpc_atomic_fixed4_eqv 2079 - __kmpc_atomic_fixed8_eqv 2080 - - __kmpc_atomic_float10_add 2081 - __kmpc_atomic_float10_sub 2082 - __kmpc_atomic_float10_mul 2083 - __kmpc_atomic_float10_div 2084 - - __kmpc_atomic_cmplx4_add 2085 - __kmpc_atomic_cmplx4_sub 2086 - __kmpc_atomic_cmplx4_mul 2087 - __kmpc_atomic_cmplx4_div 2088 - __kmpc_atomic_cmplx8_add 2089 - __kmpc_atomic_cmplx8_sub 2090 - __kmpc_atomic_cmplx8_mul 2091 - __kmpc_atomic_cmplx8_div 2092 - __kmpc_atomic_cmplx10_add 2093 - __kmpc_atomic_cmplx10_sub 2094 - __kmpc_atomic_cmplx10_mul 2095 - __kmpc_atomic_cmplx10_div 2096 - %ifdef HAVE_QUAD - __kmpc_atomic_cmplx16_add 2097 - __kmpc_atomic_cmplx16_sub 2098 - __kmpc_atomic_cmplx16_mul 2099 - #__kmpc_atomic_cmplx16_div 2000 # moved up because of mistake in number (supposed to be 2100) - - __kmpc_atomic_float16_add 2101 - __kmpc_atomic_float16_sub 2102 - __kmpc_atomic_float16_mul 2103 - __kmpc_atomic_float16_div 2104 - __kmpc_atomic_float16_max 2105 - __kmpc_atomic_float16_min 2106 - - __kmpc_atomic_fixed1_add_fp 2107 - __kmpc_atomic_fixed1_sub_fp 2108 - __kmpc_atomic_fixed1_mul_fp 2109 - __kmpc_atomic_fixed1_div_fp 2110 - __kmpc_atomic_fixed1u_div_fp 2111 - - __kmpc_atomic_fixed2_add_fp 2112 - __kmpc_atomic_fixed2_sub_fp 2113 - __kmpc_atomic_fixed2_mul_fp 2114 - __kmpc_atomic_fixed2_div_fp 2115 - __kmpc_atomic_fixed2u_div_fp 2116 - - __kmpc_atomic_fixed4_add_fp 2117 - __kmpc_atomic_fixed4_sub_fp 2118 - __kmpc_atomic_fixed4_mul_fp 2119 - __kmpc_atomic_fixed4_div_fp 2120 - __kmpc_atomic_fixed4u_div_fp 2121 - - __kmpc_atomic_fixed8_add_fp 2122 - __kmpc_atomic_fixed8_sub_fp 2123 - __kmpc_atomic_fixed8_mul_fp 2124 - __kmpc_atomic_fixed8_div_fp 2125 - __kmpc_atomic_fixed8u_div_fp 2126 - - __kmpc_atomic_float4_add_fp 2127 - __kmpc_atomic_float4_sub_fp 2128 - __kmpc_atomic_float4_mul_fp 2129 - __kmpc_atomic_float4_div_fp 2130 - - __kmpc_atomic_float8_add_fp 2131 - __kmpc_atomic_float8_sub_fp 2132 - __kmpc_atomic_float8_mul_fp 2133 - __kmpc_atomic_float8_div_fp 2134 - - __kmpc_atomic_float10_add_fp 2135 - __kmpc_atomic_float10_sub_fp 2136 - __kmpc_atomic_float10_mul_fp 2137 - __kmpc_atomic_float10_div_fp 2138 - %endif - - __kmpc_atomic_fixed1_mul_float8 2169 - __kmpc_atomic_fixed1_div_float8 2170 - - __kmpc_atomic_fixed2_mul_float8 2174 - __kmpc_atomic_fixed2_div_float8 2175 - - __kmpc_atomic_fixed4_mul_float8 2179 - __kmpc_atomic_fixed4_div_float8 2180 - - __kmpc_atomic_fixed8_mul_float8 2184 - __kmpc_atomic_fixed8_div_float8 2185 - - __kmpc_atomic_float4_add_float8 2187 - __kmpc_atomic_float4_sub_float8 2188 - __kmpc_atomic_float4_mul_float8 2189 - __kmpc_atomic_float4_div_float8 2190 - - __kmpc_atomic_cmplx4_add_cmplx8 2231 - __kmpc_atomic_cmplx4_sub_cmplx8 2232 - __kmpc_atomic_cmplx4_mul_cmplx8 2233 - __kmpc_atomic_cmplx4_div_cmplx8 2234 - - __kmpc_atomic_1 2247 - __kmpc_atomic_2 2248 - #__kmpc_atomic_4 # declared above #100 - #__kmpc_atomic_8 # declared above #101 - __kmpc_atomic_10 2251 - __kmpc_atomic_16 2252 - __kmpc_atomic_20 2253 - __kmpc_atomic_32 2254 - - %ifdef arch_32 - - %ifdef HAVE_QUAD - __kmpc_atomic_float16_add_a16 2255 - __kmpc_atomic_float16_sub_a16 2256 - __kmpc_atomic_float16_mul_a16 2257 - __kmpc_atomic_float16_div_a16 2258 - __kmpc_atomic_float16_max_a16 2259 - __kmpc_atomic_float16_min_a16 2260 - - __kmpc_atomic_cmplx16_add_a16 2261 - __kmpc_atomic_cmplx16_sub_a16 2262 - __kmpc_atomic_cmplx16_mul_a16 2263 - __kmpc_atomic_cmplx16_div_a16 2264 - %endif - - %endif - - %ifndef arch_64 - - # ATOMIC extensions for OpenMP 3.1 spec (x86 and x64 only) - - __kmpc_atomic_fixed1_rd 2265 - __kmpc_atomic_fixed2_rd 2266 - __kmpc_atomic_fixed4_rd 2267 - __kmpc_atomic_fixed8_rd 2268 - __kmpc_atomic_float4_rd 2269 - __kmpc_atomic_float8_rd 2270 - __kmpc_atomic_float10_rd 2271 - %ifdef HAVE_QUAD - __kmpc_atomic_float16_rd 2272 - %endif - __kmpc_atomic_cmplx4_rd 2273 - __kmpc_atomic_cmplx8_rd 2274 - __kmpc_atomic_cmplx10_rd 2275 - %ifdef HAVE_QUAD - __kmpc_atomic_cmplx16_rd 2276 - %ifdef arch_32 - __kmpc_atomic_float16_a16_rd 2277 - __kmpc_atomic_cmplx16_a16_rd 2278 - %endif - %endif - __kmpc_atomic_fixed1_wr 2279 - __kmpc_atomic_fixed2_wr 2280 - __kmpc_atomic_fixed4_wr 2281 - __kmpc_atomic_fixed8_wr 2282 - __kmpc_atomic_float4_wr 2283 - __kmpc_atomic_float8_wr 2284 - __kmpc_atomic_float10_wr 2285 - %ifdef HAVE_QUAD - __kmpc_atomic_float16_wr 2286 - %endif - __kmpc_atomic_cmplx4_wr 2287 - __kmpc_atomic_cmplx8_wr 2288 - __kmpc_atomic_cmplx10_wr 2289 - %ifdef HAVE_QUAD - __kmpc_atomic_cmplx16_wr 2290 - %ifdef arch_32 - __kmpc_atomic_float16_a16_wr 2291 - __kmpc_atomic_cmplx16_a16_wr 2292 - %endif - %endif - __kmpc_atomic_fixed1_add_cpt 2293 - __kmpc_atomic_fixed1_andb_cpt 2294 - __kmpc_atomic_fixed1_div_cpt 2295 - __kmpc_atomic_fixed1u_div_cpt 2296 - __kmpc_atomic_fixed1_mul_cpt 2297 - __kmpc_atomic_fixed1_orb_cpt 2298 - __kmpc_atomic_fixed1_shl_cpt 2299 - __kmpc_atomic_fixed1_shr_cpt 2300 - __kmpc_atomic_fixed1u_shr_cpt 2301 - __kmpc_atomic_fixed1_sub_cpt 2302 - __kmpc_atomic_fixed1_xor_cpt 2303 - __kmpc_atomic_fixed2_add_cpt 2304 - __kmpc_atomic_fixed2_andb_cpt 2305 - __kmpc_atomic_fixed2_div_cpt 2306 - __kmpc_atomic_fixed2u_div_cpt 2307 - __kmpc_atomic_fixed2_mul_cpt 2308 - __kmpc_atomic_fixed2_orb_cpt 2309 - __kmpc_atomic_fixed2_shl_cpt 2310 - __kmpc_atomic_fixed2_shr_cpt 2311 - __kmpc_atomic_fixed2u_shr_cpt 2312 - __kmpc_atomic_fixed2_sub_cpt 2313 - __kmpc_atomic_fixed2_xor_cpt 2314 - __kmpc_atomic_fixed4_add_cpt 2315 - __kmpc_atomic_fixed4_sub_cpt 2316 - __kmpc_atomic_float4_add_cpt 2317 - __kmpc_atomic_float4_sub_cpt 2318 - __kmpc_atomic_fixed8_add_cpt 2319 - __kmpc_atomic_fixed8_sub_cpt 2320 - __kmpc_atomic_float8_add_cpt 2321 - __kmpc_atomic_float8_sub_cpt 2322 - __kmpc_atomic_fixed4_andb_cpt 2323 - __kmpc_atomic_fixed4_div_cpt 2324 - __kmpc_atomic_fixed4u_div_cpt 2325 - __kmpc_atomic_fixed4_mul_cpt 2326 - __kmpc_atomic_fixed4_orb_cpt 2327 - __kmpc_atomic_fixed4_shl_cpt 2328 - __kmpc_atomic_fixed4_shr_cpt 2329 - __kmpc_atomic_fixed4u_shr_cpt 2330 - __kmpc_atomic_fixed4_xor_cpt 2331 - __kmpc_atomic_fixed8_andb_cpt 2332 - __kmpc_atomic_fixed8_div_cpt 2333 - __kmpc_atomic_fixed8u_div_cpt 2334 - __kmpc_atomic_fixed8_mul_cpt 2335 - __kmpc_atomic_fixed8_orb_cpt 2336 - __kmpc_atomic_fixed8_shl_cpt 2337 - __kmpc_atomic_fixed8_shr_cpt 2338 - __kmpc_atomic_fixed8u_shr_cpt 2339 - __kmpc_atomic_fixed8_xor_cpt 2340 - __kmpc_atomic_float4_div_cpt 2341 - __kmpc_atomic_float4_mul_cpt 2342 - __kmpc_atomic_float8_div_cpt 2343 - __kmpc_atomic_float8_mul_cpt 2344 - __kmpc_atomic_fixed1_andl_cpt 2345 - __kmpc_atomic_fixed1_orl_cpt 2346 - __kmpc_atomic_fixed2_andl_cpt 2347 - __kmpc_atomic_fixed2_orl_cpt 2348 - __kmpc_atomic_fixed4_andl_cpt 2349 - __kmpc_atomic_fixed4_orl_cpt 2350 - __kmpc_atomic_fixed8_andl_cpt 2351 - __kmpc_atomic_fixed8_orl_cpt 2352 - __kmpc_atomic_fixed1_max_cpt 2353 - __kmpc_atomic_fixed1_min_cpt 2354 - __kmpc_atomic_fixed2_max_cpt 2355 - __kmpc_atomic_fixed2_min_cpt 2356 - __kmpc_atomic_fixed4_max_cpt 2357 - __kmpc_atomic_fixed4_min_cpt 2358 - __kmpc_atomic_fixed8_max_cpt 2359 - __kmpc_atomic_fixed8_min_cpt 2360 - __kmpc_atomic_float4_max_cpt 2361 - __kmpc_atomic_float4_min_cpt 2362 - __kmpc_atomic_float8_max_cpt 2363 - __kmpc_atomic_float8_min_cpt 2364 - %ifdef HAVE_QUAD - __kmpc_atomic_float16_max_cpt 2365 - __kmpc_atomic_float16_min_cpt 2366 - %endif - __kmpc_atomic_fixed1_neqv_cpt 2367 - __kmpc_atomic_fixed2_neqv_cpt 2368 - __kmpc_atomic_fixed4_neqv_cpt 2369 - __kmpc_atomic_fixed8_neqv_cpt 2370 - __kmpc_atomic_fixed1_eqv_cpt 2371 - __kmpc_atomic_fixed2_eqv_cpt 2372 - __kmpc_atomic_fixed4_eqv_cpt 2373 - __kmpc_atomic_fixed8_eqv_cpt 2374 - __kmpc_atomic_float10_add_cpt 2375 - __kmpc_atomic_float10_sub_cpt 2376 - __kmpc_atomic_float10_mul_cpt 2377 - __kmpc_atomic_float10_div_cpt 2378 - %ifdef HAVE_QUAD - __kmpc_atomic_float16_add_cpt 2379 - __kmpc_atomic_float16_sub_cpt 2380 - __kmpc_atomic_float16_mul_cpt 2381 - __kmpc_atomic_float16_div_cpt 2382 - %endif - __kmpc_atomic_cmplx4_add_cpt 2383 - __kmpc_atomic_cmplx4_sub_cpt 2384 - __kmpc_atomic_cmplx4_mul_cpt 2385 - __kmpc_atomic_cmplx4_div_cpt 2386 - __kmpc_atomic_cmplx8_add_cpt 2387 - __kmpc_atomic_cmplx8_sub_cpt 2388 - __kmpc_atomic_cmplx8_mul_cpt 2389 - __kmpc_atomic_cmplx8_div_cpt 2390 - __kmpc_atomic_cmplx10_add_cpt 2391 - __kmpc_atomic_cmplx10_sub_cpt 2392 - __kmpc_atomic_cmplx10_mul_cpt 2393 - __kmpc_atomic_cmplx10_div_cpt 2394 - %ifdef HAVE_QUAD - __kmpc_atomic_cmplx16_add_cpt 2395 - __kmpc_atomic_cmplx16_sub_cpt 2396 - __kmpc_atomic_cmplx16_mul_cpt 2397 - __kmpc_atomic_cmplx16_div_cpt 2398 - %endif - #__kmpc_atomic_cmplx4_add_cpt_tmp 2409 - - %ifdef arch_32 - %ifdef HAVE_QUAD - __kmpc_atomic_float16_add_a16_cpt 2399 - __kmpc_atomic_float16_sub_a16_cpt 2400 - __kmpc_atomic_float16_mul_a16_cpt 2401 - __kmpc_atomic_float16_div_a16_cpt 2402 - __kmpc_atomic_float16_max_a16_cpt 2403 - __kmpc_atomic_float16_min_a16_cpt 2404 - __kmpc_atomic_cmplx16_add_a16_cpt 2405 - __kmpc_atomic_cmplx16_sub_a16_cpt 2406 - __kmpc_atomic_cmplx16_mul_a16_cpt 2407 - __kmpc_atomic_cmplx16_div_a16_cpt 2408 - %endif - %endif - - __kmpc_atomic_start 2410 - __kmpc_atomic_end 2411 - - %ifdef HAVE_QUAD - __kmpc_atomic_fixed1_add_cpt_fp - __kmpc_atomic_fixed1_sub_cpt_fp - __kmpc_atomic_fixed1_mul_cpt_fp - __kmpc_atomic_fixed1_div_cpt_fp - __kmpc_atomic_fixed1u_add_cpt_fp - __kmpc_atomic_fixed1u_sub_cpt_fp - __kmpc_atomic_fixed1u_mul_cpt_fp - __kmpc_atomic_fixed1u_div_cpt_fp - - __kmpc_atomic_fixed2_add_cpt_fp - __kmpc_atomic_fixed2_sub_cpt_fp - __kmpc_atomic_fixed2_mul_cpt_fp - __kmpc_atomic_fixed2_div_cpt_fp - __kmpc_atomic_fixed2u_add_cpt_fp - __kmpc_atomic_fixed2u_sub_cpt_fp - __kmpc_atomic_fixed2u_mul_cpt_fp - __kmpc_atomic_fixed2u_div_cpt_fp - - __kmpc_atomic_fixed4_add_cpt_fp - __kmpc_atomic_fixed4_sub_cpt_fp - __kmpc_atomic_fixed4_mul_cpt_fp - __kmpc_atomic_fixed4_div_cpt_fp - __kmpc_atomic_fixed4u_add_cpt_fp - __kmpc_atomic_fixed4u_sub_cpt_fp - __kmpc_atomic_fixed4u_mul_cpt_fp - __kmpc_atomic_fixed4u_div_cpt_fp - - __kmpc_atomic_fixed8_add_cpt_fp - __kmpc_atomic_fixed8_sub_cpt_fp - __kmpc_atomic_fixed8_mul_cpt_fp - __kmpc_atomic_fixed8_div_cpt_fp - __kmpc_atomic_fixed8u_add_cpt_fp - __kmpc_atomic_fixed8u_sub_cpt_fp - __kmpc_atomic_fixed8u_mul_cpt_fp - __kmpc_atomic_fixed8u_div_cpt_fp - - __kmpc_atomic_float4_add_cpt_fp - __kmpc_atomic_float4_sub_cpt_fp - __kmpc_atomic_float4_mul_cpt_fp - __kmpc_atomic_float4_div_cpt_fp - - __kmpc_atomic_float8_add_cpt_fp - __kmpc_atomic_float8_sub_cpt_fp - __kmpc_atomic_float8_mul_cpt_fp - __kmpc_atomic_float8_div_cpt_fp - - __kmpc_atomic_float10_add_cpt_fp - __kmpc_atomic_float10_sub_cpt_fp - __kmpc_atomic_float10_mul_cpt_fp - __kmpc_atomic_float10_div_cpt_fp - %endif - - %ifdef OMP_40 - - # ATOMIC extensions for OpenMP 4.0 spec (x86 and x64 only) - - __kmpc_atomic_fixed1_swp 2412 - __kmpc_atomic_fixed2_swp 2413 - __kmpc_atomic_fixed4_swp 2414 - __kmpc_atomic_fixed8_swp 2415 - __kmpc_atomic_float4_swp 2416 - __kmpc_atomic_float8_swp 2417 - __kmpc_atomic_float10_swp 2418 - %ifdef HAVE_QUAD - __kmpc_atomic_float16_swp 2419 - %endif - __kmpc_atomic_cmplx4_swp 2420 - __kmpc_atomic_cmplx8_swp 2421 - __kmpc_atomic_cmplx10_swp 2422 - %ifdef HAVE_QUAD - __kmpc_atomic_cmplx16_swp 2423 - - %ifdef arch_32 - __kmpc_atomic_float16_a16_swp 2424 - __kmpc_atomic_cmplx16_a16_swp 2425 - %endif - %endif - - __kmpc_atomic_fixed1_sub_cpt_rev 2426 - __kmpc_atomic_fixed1_div_cpt_rev 2427 - __kmpc_atomic_fixed1u_div_cpt_rev 2428 - __kmpc_atomic_fixed1_shl_cpt_rev 2429 - __kmpc_atomic_fixed1_shr_cpt_rev 2430 - __kmpc_atomic_fixed1u_shr_cpt_rev 2431 - __kmpc_atomic_fixed2_sub_cpt_rev 2432 - __kmpc_atomic_fixed2_div_cpt_rev 2433 - __kmpc_atomic_fixed2u_div_cpt_rev 2434 - __kmpc_atomic_fixed2_shl_cpt_rev 2435 - __kmpc_atomic_fixed2_shr_cpt_rev 2436 - __kmpc_atomic_fixed2u_shr_cpt_rev 2437 - __kmpc_atomic_fixed4_sub_cpt_rev 2438 - __kmpc_atomic_fixed4_div_cpt_rev 2439 - __kmpc_atomic_fixed4u_div_cpt_rev 2440 - __kmpc_atomic_fixed4_shl_cpt_rev 2441 - __kmpc_atomic_fixed4_shr_cpt_rev 2442 - __kmpc_atomic_fixed4u_shr_cpt_rev 2443 - __kmpc_atomic_fixed8_sub_cpt_rev 2444 - __kmpc_atomic_fixed8_div_cpt_rev 2445 - __kmpc_atomic_fixed8u_div_cpt_rev 2446 - __kmpc_atomic_fixed8_shl_cpt_rev 2447 - __kmpc_atomic_fixed8_shr_cpt_rev 2448 - __kmpc_atomic_fixed8u_shr_cpt_rev 2449 - __kmpc_atomic_float4_sub_cpt_rev 2450 - __kmpc_atomic_float4_div_cpt_rev 2451 - __kmpc_atomic_float8_sub_cpt_rev 2452 - __kmpc_atomic_float8_div_cpt_rev 2453 - __kmpc_atomic_float10_sub_cpt_rev 2454 - __kmpc_atomic_float10_div_cpt_rev 2455 - %ifdef HAVE_QUAD - __kmpc_atomic_float16_sub_cpt_rev 2456 - __kmpc_atomic_float16_div_cpt_rev 2457 - %endif - __kmpc_atomic_cmplx4_sub_cpt_rev 2458 - __kmpc_atomic_cmplx4_div_cpt_rev 2459 - __kmpc_atomic_cmplx8_sub_cpt_rev 2460 - __kmpc_atomic_cmplx8_div_cpt_rev 2461 - __kmpc_atomic_cmplx10_sub_cpt_rev 2462 - __kmpc_atomic_cmplx10_div_cpt_rev 2463 - %ifdef HAVE_QUAD - __kmpc_atomic_cmplx16_sub_cpt_rev 2464 - __kmpc_atomic_cmplx16_div_cpt_rev 2465 - - %ifdef arch_32 - __kmpc_atomic_float16_sub_a16_cpt_rev 2466 - __kmpc_atomic_float16_div_a16_cpt_rev 2467 - __kmpc_atomic_cmplx16_sub_a16_cpt_rev 2468 - __kmpc_atomic_cmplx16_div_a16_cpt_rev 2469 - %endif - %endif - - __kmpc_atomic_fixed1_sub_rev 2470 - __kmpc_atomic_fixed1_div_rev 2471 - __kmpc_atomic_fixed1u_div_rev 2472 - __kmpc_atomic_fixed1_shl_rev 2473 - __kmpc_atomic_fixed1_shr_rev 2474 - __kmpc_atomic_fixed1u_shr_rev 2475 - __kmpc_atomic_fixed2_sub_rev 2476 - __kmpc_atomic_fixed2_div_rev 2477 - __kmpc_atomic_fixed2u_div_rev 2478 - __kmpc_atomic_fixed2_shl_rev 2479 - __kmpc_atomic_fixed2_shr_rev 2480 - __kmpc_atomic_fixed2u_shr_rev 2481 - __kmpc_atomic_fixed4_sub_rev 2482 - __kmpc_atomic_fixed4_div_rev 2483 - __kmpc_atomic_fixed4u_div_rev 2484 - __kmpc_atomic_fixed4_shl_rev 2485 - __kmpc_atomic_fixed4_shr_rev 2486 - __kmpc_atomic_fixed4u_shr_rev 2487 - __kmpc_atomic_fixed8_sub_rev 2488 - __kmpc_atomic_fixed8_div_rev 2489 - __kmpc_atomic_fixed8u_div_rev 2490 - __kmpc_atomic_fixed8_shl_rev 2491 - __kmpc_atomic_fixed8_shr_rev 2492 - __kmpc_atomic_fixed8u_shr_rev 2493 - __kmpc_atomic_float4_sub_rev 2494 - __kmpc_atomic_float4_div_rev 2495 - __kmpc_atomic_float8_sub_rev 2496 - __kmpc_atomic_float8_div_rev 2497 - __kmpc_atomic_float10_sub_rev 2498 - __kmpc_atomic_float10_div_rev 2499 - %ifdef HAVE_QUAD - __kmpc_atomic_float16_sub_rev 2500 - __kmpc_atomic_float16_div_rev 2501 - %endif - __kmpc_atomic_cmplx4_sub_rev 2502 - __kmpc_atomic_cmplx4_div_rev 2503 - __kmpc_atomic_cmplx8_sub_rev 2504 - __kmpc_atomic_cmplx8_div_rev 2505 - __kmpc_atomic_cmplx10_sub_rev 2506 - __kmpc_atomic_cmplx10_div_rev 2507 - %ifdef HAVE_QUAD - __kmpc_atomic_cmplx16_sub_rev 2508 - __kmpc_atomic_cmplx16_div_rev 2509 - %ifdef arch_32 - __kmpc_atomic_float16_sub_a16_rev 2510 - __kmpc_atomic_float16_div_a16_rev 2511 - __kmpc_atomic_cmplx16_sub_a16_rev 2512 - __kmpc_atomic_cmplx16_div_a16_rev 2513 - %endif - %endif - - %ifdef HAVE_QUAD - __kmpc_atomic_fixed1_sub_rev_fp - __kmpc_atomic_fixed1u_sub_rev_fp - __kmpc_atomic_fixed1_div_rev_fp - __kmpc_atomic_fixed1u_div_rev_fp - __kmpc_atomic_fixed2_sub_rev_fp - __kmpc_atomic_fixed2u_sub_rev_fp - __kmpc_atomic_fixed2_div_rev_fp - __kmpc_atomic_fixed2u_div_rev_fp - __kmpc_atomic_fixed4_sub_rev_fp - __kmpc_atomic_fixed4u_sub_rev_fp - __kmpc_atomic_fixed4_div_rev_fp - __kmpc_atomic_fixed4u_div_rev_fp - __kmpc_atomic_fixed8_sub_rev_fp - __kmpc_atomic_fixed8u_sub_rev_fp - __kmpc_atomic_fixed8_div_rev_fp - __kmpc_atomic_fixed8u_div_rev_fp - __kmpc_atomic_float4_sub_rev_fp - __kmpc_atomic_float4_div_rev_fp - __kmpc_atomic_float8_sub_rev_fp - __kmpc_atomic_float8_div_rev_fp - __kmpc_atomic_float10_sub_rev_fp - __kmpc_atomic_float10_div_rev_fp - - __kmpc_atomic_fixed1_sub_cpt_rev_fp - __kmpc_atomic_fixed1u_sub_cpt_rev_fp - __kmpc_atomic_fixed1_div_cpt_rev_fp - __kmpc_atomic_fixed1u_div_cpt_rev_fp - __kmpc_atomic_fixed2_sub_cpt_rev_fp - __kmpc_atomic_fixed2u_sub_cpt_rev_fp - __kmpc_atomic_fixed2_div_cpt_rev_fp - __kmpc_atomic_fixed2u_div_cpt_rev_fp - __kmpc_atomic_fixed4_sub_cpt_rev_fp - __kmpc_atomic_fixed4u_sub_cpt_rev_fp - __kmpc_atomic_fixed4_div_cpt_rev_fp - __kmpc_atomic_fixed4u_div_cpt_rev_fp - __kmpc_atomic_fixed8_sub_cpt_rev_fp - __kmpc_atomic_fixed8u_sub_cpt_rev_fp - __kmpc_atomic_fixed8_div_cpt_rev_fp - __kmpc_atomic_fixed8u_div_cpt_rev_fp - __kmpc_atomic_float4_sub_cpt_rev_fp - __kmpc_atomic_float4_div_cpt_rev_fp - __kmpc_atomic_float8_sub_cpt_rev_fp - __kmpc_atomic_float8_div_cpt_rev_fp - __kmpc_atomic_float10_sub_cpt_rev_fp - __kmpc_atomic_float10_div_cpt_rev_fp - %endif - %endif # OMP_40 - - - %endif # arch_64 - - %ifdef HAVE_QUAD - __kmpc_atomic_fixed1u_add_fp - __kmpc_atomic_fixed1u_sub_fp - __kmpc_atomic_fixed1u_mul_fp - __kmpc_atomic_fixed2u_add_fp - __kmpc_atomic_fixed2u_sub_fp - __kmpc_atomic_fixed2u_mul_fp - __kmpc_atomic_fixed4u_add_fp - __kmpc_atomic_fixed4u_sub_fp - __kmpc_atomic_fixed4u_mul_fp - __kmpc_atomic_fixed8u_add_fp - __kmpc_atomic_fixed8u_sub_fp - __kmpc_atomic_fixed8u_mul_fp - %endif - -%endif - -# end of file # Index: vendor/llvm-openmp/openmp-release80-r363030/runtime/src/kmp_str.cpp =================================================================== --- vendor/llvm-openmp/openmp-release80-r363030/runtime/src/kmp_str.cpp (revision 348960) +++ vendor/llvm-openmp/openmp-release80-r363030/runtime/src/kmp_str.cpp (nonexistent) @@ -1,752 +0,0 @@ -/* - * kmp_str.cpp -- String manipulation routines. - */ - -//===----------------------------------------------------------------------===// -// -// The LLVM Compiler Infrastructure -// -// This file is dual licensed under the MIT and the University of Illinois Open -// Source Licenses. See LICENSE.txt for details. -// -//===----------------------------------------------------------------------===// - -#include "kmp_str.h" - -#include // va_* -#include // vsnprintf() -#include // malloc(), realloc() - -#include "kmp.h" -#include "kmp_i18n.h" - -/* String buffer. - - Usage: - - // Declare buffer and initialize it. - kmp_str_buf_t buffer; - __kmp_str_buf_init( & buffer ); - - // Print to buffer. - __kmp_str_buf_print(& buffer, "Error in file \"%s\" line %d\n", "foo.c", 12); - __kmp_str_buf_print(& buffer, " <%s>\n", line); - - // Use buffer contents. buffer.str is a pointer to data, buffer.used is a - // number of printed characters (not including terminating zero). - write( fd, buffer.str, buffer.used ); - - // Free buffer. - __kmp_str_buf_free( & buffer ); - - // Alternatively, you can detach allocated memory from buffer: - __kmp_str_buf_detach( & buffer ); - return buffer.str; // That memory should be freed eventually. - - Notes: - - * Buffer users may use buffer.str and buffer.used. Users should not change - any fields of buffer directly. - * buffer.str is never NULL. If buffer is empty, buffer.str points to empty - string (""). - * For performance reasons, buffer uses stack memory (buffer.bulk) first. If - stack memory is exhausted, buffer allocates memory on heap by malloc(), and - reallocates it by realloc() as amount of used memory grows. - * Buffer doubles amount of allocated memory each time it is exhausted. -*/ - -// TODO: __kmp_str_buf_print() can use thread local memory allocator. - -#define KMP_STR_BUF_INVARIANT(b) \ - { \ - KMP_DEBUG_ASSERT((b)->str != NULL); \ - KMP_DEBUG_ASSERT((b)->size >= sizeof((b)->bulk)); \ - KMP_DEBUG_ASSERT((b)->size % sizeof((b)->bulk) == 0); \ - KMP_DEBUG_ASSERT((unsigned)(b)->used < (b)->size); \ - KMP_DEBUG_ASSERT( \ - (b)->size == sizeof((b)->bulk) ? (b)->str == &(b)->bulk[0] : 1); \ - KMP_DEBUG_ASSERT((b)->size > sizeof((b)->bulk) ? (b)->str != &(b)->bulk[0] \ - : 1); \ - } - -void __kmp_str_buf_clear(kmp_str_buf_t *buffer) { - KMP_STR_BUF_INVARIANT(buffer); - if (buffer->used > 0) { - buffer->used = 0; - buffer->str[0] = 0; - } - KMP_STR_BUF_INVARIANT(buffer); -} // __kmp_str_buf_clear - -void __kmp_str_buf_reserve(kmp_str_buf_t *buffer, int size) { - KMP_STR_BUF_INVARIANT(buffer); - KMP_DEBUG_ASSERT(size >= 0); - - if (buffer->size < (unsigned int)size) { - // Calculate buffer size. - do { - buffer->size *= 2; - } while (buffer->size < (unsigned int)size); - - // Enlarge buffer. - if (buffer->str == &buffer->bulk[0]) { - buffer->str = (char *)KMP_INTERNAL_MALLOC(buffer->size); - if (buffer->str == NULL) { - KMP_FATAL(MemoryAllocFailed); - } - KMP_MEMCPY_S(buffer->str, buffer->size, buffer->bulk, buffer->used + 1); - } else { - buffer->str = (char *)KMP_INTERNAL_REALLOC(buffer->str, buffer->size); - if (buffer->str == NULL) { - KMP_FATAL(MemoryAllocFailed); - } - } - } - - KMP_DEBUG_ASSERT(buffer->size > 0); - KMP_DEBUG_ASSERT(buffer->size >= (unsigned)size); - KMP_STR_BUF_INVARIANT(buffer); -} // __kmp_str_buf_reserve - -void __kmp_str_buf_detach(kmp_str_buf_t *buffer) { - KMP_STR_BUF_INVARIANT(buffer); - - // If internal bulk is used, allocate memory and copy it. - if (buffer->size <= sizeof(buffer->bulk)) { - buffer->str = (char *)KMP_INTERNAL_MALLOC(buffer->size); - if (buffer->str == NULL) { - KMP_FATAL(MemoryAllocFailed); - } - KMP_MEMCPY_S(buffer->str, buffer->size, buffer->bulk, buffer->used + 1); - } -} // __kmp_str_buf_detach - -void __kmp_str_buf_free(kmp_str_buf_t *buffer) { - KMP_STR_BUF_INVARIANT(buffer); - if (buffer->size > sizeof(buffer->bulk)) { - KMP_INTERNAL_FREE(buffer->str); - } - buffer->str = buffer->bulk; - buffer->size = sizeof(buffer->bulk); - buffer->used = 0; - KMP_STR_BUF_INVARIANT(buffer); -} // __kmp_str_buf_free - -void __kmp_str_buf_cat(kmp_str_buf_t *buffer, char const *str, int len) { - KMP_STR_BUF_INVARIANT(buffer); - KMP_DEBUG_ASSERT(str != NULL); - KMP_DEBUG_ASSERT(len >= 0); - __kmp_str_buf_reserve(buffer, buffer->used + len + 1); - KMP_MEMCPY(buffer->str + buffer->used, str, len); - buffer->str[buffer->used + len] = 0; - buffer->used += len; - KMP_STR_BUF_INVARIANT(buffer); -} // __kmp_str_buf_cat - -void __kmp_str_buf_catbuf(kmp_str_buf_t *dest, const kmp_str_buf_t *src) { - KMP_DEBUG_ASSERT(dest); - KMP_DEBUG_ASSERT(src); - KMP_STR_BUF_INVARIANT(dest); - KMP_STR_BUF_INVARIANT(src); - if (!src->str || !src->used) - return; - __kmp_str_buf_reserve(dest, dest->used + src->used + 1); - KMP_MEMCPY(dest->str + dest->used, src->str, src->used); - dest->str[dest->used + src->used] = 0; - dest->used += src->used; - KMP_STR_BUF_INVARIANT(dest); -} // __kmp_str_buf_catbuf - -// Return the number of characters written -int __kmp_str_buf_vprint(kmp_str_buf_t *buffer, char const *format, - va_list args) { - int rc; - KMP_STR_BUF_INVARIANT(buffer); - - for (;;) { - int const free = buffer->size - buffer->used; - int size; - - // Try to format string. - { -/* On Linux* OS Intel(R) 64, vsnprintf() modifies args argument, so vsnprintf() - crashes if it is called for the second time with the same args. To prevent - the crash, we have to pass a fresh intact copy of args to vsnprintf() on each - iteration. - - Unfortunately, standard va_copy() macro is not available on Windows* OS. - However, it seems vsnprintf() does not modify args argument on Windows* OS. -*/ - -#if !KMP_OS_WINDOWS - va_list _args; - va_copy(_args, args); // Make copy of args. -#define args _args // Substitute args with its copy, _args. -#endif // KMP_OS_WINDOWS - rc = KMP_VSNPRINTF(buffer->str + buffer->used, free, format, args); -#if !KMP_OS_WINDOWS -#undef args // Remove substitution. - va_end(_args); -#endif // KMP_OS_WINDOWS - } - - // No errors, string has been formatted. - if (rc >= 0 && rc < free) { - buffer->used += rc; - break; - } - - // Error occurred, buffer is too small. - if (rc >= 0) { - // C99-conforming implementation of vsnprintf returns required buffer size - size = buffer->used + rc + 1; - } else { - // Older implementations just return -1. Double buffer size. - size = buffer->size * 2; - } - - // Enlarge buffer. - __kmp_str_buf_reserve(buffer, size); - - // And try again. - } - - KMP_DEBUG_ASSERT(buffer->size > 0); - KMP_STR_BUF_INVARIANT(buffer); - return rc; -} // __kmp_str_buf_vprint - -// Return the number of characters written -int __kmp_str_buf_print(kmp_str_buf_t *buffer, char const *format, ...) { - int rc; - va_list args; - va_start(args, format); - rc = __kmp_str_buf_vprint(buffer, format, args); - va_end(args); - return rc; -} // __kmp_str_buf_print - -/* The function prints specified size to buffer. Size is expressed using biggest - possible unit, for example 1024 is printed as "1k". */ -void __kmp_str_buf_print_size(kmp_str_buf_t *buf, size_t size) { - char const *names[] = {"", "k", "M", "G", "T", "P", "E", "Z", "Y"}; - int const units = sizeof(names) / sizeof(char const *); - int u = 0; - if (size > 0) { - while ((size % 1024 == 0) && (u + 1 < units)) { - size = size / 1024; - ++u; - } - } - - __kmp_str_buf_print(buf, "%" KMP_SIZE_T_SPEC "%s", size, names[u]); -} // __kmp_str_buf_print_size - -void __kmp_str_fname_init(kmp_str_fname_t *fname, char const *path) { - fname->path = NULL; - fname->dir = NULL; - fname->base = NULL; - - if (path != NULL) { - char *slash = NULL; // Pointer to the last character of dir. - char *base = NULL; // Pointer to the beginning of basename. - fname->path = __kmp_str_format("%s", path); - // Original code used strdup() function to copy a string, but on Windows* OS - // Intel(R) 64 it causes assertioon id debug heap, so I had to replace - // strdup with __kmp_str_format(). - if (KMP_OS_WINDOWS) { - __kmp_str_replace(fname->path, '\\', '/'); - } - fname->dir = __kmp_str_format("%s", fname->path); - slash = strrchr(fname->dir, '/'); - if (KMP_OS_WINDOWS && - slash == NULL) { // On Windows* OS, if slash not found, - char first = TOLOWER(fname->dir[0]); // look for drive. - if ('a' <= first && first <= 'z' && fname->dir[1] == ':') { - slash = &fname->dir[1]; - } - } - base = (slash == NULL ? fname->dir : slash + 1); - fname->base = __kmp_str_format("%s", base); // Copy basename - *base = 0; // and truncate dir. - } - -} // kmp_str_fname_init - -void __kmp_str_fname_free(kmp_str_fname_t *fname) { - __kmp_str_free(&fname->path); - __kmp_str_free(&fname->dir); - __kmp_str_free(&fname->base); -} // kmp_str_fname_free - -int __kmp_str_fname_match(kmp_str_fname_t const *fname, char const *pattern) { - int dir_match = 1; - int base_match = 1; - - if (pattern != NULL) { - kmp_str_fname_t ptrn; - __kmp_str_fname_init(&ptrn, pattern); - dir_match = strcmp(ptrn.dir, "*/") == 0 || - (fname->dir != NULL && __kmp_str_eqf(fname->dir, ptrn.dir)); - base_match = strcmp(ptrn.base, "*") == 0 || - (fname->base != NULL && __kmp_str_eqf(fname->base, ptrn.base)); - __kmp_str_fname_free(&ptrn); - } - - return dir_match && base_match; -} // __kmp_str_fname_match - -kmp_str_loc_t __kmp_str_loc_init(char const *psource, int init_fname) { - kmp_str_loc_t loc; - - loc._bulk = NULL; - loc.file = NULL; - loc.func = NULL; - loc.line = 0; - loc.col = 0; - - if (psource != NULL) { - char *str = NULL; - char *dummy = NULL; - char *line = NULL; - char *col = NULL; - - // Copy psource to keep it intact. - loc._bulk = __kmp_str_format("%s", psource); - - // Parse psource string: ";file;func;line;col;;" - str = loc._bulk; - __kmp_str_split(str, ';', &dummy, &str); - __kmp_str_split(str, ';', &loc.file, &str); - __kmp_str_split(str, ';', &loc.func, &str); - __kmp_str_split(str, ';', &line, &str); - __kmp_str_split(str, ';', &col, &str); - - // Convert line and col into numberic values. - if (line != NULL) { - loc.line = atoi(line); - if (loc.line < 0) { - loc.line = 0; - } - } - if (col != NULL) { - loc.col = atoi(col); - if (loc.col < 0) { - loc.col = 0; - } - } - } - - __kmp_str_fname_init(&loc.fname, init_fname ? loc.file : NULL); - - return loc; -} // kmp_str_loc_init - -void __kmp_str_loc_free(kmp_str_loc_t *loc) { - __kmp_str_fname_free(&loc->fname); - __kmp_str_free(&(loc->_bulk)); - loc->file = NULL; - loc->func = NULL; -} // kmp_str_loc_free - -/* This function is intended to compare file names. On Windows* OS file names - are case-insensitive, so functions performs case-insensitive comparison. On - Linux* OS it performs case-sensitive comparison. Note: The function returns - *true* if strings are *equal*. */ -int __kmp_str_eqf( // True, if strings are equal, false otherwise. - char const *lhs, // First string. - char const *rhs // Second string. - ) { - int result; -#if KMP_OS_WINDOWS - result = (_stricmp(lhs, rhs) == 0); -#else - result = (strcmp(lhs, rhs) == 0); -#endif - return result; -} // __kmp_str_eqf - -/* This function is like sprintf, but it *allocates* new buffer, which must be - freed eventually by __kmp_str_free(). The function is very convenient for - constructing strings, it successfully replaces strdup(), strcat(), it frees - programmer from buffer allocations and helps to avoid buffer overflows. - Examples: - - str = __kmp_str_format("%s", orig); //strdup() doesn't care about buffer size - __kmp_str_free( & str ); - str = __kmp_str_format( "%s%s", orig1, orig2 ); // strcat(), doesn't care - // about buffer size. - __kmp_str_free( & str ); - str = __kmp_str_format( "%s/%s.txt", path, file ); // constructing string. - __kmp_str_free( & str ); - - Performance note: - This function allocates memory with malloc() calls, so do not call it from - performance-critical code. In performance-critical code consider using - kmp_str_buf_t instead, since it uses stack-allocated buffer for short - strings. - - Why does this function use malloc()? - 1. __kmp_allocate() returns cache-aligned memory allocated with malloc(). - There are no reasons in using __kmp_allocate() for strings due to extra - overhead while cache-aligned memory is not necessary. - 2. __kmp_thread_malloc() cannot be used because it requires pointer to thread - structure. We need to perform string operations during library startup - (for example, in __kmp_register_library_startup()) when no thread - structures are allocated yet. - So standard malloc() is the only available option. -*/ - -char *__kmp_str_format( // Allocated string. - char const *format, // Format string. - ... // Other parameters. - ) { - va_list args; - int size = 512; - char *buffer = NULL; - int rc; - - // Allocate buffer. - buffer = (char *)KMP_INTERNAL_MALLOC(size); - if (buffer == NULL) { - KMP_FATAL(MemoryAllocFailed); - } - - for (;;) { - // Try to format string. - va_start(args, format); - rc = KMP_VSNPRINTF(buffer, size, format, args); - va_end(args); - - // No errors, string has been formatted. - if (rc >= 0 && rc < size) { - break; - } - - // Error occurred, buffer is too small. - if (rc >= 0) { - // C99-conforming implementation of vsnprintf returns required buffer - // size. - size = rc + 1; - } else { - // Older implementations just return -1. - size = size * 2; - } - - // Enlarge buffer and try again. - buffer = (char *)KMP_INTERNAL_REALLOC(buffer, size); - if (buffer == NULL) { - KMP_FATAL(MemoryAllocFailed); - } - } - - return buffer; -} // func __kmp_str_format - -void __kmp_str_free(char **str) { - KMP_DEBUG_ASSERT(str != NULL); - KMP_INTERNAL_FREE(*str); - *str = NULL; -} // func __kmp_str_free - -/* If len is zero, returns true iff target and data have exact case-insensitive - match. If len is negative, returns true iff target is a case-insensitive - substring of data. If len is positive, returns true iff target is a - case-insensitive substring of data or vice versa, and neither is shorter than - len. */ -int __kmp_str_match(char const *target, int len, char const *data) { - int i; - if (target == NULL || data == NULL) { - return FALSE; - } - for (i = 0; target[i] && data[i]; ++i) { - if (TOLOWER(target[i]) != TOLOWER(data[i])) { - return FALSE; - } - } - return ((len > 0) ? i >= len : (!target[i] && (len || !data[i]))); -} // __kmp_str_match - -int __kmp_str_match_false(char const *data) { - int result = - __kmp_str_match("false", 1, data) || __kmp_str_match("off", 2, data) || - __kmp_str_match("0", 1, data) || __kmp_str_match(".false.", 2, data) || - __kmp_str_match(".f.", 2, data) || __kmp_str_match("no", 1, data) || - __kmp_str_match("disabled", 0, data); - return result; -} // __kmp_str_match_false - -int __kmp_str_match_true(char const *data) { - int result = - __kmp_str_match("true", 1, data) || __kmp_str_match("on", 2, data) || - __kmp_str_match("1", 1, data) || __kmp_str_match(".true.", 2, data) || - __kmp_str_match(".t.", 2, data) || __kmp_str_match("yes", 1, data) || - __kmp_str_match("enabled", 0, data); - return result; -} // __kmp_str_match_true - -void __kmp_str_replace(char *str, char search_for, char replace_with) { - char *found = NULL; - - found = strchr(str, search_for); - while (found) { - *found = replace_with; - found = strchr(found + 1, search_for); - } -} // __kmp_str_replace - -void __kmp_str_split(char *str, // I: String to split. - char delim, // I: Character to split on. - char **head, // O: Pointer to head (may be NULL). - char **tail // O: Pointer to tail (may be NULL). - ) { - char *h = str; - char *t = NULL; - if (str != NULL) { - char *ptr = strchr(str, delim); - if (ptr != NULL) { - *ptr = 0; - t = ptr + 1; - } - } - if (head != NULL) { - *head = h; - } - if (tail != NULL) { - *tail = t; - } -} // __kmp_str_split - -/* strtok_r() is not available on Windows* OS. This function reimplements - strtok_r(). */ -char *__kmp_str_token( - char *str, // String to split into tokens. Note: String *is* modified! - char const *delim, // Delimiters. - char **buf // Internal buffer. - ) { - char *token = NULL; -#if KMP_OS_WINDOWS - // On Windows* OS there is no strtok_r() function. Let us implement it. - if (str != NULL) { - *buf = str; // First call, initialize buf. - } - *buf += strspn(*buf, delim); // Skip leading delimiters. - if (**buf != 0) { // Rest of the string is not yet empty. - token = *buf; // Use it as result. - *buf += strcspn(*buf, delim); // Skip non-delimiters. - if (**buf != 0) { // Rest of the string is not yet empty. - **buf = 0; // Terminate token here. - *buf += 1; // Advance buf to start with the next token next time. - } - } -#else - // On Linux* OS and OS X*, strtok_r() is available. Let us use it. - token = strtok_r(str, delim, buf); -#endif - return token; -} // __kmp_str_token - -int __kmp_str_to_int(char const *str, char sentinel) { - int result, factor; - char const *t; - - result = 0; - - for (t = str; *t != '\0'; ++t) { - if (*t < '0' || *t > '9') - break; - result = (result * 10) + (*t - '0'); - } - - switch (*t) { - case '\0': /* the current default for no suffix is bytes */ - factor = 1; - break; - case 'b': - case 'B': /* bytes */ - ++t; - factor = 1; - break; - case 'k': - case 'K': /* kilo-bytes */ - ++t; - factor = 1024; - break; - case 'm': - case 'M': /* mega-bytes */ - ++t; - factor = (1024 * 1024); - break; - default: - if (*t != sentinel) - return (-1); - t = ""; - factor = 1; - } - - if (result > (INT_MAX / factor)) - result = INT_MAX; - else - result *= factor; - - return (*t != 0 ? 0 : result); -} // __kmp_str_to_int - -/* The routine parses input string. It is expected it is a unsigned integer with - optional unit. Units are: "b" for bytes, "kb" or just "k" for kilobytes, "mb" - or "m" for megabytes, ..., "yb" or "y" for yottabytes. :-) Unit name is - case-insensitive. The routine returns 0 if everything is ok, or error code: - -1 in case of overflow, -2 in case of unknown unit. *size is set to parsed - value. In case of overflow *size is set to KMP_SIZE_T_MAX, in case of unknown - unit *size is set to zero. */ -void __kmp_str_to_size( // R: Error code. - char const *str, // I: String of characters, unsigned number and unit ("b", - // "kb", etc). - size_t *out, // O: Parsed number. - size_t dfactor, // I: The factor if none of the letters specified. - char const **error // O: Null if everything is ok, error message otherwise. - ) { - - size_t value = 0; - size_t factor = 0; - int overflow = 0; - int i = 0; - int digit; - - KMP_DEBUG_ASSERT(str != NULL); - - // Skip spaces. - while (str[i] == ' ' || str[i] == '\t') { - ++i; - } - - // Parse number. - if (str[i] < '0' || str[i] > '9') { - *error = KMP_I18N_STR(NotANumber); - return; - } - do { - digit = str[i] - '0'; - overflow = overflow || (value > (KMP_SIZE_T_MAX - digit) / 10); - value = (value * 10) + digit; - ++i; - } while (str[i] >= '0' && str[i] <= '9'); - - // Skip spaces. - while (str[i] == ' ' || str[i] == '\t') { - ++i; - } - -// Parse unit. -#define _case(ch, exp) \ - case ch: \ - case ch - ('a' - 'A'): { \ - size_t shift = (exp)*10; \ - ++i; \ - if (shift < sizeof(size_t) * 8) { \ - factor = (size_t)(1) << shift; \ - } else { \ - overflow = 1; \ - } \ - } break; - switch (str[i]) { - _case('k', 1); // Kilo - _case('m', 2); // Mega - _case('g', 3); // Giga - _case('t', 4); // Tera - _case('p', 5); // Peta - _case('e', 6); // Exa - _case('z', 7); // Zetta - _case('y', 8); // Yotta - // Oops. No more units... - } -#undef _case - if (str[i] == 'b' || str[i] == 'B') { // Skip optional "b". - if (factor == 0) { - factor = 1; - } - ++i; - } - if (!(str[i] == ' ' || str[i] == '\t' || str[i] == 0)) { // Bad unit - *error = KMP_I18N_STR(BadUnit); - return; - } - - if (factor == 0) { - factor = dfactor; - } - - // Apply factor. - overflow = overflow || (value > (KMP_SIZE_T_MAX / factor)); - value *= factor; - - // Skip spaces. - while (str[i] == ' ' || str[i] == '\t') { - ++i; - } - - if (str[i] != 0) { - *error = KMP_I18N_STR(IllegalCharacters); - return; - } - - if (overflow) { - *error = KMP_I18N_STR(ValueTooLarge); - *out = KMP_SIZE_T_MAX; - return; - } - - *error = NULL; - *out = value; -} // __kmp_str_to_size - -void __kmp_str_to_uint( // R: Error code. - char const *str, // I: String of characters, unsigned number. - kmp_uint64 *out, // O: Parsed number. - char const **error // O: Null if everything is ok, error message otherwise. - ) { - size_t value = 0; - int overflow = 0; - int i = 0; - int digit; - - KMP_DEBUG_ASSERT(str != NULL); - - // Skip spaces. - while (str[i] == ' ' || str[i] == '\t') { - ++i; - } - - // Parse number. - if (str[i] < '0' || str[i] > '9') { - *error = KMP_I18N_STR(NotANumber); - return; - } - do { - digit = str[i] - '0'; - overflow = overflow || (value > (KMP_SIZE_T_MAX - digit) / 10); - value = (value * 10) + digit; - ++i; - } while (str[i] >= '0' && str[i] <= '9'); - - // Skip spaces. - while (str[i] == ' ' || str[i] == '\t') { - ++i; - } - - if (str[i] != 0) { - *error = KMP_I18N_STR(IllegalCharacters); - return; - } - - if (overflow) { - *error = KMP_I18N_STR(ValueTooLarge); - *out = (kmp_uint64)-1; - return; - } - - *error = NULL; - *out = value; -} // __kmp_str_to_unit - -// end of file // Property changes on: vendor/llvm-openmp/openmp-release80-r363030/runtime/src/kmp_str.cpp ___________________________________________________________________ Deleted: svn:eol-style ## -1 +0,0 ## -native \ No newline at end of property Deleted: svn:keywords ## -1 +0,0 ## -FreeBSD=%H \ No newline at end of property Deleted: svn:mime-type ## -1 +0,0 ## -text/plain \ No newline at end of property Index: vendor/llvm-openmp/openmp-release80-r363030/runtime/src/kmp_omp.h =================================================================== --- vendor/llvm-openmp/openmp-release80-r363030/runtime/src/kmp_omp.h (revision 348960) +++ vendor/llvm-openmp/openmp-release80-r363030/runtime/src/kmp_omp.h (nonexistent) @@ -1,242 +0,0 @@ -#if USE_DEBUGGER -/* - * kmp_omp.h -- OpenMP definition for kmp_omp_struct_info_t. - * This is for information about runtime library structures. - */ - -//===----------------------------------------------------------------------===// -// -// The LLVM Compiler Infrastructure -// -// This file is dual licensed under the MIT and the University of Illinois Open -// Source Licenses. See LICENSE.txt for details. -// -//===----------------------------------------------------------------------===// - -/* THIS FILE SHOULD NOT BE MODIFIED IN IDB INTERFACE LIBRARY CODE - It should instead be modified in the OpenMP runtime and copied to the - interface library code. This way we can minimize the problems that this is - sure to cause having two copies of the same file. - - Files live in libomp and libomp_db/src/include */ - -/* CHANGE THIS WHEN STRUCTURES BELOW CHANGE - Before we release this to a customer, please don't change this value. After - it is released and stable, then any new updates to the structures or data - structure traversal algorithms need to change this value. */ -#define KMP_OMP_VERSION 9 - -typedef struct { - kmp_int32 offset; - kmp_int32 size; -} offset_and_size_t; - -typedef struct { - kmp_uint64 addr; - kmp_int32 size; - kmp_int32 padding; -} addr_and_size_t; - -typedef struct { - kmp_uint64 flags; // Flags for future extensions. - kmp_uint64 - file; // Pointer to name of source file where the parallel region is. - kmp_uint64 func; // Pointer to name of routine where the parallel region is. - kmp_int32 begin; // Beginning of source line range. - kmp_int32 end; // End of source line range. - kmp_int32 num_threads; // Specified number of threads. -} kmp_omp_nthr_item_t; - -typedef struct { - kmp_int32 num; // Number of items in the arrray. - kmp_uint64 array; // Address of array of kmp_omp_num_threads_item_t. -} kmp_omp_nthr_info_t; - -/* This structure is known to the idb interface library */ -typedef struct { - - /* Change this only if you make a fundamental data structure change here */ - kmp_int32 lib_version; - - /* sanity check. Only should be checked if versions are identical - * This is also used for backward compatibility to get the runtime - * structure size if it the runtime is older than the interface */ - kmp_int32 sizeof_this_structure; - - /* OpenMP RTL version info. */ - addr_and_size_t major; - addr_and_size_t minor; - addr_and_size_t build; - addr_and_size_t openmp_version; - addr_and_size_t banner; - - /* Various globals. */ - addr_and_size_t threads; // Pointer to __kmp_threads. - addr_and_size_t roots; // Pointer to __kmp_root. - addr_and_size_t capacity; // Pointer to __kmp_threads_capacity. -#if KMP_USE_MONITOR - addr_and_size_t monitor; // Pointer to __kmp_monitor. -#endif -#if !KMP_USE_DYNAMIC_LOCK - addr_and_size_t lock_table; // Pointer to __kmp_lock_table. -#endif - addr_and_size_t func_microtask; - addr_and_size_t func_fork; - addr_and_size_t func_fork_teams; - addr_and_size_t team_counter; - addr_and_size_t task_counter; - addr_and_size_t nthr_info; - kmp_int32 address_width; - kmp_int32 indexed_locks; - kmp_int32 last_barrier; // The end in enum barrier_type - kmp_int32 deque_size; // TASK_DEQUE_SIZE - - /* thread structure information. */ - kmp_int32 th_sizeof_struct; - offset_and_size_t th_info; // descriptor for thread - offset_and_size_t th_team; // team for this thread - offset_and_size_t th_root; // root for this thread - offset_and_size_t th_serial_team; // serial team under this thread - offset_and_size_t th_ident; // location for this thread (if available) - offset_and_size_t th_spin_here; // is thread waiting for lock (if available) - offset_and_size_t - th_next_waiting; // next thread waiting for lock (if available) - offset_and_size_t th_task_team; // task team struct - offset_and_size_t th_current_task; // innermost task being executed - offset_and_size_t - th_task_state; // alternating 0/1 for task team identification - offset_and_size_t th_bar; - offset_and_size_t th_b_worker_arrived; // the worker increases it by 1 when it -// arrives to the barrier - -#if OMP_40_ENABLED - /* teams information */ - offset_and_size_t th_teams_microtask; // entry address for teams construct - offset_and_size_t th_teams_level; // initial level of teams construct - offset_and_size_t th_teams_nteams; // number of teams in a league - offset_and_size_t - th_teams_nth; // number of threads in each team of the league -#endif - - /* kmp_desc structure (for info field above) */ - kmp_int32 ds_sizeof_struct; - offset_and_size_t ds_tid; // team thread id - offset_and_size_t ds_gtid; // global thread id - offset_and_size_t ds_thread; // native thread id - - /* team structure information */ - kmp_int32 t_sizeof_struct; - offset_and_size_t t_master_tid; // tid of master in parent team - offset_and_size_t t_ident; // location of parallel region - offset_and_size_t t_parent; // parent team - offset_and_size_t t_nproc; // # team threads - offset_and_size_t t_threads; // array of threads - offset_and_size_t t_serialized; // # levels of serialized teams - offset_and_size_t t_id; // unique team id - offset_and_size_t t_pkfn; - offset_and_size_t t_task_team; // task team structure - offset_and_size_t t_implicit_task; // taskdata for the thread's implicit task -#if OMP_40_ENABLED - offset_and_size_t t_cancel_request; -#endif - offset_and_size_t t_bar; - offset_and_size_t - t_b_master_arrived; // increased by 1 when master arrives to a barrier - offset_and_size_t - t_b_team_arrived; // increased by one when all the threads arrived - - /* root structure information */ - kmp_int32 r_sizeof_struct; - offset_and_size_t r_root_team; // team at root - offset_and_size_t r_hot_team; // hot team for this root - offset_and_size_t r_uber_thread; // root thread - offset_and_size_t r_root_id; // unique root id (if available) - - /* ident structure information */ - kmp_int32 id_sizeof_struct; - offset_and_size_t - id_psource; /* address of string ";file;func;line1;line2;;". */ - offset_and_size_t id_flags; - - /* lock structure information */ - kmp_int32 lk_sizeof_struct; - offset_and_size_t lk_initialized; - offset_and_size_t lk_location; - offset_and_size_t lk_tail_id; - offset_and_size_t lk_head_id; - offset_and_size_t lk_next_ticket; - offset_and_size_t lk_now_serving; - offset_and_size_t lk_owner_id; - offset_and_size_t lk_depth_locked; - offset_and_size_t lk_lock_flags; - -#if !KMP_USE_DYNAMIC_LOCK - /* lock_table_t */ - kmp_int32 lt_size_of_struct; /* Size and layout of kmp_lock_table_t. */ - offset_and_size_t lt_used; - offset_and_size_t lt_allocated; - offset_and_size_t lt_table; -#endif - - /* task_team_t */ - kmp_int32 tt_sizeof_struct; - offset_and_size_t tt_threads_data; - offset_and_size_t tt_found_tasks; - offset_and_size_t tt_nproc; - offset_and_size_t tt_unfinished_threads; - offset_and_size_t tt_active; - - /* kmp_taskdata_t */ - kmp_int32 td_sizeof_struct; - offset_and_size_t td_task_id; // task id - offset_and_size_t td_flags; // task flags - offset_and_size_t td_team; // team for this task - offset_and_size_t td_parent; // parent task - offset_and_size_t td_level; // task testing level - offset_and_size_t td_ident; // task identifier - offset_and_size_t td_allocated_child_tasks; // child tasks (+ current task) - // not yet deallocated - offset_and_size_t td_incomplete_child_tasks; // child tasks not yet complete - - /* Taskwait */ - offset_and_size_t td_taskwait_ident; - offset_and_size_t td_taskwait_counter; - offset_and_size_t - td_taskwait_thread; // gtid + 1 of thread encountered taskwait - -#if OMP_40_ENABLED - /* Taskgroup */ - offset_and_size_t td_taskgroup; // pointer to the current taskgroup - offset_and_size_t - td_task_count; // number of allocated and not yet complete tasks - offset_and_size_t td_cancel; // request for cancellation of this taskgroup - - /* Task dependency */ - offset_and_size_t - td_depnode; // pointer to graph node if the task has dependencies - offset_and_size_t dn_node; - offset_and_size_t dn_next; - offset_and_size_t dn_successors; - offset_and_size_t dn_task; - offset_and_size_t dn_npredecessors; - offset_and_size_t dn_nrefs; -#endif - offset_and_size_t dn_routine; - - /* kmp_thread_data_t */ - kmp_int32 hd_sizeof_struct; - offset_and_size_t hd_deque; - offset_and_size_t hd_deque_size; - offset_and_size_t hd_deque_head; - offset_and_size_t hd_deque_tail; - offset_and_size_t hd_deque_ntasks; - offset_and_size_t hd_deque_last_stolen; - - // The last field of stable version. - kmp_uint64 last_field; - -} kmp_omp_struct_info_t; - -#endif /* USE_DEBUGGER */ - -/* end of file */ Property changes on: vendor/llvm-openmp/openmp-release80-r363030/runtime/src/kmp_omp.h ___________________________________________________________________ Deleted: svn:eol-style ## -1 +0,0 ## -native \ No newline at end of property Deleted: svn:keywords ## -1 +0,0 ## -FreeBSD=%H \ No newline at end of property Deleted: svn:mime-type ## -1 +0,0 ## -text/plain \ No newline at end of property Index: vendor/llvm-openmp/openmp-release80-r363030/runtime/src/kmp_affinity.h =================================================================== --- vendor/llvm-openmp/openmp-release80-r363030/runtime/src/kmp_affinity.h (revision 348960) +++ vendor/llvm-openmp/openmp-release80-r363030/runtime/src/kmp_affinity.h (nonexistent) @@ -1,828 +0,0 @@ -/* - * kmp_affinity.h -- header for affinity management - */ - -//===----------------------------------------------------------------------===// -// -// The LLVM Compiler Infrastructure -// -// This file is dual licensed under the MIT and the University of Illinois Open -// Source Licenses. See LICENSE.txt for details. -// -//===----------------------------------------------------------------------===// - -#ifndef KMP_AFFINITY_H -#define KMP_AFFINITY_H - -#include "kmp.h" -#include "kmp_os.h" - -#if KMP_AFFINITY_SUPPORTED -#if KMP_USE_HWLOC -class KMPHwlocAffinity : public KMPAffinity { -public: - class Mask : public KMPAffinity::Mask { - hwloc_cpuset_t mask; - - public: - Mask() { - mask = hwloc_bitmap_alloc(); - this->zero(); - } - ~Mask() { hwloc_bitmap_free(mask); } - void set(int i) override { hwloc_bitmap_set(mask, i); } - bool is_set(int i) const override { return hwloc_bitmap_isset(mask, i); } - void clear(int i) override { hwloc_bitmap_clr(mask, i); } - void zero() override { hwloc_bitmap_zero(mask); } - void copy(const KMPAffinity::Mask *src) override { - const Mask *convert = static_cast(src); - hwloc_bitmap_copy(mask, convert->mask); - } - void bitwise_and(const KMPAffinity::Mask *rhs) override { - const Mask *convert = static_cast(rhs); - hwloc_bitmap_and(mask, mask, convert->mask); - } - void bitwise_or(const KMPAffinity::Mask *rhs) override { - const Mask *convert = static_cast(rhs); - hwloc_bitmap_or(mask, mask, convert->mask); - } - void bitwise_not() override { hwloc_bitmap_not(mask, mask); } - int begin() const override { return hwloc_bitmap_first(mask); } - int end() const override { return -1; } - int next(int previous) const override { - return hwloc_bitmap_next(mask, previous); - } - int get_system_affinity(bool abort_on_error) override { - KMP_ASSERT2(KMP_AFFINITY_CAPABLE(), - "Illegal get affinity operation when not capable"); - int retval = - hwloc_get_cpubind(__kmp_hwloc_topology, mask, HWLOC_CPUBIND_THREAD); - if (retval >= 0) { - return 0; - } - int error = errno; - if (abort_on_error) { - __kmp_fatal(KMP_MSG(FatalSysError), KMP_ERR(error), __kmp_msg_null); - } - return error; - } - int set_system_affinity(bool abort_on_error) const override { - KMP_ASSERT2(KMP_AFFINITY_CAPABLE(), - "Illegal get affinity operation when not capable"); - int retval = - hwloc_set_cpubind(__kmp_hwloc_topology, mask, HWLOC_CPUBIND_THREAD); - if (retval >= 0) { - return 0; - } - int error = errno; - if (abort_on_error) { - __kmp_fatal(KMP_MSG(FatalSysError), KMP_ERR(error), __kmp_msg_null); - } - return error; - } - int get_proc_group() const override { - int group = -1; -#if KMP_OS_WINDOWS - if (__kmp_num_proc_groups == 1) { - return 1; - } - for (int i = 0; i < __kmp_num_proc_groups; i++) { - // On windows, the long type is always 32 bits - unsigned long first_32_bits = hwloc_bitmap_to_ith_ulong(mask, i * 2); - unsigned long second_32_bits = - hwloc_bitmap_to_ith_ulong(mask, i * 2 + 1); - if (first_32_bits == 0 && second_32_bits == 0) { - continue; - } - if (group >= 0) { - return -1; - } - group = i; - } -#endif /* KMP_OS_WINDOWS */ - return group; - } - }; - void determine_capable(const char *var) override { - const hwloc_topology_support *topology_support; - if (__kmp_hwloc_topology == NULL) { - if (hwloc_topology_init(&__kmp_hwloc_topology) < 0) { - __kmp_hwloc_error = TRUE; - if (__kmp_affinity_verbose) - KMP_WARNING(AffHwlocErrorOccurred, var, "hwloc_topology_init()"); - } - if (hwloc_topology_load(__kmp_hwloc_topology) < 0) { - __kmp_hwloc_error = TRUE; - if (__kmp_affinity_verbose) - KMP_WARNING(AffHwlocErrorOccurred, var, "hwloc_topology_load()"); - } - } - topology_support = hwloc_topology_get_support(__kmp_hwloc_topology); - // Is the system capable of setting/getting this thread's affinity? - // Also, is topology discovery possible? (pu indicates ability to discover - // processing units). And finally, were there no errors when calling any - // hwloc_* API functions? - if (topology_support && topology_support->cpubind->set_thisthread_cpubind && - topology_support->cpubind->get_thisthread_cpubind && - topology_support->discovery->pu && !__kmp_hwloc_error) { - // enables affinity according to KMP_AFFINITY_CAPABLE() macro - KMP_AFFINITY_ENABLE(TRUE); - } else { - // indicate that hwloc didn't work and disable affinity - __kmp_hwloc_error = TRUE; - KMP_AFFINITY_DISABLE(); - } - } - void bind_thread(int which) override { - KMP_ASSERT2(KMP_AFFINITY_CAPABLE(), - "Illegal set affinity operation when not capable"); - KMPAffinity::Mask *mask; - KMP_CPU_ALLOC_ON_STACK(mask); - KMP_CPU_ZERO(mask); - KMP_CPU_SET(which, mask); - __kmp_set_system_affinity(mask, TRUE); - KMP_CPU_FREE_FROM_STACK(mask); - } - KMPAffinity::Mask *allocate_mask() override { return new Mask(); } - void deallocate_mask(KMPAffinity::Mask *m) override { delete m; } - KMPAffinity::Mask *allocate_mask_array(int num) override { - return new Mask[num]; - } - void deallocate_mask_array(KMPAffinity::Mask *array) override { - Mask *hwloc_array = static_cast(array); - delete[] hwloc_array; - } - KMPAffinity::Mask *index_mask_array(KMPAffinity::Mask *array, - int index) override { - Mask *hwloc_array = static_cast(array); - return &(hwloc_array[index]); - } - api_type get_api_type() const override { return HWLOC; } -}; -#endif /* KMP_USE_HWLOC */ - -#if KMP_OS_LINUX -/* On some of the older OS's that we build on, these constants aren't present - in #included from . They must be the same on - all systems of the same arch where they are defined, and they cannot change. - stone forever. */ -#include -#if KMP_ARCH_X86 || KMP_ARCH_ARM -#ifndef __NR_sched_setaffinity -#define __NR_sched_setaffinity 241 -#elif __NR_sched_setaffinity != 241 -#error Wrong code for setaffinity system call. -#endif /* __NR_sched_setaffinity */ -#ifndef __NR_sched_getaffinity -#define __NR_sched_getaffinity 242 -#elif __NR_sched_getaffinity != 242 -#error Wrong code for getaffinity system call. -#endif /* __NR_sched_getaffinity */ -#elif KMP_ARCH_AARCH64 -#ifndef __NR_sched_setaffinity -#define __NR_sched_setaffinity 122 -#elif __NR_sched_setaffinity != 122 -#error Wrong code for setaffinity system call. -#endif /* __NR_sched_setaffinity */ -#ifndef __NR_sched_getaffinity -#define __NR_sched_getaffinity 123 -#elif __NR_sched_getaffinity != 123 -#error Wrong code for getaffinity system call. -#endif /* __NR_sched_getaffinity */ -#elif KMP_ARCH_X86_64 -#ifndef __NR_sched_setaffinity -#define __NR_sched_setaffinity 203 -#elif __NR_sched_setaffinity != 203 -#error Wrong code for setaffinity system call. -#endif /* __NR_sched_setaffinity */ -#ifndef __NR_sched_getaffinity -#define __NR_sched_getaffinity 204 -#elif __NR_sched_getaffinity != 204 -#error Wrong code for getaffinity system call. -#endif /* __NR_sched_getaffinity */ -#elif KMP_ARCH_PPC64 -#ifndef __NR_sched_setaffinity -#define __NR_sched_setaffinity 222 -#elif __NR_sched_setaffinity != 222 -#error Wrong code for setaffinity system call. -#endif /* __NR_sched_setaffinity */ -#ifndef __NR_sched_getaffinity -#define __NR_sched_getaffinity 223 -#elif __NR_sched_getaffinity != 223 -#error Wrong code for getaffinity system call. -#endif /* __NR_sched_getaffinity */ -#elif KMP_ARCH_MIPS -#ifndef __NR_sched_setaffinity -#define __NR_sched_setaffinity 4239 -#elif __NR_sched_setaffinity != 4239 -#error Wrong code for setaffinity system call. -#endif /* __NR_sched_setaffinity */ -#ifndef __NR_sched_getaffinity -#define __NR_sched_getaffinity 4240 -#elif __NR_sched_getaffinity != 4240 -#error Wrong code for getaffinity system call. -#endif /* __NR_sched_getaffinity */ -#elif KMP_ARCH_MIPS64 -#ifndef __NR_sched_setaffinity -#define __NR_sched_setaffinity 5195 -#elif __NR_sched_setaffinity != 5195 -#error Wrong code for setaffinity system call. -#endif /* __NR_sched_setaffinity */ -#ifndef __NR_sched_getaffinity -#define __NR_sched_getaffinity 5196 -#elif __NR_sched_getaffinity != 5196 -#error Wrong code for getaffinity system call. -#endif /* __NR_sched_getaffinity */ -#error Unknown or unsupported architecture -#endif /* KMP_ARCH_* */ -class KMPNativeAffinity : public KMPAffinity { - class Mask : public KMPAffinity::Mask { - typedef unsigned char mask_t; - static const int BITS_PER_MASK_T = sizeof(mask_t) * CHAR_BIT; - - public: - mask_t *mask; - Mask() { mask = (mask_t *)__kmp_allocate(__kmp_affin_mask_size); } - ~Mask() { - if (mask) - __kmp_free(mask); - } - void set(int i) override { - mask[i / BITS_PER_MASK_T] |= ((mask_t)1 << (i % BITS_PER_MASK_T)); - } - bool is_set(int i) const override { - return (mask[i / BITS_PER_MASK_T] & ((mask_t)1 << (i % BITS_PER_MASK_T))); - } - void clear(int i) override { - mask[i / BITS_PER_MASK_T] &= ~((mask_t)1 << (i % BITS_PER_MASK_T)); - } - void zero() override { - for (size_t i = 0; i < __kmp_affin_mask_size; ++i) - mask[i] = 0; - } - void copy(const KMPAffinity::Mask *src) override { - const Mask *convert = static_cast(src); - for (size_t i = 0; i < __kmp_affin_mask_size; ++i) - mask[i] = convert->mask[i]; - } - void bitwise_and(const KMPAffinity::Mask *rhs) override { - const Mask *convert = static_cast(rhs); - for (size_t i = 0; i < __kmp_affin_mask_size; ++i) - mask[i] &= convert->mask[i]; - } - void bitwise_or(const KMPAffinity::Mask *rhs) override { - const Mask *convert = static_cast(rhs); - for (size_t i = 0; i < __kmp_affin_mask_size; ++i) - mask[i] |= convert->mask[i]; - } - void bitwise_not() override { - for (size_t i = 0; i < __kmp_affin_mask_size; ++i) - mask[i] = ~(mask[i]); - } - int begin() const override { - int retval = 0; - while (retval < end() && !is_set(retval)) - ++retval; - return retval; - } - int end() const override { return __kmp_affin_mask_size * BITS_PER_MASK_T; } - int next(int previous) const override { - int retval = previous + 1; - while (retval < end() && !is_set(retval)) - ++retval; - return retval; - } - int get_system_affinity(bool abort_on_error) override { - KMP_ASSERT2(KMP_AFFINITY_CAPABLE(), - "Illegal get affinity operation when not capable"); - int retval = - syscall(__NR_sched_getaffinity, 0, __kmp_affin_mask_size, mask); - if (retval >= 0) { - return 0; - } - int error = errno; - if (abort_on_error) { - __kmp_fatal(KMP_MSG(FatalSysError), KMP_ERR(error), __kmp_msg_null); - } - return error; - } - int set_system_affinity(bool abort_on_error) const override { - KMP_ASSERT2(KMP_AFFINITY_CAPABLE(), - "Illegal get affinity operation when not capable"); - int retval = - syscall(__NR_sched_setaffinity, 0, __kmp_affin_mask_size, mask); - if (retval >= 0) { - return 0; - } - int error = errno; - if (abort_on_error) { - __kmp_fatal(KMP_MSG(FatalSysError), KMP_ERR(error), __kmp_msg_null); - } - return error; - } - }; - void determine_capable(const char *env_var) override { - __kmp_affinity_determine_capable(env_var); - } - void bind_thread(int which) override { __kmp_affinity_bind_thread(which); } - KMPAffinity::Mask *allocate_mask() override { - KMPNativeAffinity::Mask *retval = new Mask(); - return retval; - } - void deallocate_mask(KMPAffinity::Mask *m) override { - KMPNativeAffinity::Mask *native_mask = - static_cast(m); - delete native_mask; - } - KMPAffinity::Mask *allocate_mask_array(int num) override { - return new Mask[num]; - } - void deallocate_mask_array(KMPAffinity::Mask *array) override { - Mask *linux_array = static_cast(array); - delete[] linux_array; - } - KMPAffinity::Mask *index_mask_array(KMPAffinity::Mask *array, - int index) override { - Mask *linux_array = static_cast(array); - return &(linux_array[index]); - } - api_type get_api_type() const override { return NATIVE_OS; } -}; -#endif /* KMP_OS_LINUX */ - -#if KMP_OS_WINDOWS -class KMPNativeAffinity : public KMPAffinity { - class Mask : public KMPAffinity::Mask { - typedef ULONG_PTR mask_t; - static const int BITS_PER_MASK_T = sizeof(mask_t) * CHAR_BIT; - mask_t *mask; - - public: - Mask() { - mask = (mask_t *)__kmp_allocate(sizeof(mask_t) * __kmp_num_proc_groups); - } - ~Mask() { - if (mask) - __kmp_free(mask); - } - void set(int i) override { - mask[i / BITS_PER_MASK_T] |= ((mask_t)1 << (i % BITS_PER_MASK_T)); - } - bool is_set(int i) const override { - return (mask[i / BITS_PER_MASK_T] & ((mask_t)1 << (i % BITS_PER_MASK_T))); - } - void clear(int i) override { - mask[i / BITS_PER_MASK_T] &= ~((mask_t)1 << (i % BITS_PER_MASK_T)); - } - void zero() override { - for (int i = 0; i < __kmp_num_proc_groups; ++i) - mask[i] = 0; - } - void copy(const KMPAffinity::Mask *src) override { - const Mask *convert = static_cast(src); - for (int i = 0; i < __kmp_num_proc_groups; ++i) - mask[i] = convert->mask[i]; - } - void bitwise_and(const KMPAffinity::Mask *rhs) override { - const Mask *convert = static_cast(rhs); - for (int i = 0; i < __kmp_num_proc_groups; ++i) - mask[i] &= convert->mask[i]; - } - void bitwise_or(const KMPAffinity::Mask *rhs) override { - const Mask *convert = static_cast(rhs); - for (int i = 0; i < __kmp_num_proc_groups; ++i) - mask[i] |= convert->mask[i]; - } - void bitwise_not() override { - for (int i = 0; i < __kmp_num_proc_groups; ++i) - mask[i] = ~(mask[i]); - } - int begin() const override { - int retval = 0; - while (retval < end() && !is_set(retval)) - ++retval; - return retval; - } - int end() const override { return __kmp_num_proc_groups * BITS_PER_MASK_T; } - int next(int previous) const override { - int retval = previous + 1; - while (retval < end() && !is_set(retval)) - ++retval; - return retval; - } - int set_system_affinity(bool abort_on_error) const override { - if (__kmp_num_proc_groups > 1) { - // Check for a valid mask. - GROUP_AFFINITY ga; - int group = get_proc_group(); - if (group < 0) { - if (abort_on_error) { - KMP_FATAL(AffinityInvalidMask, "kmp_set_affinity"); - } - return -1; - } - // Transform the bit vector into a GROUP_AFFINITY struct - // and make the system call to set affinity. - ga.Group = group; - ga.Mask = mask[group]; - ga.Reserved[0] = ga.Reserved[1] = ga.Reserved[2] = 0; - - KMP_DEBUG_ASSERT(__kmp_SetThreadGroupAffinity != NULL); - if (__kmp_SetThreadGroupAffinity(GetCurrentThread(), &ga, NULL) == 0) { - DWORD error = GetLastError(); - if (abort_on_error) { - __kmp_fatal(KMP_MSG(CantSetThreadAffMask), KMP_ERR(error), - __kmp_msg_null); - } - return error; - } - } else { - if (!SetThreadAffinityMask(GetCurrentThread(), *mask)) { - DWORD error = GetLastError(); - if (abort_on_error) { - __kmp_fatal(KMP_MSG(CantSetThreadAffMask), KMP_ERR(error), - __kmp_msg_null); - } - return error; - } - } - return 0; - } - int get_system_affinity(bool abort_on_error) override { - if (__kmp_num_proc_groups > 1) { - this->zero(); - GROUP_AFFINITY ga; - KMP_DEBUG_ASSERT(__kmp_GetThreadGroupAffinity != NULL); - if (__kmp_GetThreadGroupAffinity(GetCurrentThread(), &ga) == 0) { - DWORD error = GetLastError(); - if (abort_on_error) { - __kmp_fatal(KMP_MSG(FunctionError, "GetThreadGroupAffinity()"), - KMP_ERR(error), __kmp_msg_null); - } - return error; - } - if ((ga.Group < 0) || (ga.Group > __kmp_num_proc_groups) || - (ga.Mask == 0)) { - return -1; - } - mask[ga.Group] = ga.Mask; - } else { - mask_t newMask, sysMask, retval; - if (!GetProcessAffinityMask(GetCurrentProcess(), &newMask, &sysMask)) { - DWORD error = GetLastError(); - if (abort_on_error) { - __kmp_fatal(KMP_MSG(FunctionError, "GetProcessAffinityMask()"), - KMP_ERR(error), __kmp_msg_null); - } - return error; - } - retval = SetThreadAffinityMask(GetCurrentThread(), newMask); - if (!retval) { - DWORD error = GetLastError(); - if (abort_on_error) { - __kmp_fatal(KMP_MSG(FunctionError, "SetThreadAffinityMask()"), - KMP_ERR(error), __kmp_msg_null); - } - return error; - } - newMask = SetThreadAffinityMask(GetCurrentThread(), retval); - if (!newMask) { - DWORD error = GetLastError(); - if (abort_on_error) { - __kmp_fatal(KMP_MSG(FunctionError, "SetThreadAffinityMask()"), - KMP_ERR(error), __kmp_msg_null); - } - } - *mask = retval; - } - return 0; - } - int get_proc_group() const override { - int group = -1; - if (__kmp_num_proc_groups == 1) { - return 1; - } - for (int i = 0; i < __kmp_num_proc_groups; i++) { - if (mask[i] == 0) - continue; - if (group >= 0) - return -1; - group = i; - } - return group; - } - }; - void determine_capable(const char *env_var) override { - __kmp_affinity_determine_capable(env_var); - } - void bind_thread(int which) override { __kmp_affinity_bind_thread(which); } - KMPAffinity::Mask *allocate_mask() override { return new Mask(); } - void deallocate_mask(KMPAffinity::Mask *m) override { delete m; } - KMPAffinity::Mask *allocate_mask_array(int num) override { - return new Mask[num]; - } - void deallocate_mask_array(KMPAffinity::Mask *array) override { - Mask *windows_array = static_cast(array); - delete[] windows_array; - } - KMPAffinity::Mask *index_mask_array(KMPAffinity::Mask *array, - int index) override { - Mask *windows_array = static_cast(array); - return &(windows_array[index]); - } - api_type get_api_type() const override { return NATIVE_OS; } -}; -#endif /* KMP_OS_WINDOWS */ -#endif /* KMP_AFFINITY_SUPPORTED */ - -class Address { -public: - static const unsigned maxDepth = 32; - unsigned labels[maxDepth]; - unsigned childNums[maxDepth]; - unsigned depth; - unsigned leader; - Address(unsigned _depth) : depth(_depth), leader(FALSE) {} - Address &operator=(const Address &b) { - depth = b.depth; - for (unsigned i = 0; i < depth; i++) { - labels[i] = b.labels[i]; - childNums[i] = b.childNums[i]; - } - leader = FALSE; - return *this; - } - bool operator==(const Address &b) const { - if (depth != b.depth) - return false; - for (unsigned i = 0; i < depth; i++) - if (labels[i] != b.labels[i]) - return false; - return true; - } - bool isClose(const Address &b, int level) const { - if (depth != b.depth) - return false; - if ((unsigned)level >= depth) - return true; - for (unsigned i = 0; i < (depth - level); i++) - if (labels[i] != b.labels[i]) - return false; - return true; - } - bool operator!=(const Address &b) const { return !operator==(b); } - void print() const { - unsigned i; - printf("Depth: %u --- ", depth); - for (i = 0; i < depth; i++) { - printf("%u ", labels[i]); - } - } -}; - -class AddrUnsPair { -public: - Address first; - unsigned second; - AddrUnsPair(Address _first, unsigned _second) - : first(_first), second(_second) {} - AddrUnsPair &operator=(const AddrUnsPair &b) { - first = b.first; - second = b.second; - return *this; - } - void print() const { - printf("first = "); - first.print(); - printf(" --- second = %u", second); - } - bool operator==(const AddrUnsPair &b) const { - if (first != b.first) - return false; - if (second != b.second) - return false; - return true; - } - bool operator!=(const AddrUnsPair &b) const { return !operator==(b); } -}; - -static int __kmp_affinity_cmp_Address_labels(const void *a, const void *b) { - const Address *aa = &(((const AddrUnsPair *)a)->first); - const Address *bb = &(((const AddrUnsPair *)b)->first); - unsigned depth = aa->depth; - unsigned i; - KMP_DEBUG_ASSERT(depth == bb->depth); - for (i = 0; i < depth; i++) { - if (aa->labels[i] < bb->labels[i]) - return -1; - if (aa->labels[i] > bb->labels[i]) - return 1; - } - return 0; -} - -/* A structure for holding machine-specific hierarchy info to be computed once - at init. This structure represents a mapping of threads to the actual machine - hierarchy, or to our best guess at what the hierarchy might be, for the - purpose of performing an efficient barrier. In the worst case, when there is - no machine hierarchy information, it produces a tree suitable for a barrier, - similar to the tree used in the hyper barrier. */ -class hierarchy_info { -public: - /* Good default values for number of leaves and branching factor, given no - affinity information. Behaves a bit like hyper barrier. */ - static const kmp_uint32 maxLeaves = 4; - static const kmp_uint32 minBranch = 4; - /** Number of levels in the hierarchy. Typical levels are threads/core, - cores/package or socket, packages/node, nodes/machine, etc. We don't want - to get specific with nomenclature. When the machine is oversubscribed we - add levels to duplicate the hierarchy, doubling the thread capacity of the - hierarchy each time we add a level. */ - kmp_uint32 maxLevels; - - /** This is specifically the depth of the machine configuration hierarchy, in - terms of the number of levels along the longest path from root to any - leaf. It corresponds to the number of entries in numPerLevel if we exclude - all but one trailing 1. */ - kmp_uint32 depth; - kmp_uint32 base_num_threads; - enum init_status { initialized = 0, not_initialized = 1, initializing = 2 }; - volatile kmp_int8 uninitialized; // 0=initialized, 1=not initialized, - // 2=initialization in progress - volatile kmp_int8 resizing; // 0=not resizing, 1=resizing - - /** Level 0 corresponds to leaves. numPerLevel[i] is the number of children - the parent of a node at level i has. For example, if we have a machine - with 4 packages, 4 cores/package and 2 HT per core, then numPerLevel = - {2, 4, 4, 1, 1}. All empty levels are set to 1. */ - kmp_uint32 *numPerLevel; - kmp_uint32 *skipPerLevel; - - void deriveLevels(AddrUnsPair *adr2os, int num_addrs) { - int hier_depth = adr2os[0].first.depth; - int level = 0; - for (int i = hier_depth - 1; i >= 0; --i) { - int max = -1; - for (int j = 0; j < num_addrs; ++j) { - int next = adr2os[j].first.childNums[i]; - if (next > max) - max = next; - } - numPerLevel[level] = max + 1; - ++level; - } - } - - hierarchy_info() - : maxLevels(7), depth(1), uninitialized(not_initialized), resizing(0) {} - - void fini() { - if (!uninitialized && numPerLevel) { - __kmp_free(numPerLevel); - numPerLevel = NULL; - uninitialized = not_initialized; - } - } - - void init(AddrUnsPair *adr2os, int num_addrs) { - kmp_int8 bool_result = KMP_COMPARE_AND_STORE_ACQ8( - &uninitialized, not_initialized, initializing); - if (bool_result == 0) { // Wait for initialization - while (TCR_1(uninitialized) != initialized) - KMP_CPU_PAUSE(); - return; - } - KMP_DEBUG_ASSERT(bool_result == 1); - - /* Added explicit initialization of the data fields here to prevent usage of - dirty value observed when static library is re-initialized multiple times - (e.g. when non-OpenMP thread repeatedly launches/joins thread that uses - OpenMP). */ - depth = 1; - resizing = 0; - maxLevels = 7; - numPerLevel = - (kmp_uint32 *)__kmp_allocate(maxLevels * 2 * sizeof(kmp_uint32)); - skipPerLevel = &(numPerLevel[maxLevels]); - for (kmp_uint32 i = 0; i < maxLevels; - ++i) { // init numPerLevel[*] to 1 item per level - numPerLevel[i] = 1; - skipPerLevel[i] = 1; - } - - // Sort table by physical ID - if (adr2os) { - qsort(adr2os, num_addrs, sizeof(*adr2os), - __kmp_affinity_cmp_Address_labels); - deriveLevels(adr2os, num_addrs); - } else { - numPerLevel[0] = maxLeaves; - numPerLevel[1] = num_addrs / maxLeaves; - if (num_addrs % maxLeaves) - numPerLevel[1]++; - } - - base_num_threads = num_addrs; - for (int i = maxLevels - 1; i >= 0; - --i) // count non-empty levels to get depth - if (numPerLevel[i] != 1 || depth > 1) // only count one top-level '1' - depth++; - - kmp_uint32 branch = minBranch; - if (numPerLevel[0] == 1) - branch = num_addrs / maxLeaves; - if (branch < minBranch) - branch = minBranch; - for (kmp_uint32 d = 0; d < depth - 1; ++d) { // optimize hierarchy width - while (numPerLevel[d] > branch || - (d == 0 && numPerLevel[d] > maxLeaves)) { // max 4 on level 0! - if (numPerLevel[d] & 1) - numPerLevel[d]++; - numPerLevel[d] = numPerLevel[d] >> 1; - if (numPerLevel[d + 1] == 1) - depth++; - numPerLevel[d + 1] = numPerLevel[d + 1] << 1; - } - if (numPerLevel[0] == 1) { - branch = branch >> 1; - if (branch < 4) - branch = minBranch; - } - } - - for (kmp_uint32 i = 1; i < depth; ++i) - skipPerLevel[i] = numPerLevel[i - 1] * skipPerLevel[i - 1]; - // Fill in hierarchy in the case of oversubscription - for (kmp_uint32 i = depth; i < maxLevels; ++i) - skipPerLevel[i] = 2 * skipPerLevel[i - 1]; - - uninitialized = initialized; // One writer - } - - // Resize the hierarchy if nproc changes to something larger than before - void resize(kmp_uint32 nproc) { - kmp_int8 bool_result = KMP_COMPARE_AND_STORE_ACQ8(&resizing, 0, 1); - while (bool_result == 0) { // someone else is trying to resize - KMP_CPU_PAUSE(); - if (nproc <= base_num_threads) // happy with other thread's resize - return; - else // try to resize - bool_result = KMP_COMPARE_AND_STORE_ACQ8(&resizing, 0, 1); - } - KMP_DEBUG_ASSERT(bool_result != 0); - if (nproc <= base_num_threads) - return; // happy with other thread's resize - - // Calculate new maxLevels - kmp_uint32 old_sz = skipPerLevel[depth - 1]; - kmp_uint32 incs = 0, old_maxLevels = maxLevels; - // First see if old maxLevels is enough to contain new size - for (kmp_uint32 i = depth; i < maxLevels && nproc > old_sz; ++i) { - skipPerLevel[i] = 2 * skipPerLevel[i - 1]; - numPerLevel[i - 1] *= 2; - old_sz *= 2; - depth++; - } - if (nproc > old_sz) { // Not enough space, need to expand hierarchy - while (nproc > old_sz) { - old_sz *= 2; - incs++; - depth++; - } - maxLevels += incs; - - // Resize arrays - kmp_uint32 *old_numPerLevel = numPerLevel; - kmp_uint32 *old_skipPerLevel = skipPerLevel; - numPerLevel = skipPerLevel = NULL; - numPerLevel = - (kmp_uint32 *)__kmp_allocate(maxLevels * 2 * sizeof(kmp_uint32)); - skipPerLevel = &(numPerLevel[maxLevels]); - - // Copy old elements from old arrays - for (kmp_uint32 i = 0; i < old_maxLevels; - ++i) { // init numPerLevel[*] to 1 item per level - numPerLevel[i] = old_numPerLevel[i]; - skipPerLevel[i] = old_skipPerLevel[i]; - } - - // Init new elements in arrays to 1 - for (kmp_uint32 i = old_maxLevels; i < maxLevels; - ++i) { // init numPerLevel[*] to 1 item per level - numPerLevel[i] = 1; - skipPerLevel[i] = 1; - } - - // Free old arrays - __kmp_free(old_numPerLevel); - } - - // Fill in oversubscription levels of hierarchy - for (kmp_uint32 i = old_maxLevels; i < maxLevels; ++i) - skipPerLevel[i] = 2 * skipPerLevel[i - 1]; - - base_num_threads = nproc; - resizing = 0; // One writer - } -}; -#endif // KMP_AFFINITY_H Property changes on: vendor/llvm-openmp/openmp-release80-r363030/runtime/src/kmp_affinity.h ___________________________________________________________________ Deleted: svn:eol-style ## -1 +0,0 ## -native \ No newline at end of property Deleted: svn:keywords ## -1 +0,0 ## -FreeBSD=%H \ No newline at end of property Deleted: svn:mime-type ## -1 +0,0 ## -text/plain \ No newline at end of property Index: vendor/llvm-openmp/openmp-release80-r363030/runtime/src/kmp_stats_timing.cpp =================================================================== --- vendor/llvm-openmp/openmp-release80-r363030/runtime/src/kmp_stats_timing.cpp (revision 348960) +++ vendor/llvm-openmp/openmp-release80-r363030/runtime/src/kmp_stats_timing.cpp (nonexistent) @@ -1,131 +0,0 @@ -/** @file kmp_stats_timing.cpp - * Timing functions - */ - -//===----------------------------------------------------------------------===// -// -// The LLVM Compiler Infrastructure -// -// This file is dual licensed under the MIT and the University of Illinois Open -// Source Licenses. See LICENSE.txt for details. -// -//===----------------------------------------------------------------------===// - -#include -#include - -#include -#include -#include - -#include "kmp.h" -#include "kmp_stats_timing.h" - -using namespace std; - -#if KMP_HAVE_TICK_TIME -#if KMP_MIC -double tsc_tick_count::tick_time() { - // pretty bad assumption of 1GHz clock for MIC - return 1 / ((double)1000 * 1.e6); -} -#elif KMP_ARCH_X86 || KMP_ARCH_X86_64 -#include -// Extract the value from the CPUID information -double tsc_tick_count::tick_time() { - static double result = 0.0; - - if (result == 0.0) { - kmp_cpuid_t cpuinfo; - char brand[256]; - - __kmp_x86_cpuid(0x80000000, 0, &cpuinfo); - memset(brand, 0, sizeof(brand)); - int ids = cpuinfo.eax; - - for (unsigned int i = 2; i < (ids ^ 0x80000000) + 2; i++) - __kmp_x86_cpuid(i | 0x80000000, 0, - (kmp_cpuid_t *)(brand + (i - 2) * sizeof(kmp_cpuid_t))); - - char *start = &brand[0]; - for (; *start == ' '; start++) - ; - - char *end = brand + KMP_STRLEN(brand) - 3; - uint64_t multiplier; - - if (*end == 'M') - multiplier = 1000LL * 1000LL; - else if (*end == 'G') - multiplier = 1000LL * 1000LL * 1000LL; - else if (*end == 'T') - multiplier = 1000LL * 1000LL * 1000LL * 1000LL; - else { - cout << "Error determining multiplier '" << *end << "'\n"; - exit(-1); - } - *end = 0; - while (*end != ' ') - end--; - end++; - - double freq = strtod(end, &start); - if (freq == 0.0) { - cout << "Error calculating frequency " << end << "\n"; - exit(-1); - } - - result = ((double)1.0) / (freq * multiplier); - } - return result; -} -#endif -#endif - -static bool useSI = true; - -// Return a formatted string after normalising the value into -// engineering style and using a suitable unit prefix (e.g. ms, us, ns). -std::string formatSI(double interval, int width, char unit) { - std::stringstream os; - - if (useSI) { - // Preserve accuracy for small numbers, since we only multiply and the - // positive powers of ten are precisely representable. - static struct { - double scale; - char prefix; - } ranges[] = {{1.e21, 'y'}, {1.e18, 'z'}, {1.e15, 'a'}, {1.e12, 'f'}, - {1.e9, 'p'}, {1.e6, 'n'}, {1.e3, 'u'}, {1.0, 'm'}, - {1.e-3, ' '}, {1.e-6, 'k'}, {1.e-9, 'M'}, {1.e-12, 'G'}, - {1.e-15, 'T'}, {1.e-18, 'P'}, {1.e-21, 'E'}, {1.e-24, 'Z'}, - {1.e-27, 'Y'}}; - - if (interval == 0.0) { - os << std::setw(width - 3) << std::right << "0.00" << std::setw(3) - << unit; - return os.str(); - } - - bool negative = false; - if (interval < 0.0) { - negative = true; - interval = -interval; - } - - for (int i = 0; i < (int)(sizeof(ranges) / sizeof(ranges[0])); i++) { - if (interval * ranges[i].scale < 1.e0) { - interval = interval * 1000.e0 * ranges[i].scale; - os << std::fixed << std::setprecision(2) << std::setw(width - 3) - << std::right << (negative ? -interval : interval) << std::setw(2) - << ranges[i].prefix << std::setw(1) << unit; - - return os.str(); - } - } - } - os << std::setprecision(2) << std::fixed << std::right << std::setw(width - 3) - << interval << std::setw(3) << unit; - - return os.str(); -} Property changes on: vendor/llvm-openmp/openmp-release80-r363030/runtime/src/kmp_stats_timing.cpp ___________________________________________________________________ Deleted: svn:eol-style ## -1 +0,0 ## -native \ No newline at end of property Deleted: svn:keywords ## -1 +0,0 ## -FreeBSD=%H \ No newline at end of property Deleted: svn:mime-type ## -1 +0,0 ## -text/plain \ No newline at end of property Index: vendor/llvm-openmp/openmp-release80-r363030/runtime/src/kmp_i18n.h =================================================================== --- vendor/llvm-openmp/openmp-release80-r363030/runtime/src/kmp_i18n.h (revision 348960) +++ vendor/llvm-openmp/openmp-release80-r363030/runtime/src/kmp_i18n.h (nonexistent) @@ -1,179 +0,0 @@ -/* - * kmp_i18n.h - */ - -//===----------------------------------------------------------------------===// -// -// The LLVM Compiler Infrastructure -// -// This file is dual licensed under the MIT and the University of Illinois Open -// Source Licenses. See LICENSE.txt for details. -// -//===----------------------------------------------------------------------===// - -#ifndef KMP_I18N_H -#define KMP_I18N_H - -#include "kmp_str.h" - -#ifdef __cplusplus -extern "C" { -#endif // __cplusplus - -/* kmp_i18n_id.inc defines kmp_i18n_id_t type. It is an enumeration with - identifiers of all the messages in the catalog. There is one special - identifier: kmp_i18n_null, which denotes absence of message. */ -#include "kmp_i18n_id.inc" // Generated file. Do not edit it manually. - -/* Low-level functions handling message catalog. __kmp_i18n_open() opens message - catalog, __kmp_i18n_closes() it. Explicit opening is not required: if message - catalog is not yet open, __kmp_i18n_catgets() will open it implicitly. - However, catalog should be explicitly closed, otherwise resources (mamory, - handles) may leak. - - __kmp_i18n_catgets() returns read-only string. It should not be freed. - - KMP_I18N_STR macro simplifies acces to strings in message catalog a bit. - Following two lines are equivalent: - - __kmp_i18n_catgets( kmp_i18n_str_Warning ) - KMP_I18N_STR( Warning ) -*/ - -void __kmp_i18n_catopen(); -void __kmp_i18n_catclose(); -char const *__kmp_i18n_catgets(kmp_i18n_id_t id); - -#define KMP_I18N_STR(id) __kmp_i18n_catgets(kmp_i18n_str_##id) - -/* High-level interface for printing strings targeted to the user. - - All the strings are divided into 3 types: - * messages, - * hints, - * system errors. - - There are 3 kind of message severities: - * informational messages, - * warnings (non-fatal errors), - * fatal errors. - - For example: - OMP: Warning #2: Cannot open message catalog "libguide.cat": (1) - OMP: System error #2: No such file or directory (2) - OMP: Hint: Please check NLSPATH environment variable. (3) - OMP: Info #3: Default messages will be used. (4) - - where - (1) is a message of warning severity, - (2) is a system error caused the previous warning, - (3) is a hint for the user how to fix the problem, - (4) is a message of informational severity. - - Usage in complex cases (message is accompanied with hints and system errors): - - int error = errno; // We need save errno immediately, because it may - // be changed. - __kmp_msg( - kmp_ms_warning, // Severity - KMP_MSG( CantOpenMessageCatalog, name ), // Primary message - KMP_ERR( error ), // System error - KMP_HNT( CheckNLSPATH ), // Hint - __kmp_msg_null // Variadic argument list finisher - ); - - Usage in simple cases (just a message, no system errors or hints): - KMP_INFORM( WillUseDefaultMessages ); - KMP_WARNING( CantOpenMessageCatalog, name ); - KMP_FATAL( StackOverlap ); - KMP_SYSFAIL( "pthread_create", status ); - KMP_CHECK_SYSFAIL( "pthread_create", status ); - KMP_CHECK_SYSFAIL_ERRNO( "gettimeofday", status ); -*/ - -enum kmp_msg_type { - kmp_mt_dummy = 0, // Special type for internal purposes. - kmp_mt_mesg = - 4, // Primary OpenMP message, could be information, warning, or fatal. - kmp_mt_hint = 5, // Hint to the user. - kmp_mt_syserr = -1 // System error message. -}; // enum kmp_msg_type -typedef enum kmp_msg_type kmp_msg_type_t; - -struct kmp_msg { - kmp_msg_type_t type; - int num; - char *str; - int len; -}; // struct kmp_message -typedef struct kmp_msg kmp_msg_t; - -// Special message to denote the end of variadic list of arguments. -extern kmp_msg_t __kmp_msg_null; - -// Helper functions. Creates messages either from message catalog or from -// system. Note: these functions allocate memory. You should pass created -// messages to __kmp_msg() function, it will print messages and destroy them. -kmp_msg_t __kmp_msg_format(unsigned id_arg, ...); -kmp_msg_t __kmp_msg_error_code(int code); -kmp_msg_t __kmp_msg_error_mesg(char const *mesg); - -// Helper macros to make calls shorter. -#define KMP_MSG(...) __kmp_msg_format(kmp_i18n_msg_##__VA_ARGS__) -#define KMP_HNT(...) __kmp_msg_format(kmp_i18n_hnt_##__VA_ARGS__) -#define KMP_SYSERRCODE(code) __kmp_msg_error_code(code) -#define KMP_SYSERRMESG(mesg) __kmp_msg_error_mesg(mesg) -#define KMP_ERR KMP_SYSERRCODE - -// Message severity. -enum kmp_msg_severity { - kmp_ms_inform, // Just information for the user. - kmp_ms_warning, // Non-fatal error, execution continues. - kmp_ms_fatal // Fatal error, program aborts. -}; // enum kmp_msg_severity -typedef enum kmp_msg_severity kmp_msg_severity_t; - -// Primary function for printing messages for the user. The first message is -// mandatory. Any number of system errors and hints may be specified. Argument -// list must be finished with __kmp_msg_null. -void __kmp_msg(kmp_msg_severity_t severity, kmp_msg_t message, ...); -KMP_NORETURN void __kmp_fatal(kmp_msg_t message, ...); - -// Helper macros to make calls shorter in simple cases. -#define KMP_INFORM(...) \ - __kmp_msg(kmp_ms_inform, KMP_MSG(__VA_ARGS__), __kmp_msg_null) -#define KMP_WARNING(...) \ - __kmp_msg(kmp_ms_warning, KMP_MSG(__VA_ARGS__), __kmp_msg_null) -#define KMP_FATAL(...) __kmp_fatal(KMP_MSG(__VA_ARGS__), __kmp_msg_null) -#define KMP_SYSFAIL(func, error) \ - __kmp_fatal(KMP_MSG(FunctionError, func), KMP_SYSERRCODE(error), \ - __kmp_msg_null) - -// Check error, if not zero, generate fatal error message. -#define KMP_CHECK_SYSFAIL(func, error) \ - { \ - if (error) { \ - KMP_SYSFAIL(func, error); \ - } \ - } - -// Check status, if not zero, generate fatal error message using errno. -#define KMP_CHECK_SYSFAIL_ERRNO(func, status) \ - { \ - if (status != 0) { \ - int error = errno; \ - KMP_SYSFAIL(func, error); \ - } \ - } - -#ifdef KMP_DEBUG -void __kmp_i18n_dump_catalog(kmp_str_buf_t *buffer); -#endif // KMP_DEBUG - -#ifdef __cplusplus -}; // extern "C" -#endif // __cplusplus - -#endif // KMP_I18N_H - -// end of file // Property changes on: vendor/llvm-openmp/openmp-release80-r363030/runtime/src/kmp_i18n.h ___________________________________________________________________ Deleted: svn:eol-style ## -1 +0,0 ## -native \ No newline at end of property Deleted: svn:keywords ## -1 +0,0 ## -FreeBSD=%H \ No newline at end of property Deleted: svn:mime-type ## -1 +0,0 ## -text/plain \ No newline at end of property Index: vendor/llvm-openmp/openmp-release80-r363030/runtime/src/kmp_alloc.cpp =================================================================== --- vendor/llvm-openmp/openmp-release80-r363030/runtime/src/kmp_alloc.cpp (revision 348960) +++ vendor/llvm-openmp/openmp-release80-r363030/runtime/src/kmp_alloc.cpp (nonexistent) @@ -1,1809 +0,0 @@ -/* - * kmp_alloc.cpp -- private/shared dynamic memory allocation and management - */ - -//===----------------------------------------------------------------------===// -// -// The LLVM Compiler Infrastructure -// -// This file is dual licensed under the MIT and the University of Illinois Open -// Source Licenses. See LICENSE.txt for details. -// -//===----------------------------------------------------------------------===// - -#include "kmp.h" -#include "kmp_io.h" -#include "kmp_wrapper_malloc.h" - -// Disable bget when it is not used -#if KMP_USE_BGET - -/* Thread private buffer management code */ - -typedef int (*bget_compact_t)(size_t, int); -typedef void *(*bget_acquire_t)(size_t); -typedef void (*bget_release_t)(void *); - -/* NOTE: bufsize must be a signed datatype */ - -#if KMP_OS_WINDOWS -#if KMP_ARCH_X86 || KMP_ARCH_ARM -typedef kmp_int32 bufsize; -#else -typedef kmp_int64 bufsize; -#endif -#else -typedef ssize_t bufsize; -#endif - -/* The three modes of operation are, fifo search, lifo search, and best-fit */ - -typedef enum bget_mode { - bget_mode_fifo = 0, - bget_mode_lifo = 1, - bget_mode_best = 2 -} bget_mode_t; - -static void bpool(kmp_info_t *th, void *buffer, bufsize len); -static void *bget(kmp_info_t *th, bufsize size); -static void *bgetz(kmp_info_t *th, bufsize size); -static void *bgetr(kmp_info_t *th, void *buffer, bufsize newsize); -static void brel(kmp_info_t *th, void *buf); -static void bectl(kmp_info_t *th, bget_compact_t compact, - bget_acquire_t acquire, bget_release_t release, - bufsize pool_incr); - -/* BGET CONFIGURATION */ -/* Buffer allocation size quantum: all buffers allocated are a - multiple of this size. This MUST be a power of two. */ - -/* On IA-32 architecture with Linux* OS, malloc() does not - ensure 16 byte alignmnent */ - -#if KMP_ARCH_X86 || !KMP_HAVE_QUAD - -#define SizeQuant 8 -#define AlignType double - -#else - -#define SizeQuant 16 -#define AlignType _Quad - -#endif - -// Define this symbol to enable the bstats() function which calculates the -// total free space in the buffer pool, the largest available buffer, and the -// total space currently allocated. -#define BufStats 1 - -#ifdef KMP_DEBUG - -// Define this symbol to enable the bpoold() function which dumps the buffers -// in a buffer pool. -#define BufDump 1 - -// Define this symbol to enable the bpoolv() function for validating a buffer -// pool. -#define BufValid 1 - -// Define this symbol to enable the bufdump() function which allows dumping the -// contents of an allocated or free buffer. -#define DumpData 1 - -#ifdef NOT_USED_NOW - -// Wipe free buffers to a guaranteed pattern of garbage to trip up miscreants -// who attempt to use pointers into released buffers. -#define FreeWipe 1 - -// Use a best fit algorithm when searching for space for an allocation request. -// This uses memory more efficiently, but allocation will be much slower. -#define BestFit 1 - -#endif /* NOT_USED_NOW */ -#endif /* KMP_DEBUG */ - -static bufsize bget_bin_size[] = { - 0, - // 1 << 6, /* .5 Cache line */ - 1 << 7, /* 1 Cache line, new */ - 1 << 8, /* 2 Cache lines */ - 1 << 9, /* 4 Cache lines, new */ - 1 << 10, /* 8 Cache lines */ - 1 << 11, /* 16 Cache lines, new */ - 1 << 12, 1 << 13, /* new */ - 1 << 14, 1 << 15, /* new */ - 1 << 16, 1 << 17, 1 << 18, 1 << 19, 1 << 20, /* 1MB */ - 1 << 21, /* 2MB */ - 1 << 22, /* 4MB */ - 1 << 23, /* 8MB */ - 1 << 24, /* 16MB */ - 1 << 25, /* 32MB */ -}; - -#define MAX_BGET_BINS (int)(sizeof(bget_bin_size) / sizeof(bufsize)) - -struct bfhead; - -// Declare the interface, including the requested buffer size type, bufsize. - -/* Queue links */ -typedef struct qlinks { - struct bfhead *flink; /* Forward link */ - struct bfhead *blink; /* Backward link */ -} qlinks_t; - -/* Header in allocated and free buffers */ -typedef struct bhead2 { - kmp_info_t *bthr; /* The thread which owns the buffer pool */ - bufsize prevfree; /* Relative link back to previous free buffer in memory or - 0 if previous buffer is allocated. */ - bufsize bsize; /* Buffer size: positive if free, negative if allocated. */ -} bhead2_t; - -/* Make sure the bhead structure is a multiple of SizeQuant in size. */ -typedef union bhead { - KMP_ALIGN(SizeQuant) - AlignType b_align; - char b_pad[sizeof(bhead2_t) + (SizeQuant - (sizeof(bhead2_t) % SizeQuant))]; - bhead2_t bb; -} bhead_t; -#define BH(p) ((bhead_t *)(p)) - -/* Header in directly allocated buffers (by acqfcn) */ -typedef struct bdhead { - bufsize tsize; /* Total size, including overhead */ - bhead_t bh; /* Common header */ -} bdhead_t; -#define BDH(p) ((bdhead_t *)(p)) - -/* Header in free buffers */ -typedef struct bfhead { - bhead_t bh; /* Common allocated/free header */ - qlinks_t ql; /* Links on free list */ -} bfhead_t; -#define BFH(p) ((bfhead_t *)(p)) - -typedef struct thr_data { - bfhead_t freelist[MAX_BGET_BINS]; -#if BufStats - size_t totalloc; /* Total space currently allocated */ - long numget, numrel; /* Number of bget() and brel() calls */ - long numpblk; /* Number of pool blocks */ - long numpget, numprel; /* Number of block gets and rels */ - long numdget, numdrel; /* Number of direct gets and rels */ -#endif /* BufStats */ - - /* Automatic expansion block management functions */ - bget_compact_t compfcn; - bget_acquire_t acqfcn; - bget_release_t relfcn; - - bget_mode_t mode; /* what allocation mode to use? */ - - bufsize exp_incr; /* Expansion block size */ - bufsize pool_len; /* 0: no bpool calls have been made - -1: not all pool blocks are the same size - >0: (common) block size for all bpool calls made so far - */ - bfhead_t *last_pool; /* Last pool owned by this thread (delay dealocation) */ -} thr_data_t; - -/* Minimum allocation quantum: */ -#define QLSize (sizeof(qlinks_t)) -#define SizeQ ((SizeQuant > QLSize) ? SizeQuant : QLSize) -#define MaxSize \ - (bufsize)( \ - ~(((bufsize)(1) << (sizeof(bufsize) * CHAR_BIT - 1)) | (SizeQuant - 1))) -// Maximun for the requested size. - -/* End sentinel: value placed in bsize field of dummy block delimiting - end of pool block. The most negative number which will fit in a - bufsize, defined in a way that the compiler will accept. */ - -#define ESent \ - ((bufsize)(-(((((bufsize)1) << ((int)sizeof(bufsize) * 8 - 2)) - 1) * 2) - 2)) - -/* Thread Data management routines */ -static int bget_get_bin(bufsize size) { - // binary chop bins - int lo = 0, hi = MAX_BGET_BINS - 1; - - KMP_DEBUG_ASSERT(size > 0); - - while ((hi - lo) > 1) { - int mid = (lo + hi) >> 1; - if (size < bget_bin_size[mid]) - hi = mid - 1; - else - lo = mid; - } - - KMP_DEBUG_ASSERT((lo >= 0) && (lo < MAX_BGET_BINS)); - - return lo; -} - -static void set_thr_data(kmp_info_t *th) { - int i; - thr_data_t *data; - - data = (thr_data_t *)((!th->th.th_local.bget_data) - ? __kmp_allocate(sizeof(*data)) - : th->th.th_local.bget_data); - - memset(data, '\0', sizeof(*data)); - - for (i = 0; i < MAX_BGET_BINS; ++i) { - data->freelist[i].ql.flink = &data->freelist[i]; - data->freelist[i].ql.blink = &data->freelist[i]; - } - - th->th.th_local.bget_data = data; - th->th.th_local.bget_list = 0; -#if !USE_CMP_XCHG_FOR_BGET -#ifdef USE_QUEUING_LOCK_FOR_BGET - __kmp_init_lock(&th->th.th_local.bget_lock); -#else - __kmp_init_bootstrap_lock(&th->th.th_local.bget_lock); -#endif /* USE_LOCK_FOR_BGET */ -#endif /* ! USE_CMP_XCHG_FOR_BGET */ -} - -static thr_data_t *get_thr_data(kmp_info_t *th) { - thr_data_t *data; - - data = (thr_data_t *)th->th.th_local.bget_data; - - KMP_DEBUG_ASSERT(data != 0); - - return data; -} - -/* Walk the free list and release the enqueued buffers */ -static void __kmp_bget_dequeue(kmp_info_t *th) { - void *p = TCR_SYNC_PTR(th->th.th_local.bget_list); - - if (p != 0) { -#if USE_CMP_XCHG_FOR_BGET - { - volatile void *old_value = TCR_SYNC_PTR(th->th.th_local.bget_list); - while (!KMP_COMPARE_AND_STORE_PTR(&th->th.th_local.bget_list, - CCAST(void *, old_value), nullptr)) { - KMP_CPU_PAUSE(); - old_value = TCR_SYNC_PTR(th->th.th_local.bget_list); - } - p = CCAST(void *, old_value); - } -#else /* ! USE_CMP_XCHG_FOR_BGET */ -#ifdef USE_QUEUING_LOCK_FOR_BGET - __kmp_acquire_lock(&th->th.th_local.bget_lock, __kmp_gtid_from_thread(th)); -#else - __kmp_acquire_bootstrap_lock(&th->th.th_local.bget_lock); -#endif /* USE_QUEUING_LOCK_FOR_BGET */ - - p = (void *)th->th.th_local.bget_list; - th->th.th_local.bget_list = 0; - -#ifdef USE_QUEUING_LOCK_FOR_BGET - __kmp_release_lock(&th->th.th_local.bget_lock, __kmp_gtid_from_thread(th)); -#else - __kmp_release_bootstrap_lock(&th->th.th_local.bget_lock); -#endif -#endif /* USE_CMP_XCHG_FOR_BGET */ - - /* Check again to make sure the list is not empty */ - while (p != 0) { - void *buf = p; - bfhead_t *b = BFH(((char *)p) - sizeof(bhead_t)); - - KMP_DEBUG_ASSERT(b->bh.bb.bsize != 0); - KMP_DEBUG_ASSERT(((kmp_uintptr_t)TCR_PTR(b->bh.bb.bthr) & ~1) == - (kmp_uintptr_t)th); // clear possible mark - KMP_DEBUG_ASSERT(b->ql.blink == 0); - - p = (void *)b->ql.flink; - - brel(th, buf); - } - } -} - -/* Chain together the free buffers by using the thread owner field */ -static void __kmp_bget_enqueue(kmp_info_t *th, void *buf -#ifdef USE_QUEUING_LOCK_FOR_BGET - , - kmp_int32 rel_gtid -#endif - ) { - bfhead_t *b = BFH(((char *)buf) - sizeof(bhead_t)); - - KMP_DEBUG_ASSERT(b->bh.bb.bsize != 0); - KMP_DEBUG_ASSERT(((kmp_uintptr_t)TCR_PTR(b->bh.bb.bthr) & ~1) == - (kmp_uintptr_t)th); // clear possible mark - - b->ql.blink = 0; - - KC_TRACE(10, ("__kmp_bget_enqueue: moving buffer to T#%d list\n", - __kmp_gtid_from_thread(th))); - -#if USE_CMP_XCHG_FOR_BGET - { - volatile void *old_value = TCR_PTR(th->th.th_local.bget_list); - /* the next pointer must be set before setting bget_list to buf to avoid - exposing a broken list to other threads, even for an instant. */ - b->ql.flink = BFH(CCAST(void *, old_value)); - - while (!KMP_COMPARE_AND_STORE_PTR(&th->th.th_local.bget_list, - CCAST(void *, old_value), buf)) { - KMP_CPU_PAUSE(); - old_value = TCR_PTR(th->th.th_local.bget_list); - /* the next pointer must be set before setting bget_list to buf to avoid - exposing a broken list to other threads, even for an instant. */ - b->ql.flink = BFH(CCAST(void *, old_value)); - } - } -#else /* ! USE_CMP_XCHG_FOR_BGET */ -#ifdef USE_QUEUING_LOCK_FOR_BGET - __kmp_acquire_lock(&th->th.th_local.bget_lock, rel_gtid); -#else - __kmp_acquire_bootstrap_lock(&th->th.th_local.bget_lock); -#endif - - b->ql.flink = BFH(th->th.th_local.bget_list); - th->th.th_local.bget_list = (void *)buf; - -#ifdef USE_QUEUING_LOCK_FOR_BGET - __kmp_release_lock(&th->th.th_local.bget_lock, rel_gtid); -#else - __kmp_release_bootstrap_lock(&th->th.th_local.bget_lock); -#endif -#endif /* USE_CMP_XCHG_FOR_BGET */ -} - -/* insert buffer back onto a new freelist */ -static void __kmp_bget_insert_into_freelist(thr_data_t *thr, bfhead_t *b) { - int bin; - - KMP_DEBUG_ASSERT(((size_t)b) % SizeQuant == 0); - KMP_DEBUG_ASSERT(b->bh.bb.bsize % SizeQuant == 0); - - bin = bget_get_bin(b->bh.bb.bsize); - - KMP_DEBUG_ASSERT(thr->freelist[bin].ql.blink->ql.flink == - &thr->freelist[bin]); - KMP_DEBUG_ASSERT(thr->freelist[bin].ql.flink->ql.blink == - &thr->freelist[bin]); - - b->ql.flink = &thr->freelist[bin]; - b->ql.blink = thr->freelist[bin].ql.blink; - - thr->freelist[bin].ql.blink = b; - b->ql.blink->ql.flink = b; -} - -/* unlink the buffer from the old freelist */ -static void __kmp_bget_remove_from_freelist(bfhead_t *b) { - KMP_DEBUG_ASSERT(b->ql.blink->ql.flink == b); - KMP_DEBUG_ASSERT(b->ql.flink->ql.blink == b); - - b->ql.blink->ql.flink = b->ql.flink; - b->ql.flink->ql.blink = b->ql.blink; -} - -/* GET STATS -- check info on free list */ -static void bcheck(kmp_info_t *th, bufsize *max_free, bufsize *total_free) { - thr_data_t *thr = get_thr_data(th); - int bin; - - *total_free = *max_free = 0; - - for (bin = 0; bin < MAX_BGET_BINS; ++bin) { - bfhead_t *b, *best; - - best = &thr->freelist[bin]; - b = best->ql.flink; - - while (b != &thr->freelist[bin]) { - *total_free += (b->bh.bb.bsize - sizeof(bhead_t)); - if ((best == &thr->freelist[bin]) || (b->bh.bb.bsize < best->bh.bb.bsize)) - best = b; - - /* Link to next buffer */ - b = b->ql.flink; - } - - if (*max_free < best->bh.bb.bsize) - *max_free = best->bh.bb.bsize; - } - - if (*max_free > (bufsize)sizeof(bhead_t)) - *max_free -= sizeof(bhead_t); -} - -/* BGET -- Allocate a buffer. */ -static void *bget(kmp_info_t *th, bufsize requested_size) { - thr_data_t *thr = get_thr_data(th); - bufsize size = requested_size; - bfhead_t *b; - void *buf; - int compactseq = 0; - int use_blink = 0; - /* For BestFit */ - bfhead_t *best; - - if (size < 0 || size + sizeof(bhead_t) > MaxSize) { - return NULL; - } - - __kmp_bget_dequeue(th); /* Release any queued buffers */ - - if (size < (bufsize)SizeQ) { // Need at least room for the queue links. - size = SizeQ; - } -#if defined(SizeQuant) && (SizeQuant > 1) - size = (size + (SizeQuant - 1)) & (~(SizeQuant - 1)); -#endif - - size += sizeof(bhead_t); // Add overhead in allocated buffer to size required. - KMP_DEBUG_ASSERT(size >= 0); - KMP_DEBUG_ASSERT(size % SizeQuant == 0); - - use_blink = (thr->mode == bget_mode_lifo); - - /* If a compact function was provided in the call to bectl(), wrap - a loop around the allocation process to allow compaction to - intervene in case we don't find a suitable buffer in the chain. */ - - for (;;) { - int bin; - - for (bin = bget_get_bin(size); bin < MAX_BGET_BINS; ++bin) { - /* Link to next buffer */ - b = (use_blink ? thr->freelist[bin].ql.blink - : thr->freelist[bin].ql.flink); - - if (thr->mode == bget_mode_best) { - best = &thr->freelist[bin]; - - /* Scan the free list searching for the first buffer big enough - to hold the requested size buffer. */ - while (b != &thr->freelist[bin]) { - if (b->bh.bb.bsize >= (bufsize)size) { - if ((best == &thr->freelist[bin]) || - (b->bh.bb.bsize < best->bh.bb.bsize)) { - best = b; - } - } - - /* Link to next buffer */ - b = (use_blink ? b->ql.blink : b->ql.flink); - } - b = best; - } - - while (b != &thr->freelist[bin]) { - if ((bufsize)b->bh.bb.bsize >= (bufsize)size) { - - // Buffer is big enough to satisfy the request. Allocate it to the - // caller. We must decide whether the buffer is large enough to split - // into the part given to the caller and a free buffer that remains - // on the free list, or whether the entire buffer should be removed - // from the free list and given to the caller in its entirety. We - // only split the buffer if enough room remains for a header plus the - // minimum quantum of allocation. - if ((b->bh.bb.bsize - (bufsize)size) > - (bufsize)(SizeQ + (sizeof(bhead_t)))) { - bhead_t *ba, *bn; - - ba = BH(((char *)b) + (b->bh.bb.bsize - (bufsize)size)); - bn = BH(((char *)ba) + size); - - KMP_DEBUG_ASSERT(bn->bb.prevfree == b->bh.bb.bsize); - - /* Subtract size from length of free block. */ - b->bh.bb.bsize -= (bufsize)size; - - /* Link allocated buffer to the previous free buffer. */ - ba->bb.prevfree = b->bh.bb.bsize; - - /* Plug negative size into user buffer. */ - ba->bb.bsize = -size; - - /* Mark this buffer as owned by this thread. */ - TCW_PTR(ba->bb.bthr, - th); // not an allocated address (do not mark it) - /* Mark buffer after this one not preceded by free block. */ - bn->bb.prevfree = 0; - - // unlink buffer from old freelist, and reinsert into new freelist - __kmp_bget_remove_from_freelist(b); - __kmp_bget_insert_into_freelist(thr, b); -#if BufStats - thr->totalloc += (size_t)size; - thr->numget++; /* Increment number of bget() calls */ -#endif - buf = (void *)((((char *)ba) + sizeof(bhead_t))); - KMP_DEBUG_ASSERT(((size_t)buf) % SizeQuant == 0); - return buf; - } else { - bhead_t *ba; - - ba = BH(((char *)b) + b->bh.bb.bsize); - - KMP_DEBUG_ASSERT(ba->bb.prevfree == b->bh.bb.bsize); - - /* The buffer isn't big enough to split. Give the whole - shebang to the caller and remove it from the free list. */ - - __kmp_bget_remove_from_freelist(b); -#if BufStats - thr->totalloc += (size_t)b->bh.bb.bsize; - thr->numget++; /* Increment number of bget() calls */ -#endif - /* Negate size to mark buffer allocated. */ - b->bh.bb.bsize = -(b->bh.bb.bsize); - - /* Mark this buffer as owned by this thread. */ - TCW_PTR(ba->bb.bthr, th); // not an allocated address (do not mark) - /* Zero the back pointer in the next buffer in memory - to indicate that this buffer is allocated. */ - ba->bb.prevfree = 0; - - /* Give user buffer starting at queue links. */ - buf = (void *)&(b->ql); - KMP_DEBUG_ASSERT(((size_t)buf) % SizeQuant == 0); - return buf; - } - } - - /* Link to next buffer */ - b = (use_blink ? b->ql.blink : b->ql.flink); - } - } - - /* We failed to find a buffer. If there's a compact function defined, - notify it of the size requested. If it returns TRUE, try the allocation - again. */ - - if ((thr->compfcn == 0) || (!(*thr->compfcn)(size, ++compactseq))) { - break; - } - } - - /* No buffer available with requested size free. */ - - /* Don't give up yet -- look in the reserve supply. */ - if (thr->acqfcn != 0) { - if (size > (bufsize)(thr->exp_incr - sizeof(bhead_t))) { - /* Request is too large to fit in a single expansion block. - Try to satisy it by a direct buffer acquisition. */ - bdhead_t *bdh; - - size += sizeof(bdhead_t) - sizeof(bhead_t); - - KE_TRACE(10, ("%%%%%% MALLOC( %d )\n", (int)size)); - - /* richryan */ - bdh = BDH((*thr->acqfcn)((bufsize)size)); - if (bdh != NULL) { - - // Mark the buffer special by setting size field of its header to zero. - bdh->bh.bb.bsize = 0; - - /* Mark this buffer as owned by this thread. */ - TCW_PTR(bdh->bh.bb.bthr, th); // don't mark buffer as allocated, - // because direct buffer never goes to free list - bdh->bh.bb.prevfree = 0; - bdh->tsize = size; -#if BufStats - thr->totalloc += (size_t)size; - thr->numget++; /* Increment number of bget() calls */ - thr->numdget++; /* Direct bget() call count */ -#endif - buf = (void *)(bdh + 1); - KMP_DEBUG_ASSERT(((size_t)buf) % SizeQuant == 0); - return buf; - } - - } else { - - /* Try to obtain a new expansion block */ - void *newpool; - - KE_TRACE(10, ("%%%%%% MALLOCB( %d )\n", (int)thr->exp_incr)); - - /* richryan */ - newpool = (*thr->acqfcn)((bufsize)thr->exp_incr); - KMP_DEBUG_ASSERT(((size_t)newpool) % SizeQuant == 0); - if (newpool != NULL) { - bpool(th, newpool, thr->exp_incr); - buf = bget( - th, requested_size); /* This can't, I say, can't get into a loop. */ - return buf; - } - } - } - - /* Still no buffer available */ - - return NULL; -} - -/* BGETZ -- Allocate a buffer and clear its contents to zero. We clear - the entire contents of the buffer to zero, not just the - region requested by the caller. */ - -static void *bgetz(kmp_info_t *th, bufsize size) { - char *buf = (char *)bget(th, size); - - if (buf != NULL) { - bhead_t *b; - bufsize rsize; - - b = BH(buf - sizeof(bhead_t)); - rsize = -(b->bb.bsize); - if (rsize == 0) { - bdhead_t *bd; - - bd = BDH(buf - sizeof(bdhead_t)); - rsize = bd->tsize - (bufsize)sizeof(bdhead_t); - } else { - rsize -= sizeof(bhead_t); - } - - KMP_DEBUG_ASSERT(rsize >= size); - - (void)memset(buf, 0, (bufsize)rsize); - } - return ((void *)buf); -} - -/* BGETR -- Reallocate a buffer. This is a minimal implementation, - simply in terms of brel() and bget(). It could be - enhanced to allow the buffer to grow into adjacent free - blocks and to avoid moving data unnecessarily. */ - -static void *bgetr(kmp_info_t *th, void *buf, bufsize size) { - void *nbuf; - bufsize osize; /* Old size of buffer */ - bhead_t *b; - - nbuf = bget(th, size); - if (nbuf == NULL) { /* Acquire new buffer */ - return NULL; - } - if (buf == NULL) { - return nbuf; - } - b = BH(((char *)buf) - sizeof(bhead_t)); - osize = -b->bb.bsize; - if (osize == 0) { - /* Buffer acquired directly through acqfcn. */ - bdhead_t *bd; - - bd = BDH(((char *)buf) - sizeof(bdhead_t)); - osize = bd->tsize - (bufsize)sizeof(bdhead_t); - } else { - osize -= sizeof(bhead_t); - } - - KMP_DEBUG_ASSERT(osize > 0); - - (void)KMP_MEMCPY((char *)nbuf, (char *)buf, /* Copy the data */ - (size_t)((size < osize) ? size : osize)); - brel(th, buf); - - return nbuf; -} - -/* BREL -- Release a buffer. */ -static void brel(kmp_info_t *th, void *buf) { - thr_data_t *thr = get_thr_data(th); - bfhead_t *b, *bn; - kmp_info_t *bth; - - KMP_DEBUG_ASSERT(buf != NULL); - KMP_DEBUG_ASSERT(((size_t)buf) % SizeQuant == 0); - - b = BFH(((char *)buf) - sizeof(bhead_t)); - - if (b->bh.bb.bsize == 0) { /* Directly-acquired buffer? */ - bdhead_t *bdh; - - bdh = BDH(((char *)buf) - sizeof(bdhead_t)); - KMP_DEBUG_ASSERT(b->bh.bb.prevfree == 0); -#if BufStats - thr->totalloc -= (size_t)bdh->tsize; - thr->numdrel++; /* Number of direct releases */ - thr->numrel++; /* Increment number of brel() calls */ -#endif /* BufStats */ -#ifdef FreeWipe - (void)memset((char *)buf, 0x55, (size_t)(bdh->tsize - sizeof(bdhead_t))); -#endif /* FreeWipe */ - - KE_TRACE(10, ("%%%%%% FREE( %p )\n", (void *)bdh)); - - KMP_DEBUG_ASSERT(thr->relfcn != 0); - (*thr->relfcn)((void *)bdh); /* Release it directly. */ - return; - } - - bth = (kmp_info_t *)((kmp_uintptr_t)TCR_PTR(b->bh.bb.bthr) & - ~1); // clear possible mark before comparison - if (bth != th) { - /* Add this buffer to be released by the owning thread later */ - __kmp_bget_enqueue(bth, buf -#ifdef USE_QUEUING_LOCK_FOR_BGET - , - __kmp_gtid_from_thread(th) -#endif - ); - return; - } - - /* Buffer size must be negative, indicating that the buffer is allocated. */ - if (b->bh.bb.bsize >= 0) { - bn = NULL; - } - KMP_DEBUG_ASSERT(b->bh.bb.bsize < 0); - - /* Back pointer in next buffer must be zero, indicating the same thing: */ - - KMP_DEBUG_ASSERT(BH((char *)b - b->bh.bb.bsize)->bb.prevfree == 0); - -#if BufStats - thr->numrel++; /* Increment number of brel() calls */ - thr->totalloc += (size_t)b->bh.bb.bsize; -#endif - - /* If the back link is nonzero, the previous buffer is free. */ - - if (b->bh.bb.prevfree != 0) { - /* The previous buffer is free. Consolidate this buffer with it by adding - the length of this buffer to the previous free buffer. Note that we - subtract the size in the buffer being released, since it's negative to - indicate that the buffer is allocated. */ - bufsize size = b->bh.bb.bsize; - - /* Make the previous buffer the one we're working on. */ - KMP_DEBUG_ASSERT(BH((char *)b - b->bh.bb.prevfree)->bb.bsize == - b->bh.bb.prevfree); - b = BFH(((char *)b) - b->bh.bb.prevfree); - b->bh.bb.bsize -= size; - - /* unlink the buffer from the old freelist */ - __kmp_bget_remove_from_freelist(b); - } else { - /* The previous buffer isn't allocated. Mark this buffer size as positive - (i.e. free) and fall through to place the buffer on the free list as an - isolated free block. */ - b->bh.bb.bsize = -b->bh.bb.bsize; - } - - /* insert buffer back onto a new freelist */ - __kmp_bget_insert_into_freelist(thr, b); - - /* Now we look at the next buffer in memory, located by advancing from - the start of this buffer by its size, to see if that buffer is - free. If it is, we combine this buffer with the next one in - memory, dechaining the second buffer from the free list. */ - bn = BFH(((char *)b) + b->bh.bb.bsize); - if (bn->bh.bb.bsize > 0) { - - /* The buffer is free. Remove it from the free list and add - its size to that of our buffer. */ - KMP_DEBUG_ASSERT(BH((char *)bn + bn->bh.bb.bsize)->bb.prevfree == - bn->bh.bb.bsize); - - __kmp_bget_remove_from_freelist(bn); - - b->bh.bb.bsize += bn->bh.bb.bsize; - - /* unlink the buffer from the old freelist, and reinsert it into the new - * freelist */ - __kmp_bget_remove_from_freelist(b); - __kmp_bget_insert_into_freelist(thr, b); - - /* Finally, advance to the buffer that follows the newly - consolidated free block. We must set its backpointer to the - head of the consolidated free block. We know the next block - must be an allocated block because the process of recombination - guarantees that two free blocks will never be contiguous in - memory. */ - bn = BFH(((char *)b) + b->bh.bb.bsize); - } -#ifdef FreeWipe - (void)memset(((char *)b) + sizeof(bfhead_t), 0x55, - (size_t)(b->bh.bb.bsize - sizeof(bfhead_t))); -#endif - KMP_DEBUG_ASSERT(bn->bh.bb.bsize < 0); - - /* The next buffer is allocated. Set the backpointer in it to point - to this buffer; the previous free buffer in memory. */ - - bn->bh.bb.prevfree = b->bh.bb.bsize; - - /* If a block-release function is defined, and this free buffer - constitutes the entire block, release it. Note that pool_len - is defined in such a way that the test will fail unless all - pool blocks are the same size. */ - if (thr->relfcn != 0 && - b->bh.bb.bsize == (bufsize)(thr->pool_len - sizeof(bhead_t))) { -#if BufStats - if (thr->numpblk != - 1) { /* Do not release the last buffer until finalization time */ -#endif - - KMP_DEBUG_ASSERT(b->bh.bb.prevfree == 0); - KMP_DEBUG_ASSERT(BH((char *)b + b->bh.bb.bsize)->bb.bsize == ESent); - KMP_DEBUG_ASSERT(BH((char *)b + b->bh.bb.bsize)->bb.prevfree == - b->bh.bb.bsize); - - /* Unlink the buffer from the free list */ - __kmp_bget_remove_from_freelist(b); - - KE_TRACE(10, ("%%%%%% FREE( %p )\n", (void *)b)); - - (*thr->relfcn)(b); -#if BufStats - thr->numprel++; /* Nr of expansion block releases */ - thr->numpblk--; /* Total number of blocks */ - KMP_DEBUG_ASSERT(thr->numpblk == thr->numpget - thr->numprel); - - // avoid leaving stale last_pool pointer around if it is being dealloced - if (thr->last_pool == b) - thr->last_pool = 0; - } else { - thr->last_pool = b; - } -#endif /* BufStats */ - } -} - -/* BECTL -- Establish automatic pool expansion control */ -static void bectl(kmp_info_t *th, bget_compact_t compact, - bget_acquire_t acquire, bget_release_t release, - bufsize pool_incr) { - thr_data_t *thr = get_thr_data(th); - - thr->compfcn = compact; - thr->acqfcn = acquire; - thr->relfcn = release; - thr->exp_incr = pool_incr; -} - -/* BPOOL -- Add a region of memory to the buffer pool. */ -static void bpool(kmp_info_t *th, void *buf, bufsize len) { - /* int bin = 0; */ - thr_data_t *thr = get_thr_data(th); - bfhead_t *b = BFH(buf); - bhead_t *bn; - - __kmp_bget_dequeue(th); /* Release any queued buffers */ - -#ifdef SizeQuant - len &= ~(SizeQuant - 1); -#endif - if (thr->pool_len == 0) { - thr->pool_len = len; - } else if (len != thr->pool_len) { - thr->pool_len = -1; - } -#if BufStats - thr->numpget++; /* Number of block acquisitions */ - thr->numpblk++; /* Number of blocks total */ - KMP_DEBUG_ASSERT(thr->numpblk == thr->numpget - thr->numprel); -#endif /* BufStats */ - - /* Since the block is initially occupied by a single free buffer, - it had better not be (much) larger than the largest buffer - whose size we can store in bhead.bb.bsize. */ - KMP_DEBUG_ASSERT(len - sizeof(bhead_t) <= -((bufsize)ESent + 1)); - - /* Clear the backpointer at the start of the block to indicate that - there is no free block prior to this one. That blocks - recombination when the first block in memory is released. */ - b->bh.bb.prevfree = 0; - - /* Create a dummy allocated buffer at the end of the pool. This dummy - buffer is seen when a buffer at the end of the pool is released and - blocks recombination of the last buffer with the dummy buffer at - the end. The length in the dummy buffer is set to the largest - negative number to denote the end of the pool for diagnostic - routines (this specific value is not counted on by the actual - allocation and release functions). */ - len -= sizeof(bhead_t); - b->bh.bb.bsize = (bufsize)len; - /* Set the owner of this buffer */ - TCW_PTR(b->bh.bb.bthr, - (kmp_info_t *)((kmp_uintptr_t)th | - 1)); // mark the buffer as allocated address - - /* Chain the new block to the free list. */ - __kmp_bget_insert_into_freelist(thr, b); - -#ifdef FreeWipe - (void)memset(((char *)b) + sizeof(bfhead_t), 0x55, - (size_t)(len - sizeof(bfhead_t))); -#endif - bn = BH(((char *)b) + len); - bn->bb.prevfree = (bufsize)len; - /* Definition of ESent assumes two's complement! */ - KMP_DEBUG_ASSERT((~0) == -1 && (bn != 0)); - - bn->bb.bsize = ESent; -} - -/* BFREED -- Dump the free lists for this thread. */ -static void bfreed(kmp_info_t *th) { - int bin = 0, count = 0; - int gtid = __kmp_gtid_from_thread(th); - thr_data_t *thr = get_thr_data(th); - -#if BufStats - __kmp_printf_no_lock("__kmp_printpool: T#%d total=%" KMP_UINT64_SPEC - " get=%" KMP_INT64_SPEC " rel=%" KMP_INT64_SPEC - " pblk=%" KMP_INT64_SPEC " pget=%" KMP_INT64_SPEC - " prel=%" KMP_INT64_SPEC " dget=%" KMP_INT64_SPEC - " drel=%" KMP_INT64_SPEC "\n", - gtid, (kmp_uint64)thr->totalloc, (kmp_int64)thr->numget, - (kmp_int64)thr->numrel, (kmp_int64)thr->numpblk, - (kmp_int64)thr->numpget, (kmp_int64)thr->numprel, - (kmp_int64)thr->numdget, (kmp_int64)thr->numdrel); -#endif - - for (bin = 0; bin < MAX_BGET_BINS; ++bin) { - bfhead_t *b; - - for (b = thr->freelist[bin].ql.flink; b != &thr->freelist[bin]; - b = b->ql.flink) { - bufsize bs = b->bh.bb.bsize; - - KMP_DEBUG_ASSERT(b->ql.blink->ql.flink == b); - KMP_DEBUG_ASSERT(b->ql.flink->ql.blink == b); - KMP_DEBUG_ASSERT(bs > 0); - - count += 1; - - __kmp_printf_no_lock( - "__kmp_printpool: T#%d Free block: 0x%p size %6ld bytes.\n", gtid, b, - (long)bs); -#ifdef FreeWipe - { - char *lerr = ((char *)b) + sizeof(bfhead_t); - if ((bs > sizeof(bfhead_t)) && - ((*lerr != 0x55) || - (memcmp(lerr, lerr + 1, (size_t)(bs - (sizeof(bfhead_t) + 1))) != - 0))) { - __kmp_printf_no_lock("__kmp_printpool: T#%d (Contents of above " - "free block have been overstored.)\n", - gtid); - } - } -#endif - } - } - - if (count == 0) - __kmp_printf_no_lock("__kmp_printpool: T#%d No free blocks\n", gtid); -} - -void __kmp_initialize_bget(kmp_info_t *th) { - KMP_DEBUG_ASSERT(SizeQuant >= sizeof(void *) && (th != 0)); - - set_thr_data(th); - - bectl(th, (bget_compact_t)0, (bget_acquire_t)malloc, (bget_release_t)free, - (bufsize)__kmp_malloc_pool_incr); -} - -void __kmp_finalize_bget(kmp_info_t *th) { - thr_data_t *thr; - bfhead_t *b; - - KMP_DEBUG_ASSERT(th != 0); - -#if BufStats - thr = (thr_data_t *)th->th.th_local.bget_data; - KMP_DEBUG_ASSERT(thr != NULL); - b = thr->last_pool; - - /* If a block-release function is defined, and this free buffer constitutes - the entire block, release it. Note that pool_len is defined in such a way - that the test will fail unless all pool blocks are the same size. */ - - // Deallocate the last pool if one exists because we no longer do it in brel() - if (thr->relfcn != 0 && b != 0 && thr->numpblk != 0 && - b->bh.bb.bsize == (bufsize)(thr->pool_len - sizeof(bhead_t))) { - KMP_DEBUG_ASSERT(b->bh.bb.prevfree == 0); - KMP_DEBUG_ASSERT(BH((char *)b + b->bh.bb.bsize)->bb.bsize == ESent); - KMP_DEBUG_ASSERT(BH((char *)b + b->bh.bb.bsize)->bb.prevfree == - b->bh.bb.bsize); - - /* Unlink the buffer from the free list */ - __kmp_bget_remove_from_freelist(b); - - KE_TRACE(10, ("%%%%%% FREE( %p )\n", (void *)b)); - - (*thr->relfcn)(b); - thr->numprel++; /* Nr of expansion block releases */ - thr->numpblk--; /* Total number of blocks */ - KMP_DEBUG_ASSERT(thr->numpblk == thr->numpget - thr->numprel); - } -#endif /* BufStats */ - - /* Deallocate bget_data */ - if (th->th.th_local.bget_data != NULL) { - __kmp_free(th->th.th_local.bget_data); - th->th.th_local.bget_data = NULL; - } -} - -void kmpc_set_poolsize(size_t size) { - bectl(__kmp_get_thread(), (bget_compact_t)0, (bget_acquire_t)malloc, - (bget_release_t)free, (bufsize)size); -} - -size_t kmpc_get_poolsize(void) { - thr_data_t *p; - - p = get_thr_data(__kmp_get_thread()); - - return p->exp_incr; -} - -void kmpc_set_poolmode(int mode) { - thr_data_t *p; - - if (mode == bget_mode_fifo || mode == bget_mode_lifo || - mode == bget_mode_best) { - p = get_thr_data(__kmp_get_thread()); - p->mode = (bget_mode_t)mode; - } -} - -int kmpc_get_poolmode(void) { - thr_data_t *p; - - p = get_thr_data(__kmp_get_thread()); - - return p->mode; -} - -void kmpc_get_poolstat(size_t *maxmem, size_t *allmem) { - kmp_info_t *th = __kmp_get_thread(); - bufsize a, b; - - __kmp_bget_dequeue(th); /* Release any queued buffers */ - - bcheck(th, &a, &b); - - *maxmem = a; - *allmem = b; -} - -void kmpc_poolprint(void) { - kmp_info_t *th = __kmp_get_thread(); - - __kmp_bget_dequeue(th); /* Release any queued buffers */ - - bfreed(th); -} - -#endif // #if KMP_USE_BGET - -void *kmpc_malloc(size_t size) { - void *ptr; - ptr = bget(__kmp_entry_thread(), (bufsize)(size + sizeof(ptr))); - if (ptr != NULL) { - // save allocated pointer just before one returned to user - *(void **)ptr = ptr; - ptr = (void **)ptr + 1; - } - return ptr; -} - -#define IS_POWER_OF_TWO(n) (((n) & ((n)-1)) == 0) - -void *kmpc_aligned_malloc(size_t size, size_t alignment) { - void *ptr; - void *ptr_allocated; - KMP_DEBUG_ASSERT(alignment < 32 * 1024); // Alignment should not be too big - if (!IS_POWER_OF_TWO(alignment)) { - // AC: do we need to issue a warning here? - errno = EINVAL; - return NULL; - } - size = size + sizeof(void *) + alignment; - ptr_allocated = bget(__kmp_entry_thread(), (bufsize)size); - if (ptr_allocated != NULL) { - // save allocated pointer just before one returned to user - ptr = (void *)(((kmp_uintptr_t)ptr_allocated + sizeof(void *) + alignment) & - ~(alignment - 1)); - *((void **)ptr - 1) = ptr_allocated; - } else { - ptr = NULL; - } - return ptr; -} - -void *kmpc_calloc(size_t nelem, size_t elsize) { - void *ptr; - ptr = bgetz(__kmp_entry_thread(), (bufsize)(nelem * elsize + sizeof(ptr))); - if (ptr != NULL) { - // save allocated pointer just before one returned to user - *(void **)ptr = ptr; - ptr = (void **)ptr + 1; - } - return ptr; -} - -void *kmpc_realloc(void *ptr, size_t size) { - void *result = NULL; - if (ptr == NULL) { - // If pointer is NULL, realloc behaves like malloc. - result = bget(__kmp_entry_thread(), (bufsize)(size + sizeof(ptr))); - // save allocated pointer just before one returned to user - if (result != NULL) { - *(void **)result = result; - result = (void **)result + 1; - } - } else if (size == 0) { - // If size is 0, realloc behaves like free. - // The thread must be registered by the call to kmpc_malloc() or - // kmpc_calloc() before. - // So it should be safe to call __kmp_get_thread(), not - // __kmp_entry_thread(). - KMP_ASSERT(*((void **)ptr - 1)); - brel(__kmp_get_thread(), *((void **)ptr - 1)); - } else { - result = bgetr(__kmp_entry_thread(), *((void **)ptr - 1), - (bufsize)(size + sizeof(ptr))); - if (result != NULL) { - *(void **)result = result; - result = (void **)result + 1; - } - } - return result; -} - -// NOTE: the library must have already been initialized by a previous allocate -void kmpc_free(void *ptr) { - if (!__kmp_init_serial) { - return; - } - if (ptr != NULL) { - kmp_info_t *th = __kmp_get_thread(); - __kmp_bget_dequeue(th); /* Release any queued buffers */ - // extract allocated pointer and free it - KMP_ASSERT(*((void **)ptr - 1)); - brel(th, *((void **)ptr - 1)); - } -} - -void *___kmp_thread_malloc(kmp_info_t *th, size_t size KMP_SRC_LOC_DECL) { - void *ptr; - KE_TRACE(30, ("-> __kmp_thread_malloc( %p, %d ) called from %s:%d\n", th, - (int)size KMP_SRC_LOC_PARM)); - ptr = bget(th, (bufsize)size); - KE_TRACE(30, ("<- __kmp_thread_malloc() returns %p\n", ptr)); - return ptr; -} - -void *___kmp_thread_calloc(kmp_info_t *th, size_t nelem, - size_t elsize KMP_SRC_LOC_DECL) { - void *ptr; - KE_TRACE(30, ("-> __kmp_thread_calloc( %p, %d, %d ) called from %s:%d\n", th, - (int)nelem, (int)elsize KMP_SRC_LOC_PARM)); - ptr = bgetz(th, (bufsize)(nelem * elsize)); - KE_TRACE(30, ("<- __kmp_thread_calloc() returns %p\n", ptr)); - return ptr; -} - -void *___kmp_thread_realloc(kmp_info_t *th, void *ptr, - size_t size KMP_SRC_LOC_DECL) { - KE_TRACE(30, ("-> __kmp_thread_realloc( %p, %p, %d ) called from %s:%d\n", th, - ptr, (int)size KMP_SRC_LOC_PARM)); - ptr = bgetr(th, ptr, (bufsize)size); - KE_TRACE(30, ("<- __kmp_thread_realloc() returns %p\n", ptr)); - return ptr; -} - -void ___kmp_thread_free(kmp_info_t *th, void *ptr KMP_SRC_LOC_DECL) { - KE_TRACE(30, ("-> __kmp_thread_free( %p, %p ) called from %s:%d\n", th, - ptr KMP_SRC_LOC_PARM)); - if (ptr != NULL) { - __kmp_bget_dequeue(th); /* Release any queued buffers */ - brel(th, ptr); - } - KE_TRACE(30, ("<- __kmp_thread_free()\n")); -} - -#if OMP_50_ENABLED -/* OMP 5.0 Memory Management support */ -static int (*p_hbw_check)(void); -static void *(*p_hbw_malloc)(size_t); -static void (*p_hbw_free)(void *); -static int (*p_hbw_set_policy)(int); -static const char *kmp_mk_lib_name; -static void *h_memkind; - -void __kmp_init_memkind() { -#if KMP_OS_UNIX && KMP_DYNAMIC_LIB - kmp_mk_lib_name = "libmemkind.so"; - h_memkind = dlopen(kmp_mk_lib_name, RTLD_LAZY); - if (h_memkind) { - p_hbw_check = (int (*)())dlsym(h_memkind, "hbw_check_available"); - p_hbw_malloc = (void *(*)(size_t))dlsym(h_memkind, "hbw_malloc"); - p_hbw_free = (void (*)(void *))dlsym(h_memkind, "hbw_free"); - p_hbw_set_policy = (int (*)(int))dlsym(h_memkind, "hbw_set_policy"); - if (p_hbw_check && p_hbw_malloc && p_hbw_free && p_hbw_set_policy) { - __kmp_memkind_available = 1; - if (p_hbw_check() == 0) { - p_hbw_set_policy(1); // return NULL is not enough memory - __kmp_hbw_mem_available = 1; // found HBW memory available - } - return; // success - all symbols resolved - } - dlclose(h_memkind); // failure - h_memkind = NULL; - } - p_hbw_check = NULL; - p_hbw_malloc = NULL; - p_hbw_free = NULL; - p_hbw_set_policy = NULL; -#else - kmp_mk_lib_name = ""; - h_memkind = NULL; - p_hbw_check = NULL; - p_hbw_malloc = NULL; - p_hbw_free = NULL; - p_hbw_set_policy = NULL; -#endif -} - -void __kmp_fini_memkind() { -#if KMP_OS_UNIX && KMP_DYNAMIC_LIB - if (h_memkind) { - dlclose(h_memkind); - h_memkind = NULL; - } - p_hbw_check = NULL; - p_hbw_malloc = NULL; - p_hbw_free = NULL; - p_hbw_set_policy = NULL; -#endif -} - -void __kmpc_set_default_allocator(int gtid, const omp_allocator_t *allocator) { - if (allocator == OMP_NULL_ALLOCATOR) - allocator = omp_default_mem_alloc; - KMP_DEBUG_ASSERT( - allocator == omp_default_mem_alloc || - allocator == omp_large_cap_mem_alloc || - allocator == omp_const_mem_alloc || allocator == omp_high_bw_mem_alloc || - allocator == omp_low_lat_mem_alloc || allocator == omp_cgroup_mem_alloc || - allocator == omp_pteam_mem_alloc || allocator == omp_thread_mem_alloc); - __kmp_threads[gtid]->th.th_def_allocator = allocator; -} -const omp_allocator_t *__kmpc_get_default_allocator(int gtid) { - return __kmp_threads[gtid]->th.th_def_allocator; -} - -typedef struct kmp_mem_desc { // Memory block descriptor - void *ptr_alloc; // Pointer returned by allocator - size_t size_a; // Size of allocated memory block (initial+descriptor+align) - void *ptr_align; // Pointer to aligned memory, returned - const omp_allocator_t *allocator; // allocator -} kmp_mem_desc_t; -static int alignment = sizeof(void *); // let's align to pointer size - -void *__kmpc_alloc(int gtid, size_t size, const omp_allocator_t *allocator) { - KMP_DEBUG_ASSERT(__kmp_init_serial); - if (allocator == OMP_NULL_ALLOCATOR) - allocator = __kmp_threads[gtid]->th.th_def_allocator; - - int sz_desc = sizeof(kmp_mem_desc_t); - void *ptr = NULL; - kmp_mem_desc_t desc; - kmp_uintptr_t addr; // address returned by allocator - kmp_uintptr_t addr_align; // address to return to caller - kmp_uintptr_t addr_descr; // address of memory block descriptor - - KE_TRACE(25, ("__kmpc_alloc: T#%d (%d, %p)\n", gtid, (int)size, allocator)); - - desc.size_a = size + sz_desc + alignment; - if (allocator == omp_default_mem_alloc) - ptr = __kmp_allocate(desc.size_a); - if (allocator == omp_high_bw_mem_alloc && __kmp_hbw_mem_available) { - KMP_DEBUG_ASSERT(p_hbw_malloc != NULL); - ptr = p_hbw_malloc(desc.size_a); - } - - KE_TRACE(10, ("__kmpc_alloc: T#%d %p=alloc(%d) hbw %d\n", gtid, ptr, - desc.size_a, __kmp_hbw_mem_available)); - if (ptr == NULL) - return NULL; - - addr = (kmp_uintptr_t)ptr; - addr_align = (addr + sz_desc + alignment - 1) & ~(alignment - 1); - addr_descr = addr_align - sz_desc; - - desc.ptr_alloc = ptr; - desc.ptr_align = (void *)addr_align; - desc.allocator = allocator; - *((kmp_mem_desc_t *)addr_descr) = desc; // save descriptor contents - KMP_MB(); - - KE_TRACE(25, ("__kmpc_alloc returns %p, T#%d\n", desc.ptr_align, gtid)); - return desc.ptr_align; -} - -void __kmpc_free(int gtid, void *ptr, const omp_allocator_t *allocator) { - KE_TRACE(25, ("__kmpc_free: T#%d free(%p,%p)\n", gtid, ptr, allocator)); - if (ptr == NULL) - return; - - kmp_mem_desc_t desc; - kmp_uintptr_t addr_align; // address to return to caller - kmp_uintptr_t addr_descr; // address of memory block descriptor - - addr_align = (kmp_uintptr_t)ptr; - addr_descr = addr_align - sizeof(kmp_mem_desc_t); - desc = *((kmp_mem_desc_t *)addr_descr); // read descriptor - - KMP_DEBUG_ASSERT(desc.ptr_align == ptr); - if (allocator) { - KMP_DEBUG_ASSERT(desc.allocator == allocator); - } else { - allocator = desc.allocator; - } - KMP_DEBUG_ASSERT(allocator); - - if (allocator == omp_default_mem_alloc) - __kmp_free(desc.ptr_alloc); - if (allocator == omp_high_bw_mem_alloc && __kmp_hbw_mem_available) { - KMP_DEBUG_ASSERT(p_hbw_free != NULL); - p_hbw_free(desc.ptr_alloc); - } - KE_TRACE(10, ("__kmpc_free: T#%d freed %p (%p)\n", gtid, desc.ptr_alloc, - allocator)); -} - -#endif - -/* If LEAK_MEMORY is defined, __kmp_free() will *not* free memory. It causes - memory leaks, but it may be useful for debugging memory corruptions, used - freed pointers, etc. */ -/* #define LEAK_MEMORY */ -struct kmp_mem_descr { // Memory block descriptor. - void *ptr_allocated; // Pointer returned by malloc(), subject for free(). - size_t size_allocated; // Size of allocated memory block. - void *ptr_aligned; // Pointer to aligned memory, to be used by client code. - size_t size_aligned; // Size of aligned memory block. -}; -typedef struct kmp_mem_descr kmp_mem_descr_t; - -/* Allocate memory on requested boundary, fill allocated memory with 0x00. - NULL is NEVER returned, __kmp_abort() is called in case of memory allocation - error. Must use __kmp_free when freeing memory allocated by this routine! */ -static void *___kmp_allocate_align(size_t size, - size_t alignment KMP_SRC_LOC_DECL) { - /* __kmp_allocate() allocates (by call to malloc()) bigger memory block than - requested to return properly aligned pointer. Original pointer returned - by malloc() and size of allocated block is saved in descriptor just - before the aligned pointer. This information used by __kmp_free() -- it - has to pass to free() original pointer, not aligned one. - - +---------+------------+-----------------------------------+---------+ - | padding | descriptor | aligned block | padding | - +---------+------------+-----------------------------------+---------+ - ^ ^ - | | - | +- Aligned pointer returned to caller - +- Pointer returned by malloc() - - Aligned block is filled with zeros, paddings are filled with 0xEF. */ - - kmp_mem_descr_t descr; - kmp_uintptr_t addr_allocated; // Address returned by malloc(). - kmp_uintptr_t addr_aligned; // Aligned address to return to caller. - kmp_uintptr_t addr_descr; // Address of memory block descriptor. - - KE_TRACE(25, ("-> ___kmp_allocate_align( %d, %d ) called from %s:%d\n", - (int)size, (int)alignment KMP_SRC_LOC_PARM)); - - KMP_DEBUG_ASSERT(alignment < 32 * 1024); // Alignment should not be too - KMP_DEBUG_ASSERT(sizeof(void *) <= sizeof(kmp_uintptr_t)); - // Make sure kmp_uintptr_t is enough to store addresses. - - descr.size_aligned = size; - descr.size_allocated = - descr.size_aligned + sizeof(kmp_mem_descr_t) + alignment; - -#if KMP_DEBUG - descr.ptr_allocated = _malloc_src_loc(descr.size_allocated, _file_, _line_); -#else - descr.ptr_allocated = malloc_src_loc(descr.size_allocated KMP_SRC_LOC_PARM); -#endif - KE_TRACE(10, (" malloc( %d ) returned %p\n", (int)descr.size_allocated, - descr.ptr_allocated)); - if (descr.ptr_allocated == NULL) { - KMP_FATAL(OutOfHeapMemory); - } - - addr_allocated = (kmp_uintptr_t)descr.ptr_allocated; - addr_aligned = - (addr_allocated + sizeof(kmp_mem_descr_t) + alignment) & ~(alignment - 1); - addr_descr = addr_aligned - sizeof(kmp_mem_descr_t); - - descr.ptr_aligned = (void *)addr_aligned; - - KE_TRACE(26, (" ___kmp_allocate_align: " - "ptr_allocated=%p, size_allocated=%d, " - "ptr_aligned=%p, size_aligned=%d\n", - descr.ptr_allocated, (int)descr.size_allocated, - descr.ptr_aligned, (int)descr.size_aligned)); - - KMP_DEBUG_ASSERT(addr_allocated <= addr_descr); - KMP_DEBUG_ASSERT(addr_descr + sizeof(kmp_mem_descr_t) == addr_aligned); - KMP_DEBUG_ASSERT(addr_aligned + descr.size_aligned <= - addr_allocated + descr.size_allocated); - KMP_DEBUG_ASSERT(addr_aligned % alignment == 0); -#ifdef KMP_DEBUG - memset(descr.ptr_allocated, 0xEF, descr.size_allocated); -// Fill allocated memory block with 0xEF. -#endif - memset(descr.ptr_aligned, 0x00, descr.size_aligned); - // Fill the aligned memory block (which is intended for using by caller) with - // 0x00. Do not - // put this filling under KMP_DEBUG condition! Many callers expect zeroed - // memory. (Padding - // bytes remain filled with 0xEF in debugging library.) - *((kmp_mem_descr_t *)addr_descr) = descr; - - KMP_MB(); - - KE_TRACE(25, ("<- ___kmp_allocate_align() returns %p\n", descr.ptr_aligned)); - return descr.ptr_aligned; -} // func ___kmp_allocate_align - -/* Allocate memory on cache line boundary, fill allocated memory with 0x00. - Do not call this func directly! Use __kmp_allocate macro instead. - NULL is NEVER returned, __kmp_abort() is called in case of memory allocation - error. Must use __kmp_free when freeing memory allocated by this routine! */ -void *___kmp_allocate(size_t size KMP_SRC_LOC_DECL) { - void *ptr; - KE_TRACE(25, ("-> __kmp_allocate( %d ) called from %s:%d\n", - (int)size KMP_SRC_LOC_PARM)); - ptr = ___kmp_allocate_align(size, __kmp_align_alloc KMP_SRC_LOC_PARM); - KE_TRACE(25, ("<- __kmp_allocate() returns %p\n", ptr)); - return ptr; -} // func ___kmp_allocate - -/* Allocate memory on page boundary, fill allocated memory with 0x00. - Does not call this func directly! Use __kmp_page_allocate macro instead. - NULL is NEVER returned, __kmp_abort() is called in case of memory allocation - error. Must use __kmp_free when freeing memory allocated by this routine! */ -void *___kmp_page_allocate(size_t size KMP_SRC_LOC_DECL) { - int page_size = 8 * 1024; - void *ptr; - - KE_TRACE(25, ("-> __kmp_page_allocate( %d ) called from %s:%d\n", - (int)size KMP_SRC_LOC_PARM)); - ptr = ___kmp_allocate_align(size, page_size KMP_SRC_LOC_PARM); - KE_TRACE(25, ("<- __kmp_page_allocate( %d ) returns %p\n", (int)size, ptr)); - return ptr; -} // ___kmp_page_allocate - -/* Free memory allocated by __kmp_allocate() and __kmp_page_allocate(). - In debug mode, fill the memory block with 0xEF before call to free(). */ -void ___kmp_free(void *ptr KMP_SRC_LOC_DECL) { - kmp_mem_descr_t descr; - kmp_uintptr_t addr_allocated; // Address returned by malloc(). - kmp_uintptr_t addr_aligned; // Aligned address passed by caller. - - KE_TRACE(25, - ("-> __kmp_free( %p ) called from %s:%d\n", ptr KMP_SRC_LOC_PARM)); - KMP_ASSERT(ptr != NULL); - - descr = *(kmp_mem_descr_t *)((kmp_uintptr_t)ptr - sizeof(kmp_mem_descr_t)); - - KE_TRACE(26, (" __kmp_free: " - "ptr_allocated=%p, size_allocated=%d, " - "ptr_aligned=%p, size_aligned=%d\n", - descr.ptr_allocated, (int)descr.size_allocated, - descr.ptr_aligned, (int)descr.size_aligned)); - - addr_allocated = (kmp_uintptr_t)descr.ptr_allocated; - addr_aligned = (kmp_uintptr_t)descr.ptr_aligned; - - KMP_DEBUG_ASSERT(addr_aligned % CACHE_LINE == 0); - KMP_DEBUG_ASSERT(descr.ptr_aligned == ptr); - KMP_DEBUG_ASSERT(addr_allocated + sizeof(kmp_mem_descr_t) <= addr_aligned); - KMP_DEBUG_ASSERT(descr.size_aligned < descr.size_allocated); - KMP_DEBUG_ASSERT(addr_aligned + descr.size_aligned <= - addr_allocated + descr.size_allocated); - -#ifdef KMP_DEBUG - memset(descr.ptr_allocated, 0xEF, descr.size_allocated); -// Fill memory block with 0xEF, it helps catch using freed memory. -#endif - -#ifndef LEAK_MEMORY - KE_TRACE(10, (" free( %p )\n", descr.ptr_allocated)); -#ifdef KMP_DEBUG - _free_src_loc(descr.ptr_allocated, _file_, _line_); -#else - free_src_loc(descr.ptr_allocated KMP_SRC_LOC_PARM); -#endif -#endif - KMP_MB(); - KE_TRACE(25, ("<- __kmp_free() returns\n")); -} // func ___kmp_free - -#if USE_FAST_MEMORY == 3 -// Allocate fast memory by first scanning the thread's free lists -// If a chunk the right size exists, grab it off the free list. -// Otherwise allocate normally using kmp_thread_malloc. - -// AC: How to choose the limit? Just get 16 for now... -#define KMP_FREE_LIST_LIMIT 16 - -// Always use 128 bytes for determining buckets for caching memory blocks -#define DCACHE_LINE 128 - -void *___kmp_fast_allocate(kmp_info_t *this_thr, size_t size KMP_SRC_LOC_DECL) { - void *ptr; - int num_lines; - int idx; - int index; - void *alloc_ptr; - size_t alloc_size; - kmp_mem_descr_t *descr; - - KE_TRACE(25, ("-> __kmp_fast_allocate( T#%d, %d ) called from %s:%d\n", - __kmp_gtid_from_thread(this_thr), (int)size KMP_SRC_LOC_PARM)); - - num_lines = (size + DCACHE_LINE - 1) / DCACHE_LINE; - idx = num_lines - 1; - KMP_DEBUG_ASSERT(idx >= 0); - if (idx < 2) { - index = 0; // idx is [ 0, 1 ], use first free list - num_lines = 2; // 1, 2 cache lines or less than cache line - } else if ((idx >>= 2) == 0) { - index = 1; // idx is [ 2, 3 ], use second free list - num_lines = 4; // 3, 4 cache lines - } else if ((idx >>= 2) == 0) { - index = 2; // idx is [ 4, 15 ], use third free list - num_lines = 16; // 5, 6, ..., 16 cache lines - } else if ((idx >>= 2) == 0) { - index = 3; // idx is [ 16, 63 ], use fourth free list - num_lines = 64; // 17, 18, ..., 64 cache lines - } else { - goto alloc_call; // 65 or more cache lines ( > 8KB ), don't use free lists - } - - ptr = this_thr->th.th_free_lists[index].th_free_list_self; - if (ptr != NULL) { - // pop the head of no-sync free list - this_thr->th.th_free_lists[index].th_free_list_self = *((void **)ptr); - KMP_DEBUG_ASSERT( - this_thr == - ((kmp_mem_descr_t *)((kmp_uintptr_t)ptr - sizeof(kmp_mem_descr_t))) - ->ptr_aligned); - goto end; - } - ptr = TCR_SYNC_PTR(this_thr->th.th_free_lists[index].th_free_list_sync); - if (ptr != NULL) { - // no-sync free list is empty, use sync free list (filled in by other - // threads only) - // pop the head of the sync free list, push NULL instead - while (!KMP_COMPARE_AND_STORE_PTR( - &this_thr->th.th_free_lists[index].th_free_list_sync, ptr, nullptr)) { - KMP_CPU_PAUSE(); - ptr = TCR_SYNC_PTR(this_thr->th.th_free_lists[index].th_free_list_sync); - } - // push the rest of chain into no-sync free list (can be NULL if there was - // the only block) - this_thr->th.th_free_lists[index].th_free_list_self = *((void **)ptr); - KMP_DEBUG_ASSERT( - this_thr == - ((kmp_mem_descr_t *)((kmp_uintptr_t)ptr - sizeof(kmp_mem_descr_t))) - ->ptr_aligned); - goto end; - } - -alloc_call: - // haven't found block in the free lists, thus allocate it - size = num_lines * DCACHE_LINE; - - alloc_size = size + sizeof(kmp_mem_descr_t) + DCACHE_LINE; - KE_TRACE(25, ("__kmp_fast_allocate: T#%d Calling __kmp_thread_malloc with " - "alloc_size %d\n", - __kmp_gtid_from_thread(this_thr), alloc_size)); - alloc_ptr = bget(this_thr, (bufsize)alloc_size); - - // align ptr to DCACHE_LINE - ptr = (void *)((((kmp_uintptr_t)alloc_ptr) + sizeof(kmp_mem_descr_t) + - DCACHE_LINE) & - ~(DCACHE_LINE - 1)); - descr = (kmp_mem_descr_t *)(((kmp_uintptr_t)ptr) - sizeof(kmp_mem_descr_t)); - - descr->ptr_allocated = alloc_ptr; // remember allocated pointer - // we don't need size_allocated - descr->ptr_aligned = (void *)this_thr; // remember allocating thread - // (it is already saved in bget buffer, - // but we may want to use another allocator in future) - descr->size_aligned = size; - -end: - KE_TRACE(25, ("<- __kmp_fast_allocate( T#%d ) returns %p\n", - __kmp_gtid_from_thread(this_thr), ptr)); - return ptr; -} // func __kmp_fast_allocate - -// Free fast memory and place it on the thread's free list if it is of -// the correct size. -void ___kmp_fast_free(kmp_info_t *this_thr, void *ptr KMP_SRC_LOC_DECL) { - kmp_mem_descr_t *descr; - kmp_info_t *alloc_thr; - size_t size; - size_t idx; - int index; - - KE_TRACE(25, ("-> __kmp_fast_free( T#%d, %p ) called from %s:%d\n", - __kmp_gtid_from_thread(this_thr), ptr KMP_SRC_LOC_PARM)); - KMP_ASSERT(ptr != NULL); - - descr = (kmp_mem_descr_t *)(((kmp_uintptr_t)ptr) - sizeof(kmp_mem_descr_t)); - - KE_TRACE(26, (" __kmp_fast_free: size_aligned=%d\n", - (int)descr->size_aligned)); - - size = descr->size_aligned; // 2, 4, 16, 64, 65, 66, ... cache lines - - idx = DCACHE_LINE * 2; // 2 cache lines is minimal size of block - if (idx == size) { - index = 0; // 2 cache lines - } else if ((idx <<= 1) == size) { - index = 1; // 4 cache lines - } else if ((idx <<= 2) == size) { - index = 2; // 16 cache lines - } else if ((idx <<= 2) == size) { - index = 3; // 64 cache lines - } else { - KMP_DEBUG_ASSERT(size > DCACHE_LINE * 64); - goto free_call; // 65 or more cache lines ( > 8KB ) - } - - alloc_thr = (kmp_info_t *)descr->ptr_aligned; // get thread owning the block - if (alloc_thr == this_thr) { - // push block to self no-sync free list, linking previous head (LIFO) - *((void **)ptr) = this_thr->th.th_free_lists[index].th_free_list_self; - this_thr->th.th_free_lists[index].th_free_list_self = ptr; - } else { - void *head = this_thr->th.th_free_lists[index].th_free_list_other; - if (head == NULL) { - // Create new free list - this_thr->th.th_free_lists[index].th_free_list_other = ptr; - *((void **)ptr) = NULL; // mark the tail of the list - descr->size_allocated = (size_t)1; // head of the list keeps its length - } else { - // need to check existed "other" list's owner thread and size of queue - kmp_mem_descr_t *dsc = - (kmp_mem_descr_t *)((char *)head - sizeof(kmp_mem_descr_t)); - // allocating thread, same for all queue nodes - kmp_info_t *q_th = (kmp_info_t *)(dsc->ptr_aligned); - size_t q_sz = - dsc->size_allocated + 1; // new size in case we add current task - if (q_th == alloc_thr && q_sz <= KMP_FREE_LIST_LIMIT) { - // we can add current task to "other" list, no sync needed - *((void **)ptr) = head; - descr->size_allocated = q_sz; - this_thr->th.th_free_lists[index].th_free_list_other = ptr; - } else { - // either queue blocks owner is changing or size limit exceeded - // return old queue to allocating thread (q_th) synchroneously, - // and start new list for alloc_thr's tasks - void *old_ptr; - void *tail = head; - void *next = *((void **)head); - while (next != NULL) { - KMP_DEBUG_ASSERT( - // queue size should decrease by 1 each step through the list - ((kmp_mem_descr_t *)((char *)next - sizeof(kmp_mem_descr_t))) - ->size_allocated + - 1 == - ((kmp_mem_descr_t *)((char *)tail - sizeof(kmp_mem_descr_t))) - ->size_allocated); - tail = next; // remember tail node - next = *((void **)next); - } - KMP_DEBUG_ASSERT(q_th != NULL); - // push block to owner's sync free list - old_ptr = TCR_PTR(q_th->th.th_free_lists[index].th_free_list_sync); - /* the next pointer must be set before setting free_list to ptr to avoid - exposing a broken list to other threads, even for an instant. */ - *((void **)tail) = old_ptr; - - while (!KMP_COMPARE_AND_STORE_PTR( - &q_th->th.th_free_lists[index].th_free_list_sync, old_ptr, head)) { - KMP_CPU_PAUSE(); - old_ptr = TCR_PTR(q_th->th.th_free_lists[index].th_free_list_sync); - *((void **)tail) = old_ptr; - } - - // start new list of not-selt tasks - this_thr->th.th_free_lists[index].th_free_list_other = ptr; - *((void **)ptr) = NULL; - descr->size_allocated = (size_t)1; // head of queue keeps its length - } - } - } - goto end; - -free_call: - KE_TRACE(25, ("__kmp_fast_free: T#%d Calling __kmp_thread_free for size %d\n", - __kmp_gtid_from_thread(this_thr), size)); - __kmp_bget_dequeue(this_thr); /* Release any queued buffers */ - brel(this_thr, descr->ptr_allocated); - -end: - KE_TRACE(25, ("<- __kmp_fast_free() returns\n")); - -} // func __kmp_fast_free - -// Initialize the thread free lists related to fast memory -// Only do this when a thread is initially created. -void __kmp_initialize_fast_memory(kmp_info_t *this_thr) { - KE_TRACE(10, ("__kmp_initialize_fast_memory: Called from th %p\n", this_thr)); - - memset(this_thr->th.th_free_lists, 0, NUM_LISTS * sizeof(kmp_free_list_t)); -} - -// Free the memory in the thread free lists related to fast memory -// Only do this when a thread is being reaped (destroyed). -void __kmp_free_fast_memory(kmp_info_t *th) { - // Suppose we use BGET underlying allocator, walk through its structures... - int bin; - thr_data_t *thr = get_thr_data(th); - void **lst = NULL; - - KE_TRACE( - 5, ("__kmp_free_fast_memory: Called T#%d\n", __kmp_gtid_from_thread(th))); - - __kmp_bget_dequeue(th); // Release any queued buffers - - // Dig through free lists and extract all allocated blocks - for (bin = 0; bin < MAX_BGET_BINS; ++bin) { - bfhead_t *b = thr->freelist[bin].ql.flink; - while (b != &thr->freelist[bin]) { - if ((kmp_uintptr_t)b->bh.bb.bthr & 1) { // the buffer is allocated address - *((void **)b) = - lst; // link the list (override bthr, but keep flink yet) - lst = (void **)b; // push b into lst - } - b = b->ql.flink; // get next buffer - } - } - while (lst != NULL) { - void *next = *lst; - KE_TRACE(10, ("__kmp_free_fast_memory: freeing %p, next=%p th %p (%d)\n", - lst, next, th, __kmp_gtid_from_thread(th))); - (*thr->relfcn)(lst); -#if BufStats - // count blocks to prevent problems in __kmp_finalize_bget() - thr->numprel++; /* Nr of expansion block releases */ - thr->numpblk--; /* Total number of blocks */ -#endif - lst = (void **)next; - } - - KE_TRACE( - 5, ("__kmp_free_fast_memory: Freed T#%d\n", __kmp_gtid_from_thread(th))); -} - -#endif // USE_FAST_MEMORY Property changes on: vendor/llvm-openmp/openmp-release80-r363030/runtime/src/kmp_alloc.cpp ___________________________________________________________________ Deleted: svn:eol-style ## -1 +0,0 ## -native \ No newline at end of property Deleted: svn:keywords ## -1 +0,0 ## -FreeBSD=%H \ No newline at end of property Deleted: svn:mime-type ## -1 +0,0 ## -text/plain \ No newline at end of property Index: vendor/llvm-openmp/openmp-release80-r363030/runtime/src/kmp_ftn_stdcall.cpp =================================================================== --- vendor/llvm-openmp/openmp-release80-r363030/runtime/src/kmp_ftn_stdcall.cpp (revision 348960) +++ vendor/llvm-openmp/openmp-release80-r363030/runtime/src/kmp_ftn_stdcall.cpp (nonexistent) @@ -1,33 +0,0 @@ -/* - * kmp_ftn_stdcall.cpp -- Fortran __stdcall linkage support for OpenMP. - */ - -//===----------------------------------------------------------------------===// -// -// The LLVM Compiler Infrastructure -// -// This file is dual licensed under the MIT and the University of Illinois Open -// Source Licenses. See LICENSE.txt for details. -// -//===----------------------------------------------------------------------===// - -#include "kmp.h" - -// Note: This string is not printed when KMP_VERSION=1. -char const __kmp_version_ftnstdcall[] = - KMP_VERSION_PREFIX "Fortran __stdcall OMP support: " -#ifdef USE_FTN_STDCALL - "yes"; -#else - "no"; -#endif - -#ifdef USE_FTN_STDCALL - -#define FTN_STDCALL KMP_STDCALL -#define KMP_FTN_ENTRIES USE_FTN_STDCALL - -#include "kmp_ftn_entry.h" -#include "kmp_ftn_os.h" - -#endif /* USE_FTN_STDCALL */ Property changes on: vendor/llvm-openmp/openmp-release80-r363030/runtime/src/kmp_ftn_stdcall.cpp ___________________________________________________________________ Deleted: svn:eol-style ## -1 +0,0 ## -native \ No newline at end of property Deleted: svn:keywords ## -1 +0,0 ## -FreeBSD=%H \ No newline at end of property Deleted: svn:mime-type ## -1 +0,0 ## -text/plain \ No newline at end of property Index: vendor/llvm-openmp/openmp-release80-r363030/CREDITS.txt =================================================================== --- vendor/llvm-openmp/openmp-release80-r363030/CREDITS.txt (revision 348960) +++ vendor/llvm-openmp/openmp-release80-r363030/CREDITS.txt (nonexistent) @@ -1,61 +0,0 @@ -This file is a partial list of people who have contributed to the LLVM/openmp -project. If you have contributed a patch or made some other contribution to -LLVM/openmp, please submit a patch to this file to add yourself, and it will be -done! - -The list is sorted by surname and formatted to allow easy grepping and -beautification by scripts. The fields are: name (N), email (E), web-address -(W), PGP key ID and fingerprint (P), description (D), and snail-mail address -(S). - -N: Adam Azarchs -W: 10xgenomics.com -D: Bug fix for lock code - -N: Carlo Bertolli -W: http://ibm.com -D: IBM contributor to PowerPC support in CMake files and elsewhere. - -N: Diego Caballero -E: diego.l.caballero@gmail.com -D: Fork performance improvements - -N: Sunita Chandrasekaran -D: Contributor to testsuite from OpenUH - -N: Barbara Chapman -D: Contributor to testsuite from OpenUH - -N: University of Houston -W: http://web.cs.uh.edu/~openuh/download/ -D: OpenUH test suite - -N: Intel Corporation OpenMP runtime team -W: http://openmprtl.org -D: Created the runtime. - -N: John Mellor-Crummey and other members of the OpenMP Tools Working Group -E: johnmc@rice.edu -D: OpenMP Tools Interface (OMPT) - -N: Matthias Muller -D: Contributor to testsuite from OpenUH - -N: Tal Nevo -E: tal@scalemp.com -D: ScaleMP contributor to improve runtime performance there. -W: http://scalemp.com - -N: Pavel Neytchev -D: Contributor to testsuite from OpenUH - -N: Steven Noonan -E: steven@uplinklabs.net -D: Patches for the ARM architecture and removal of several inconsistencies. - -N: Alp Toker -E: alp@nuanti.com -D: Making build work for FreeBSD. - -N: Cheng Wang -D: Contributor to testsuite from OpenUH Property changes on: vendor/llvm-openmp/openmp-release80-r363030/CREDITS.txt ___________________________________________________________________ Deleted: svn:eol-style ## -1 +0,0 ## -native \ No newline at end of property Deleted: svn:keywords ## -1 +0,0 ## -FreeBSD=%H \ No newline at end of property Deleted: svn:mime-type ## -1 +0,0 ## -text/plain \ No newline at end of property Index: vendor/llvm-openmp/openmp-release80-r363030/LICENSE.txt =================================================================== --- vendor/llvm-openmp/openmp-release80-r363030/LICENSE.txt (revision 348960) +++ vendor/llvm-openmp/openmp-release80-r363030/LICENSE.txt (nonexistent) @@ -1,174 +0,0 @@ -============================================================================== - -The software contained in this directory tree is dual licensed under both the -University of Illinois "BSD-Like" license and the MIT license. As a user of -this code you may choose to use it under either license. As a contributor, -you agree to allow your code to be used under both. The full text of the -relevant licenses is included below. - -In addition, a license agreement from the copyright/patent holders of the -software contained in this directory tree is included below. - -============================================================================== - -University of Illinois/NCSA -Open Source License - -Copyright (c) 1997-2019 Intel Corporation - -All rights reserved. - -Developed by: - OpenMP Runtime Team - Intel Corporation - http://www.openmprtl.org - -Permission is hereby granted, free of charge, to any person obtaining a copy of -this software and associated documentation files (the "Software"), to deal with -the Software without restriction, including without limitation the rights to -use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies -of the Software, and to permit persons to whom the Software is furnished to do -so, subject to the following conditions: - - * Redistributions of source code must retain the above copyright notice, - this list of conditions and the following disclaimers. - - * Redistributions in binary form must reproduce the above copyright notice, - this list of conditions and the following disclaimers in the - documentation and/or other materials provided with the distribution. - - * Neither the names of Intel Corporation OpenMP Runtime Team nor the - names of its contributors may be used to endorse or promote products - derived from this Software without specific prior written permission. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS -FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS WITH THE -SOFTWARE. - -============================================================================== - -Copyright (c) 1997-2019 Intel Corporation - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. - -============================================================================== - -Intel Corporation - -Software Grant License Agreement ("Agreement") - -Except for the license granted herein to you, Intel Corporation ("Intel") reserves -all right, title, and interest in and to the Software (defined below). - -Definition - -"Software" means the code and documentation as well as any original work of -authorship, including any modifications or additions to an existing work, that -is intentionally submitted by Intel to llvm.org (http://llvm.org) ("LLVM") for -inclusion in, or documentation of, any of the products owned or managed by LLVM -(the "Work"). For the purposes of this definition, "submitted" means any form of -electronic, verbal, or written communication sent to LLVM or its -representatives, including but not limited to communication on electronic -mailing lists, source code control systems, and issue tracking systems that are -managed by, or on behalf of, LLVM for the purpose of discussing and improving -the Work, but excluding communication that is conspicuously marked otherwise. - -1. Grant of Copyright License. Subject to the terms and conditions of this - Agreement, Intel hereby grants to you and to recipients of the Software - distributed by LLVM a perpetual, worldwide, non-exclusive, no-charge, - royalty-free, irrevocable copyright license to reproduce, prepare derivative - works of, publicly display, publicly perform, sublicense, and distribute the - Software and such derivative works. - -2. Grant of Patent License. Subject to the terms and conditions of this - Agreement, Intel hereby grants you and to recipients of the Software - distributed by LLVM a perpetual, worldwide, non-exclusive, no-charge, - royalty-free, irrevocable (except as stated in this section) patent license - to make, have made, use, offer to sell, sell, import, and otherwise transfer - the Work, where such license applies only to those patent claims licensable - by Intel that are necessarily infringed by Intel's Software alone or by - combination of the Software with the Work to which such Software was - submitted. If any entity institutes patent litigation against Intel or any - other entity (including a cross-claim or counterclaim in a lawsuit) alleging - that Intel's Software, or the Work to which Intel has contributed constitutes - direct or contributory patent infringement, then any patent licenses granted - to that entity under this Agreement for the Software or Work shall terminate - as of the date such litigation is filed. - -Unless required by applicable law or agreed to in writing, the software is -provided on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, -either express or implied, including, without limitation, any warranties or -conditions of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A -PARTICULAR PURPOSE. - -============================================================================== - -ARM Limited - -Software Grant License Agreement ("Agreement") - -Except for the license granted herein to you, ARM Limited ("ARM") reserves all -right, title, and interest in and to the Software (defined below). - -Definition - -"Software" means the code and documentation as well as any original work of -authorship, including any modifications or additions to an existing work, that -is intentionally submitted by ARM to llvm.org (http://llvm.org) ("LLVM") for -inclusion in, or documentation of, any of the products owned or managed by LLVM -(the "Work"). For the purposes of this definition, "submitted" means any form of -electronic, verbal, or written communication sent to LLVM or its -representatives, including but not limited to communication on electronic -mailing lists, source code control systems, and issue tracking systems that are -managed by, or on behalf of, LLVM for the purpose of discussing and improving -the Work, but excluding communication that is conspicuously marked otherwise. - -1. Grant of Copyright License. Subject to the terms and conditions of this - Agreement, ARM hereby grants to you and to recipients of the Software - distributed by LLVM a perpetual, worldwide, non-exclusive, no-charge, - royalty-free, irrevocable copyright license to reproduce, prepare derivative - works of, publicly display, publicly perform, sublicense, and distribute the - Software and such derivative works. - -2. Grant of Patent License. Subject to the terms and conditions of this - Agreement, ARM hereby grants you and to recipients of the Software - distributed by LLVM a perpetual, worldwide, non-exclusive, no-charge, - royalty-free, irrevocable (except as stated in this section) patent license - to make, have made, use, offer to sell, sell, import, and otherwise transfer - the Work, where such license applies only to those patent claims licensable - by ARM that are necessarily infringed by ARM's Software alone or by - combination of the Software with the Work to which such Software was - submitted. If any entity institutes patent litigation against ARM or any - other entity (including a cross-claim or counterclaim in a lawsuit) alleging - that ARM's Software, or the Work to which ARM has contributed constitutes - direct or contributory patent infringement, then any patent licenses granted - to that entity under this Agreement for the Software or Work shall terminate - as of the date such litigation is filed. - -Unless required by applicable law or agreed to in writing, the software is -provided on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, -either express or implied, including, without limitation, any warranties or -conditions of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A -PARTICULAR PURPOSE. - -============================================================================== Property changes on: vendor/llvm-openmp/openmp-release80-r363030/LICENSE.txt ___________________________________________________________________ Deleted: svn:eol-style ## -1 +0,0 ## -native \ No newline at end of property Deleted: svn:keywords ## -1 +0,0 ## -FreeBSD=%H \ No newline at end of property Deleted: svn:mime-type ## -1 +0,0 ## -text/plain \ No newline at end of property