Commit f730a720 authored by Stepan Nassyr's avatar Stepan Nassyr
Browse files

CANN TensorFlow 1.15.0 (compiles and imports fine)

parent 228953f1
Rename gettid to sys_gettid to avoid name clash with gettid of glibc 2.30+
https://github.com/grpc/grpc/commit/de6255941a5e1c2fb2d50e57f84e38c09f45023d
https://github.com/grpc/grpc/commit/57586a1ca7f17b1916aed3dea4ff8de872dbf853
diff --git a/third_party/grpc/src/core/lib/gpr/log_linux.cc b/third_party/grpc/src/core/lib/gpr/log_linux.cc
index 561276f0c2..8b597b4cf2 100644
--- a/third_party/grpc/src/core/lib/gpr/log_linux.cc
+++ b/third_party/grpc/src/core/lib/gpr/log_linux.cc
@@ -40,7 +40,7 @@
#include <time.h>
#include <unistd.h>
-static long gettid(void) { return syscall(__NR_gettid); }
+static long sys_gettid(void) { return syscall(__NR_gettid); }
void gpr_log(const char* file, int line, gpr_log_severity severity,
const char* format, ...) {
@@ -70,7 +70,7 @@ void gpr_default_log(gpr_log_func_args* args) {
gpr_timespec now = gpr_now(GPR_CLOCK_REALTIME);
struct tm tm;
static __thread long tid = 0;
- if (tid == 0) tid = gettid();
+ if (tid == 0) tid = sys_gettid();
timer = static_cast<time_t>(now.tv_sec);
final_slash = strrchr(args->file, '/');
diff --git a/third_party/grpc/src/core/lib/gpr/log_posix.cc b/third_party/grpc/src/core/lib/gpr/log_posix.cc
index 0acb225572..cd0b702b94 100644
--- a/third_party/grpc/src/core/lib/gpr/log_posix.cc
+++ b/third_party/grpc/src/core/lib/gpr/log_posix.cc
@@ -30,7 +30,7 @@
#include <string.h>
#include <time.h>
-static intptr_t gettid(void) { return (intptr_t)pthread_self(); }
+static intptr_t sys_gettid(void) { return (intptr_t)pthread_self(); }
void gpr_log(const char* file, int line, gpr_log_severity severity,
const char* format, ...) {
@@ -85,7 +85,7 @@ void gpr_default_log(gpr_log_func_args* args) {
char* prefix;
gpr_asprintf(&prefix, "%s%s.%09d %7tu %s:%d]",
gpr_log_severity_string(args->severity), time_buffer,
- (int)(now.tv_nsec), gettid(), display_file, args->line);
+ (int)(now.tv_nsec), sys_gettid(), display_file, args->line);
fprintf(stderr, "%-70s %s\n", prefix, args->message);
gpr_free(prefix);
diff --git a/third_party/grpc/src/core/lib/iomgr/ev_epollex_linux.cc b/third_party/grpc/src/core/lib/iomgr/ev_epollex_linux.cc
index 7a4870db78..4258ded8a0 100644
--- a/third_party/grpc/src/core/lib/iomgr/ev_epollex_linux.cc
+++ b/third_party/grpc/src/core/lib/iomgr/ev_epollex_linux.cc
@@ -1150,7 +1150,7 @@ static void end_worker(grpc_pollset* pollset, grpc_pollset_worker* worker,
}
#ifndef NDEBUG
-static long gettid(void) { return syscall(__NR_gettid); }
+static long sys_gettid(void) { return syscall(__NR_gettid); }
#endif
/* pollset->mu lock must be held by the caller before calling this.
@@ -1170,7 +1170,7 @@ static grpc_error* pollset_work(grpc_pollset* pollset,
#define WORKER_PTR (&worker)
#endif
#ifndef NDEBUG
- WORKER_PTR->originator = gettid();
+ WORKER_PTR->originator = sys_gettid();
#endif
if (grpc_polling_trace.enabled()) {
gpr_log(GPR_INFO,
name = 'Bazel'
version = '0.26.1'
homepage = 'https://bazel.io/'
description = """Bazel is a build tool that builds code quickly and reliably.
It is used to build the majority of Google's software."""
toolchain = {'name': 'GCCcore', 'version': '9.3.0'}
source_urls = ['https://github.com/bazelbuild/bazel/releases/download/%(version)s']
sources = ['%(namelower)s-%(version)s-dist.zip']
patches = ['%(name)s-0.25.2_rename_gettid.patch']
checksums = [
'c0e94f8f818759f3f67af798c38683520c540f469cb41aea8f5e5a0e43f11600', # bazel-0.26.1-dist.zip
'8639129941a6db079015ea7e04e7f5b6b24da3c963e7eb0488df34439d628f0e', # Bazel-0.26.1_rename_gettid.patch
]
builddependencies = [
('binutils', '2.36.1'),
('Python', '3.7.5'),
('Zip', '3.0'),
]
dependencies = [('Java', '8.292.10', '', True)]
moduleclass = 'devel'
# Stepan Nassyr (JSC)
#easyblock = 'Binary'
easyblock = 'CMakeNinja'
name = 'CANN-tfplugin'
version = '1.7.0'
homepage = 'https://e.huawei.com/en/products/cloud-computing-dc/atlas/cann'
description = 'CANN plugin for TensorFlow'
versionsuffix = '-Python-%(pyver)s'
toolchain = {'name': 'goolf', 'version': '2021a.9'}
sources = [{
'filename': '%(name)s-%(version)s.tar.gz',
'git_config': {
'url': 'https://gitee.com/ascend/',
'repo_name': 'tensorflow',
'commit': '341e07f',
'recursive': True,
'keep_git_dir': True,
},
}]
patches = [
'tfplugin-1.7.0-secure_c-checksum.patch',
'tfplugin-1.7.0-use-swig-properly.patch',
'tfplugin-1.7.0-install-in-cmake.patch',
'tfplugin-1.7.0-flag-fix-plus-keep-cxx11abi.patch',
'tfplugin-1.7.0-ge-cxx11abi-shim.patch',
]
builddependencies = [
('CMake', '3.20.0'),
('Ninja', '1.10.2'),
('SWIG', '4.0.2')
]
cann_version = "5.0.2.alpha005"
dependencies = [
('Python', '3.7.5'),
('CANN-Toolkit', cann_version, versionsuffix),
('SciPy-Stack', '2021a', versionsuffix),
('TensorFlow-CANN', '1.15.0', versionsuffix),
]
preconfigopts="olddir=$(pwd) && cd %%(builddir)s/tensorflow && ASCEND_CUSTOM_PATH=$EBROOTCANNMINTOOLKIT/ascend-toolkit/%s/ ./configure && cd $olddir &&" % cann_version
configopts="-DENABLE_OPEN_SRC=True"
sanity_check_paths = {
'files': ['lib/python/site-packages/npu_bridge/tf_adapter.py'],
'dirs' : ['lib/python/site-packages/npu_bridge']
}
modextrapaths = {
'PYTHONPATH' : ['lib/python/site-packages'],
}
moduleclass = 'lib'
# Stepan Nassyr (JSC)
#easyblock = 'Binary'
easyblock = 'cann_package'
name = 'CANN-tfplugin'
version = '5.0.2.alpha005'
homepage = 'https://e.huawei.com/en/products/cloud-computing-dc/atlas/cann'
description = 'CANN plugin for TensorFlow'
versionsuffix = '-Python-%(pyver)s'
toolchain = {'name': 'goolf', 'version': '2021a.9'}
import platform as local_platform
local_arch_suf = local_platform.machine()
local_alt_suf = {
'x86_64' : 'x64',
'aarch64' : 'arm64'
}
local_arch_sums = {
'x86_64' : 'd3a787f91b2ee77828c455406dd9c2a468980a1b47492fdd3c59f37b936c7ef4',
'aarch64' : '410ad8d43b8ffa865ed6064f04ece80717af42885a1801eb6ebd3b9b3bfa0f6d',
}
sources = [{
'source_urls' : ['https://ascend-repo.obs.cn-east-2.myhuaweicloud.com/CANN/%(version)s/'],
'filename': 'Ascend-cann-tfplugin_%%(version)s_linux-%s.run' % local_arch_suf,
}]
checksums = [
local_arch_sums[local_arch_suf],
]
dependencies = [
('CMake', '3.20.0'),
('Python', '3.7.5'),
('CANN-Toolkit', version, versionsuffix),
('SciPy-Stack', '2021a', versionsuffix),
]
cann_installers=[
('Ascend-tfplugin','tfplugin/script/install.sh','--pylocal'),
]
subdir_symlinks=[]
sanity_check_paths = {
'files': ['ascend-toolkit/%(version)s/arm64-linux/tfplugin/python/site-packages/npu_bridge/tf_adapter.py'],
'dirs' : ['ascend-toolkit/%(version)s/arm64-linux/tfplugin/python/site-packages/npu_bridge']
}
modextrapaths = {
'PYTHONPATH' : ['ascend-toolkit/%(version)s/arm64-linux/tfplugin/python/site-packages'],
}
moduleclass = 'lib'
diff --git a/CMakeLists.txt b/CMakeLists.txt
index 6c3c0b0..1b39479 100755
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -10,10 +10,8 @@ option(ENABLE_OPEN_SRC "Enable graphengine compile in opensource." FALSE)
include(UseSWIG)
set(CMAKE_CXX_STANDARD 11)
- set(CMAKE_C_FLAGS "-O2 -DNDEBUG -Wno-deprecated-declarations -Wall -fPIC -fstack-protector-all -Wl,-z,relro,-z,now,-z,noexecstack -s -pipe ${CMAKE_C_FLAGS}")
- set(CMAKE_CXX_FLAGS "-std=c++11 -O2 -DNDEBUG -Wno-deprecated-declarations -Wall -fPIC -fstack-protector-all -Wl,-z,relro,-z,now,-z,noexecstack -s -pipe ${CMAKE_CXX_FLAGS}")
-
- add_definitions(-D_GLIBCXX_USE_CXX11_ABI=0)
+ set(CMAKE_C_FLAGS "-DNDEBUG -Wfloat-equal -Wextra -Wno-deprecated-declarations -Wall -fPIC -fstack-protector-all -Wl,-z,relro,-z,now,-z,noexecstack -s -pipe ${CMAKE_C_FLAGS}")
+ set(CMAKE_CXX_FLAGS "-DNDEBUG -Wfloat-equal -Wextra -Wno-deprecated-declarations -Wall -fPIC -fstack-protector-all -Wl,-z,relro,-z,now,-z,noexecstack -s -pipe ${CMAKE_CXX_FLAGS}")
# build external prjects
if(DEFINED ENV{D_PKG_SERVER})
diff --git a/CMakeLists.txt b/CMakeLists.txt
index 6c3c0b0..9889527 100755
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -88,6 +94,9 @@ if (ENABLE_OPEN_SRC)
COMMAND cd ${CMAKE_BINARY_DIR}/dist/python/ && ${PYTHON_BIN_PATH} setup.py bdist_wheel
VERBATIM
)
+
+ install(DIRECTORY ${CMAKE_BINARY_DIR}/dist/python/build/lib/npu_bridge
+ DESTINATION lib/python/site-packages)
else()
set(BASE_DIR ${CMAKE_CURRENT_SOURCE_DIR})
set(CMAKE_VERBOSE_MAKEFILE ON)
diff --color -urN tensorflow-unpatched/cmake/secure_c.cmake tensorflow/cmake/secure_c.cmake
--- tensorflow-unpatched/cmake/secure_c.cmake 2022-02-04 09:27:50.579972570 +0000
+++ tensorflow/cmake/secure_c.cmake 2022-02-04 09:28:34.230256920 +0000
@@ -5,13 +5,13 @@
FetchContent_Declare(
secure_c
URL ${_json_url}
- URL_HASH MD5=ae4865cec1bfb52f7dca03f5c05ac98a
+ URL_HASH MD5=f3db321939ae17527b8939651f7e1c8b
)
else()
FetchContent_Declare(
secure_c
URL https://gitee.com/openeuler/libboundscheck/repository/archive/v1.1.10.tar.gz
- URL_HASH MD5=ae4865cec1bfb52f7dca03f5c05ac98a
+ URL_HASH MD5=f3db321939ae17527b8939651f7e1c8b
)
endif()
FetchContent_GetProperties(secure_c)
diff --git a/CMakeLists.txt b/CMakeLists.txt
index 6c3c0b0..9889527 100755
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -3,8 +3,11 @@ option(ENABLE_OPEN_SRC "Enable graphengine compile in opensource." FALSE)
set(TFADAPTER_DIR ${CMAKE_CURRENT_LIST_DIR})
if (ENABLE_OPEN_SRC)
- cmake_minimum_required(VERSION 3.14)
+ cmake_minimum_required(VERSION 3.18)
project(TFAdapter)
+
+ find_package(SWIG 4.0 COMPONENTS python)
+ include(UseSWIG)
set(CMAKE_CXX_STANDARD 11)
set(CMAKE_C_FLAGS "-O2 -DNDEBUG -Wfloat-equal -Wextra -Wno-deprecated-declarations -Wall -fPIC -fstack-protector-all -Wl,-z,relro,-z,now,-z,noexecstack -s -pipe ${CMAKE_C_FLAGS}")
@@ -54,8 +57,10 @@ if (ENABLE_OPEN_SRC)
file(COPY ${CMAKE_CURRENT_LIST_DIR}/tf_adapter/python DESTINATION ${CMAKE_BINARY_DIR}/dist)
file(COPY ${CMAKE_CURRENT_LIST_DIR}/convert_tf2npu DESTINATION ${CMAKE_BINARY_DIR}/dist/python/npu_bridge)
file(COPY ${CMAKE_CURRENT_LIST_DIR}/tf_adapter/swig DESTINATION ${CMAKE_BINARY_DIR}/dist)
- execute_process(COMMAND ${SWIG_BIN_PATH} -python -c++ -threads ${CMAKE_BINARY_DIR}/dist/swig/ge_plugin.i)
- file(COPY ${CMAKE_BINARY_DIR}/dist/swig/tf_adapter.py DESTINATION ${CMAKE_BINARY_DIR}/dist/python/npu_bridge)
+
+ set_property(SOURCE ${CMAKE_BINARY_DIR}/dist/swig/ge_plugin.i PROPERTY CPLUSPLUS ON)
+ # execute_process(COMMAND ${SWIG_BIN_PATH} -python -c++ -threads ${CMAKE_BINARY_DIR}/dist/swig/ge_plugin.i)
+ #file(COPY ${CMAKE_BINARY_DIR}/dist/swig/tf_adapter.py DESTINATION ${CMAKE_BINARY_DIR}/dist/python/npu_bridge)
file(GLOB_RECURSE SOURCES
${CMAKE_CURRENT_LIST_DIR}/tf_adapter/common/*.cc
@@ -64,7 +69,8 @@ if (ENABLE_OPEN_SRC)
${CMAKE_CURRENT_LIST_DIR}/tf_adapter/optimizers/*.cc
${CMAKE_CURRENT_LIST_DIR}/tf_adapter/util/*.cc
)
- add_library(_tf_adapter SHARED ${SOURCES} ${CMAKE_BINARY_DIR}/dist/swig/ge_plugin_wrap.cxx)
+ swig_add_library(_tf_adapter TYPE SHARED LANGUAGE python SOURCES ${SOURCES} ${CMAKE_BINARY_DIR}/dist/swig/ge_plugin.i OUTPUT_DIR ${CMAKE_BINARY_DIR}/dist/python/npu_bridge)
+ set_property(TARGET _tf_adapter PROPERTY SWIG_COMPILE_OPTIONS -threads)
foreach (COMPILE_FLAG ${COMPILE_FLAGS})
target_compile_options(_tf_adapter PUBLIC "${COMPILE_FLAG}")
easyblock = 'CMakeNinja'
name = 'nsync-CANN'
version = '1.22.0'
homepage = 'https://github.com/google/nsync'
description = """nsync is a C library that exports various synchronization primitives, such as mutexes"""
toolchain = {'name': 'GCCcore', 'version': '9.3.0'}
source_urls = ['https://github.com/google/nsync/archive/refs/tags/']
sources = ['%(version)s.tar.gz']
patches = ['nsync-cann.patch']
checksums = ['caf32e6b3d478b78cff6c2ba009c3400f8251f646804bcb65465666a9cea93c4']
builddependencies = [
('binutils', '2.36.1'),
('CMake', '3.20.0'),
('Ninja', '1.10.2'),
]
sanity_check_paths = {
'files': ['include/nsync.h', 'lib/libnsync.a', 'lib/libnsync_cpp.a'],
'dirs': [],
}
moduleclass = 'devel'
diff --color -urN nsync-1.22.0-unpatched/platform/c++11/atomic.h nsync-1.22.0/platform/c++11/atomic.h
--- nsync-1.22.0-unpatched/platform/c++11/atomic.h 2022-02-03 09:37:29.403289592 +0000
+++ nsync-1.22.0/platform/c++11/atomic.h 2022-02-03 09:39:49.644194758 +0000
@@ -65,21 +65,31 @@
NSYNC_CPP_START_
+#define ATM_CB_() __sync_synchronize()
+
static INLINE int atm_cas_nomb_u32_ (nsync_atomic_uint32_ *p, uint32_t o, uint32_t n) {
- return (std::atomic_compare_exchange_strong_explicit (NSYNC_ATOMIC_UINT32_PTR_ (p), &o, n,
+ int result = (std::atomic_compare_exchange_strong_explicit (NSYNC_ATOMIC_UINT32_PTR_ (p), &o, n,
std::memory_order_relaxed, std::memory_order_relaxed));
+ ATM_CB_();
+ return result;
}
static INLINE int atm_cas_acq_u32_ (nsync_atomic_uint32_ *p, uint32_t o, uint32_t n) {
- return (std::atomic_compare_exchange_strong_explicit (NSYNC_ATOMIC_UINT32_PTR_ (p), &o, n,
+ int result = (std::atomic_compare_exchange_strong_explicit (NSYNC_ATOMIC_UINT32_PTR_ (p), &o, n,
std::memory_order_acquire, std::memory_order_relaxed));
+ ATM_CB_();
+ return result;
}
static INLINE int atm_cas_rel_u32_ (nsync_atomic_uint32_ *p, uint32_t o, uint32_t n) {
- return (std::atomic_compare_exchange_strong_explicit (NSYNC_ATOMIC_UINT32_PTR_ (p), &o, n,
+ int result = (std::atomic_compare_exchange_strong_explicit (NSYNC_ATOMIC_UINT32_PTR_ (p), &o, n,
std::memory_order_release, std::memory_order_relaxed));
+ ATM_CB_();
+ return result;
}
static INLINE int atm_cas_relacq_u32_ (nsync_atomic_uint32_ *p, uint32_t o, uint32_t n) {
- return (std::atomic_compare_exchange_strong_explicit (NSYNC_ATOMIC_UINT32_PTR_ (p), &o, n,
+ int result = (std::atomic_compare_exchange_strong_explicit (NSYNC_ATOMIC_UINT32_PTR_ (p), &o, n,
std::memory_order_acq_rel, std::memory_order_relaxed));
+ ATM_CB_();
+ return result;
}
#define ATM_CAS_HELPER_(barrier, p, o, n) (atm_cas_##barrier##_u32_ ((p), (o), (n)))
fix for "undeclared inclusion(s) in rule '//tensorflow/contrib/mpi_collectives:python/ops/_mpi_ops.so'"
see https://github.com/tensorflow/tensorflow/issues/17437
--- tensorflow-1.14.0/tensorflow/contrib/mpi_collectives/BUILD.orig 2019-08-15 11:20:32.933089345 +0200
+++ tensorflow-1.14.0/tensorflow/contrib/mpi_collectives/BUILD 2019-08-15 11:21:04.923134714 +0200
@@ -53,6 +53,7 @@
":mpi_defines",
":mpi_message_proto_cc",
"//third_party/mpi",
+ "//tensorflow/stream_executor",
],
)
--- tensorflow-1.14.0/tensorflow/tensorflow.bzl.orig 2019-08-15 11:20:32.933089345 +0200
+++ tensorflow-1.14.0/tensorflow/tensorflow.bzl 2019-08-15 11:21:46.833194234 +0200
@@ -1771,7 +1771,7 @@
name = name + "_check_deps",
disallowed_deps = [
clean_dep("//tensorflow/core:framework"),
- clean_dep("//tensorflow/core:lib"),
+ # clean_dep("//tensorflow/core:lib"),
],
deps = deps + if_cuda_is_configured_compat(cuda_deps) + if_rocm_is_configured(rocm_deps),
)
fix for SWIG existing with 'exit 1' when "SWIGing tensorflow/python/tensorflow.i"
see also https://github.com/bazelbuild/bazel/issues/4053
author: Kenneth Hoste (HPC-UGent)
--- tensorflow-1.14.0/tensorflow/tensorflow.bzl.orig 2019-08-15 09:50:47.915105028 +0200
+++ tensorflow-1.14.0/tensorflow/tensorflow.bzl 2019-08-15 09:53:06.305323690 +0200
@@ -1555,6 +1555,7 @@
args += [src.path]
outputs = [ctx.outputs.cc_out, ctx.outputs.py_out]
ctx.actions.run(
+ use_default_shell_env = True,
executable = ctx.executable._swig,
arguments = args,
inputs = inputs.to_list(),
diff --color -urN TensorFlow-unpatched/tensorflow-1.15.0/tensorflow/python/lib/core/bfloat16.cc TensorFlow/tensorflow-1.15.0/tensorflow/python/lib/core/bfloat16.cc
--- TensorFlow-unpatched/tensorflow-1.15.0/tensorflow/python/lib/core/bfloat16.cc 2022-02-03 15:16:08.426280419 +0000
+++ TensorFlow/tensorflow-1.15.0/tensorflow/python/lib/core/bfloat16.cc 2022-02-03 15:20:01.737820983 +0000
@@ -490,7 +490,7 @@
}
template <typename InType, typename OutType, typename Functor>
-void BinaryUFunc(char** args, npy_intp* dimensions, npy_intp* steps,
+void BinaryUFunc(char** args, npy_intp const* dimensions, npy_intp const* steps,
void* data) {
const char* i0 = args[0];
const char* i1 = args[1];
@@ -506,7 +506,7 @@
}
template <typename Functor>
-void CompareUFunc(char** args, npy_intp* dimensions, npy_intp* steps,
+void CompareUFunc(char** args, npy_intp const* dimensions, npy_intp const* steps,
void* data) {
BinaryUFunc<bfloat16, npy_bool, Functor>(args, dimensions, steps, data);
}
diff --color -urN TensorFlow-unpatched/tensorflow-1.15.0/tensorflow/lite/tools/make/targets/aarch64_makefile.inc TensorFlow/tensorflow-1.15.0/tensorflow/lite/tools/make/targets/aarch64_makefile.inc
--- TensorFlow-unpatched/tensorflow-1.15.0/tensorflow/lite/tools/make/targets/aarch64_makefile.inc 2022-02-03 11:56:02.687293394 +0000
+++ TensorFlow/tensorflow-1.15.0/tensorflow/lite/tools/make/targets/aarch64_makefile.inc 2022-02-03 11:57:39.927933499 +0000
@@ -9,12 +9,16 @@
-march=armv8-a \
-funsafe-math-optimizations \
-ftree-vectorize \
+ -flax-vector-conversions \
+ -fomit-frame-pointer \
-fPIC
CFLAGS += \
-march=armv8-a \
-funsafe-math-optimizations \
-ftree-vectorize \
+ -flax-vector-conversions \
+ -fomit-frame-pointer \
-fPIC
LDFLAGS := \
diff --color -urN TensorFlow-unpatched/tensorflow-1.15.0/tensorflow/lite/tools/make/Makefile TensorFlow/tensorflow-1.15.0/tensorflow/lite/tools/make/Makefile
--- TensorFlow-unpatched/tensorflow-1.15.0/tensorflow/lite/tools/make/Makefile 2022-02-03 12:45:44.026824096 +0000
+++ TensorFlow/tensorflow-1.15.0/tensorflow/lite/tools/make/Makefile 2022-02-03 12:47:12.867404425 +0000
@@ -55,7 +55,7 @@
# There are no rules for compiling objects for the host system (since we don't
# generate things like the protobuf compiler that require that), so all of
# these settings are for the target compiler.
-CXXFLAGS := -O3 -DNDEBUG -fPIC
+CXXFLAGS := -O3 -DNDEBUG -fPIC -flax-vector-conversions -fomit-frame-pointer
CXXFLAGS += $(EXTRA_CXXFLAGS)
CFLAGS := ${CXXFLAGS}
CXXFLAGS += --std=c++11
diff --color -urN TensorFlow-unpatched/tensorflow-1.15.0/tensorflow/lite/build_def.bzl TensorFlow/tensorflow-1.15.0/tensorflow/lite/build_def.bzl
--- TensorFlow-unpatched/tensorflow-1.15.0/tensorflow/lite/build_def.bzl 2022-02-03 14:16:02.502469933 +0000
+++ TensorFlow/tensorflow-1.15.0/tensorflow/lite/build_def.bzl 2022-02-03 14:19:00.703651438 +0000
@@ -14,6 +14,8 @@
] + select({
str(Label("//tensorflow:android_arm64")): [
"-O3",
+ "-flax-vector-conversions",
+ "-fomit-frame-pointer",
],
str(Label("//tensorflow:android_arm")): [
"-mfpu=neon",
diff --color -urN TensorFlow-unpatched/tensorflow-1.15.0/tensorflow/tensorflow.bzl TensorFlow/tensorflow-1.15.0/tensorflow/tensorflow.bzl
--- TensorFlow-unpatched/tensorflow-1.15.0/tensorflow/tensorflow.bzl 2022-02-03 14:40:47.322285097 +0000
+++ TensorFlow/tensorflow-1.15.0/tensorflow/tensorflow.bzl 2022-02-03 14:45:23.934108850 +0000
@@ -232,6 +232,12 @@
"//conditions:default": [],
})
+def if_linux_aarch64(a):
+ return select({
+ clean_dep("//tensorflow:linux_aarch64"): a,
+ "//conditions:default": [],
+ })
+
def if_override_eigen_strong_inline(a):
return select({
clean_dep("//tensorflow:override_eigen_strong_inline"): a,
@@ -300,6 +306,7 @@
if_ngraph(["-DINTEL_NGRAPH=1"]) +
if_android_arm([""]) +
if_linux_x86_64([""]) +
+ if_linux_aarch64(["-flax-vector-conversions","-fomit-frame-pointer"]) +
if_ios_x86_64([""]) +
select({
clean_dep("//tensorflow:framework_shared_object"): [],
diff --color -urN TensorFlow-unpatched/tensorflow-1.15.0/tensorflow/lite/build_def.bzl TensorFlow/tensorflow-1.15.0/tensorflow/lite/build_def.bzl
--- TensorFlow-unpatched/tensorflow-1.15.0/tensorflow/lite/build_def.bzl 2022-02-03 14:57:14.338794705 +0000
+++ TensorFlow/tensorflow-1.15.0/tensorflow/lite/build_def.bzl 2022-02-03 14:58:07.069142683 +0000
@@ -17,6 +17,11 @@
"-flax-vector-conversions",
"-fomit-frame-pointer",
],
+ str(Label("//tensorflow:linux_aarch64")): [
+ "-O3",
+ "-flax-vector-conversions",
+ "-fomit-frame-pointer",
+ ],
str(Label("//tensorflow:android_arm")): [
"-mfpu=neon",
"-O3",
# Required on CentOS6
# https://github.com/tensorflow/tensorflow/issues/15129
--- tensorflow-1.15.0/tensorflow/tensorflow.bzl.orig 2019-11-15 09:58:49.029380497 +0100
+++ tensorflow-1.15.0/tensorflow/tensorflow.bzl 2019-11-15 10:01:26.337767204 +0100
@@ -498,7 +498,7 @@
srcs = [],
deps = [],
data = [],
- linkopts = [],
+ linkopts = ["-lrt"],
framework_so = tf_binary_additional_srcs(),
soversion = None,
kernels = [],
@@ -602,7 +602,7 @@
srcs = [],
deps = [],
data = [],
- linkopts = [],
+ linkopts = ["-lrt"],
copts = tf_copts(),
kernels = [],
per_os_targets = False, # Generate targets with SHARED_LIBRARY_NAME_PATTERNS
@@ -693,7 +693,7 @@
tf_cc_binary(
name = tool,
copts = tf_copts(),
- linkopts = if_not_windows(["-lm", "-Wl,-ldl"]),
+ linkopts = if_not_windows(["-lm", "-Wl,-ldl", "-lrt"]),
linkstatic = 1, # Faster to link this one-time-use binary dynamically
deps = [op_gen] + deps,
)
@@ -866,7 +866,7 @@
hidden_file = None,
generated_target_name = None,
op_whitelist = [],
- cc_linkopts = [],
+ cc_linkopts = ["-lrt"],
api_def_srcs = []):
if (hidden or hidden_file) and op_whitelist:
fail("Cannot pass specify both hidden and op_whitelist.")
@@ -878,7 +878,7 @@
tf_cc_binary(
name = tool_name,
copts = tf_copts(),
- linkopts = if_not_windows(["-lm", "-Wl,-ldl"]) + cc_linkopts,
+ linkopts = if_not_windows(["-lm", "-Wl,-ldl", "-lrt"]) + cc_linkopts,
linkstatic = 1, # Faster to link this one-time-use binary dynamically
visibility = [clean_dep("//tensorflow:internal")],
deps = ([
@@ -1175,7 +1175,7 @@
tags = [],
size = "medium",
args = None,
- linkopts = [],
+ linkopts = ['-lrt'],
kernels = []):
for src in srcs:
tf_cc_test(
remove hardcoding of -msse4.2 & co since it overrules -xHost when using Intel compilers
author: Kenneth Hoste (HPC-UGent)