Spark and deps

50114fd0 · Alexandre Strube · 3f483f3d · 50114fd0 · 50114fd0 · 50114fd0
Commit 50114fd0 authored 2 years ago by Alexandre Strube
--- a/Golden_Repo/a/Arrow/Arrow-8.0.0-foss-2022a.eb
+++ b/Golden_Repo/a/Arrow/Arrow-8.0.0-foss-2022a.eb
+easyblock = 'CMakeMake'
+
+name = 'Arrow'
+version = '8.0.0'
+
+homepage = 'https://arrow.apache.org'
+description = """Apache Arrow (incl. PyArrow Python bindings), a cross-language development platform
+ for in-memory data."""
+
+toolchain = {'name': 'foss', 'version': '2022a'}
+
+source_urls = ['https://archive.apache.org/dist/%(namelower)s/%(namelower)s-%(version)s']
+sources = ['apache-arrow-%(version)s.tar.gz']
+checksums = ['ad9a05705117c989c116bae9ac70492fe015050e1b80fb0e38fde4b5d863aaa3']
+
+builddependencies = [
+    ('CMake', '3.23.1'),
+    ('Autotools', '20220317'),
+    ('flex', '2.6.4'),
+    ('Bison', '3.8.2'),
+    ('pkgconf', '1.8.0'),
+]
+
+# Arrow strongly prefers included jemalloc, so not including it as a dependency
+dependencies = [
+    ('Python', '3.10.4'),
+    ('SciPy-bundle', '2022.05', '', ('gcccoremkl', '11.3.0-2022.1.0')),  # for numpy
+    ('Boost', '1.79.0'),
+    ('lz4', '1.9.3'),
+    ('zlib', '1.2.12'),
+    ('bzip2', '1.0.8'),
+    ('zstd', '1.5.2'),
+    ('snappy', '1.1.9'),
+    ('RapidJSON', '1.1.0'),
+    ('RE2', '2022-06-01'),
+    ('utf8proc', '2.7.0'),
+]
+
+start_dir = 'cpp'
+
+# see https://arrow.apache.org/docs/developers/python.html
+configopts = "-DARROW_DATASET=on -DARROW_PYTHON=on -DARROW_PARQUET=ON -DARROW_WITH_SNAPPY=ON "
+configopts += "-DCMAKE_INSTALL_LIBDIR=lib -DPython3_ROOT_DIR=$EBROOTPYTHON "
+configopts += "-DARROW_WITH_ZLIB=ON -DARROW_WITH_BZ2=ON -DARROW_WITH_ZSTD=ON -DARROW_WITH_LZ4=ON "
+configopts += "-DZSTD_ROOT=$EBROOTZSTD "
+
+# also install Python bindings
+local_install_pyarrow_cmds = "export PKG_CONFIG_PATH=%(installdir)s/lib/pkgconfig:$PKG_CONFIG_PATH && "
+local_install_pyarrow_cmds += "export PYTHONPATH=%(installdir)s/lib/python%(pyshortver)s/site-packages:$PYTHONPATH && "
+local_install_pyarrow_cmds += "cd %(builddir)s/*arrow-%(version)s/python && export XDG_CACHE_HOME=$TMPDIR && "
+local_install_pyarrow_cmds += "sed -i 's/numpy==[0-9.]*/numpy/g' pyproject.toml && "
+local_install_pyarrow_cmds += "Python3_ROOT_DIR=$EBROOTPYTHON "
+local_install_pyarrow_cmds += "PYARROW_WITH_DATASET=1 PYARROW_WITH_PARQUET=1 pip install --prefix %(installdir)s ."
+postinstallcmds = [local_install_pyarrow_cmds]
+
+modextrapaths = {'PYTHONPATH': 'lib/python%(pyshortver)s/site-packages'}
+
+sanity_check_paths = {
+    'files': ['lib/libarrow.a', 'lib/libarrow.%s' % SHLIB_EXT,
+              'lib/libarrow_python.a', 'lib/libarrow_python.%s' % SHLIB_EXT],
+    'dirs': ['include/arrow', 'lib/cmake/arrow', 'lib/pkgconfig', 'lib/python%(pyshortver)s/site-packages'],
+}
+
+sanity_check_commands = [
+    "python -c 'import pyarrow'",
+    "python -c 'import pyarrow.dataset'",
+    "python -c 'import pyarrow.parquet'",
+]
+
+moduleclass = 'data'
--- a/Golden_Repo/r/RE2/RE2-2022-06-01-GCCcore-11.3.0.eb
+++ b/Golden_Repo/r/RE2/RE2-2022-06-01-GCCcore-11.3.0.eb
+##
+# Author:    Robert Mijakovic <robert.mijakovic@lxp.lu>
+##
+easyblock = "CMakeMake"
+
+name = 'RE2'
+version = '2022-06-01'
+
+homepage = 'https://github.com/google/re2'
+description = """
+RE2 is a fast, safe, thread-friendly alternative to backtracking regular
+expression engines like those used in PCRE, Perl, and Python. It is a C++
+library. """
+
+toolchain = {'name': 'GCCcore', 'version': '11.3.0'}
+toolchainopts = {'pic': True}
+
+github_account = 'google'
+source_urls = [GITHUB_SOURCE]
+sources = ['%(version)s.tar.gz']
+checksums = ['f89c61410a072e5cbcf8c27e3a778da7d6fd2f2b5b1445cd4f4508bee946ab0f']
+
+builddependencies = {
+    ('binutils', '2.38'),
+    ('CMake', '3.23.1'),
+}
+
+sanity_check_paths = {
+    'files': ['lib/libre2.a'],
+    'dirs': ['include/re2'],
+}
+
+moduleclass = 'lib'
--- a/Golden_Repo/s/Spark/Spark-3.3.1-foss-2022a-CUDA-11.7.eb
+++ b/Golden_Repo/s/Spark/Spark-3.3.1-foss-2022a-CUDA-11.7.eb
+# Author: Denis Krišťák (INUITS)
+
+easyblock = 'Tarball'
+
+name = 'Spark'
+version = '3.3.1'
+versionsuffix = '-CUDA-%(cudaver)s'
+
+
+homepage = 'https://spark.apache.org'
+description = """Spark is Hadoop MapReduce done in memory"""
+
+toolchain = {'name': 'foss', 'version': '2022a'}
+
+source_urls = [
+    'https://archive.apache.org/dist//%(namelower)s/%(namelower)s-%(version)s/',
+    'https://downloads.apache.org/%(namelower)s/%(namelower)s-%(version)s/'
+]
+sources = ['%(namelower)s-%(version)s-bin-hadoop3.tgz']
+checksums = ['91df3e3f73682d32261a8b245af6b0553dc5cf4af208ef82219283d23ee01ece']
+
+dependencies = [
+    ('Python', '3.10.4'),
+    ('Java', '11', '', SYSTEM),
+    ('CUDA', '11.7', '', SYSTEM),
+    ('Arrow', '8.0.0'),
+]
+
+exts_defaultclass = 'PythonPackage'
+exts_default_options = {
+    'source_urls': [PYPI_SOURCE],
+    'download_dep_fail': True,
+    'use_pip': True,
+}
+
+exts_list = [
+    ('py4j', '0.10.9.7', {
+        'checksums': ['0b6e5315bb3ada5cf62ac651d107bb2ebc02def3dee9d9548e3baac644ea8dbb'],
+    }),
+]
+
+sanity_check_paths = {
+    'files': ['bin/pyspark', 'bin/spark-shell'],
+    'dirs': ['python']
+}
+
+sanity_check_commands = [
+    "pyspark -h",
+    "python -c 'import pyspark'",
+]
+
+modextrapaths = {'PYTHONPATH': ['python', 'lib/python%(pyshortver)s/site-packages']}
+
+modextravars = {'SPARK_HOME': '%(installdir)s'}
+
+moduleclass = 'devel'
--- a/Golden_Repo/u/utf8proc/utf8proc-2.7.0-GCCcore-11.3.0.eb
+++ b/Golden_Repo/u/utf8proc/utf8proc-2.7.0-GCCcore-11.3.0.eb
+easyblock = 'CMakeMake'
+
+name = 'utf8proc'
+version = '2.7.0'
+
+homepage = 'https://github.com/JuliaStrings/utf8proc'
+description = """utf8proc is a small, clean C library that provides Unicode normalization, case-folding, 
+and other operations for data in the UTF-8 encoding."""
+
+toolchain = {'name': 'GCCcore', 'version': '11.3.0'}
+
+source_urls = ['https://github.com/JuliaStrings/utf8proc/archive/']
+sources = ['v%(version)s.tar.gz']
+checksums = ['4bb121e297293c0fd55f08f83afab6d35d48f0af4ecc07523ad8ec99aa2b12a1']
+
+builddependencies = [
+    ('binutils', '2.38'),
+    ('CMake', '3.23.1'),
+]
+
+separate_build_dir = True
+
+configopts = ['', '-DBUILD_SHARED_LIBS=true']
+
+sanity_check_paths = {
+    'files': ['include/utf8proc.h', 'lib/libutf8proc.a', 'lib/libutf8proc.%s' % SHLIB_EXT],
+    'dirs': [],
+}
+
+moduleclass = 'lib'