Skip to content
Snippets Groups Projects
Commit 50114fd0 authored by Alexandre Strube's avatar Alexandre Strube
Browse files

Spark and deps

parent 3f483f3d
No related branches found
No related tags found
No related merge requests found
easyblock = 'CMakeMake'
name = 'Arrow'
version = '8.0.0'
homepage = 'https://arrow.apache.org'
description = """Apache Arrow (incl. PyArrow Python bindings), a cross-language development platform
for in-memory data."""
toolchain = {'name': 'foss', 'version': '2022a'}
source_urls = ['https://archive.apache.org/dist/%(namelower)s/%(namelower)s-%(version)s']
sources = ['apache-arrow-%(version)s.tar.gz']
checksums = ['ad9a05705117c989c116bae9ac70492fe015050e1b80fb0e38fde4b5d863aaa3']
builddependencies = [
('CMake', '3.23.1'),
('Autotools', '20220317'),
('flex', '2.6.4'),
('Bison', '3.8.2'),
('pkgconf', '1.8.0'),
]
# Arrow strongly prefers included jemalloc, so not including it as a dependency
dependencies = [
('Python', '3.10.4'),
('SciPy-bundle', '2022.05', '', ('gcccoremkl', '11.3.0-2022.1.0')), # for numpy
('Boost', '1.79.0'),
('lz4', '1.9.3'),
('zlib', '1.2.12'),
('bzip2', '1.0.8'),
('zstd', '1.5.2'),
('snappy', '1.1.9'),
('RapidJSON', '1.1.0'),
('RE2', '2022-06-01'),
('utf8proc', '2.7.0'),
]
start_dir = 'cpp'
# see https://arrow.apache.org/docs/developers/python.html
configopts = "-DARROW_DATASET=on -DARROW_PYTHON=on -DARROW_PARQUET=ON -DARROW_WITH_SNAPPY=ON "
configopts += "-DCMAKE_INSTALL_LIBDIR=lib -DPython3_ROOT_DIR=$EBROOTPYTHON "
configopts += "-DARROW_WITH_ZLIB=ON -DARROW_WITH_BZ2=ON -DARROW_WITH_ZSTD=ON -DARROW_WITH_LZ4=ON "
configopts += "-DZSTD_ROOT=$EBROOTZSTD "
# also install Python bindings
local_install_pyarrow_cmds = "export PKG_CONFIG_PATH=%(installdir)s/lib/pkgconfig:$PKG_CONFIG_PATH && "
local_install_pyarrow_cmds += "export PYTHONPATH=%(installdir)s/lib/python%(pyshortver)s/site-packages:$PYTHONPATH && "
local_install_pyarrow_cmds += "cd %(builddir)s/*arrow-%(version)s/python && export XDG_CACHE_HOME=$TMPDIR && "
local_install_pyarrow_cmds += "sed -i 's/numpy==[0-9.]*/numpy/g' pyproject.toml && "
local_install_pyarrow_cmds += "Python3_ROOT_DIR=$EBROOTPYTHON "
local_install_pyarrow_cmds += "PYARROW_WITH_DATASET=1 PYARROW_WITH_PARQUET=1 pip install --prefix %(installdir)s ."
postinstallcmds = [local_install_pyarrow_cmds]
modextrapaths = {'PYTHONPATH': 'lib/python%(pyshortver)s/site-packages'}
sanity_check_paths = {
'files': ['lib/libarrow.a', 'lib/libarrow.%s' % SHLIB_EXT,
'lib/libarrow_python.a', 'lib/libarrow_python.%s' % SHLIB_EXT],
'dirs': ['include/arrow', 'lib/cmake/arrow', 'lib/pkgconfig', 'lib/python%(pyshortver)s/site-packages'],
}
sanity_check_commands = [
"python -c 'import pyarrow'",
"python -c 'import pyarrow.dataset'",
"python -c 'import pyarrow.parquet'",
]
moduleclass = 'data'
##
# Author: Robert Mijakovic <robert.mijakovic@lxp.lu>
##
easyblock = "CMakeMake"
name = 'RE2'
version = '2022-06-01'
homepage = 'https://github.com/google/re2'
description = """
RE2 is a fast, safe, thread-friendly alternative to backtracking regular
expression engines like those used in PCRE, Perl, and Python. It is a C++
library. """
toolchain = {'name': 'GCCcore', 'version': '11.3.0'}
toolchainopts = {'pic': True}
github_account = 'google'
source_urls = [GITHUB_SOURCE]
sources = ['%(version)s.tar.gz']
checksums = ['f89c61410a072e5cbcf8c27e3a778da7d6fd2f2b5b1445cd4f4508bee946ab0f']
builddependencies = {
('binutils', '2.38'),
('CMake', '3.23.1'),
}
sanity_check_paths = {
'files': ['lib/libre2.a'],
'dirs': ['include/re2'],
}
moduleclass = 'lib'
# Author: Denis Krišťák (INUITS)
easyblock = 'Tarball'
name = 'Spark'
version = '3.3.1'
versionsuffix = '-CUDA-%(cudaver)s'
homepage = 'https://spark.apache.org'
description = """Spark is Hadoop MapReduce done in memory"""
toolchain = {'name': 'foss', 'version': '2022a'}
source_urls = [
'https://archive.apache.org/dist//%(namelower)s/%(namelower)s-%(version)s/',
'https://downloads.apache.org/%(namelower)s/%(namelower)s-%(version)s/'
]
sources = ['%(namelower)s-%(version)s-bin-hadoop3.tgz']
checksums = ['91df3e3f73682d32261a8b245af6b0553dc5cf4af208ef82219283d23ee01ece']
dependencies = [
('Python', '3.10.4'),
('Java', '11', '', SYSTEM),
('CUDA', '11.7', '', SYSTEM),
('Arrow', '8.0.0'),
]
exts_defaultclass = 'PythonPackage'
exts_default_options = {
'source_urls': [PYPI_SOURCE],
'download_dep_fail': True,
'use_pip': True,
}
exts_list = [
('py4j', '0.10.9.7', {
'checksums': ['0b6e5315bb3ada5cf62ac651d107bb2ebc02def3dee9d9548e3baac644ea8dbb'],
}),
]
sanity_check_paths = {
'files': ['bin/pyspark', 'bin/spark-shell'],
'dirs': ['python']
}
sanity_check_commands = [
"pyspark -h",
"python -c 'import pyspark'",
]
modextrapaths = {'PYTHONPATH': ['python', 'lib/python%(pyshortver)s/site-packages']}
modextravars = {'SPARK_HOME': '%(installdir)s'}
moduleclass = 'devel'
easyblock = 'CMakeMake'
name = 'utf8proc'
version = '2.7.0'
homepage = 'https://github.com/JuliaStrings/utf8proc'
description = """utf8proc is a small, clean C library that provides Unicode normalization, case-folding,
and other operations for data in the UTF-8 encoding."""
toolchain = {'name': 'GCCcore', 'version': '11.3.0'}
source_urls = ['https://github.com/JuliaStrings/utf8proc/archive/']
sources = ['v%(version)s.tar.gz']
checksums = ['4bb121e297293c0fd55f08f83afab6d35d48f0af4ecc07523ad8ec99aa2b12a1']
builddependencies = [
('binutils', '2.38'),
('CMake', '3.23.1'),
]
separate_build_dir = True
configopts = ['', '-DBUILD_SHARED_LIBS=true']
sanity_check_paths = {
'files': ['include/utf8proc.h', 'lib/libutf8proc.a', 'lib/libutf8proc.%s' % SHLIB_EXT],
'dirs': [],
}
moduleclass = 'lib'
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment