From 073ba868eae1fc38f9f90502af2f7311bd468f23 Mon Sep 17 00:00:00 2001 From: Stefan Kesselheim <s.kesselheim@fz-juelich.de> Date: Thu, 12 Jan 2023 18:19:46 +0100 Subject: [PATCH] added example notebook --- PySpark_PI.ipynb | 90 ++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 90 insertions(+) create mode 100644 PySpark_PI.ipynb diff --git a/PySpark_PI.ipynb b/PySpark_PI.ipynb new file mode 100644 index 0000000..ca10262 --- /dev/null +++ b/PySpark_PI.ipynb @@ -0,0 +1,90 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "id": "0d4c4bff-43ef-4574-8252-d309f872de3a", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + " JOBID PARTITION NAME USER ST TIME NODES NODELIST(REASON)\n", + " 6526639 develboos spark-cl kesselhe R 24:48 2 jwb[0097,0117]\n" + ] + } + ], + "source": [ + "!squeue --me" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "67b9bb6e-6dbb-4c0f-80fa-3aea2afcce28", + "metadata": {}, + "outputs": [], + "source": [ + "from pyspark.sql import SparkSession\n", + "import os\n", + "import random\n", + "home = os.environ[\"HOME\"]\n", + "spark_master=\"spark://jwb0097i.juwels\"\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "5b2ee080-70e8-42b4-935f-3c8fa9634c85", + "metadata": {}, + "outputs": [], + "source": [ + "\n", + "# This is required to add a \"i\" to the hostname\n", + "tmp=os.environ[\"HOSTNAME\"].split(\".\"); tmp[0]+=\"i\"; spark_driver_hostname=\".\".join(tmp)\n", + "\n", + "spark = SparkSession \\\n", + " .builder \\\n", + " .appName(\"My SparkSession\") \\\n", + " .config(\"spark.master\", spark_master) \\\n", + " .config(\"spark.driver.memory\", \"10g\") \\\n", + " .config(\"spark.driver.host\", spark_driver_hostname) \\\n", + " .config(\"spark.executor.memory\", \"400g\") \\\n", + " .getOrCreate()\n", + "\n", + "sc=spark.sparkContext\n", + "\n", + "def inside(p):\n", + " x, y = random.random(), random.random()\n", + " return x*x + y*y < 1\n", + "\n", + "NUM_SAMPLES=10000000\n", + "count = sc.parallelize(range(0, NUM_SAMPLES)) \\\n", + " .filter(inside).count()\n", + "print(\"Pi is roughly %f\" % (4.0 * count / NUM_SAMPLES))" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "spark_env", + "language": "python", + "name": "spark_env" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.9.6" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} -- GitLab