Skip to content
Snippets Groups Projects
Introduction-to-Pandas--master.ipynb 673 KiB
Newer Older
Andreas Herten's avatar
Andreas Herten committed
       "      <th>Locke</th>\n",
       "      <td>Terry O'Quinn</td>\n",
       "      <td>True</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Sawyer</th>\n",
       "      <td>Josh Holloway</td>\n",
       "      <td>True</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Walt</th>\n",
       "      <td>Malcolm David Kelley</td>\n",
       "      <td>False</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "                          Actor  Main Cast\n",
       "Character                                 \n",
       "Hurley             Jorge Garcia       True\n",
       "Jack                Matthew Fox       True\n",
       "Kate           Evangeline Lilly       True\n",
       "Locke             Terry O'Quinn       True\n",
       "Sawyer            Josh Holloway       True\n",
       "Walt       Malcolm David Kelley      False"
      ]
     },
     "execution_count": 29,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "pd.read_json(\"lost.json\").set_index(\"Character\").sort_index()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "exercise": "task",
Andreas Herten's avatar
Andreas Herten committed
    "slideshow": {
     "slide_type": "slide"
    }
   },
   "source": [
    "## Task 2\n",
    "<a name=\"task2\"></a>\n",
Andreas Herten's avatar
Andreas Herten committed
    "\n",
    "* Read in `nest-data.csv` to `DataFrame`; call it `df`\n",
    "* Get to know it and play a bit with it\n",
    "* Tell me when you're done: [pollev.com/aherten538](https://pollev.com/aherten538)"
Andreas Herten's avatar
Andreas Herten committed
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 30,
   "metadata": {
    "exercise": "task"
   },
Andreas Herten's avatar
Andreas Herten committed
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "id,Nodes,Tasks/Node,Threads/Task,Runtime Program / s,Scale,Plastic,Avg. Neuron Build Time / s,Min. Edge Build Time / s,Max. Edge Build Time / s,Min. Init. Time / s,Max. Init. Time / s,Presim. Time / s,Sim. Time / s,Virt. Memory (Sum) / kB,Local Spike Counter (Sum),Average Rate (Sum),Number of Neurons,Number of Connections,Min. Delay,Max. Delay\n",
Andreas Herten's avatar
Andreas Herten committed
      "5,1,2,4,420.42,10,true,0.29,88.12,88.18,1.14,1.20,17.26,311.52,46560664.00,825499,7.48,112500,1265738500,1.5,1.5\n",
      "5,1,4,4,200.84,10,true,0.15,46.03,46.34,0.70,1.01,7.87,142.97,46903088.00,802865,7.03,112500,1265738500,1.5,1.5\n"
     ]
    }
   ],
   "source": [
    "!cat nest-data.csv | head -3"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 31,
Andreas Herten's avatar
Andreas Herten committed
   "metadata": {
    "exercise": "solution",
Andreas Herten's avatar
Andreas Herten committed
    "slideshow": {
     "slide_type": "fragment"
    }
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>id</th>\n",
       "      <th>Nodes</th>\n",
       "      <th>Tasks/Node</th>\n",
       "      <th>Threads/Task</th>\n",
       "      <th>Runtime Program / s</th>\n",
       "      <th>Scale</th>\n",
       "      <th>Plastic</th>\n",
       "      <th>Avg. Neuron Build Time / s</th>\n",
       "      <th>Min. Edge Build Time / s</th>\n",
       "      <th>Max. Edge Build Time / s</th>\n",
       "      <th>...</th>\n",
       "      <th>Max. Init. Time / s</th>\n",
       "      <th>Presim. Time / s</th>\n",
       "      <th>Sim. Time / s</th>\n",
       "      <th>Virt. Memory (Sum) / kB</th>\n",
       "      <th>Local Spike Counter (Sum)</th>\n",
       "      <th>Average Rate (Sum)</th>\n",
       "      <th>Number of Neurons</th>\n",
       "      <th>Number of Connections</th>\n",
       "      <th>Min. Delay</th>\n",
       "      <th>Max. Delay</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>5</td>\n",
       "      <td>1</td>\n",
       "      <td>2</td>\n",
       "      <td>4</td>\n",
       "      <td>420.42</td>\n",
       "      <td>10</td>\n",
       "      <td>True</td>\n",
       "      <td>0.29</td>\n",
       "      <td>88.12</td>\n",
       "      <td>88.18</td>\n",
       "      <td>...</td>\n",
       "      <td>1.20</td>\n",
       "      <td>17.26</td>\n",
       "      <td>311.52</td>\n",
       "      <td>46560664.0</td>\n",
       "      <td>825499</td>\n",
       "      <td>7.48</td>\n",
       "      <td>112500</td>\n",
       "      <td>1265738500</td>\n",
       "      <td>1.5</td>\n",
       "      <td>1.5</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>5</td>\n",
       "      <td>1</td>\n",
       "      <td>4</td>\n",
       "      <td>4</td>\n",
       "      <td>200.84</td>\n",
       "      <td>10</td>\n",
       "      <td>True</td>\n",
       "      <td>0.15</td>\n",
       "      <td>46.03</td>\n",
       "      <td>46.34</td>\n",
       "      <td>...</td>\n",
       "      <td>1.01</td>\n",
       "      <td>7.87</td>\n",
       "      <td>142.97</td>\n",
       "      <td>46903088.0</td>\n",
       "      <td>802865</td>\n",
       "      <td>7.03</td>\n",
       "      <td>112500</td>\n",
       "      <td>1265738500</td>\n",
       "      <td>1.5</td>\n",
       "      <td>1.5</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>5</td>\n",
       "      <td>1</td>\n",
       "      <td>2</td>\n",
       "      <td>8</td>\n",
       "      <td>202.15</td>\n",
       "      <td>10</td>\n",
       "      <td>True</td>\n",
       "      <td>0.28</td>\n",
       "      <td>47.98</td>\n",
       "      <td>48.48</td>\n",
       "      <td>...</td>\n",
       "      <td>1.20</td>\n",
       "      <td>7.95</td>\n",
       "      <td>142.81</td>\n",
       "      <td>47699384.0</td>\n",
       "      <td>802865</td>\n",
       "      <td>7.03</td>\n",
       "      <td>112500</td>\n",
       "      <td>1265738500</td>\n",
       "      <td>1.5</td>\n",
       "      <td>1.5</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>5</td>\n",
       "      <td>1</td>\n",
       "      <td>4</td>\n",
       "      <td>8</td>\n",
       "      <td>89.57</td>\n",
       "      <td>10</td>\n",
       "      <td>True</td>\n",
       "      <td>0.15</td>\n",
       "      <td>20.41</td>\n",
       "      <td>23.21</td>\n",
       "      <td>...</td>\n",
       "      <td>3.04</td>\n",
       "      <td>3.19</td>\n",
       "      <td>60.31</td>\n",
       "      <td>46813040.0</td>\n",
       "      <td>821491</td>\n",
       "      <td>7.23</td>\n",
       "      <td>112500</td>\n",
       "      <td>1265738500</td>\n",
       "      <td>1.5</td>\n",
       "      <td>1.5</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>5</td>\n",
       "      <td>2</td>\n",
       "      <td>2</td>\n",
       "      <td>4</td>\n",
       "      <td>164.16</td>\n",
       "      <td>10</td>\n",
       "      <td>True</td>\n",
       "      <td>0.20</td>\n",
       "      <td>40.03</td>\n",
       "      <td>41.09</td>\n",
       "      <td>...</td>\n",
       "      <td>1.58</td>\n",
       "      <td>6.08</td>\n",
       "      <td>114.88</td>\n",
       "      <td>46937216.0</td>\n",
       "      <td>802865</td>\n",
       "      <td>7.03</td>\n",
       "      <td>112500</td>\n",
       "      <td>1265738500</td>\n",
       "      <td>1.5</td>\n",
       "      <td>1.5</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>5 rows × 21 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "   id  Nodes  Tasks/Node  Threads/Task  Runtime Program / s  Scale  Plastic  \\\n",
       "0   5      1           2             4               420.42     10     True   \n",
       "1   5      1           4             4               200.84     10     True   \n",
       "2   5      1           2             8               202.15     10     True   \n",
       "3   5      1           4             8                89.57     10     True   \n",
       "4   5      2           2             4               164.16     10     True   \n",
       "\n",
       "   Avg. Neuron Build Time / s  Min. Edge Build Time / s  \\\n",
       "0                        0.29                     88.12   \n",
       "1                        0.15                     46.03   \n",
       "2                        0.28                     47.98   \n",
       "3                        0.15                     20.41   \n",
       "4                        0.20                     40.03   \n",
       "\n",
       "   Max. Edge Build Time / s  ...  Max. Init. Time / s  Presim. Time / s  \\\n",
       "0                     88.18  ...                 1.20             17.26   \n",
       "1                     46.34  ...                 1.01              7.87   \n",
       "2                     48.48  ...                 1.20              7.95   \n",
       "3                     23.21  ...                 3.04              3.19   \n",
       "4                     41.09  ...                 1.58              6.08   \n",
       "\n",
       "   Sim. Time / s  Virt. Memory (Sum) / kB  Local Spike Counter (Sum)  \\\n",
       "0         311.52               46560664.0                     825499   \n",
       "1         142.97               46903088.0                     802865   \n",
       "2         142.81               47699384.0                     802865   \n",
       "3          60.31               46813040.0                     821491   \n",
       "4         114.88               46937216.0                     802865   \n",
       "\n",
       "   Average Rate (Sum)  Number of Neurons  Number of Connections  Min. Delay  \\\n",
       "0                7.48             112500             1265738500         1.5   \n",
       "1                7.03             112500             1265738500         1.5   \n",
       "2                7.03             112500             1265738500         1.5   \n",
       "3                7.23             112500             1265738500         1.5   \n",
       "4                7.03             112500             1265738500         1.5   \n",
       "\n",
       "   Max. Delay  \n",
       "0         1.5  \n",
       "1         1.5  \n",
       "2         1.5  \n",
       "3         1.5  \n",
       "4         1.5  \n",
       "\n",
       "[5 rows x 21 columns]"
      ]
     },
     "execution_count": 31,
Andreas Herten's avatar
Andreas Herten committed
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df = pd.read_csv(\"nest-data.csv\")\n",
    "df.head()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "slideshow": {
     "slide_type": "slide"
    }
   },
   "source": [
    "## Read CSV Options\n",
    "\n",
    "* See also full [API documentation](https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.read_csv.html)\n",
    "* Important parameters\n",
    "    - `sep`: Set separator (for example `:` instead of `,`)\n",
    "    - `header`: Specify info about headers for columns; able to use multi-index for columns!\n",
    "    - `names`: Alternative to `header` – provide your own column titles\n",
    "    - `usecols`: Don't read whole set of columns, but only these; works with any list (`range(0:20:2)`)…\n",
    "    - `skiprows`: Don't read in these rows\n",
    "    - `na_values`: What string(s) to recognize as `N/A` values (which will be ignored during operations on data frame)\n",
    "    - `parse_dates`: Try to parse dates in CSV; different behaviours as to provided data structure; optionally used together with `date_parser`\n",
    "    - `compression`: Treat input file as compressed file (\"infer\", \"gzip\", \"zip\", …)\n",
    "    - `decimal`: Decimal point divider – for German data…\n",
    "    \n",
    "```python\n",
    "pandas.read_csv(filepath_or_buffer, sep=', ', delimiter=None, header='infer', names=None, index_col=None, usecols=None, squeeze=False, prefix=None, mangle_dupe_cols=True, dtype=None, engine=None, converters=None, true_values=None, false_values=None, skipinitialspace=False, skiprows=None, skipfooter=0, nrows=None, na_values=None, keep_default_na=True, na_filter=True, verbose=False, skip_blank_lines=True, parse_dates=False, infer_datetime_format=False, keep_date_col=False, date_parser=None, dayfirst=False, iterator=False, chunksize=None, compression='infer', thousands=None, decimal=b'.', lineterminator=None, quotechar='\"', quoting=0, doublequote=True, escapechar=None, comment=None, encoding=None, dialect=None, tupleize_cols=None, error_bad_lines=True, warn_bad_lines=True, delim_whitespace=False, low_memory=True, memory_map=False, float_precision=None)\n",
    "```"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "slideshow": {
     "slide_type": "slide"
    }
   },
   "source": [
    "## Slicing of Data Frames\n",
    "\n",
    "### Slicing Columns\n",
    "\n",
    "* Use square-bracket operators to slice data frame: `[]`\n",
    "    * Use column name to select column\n",
    "    * Also: Slice horizontally\n",
    "* Example: Select only columnn `C` from `df_demo`"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 32,
Andreas Herten's avatar
Andreas Herten committed
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>A</th>\n",
       "      <th>B</th>\n",
       "      <th>C</th>\n",
       "      <th>D</th>\n",
       "      <th>E</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>1.2</td>\n",
       "      <td>2018-02-26</td>\n",
Andreas Herten's avatar
Andreas Herten committed
       "      <td>-2.718282</td>\n",
       "      <td>This</td>\n",
       "      <td>Same</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>1.2</td>\n",
       "      <td>2018-02-26</td>\n",
Andreas Herten's avatar
Andreas Herten committed
       "      <td>1.718282</td>\n",
       "      <td>column</td>\n",
       "      <td>Same</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>1.2</td>\n",
       "      <td>2018-02-26</td>\n",
Andreas Herten's avatar
Andreas Herten committed
       "      <td>-1.304068</td>\n",
       "      <td>has</td>\n",
       "      <td>Same</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "     A          B         C       D     E\n",
       "0  1.2 2018-02-26 -2.718282    This  Same\n",
       "1  1.2 2018-02-26  1.718282  column  Same\n",
       "2  1.2 2018-02-26 -1.304068     has  Same"
     "execution_count": 32,
Andreas Herten's avatar
Andreas Herten committed
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df_demo.head(3)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 33,
Andreas Herten's avatar
Andreas Herten committed
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0   -2.718282\n",
       "1    1.718282\n",
       "2   -1.304068\n",
       "3    0.986231\n",
       "4   -0.718282\n",
       "Name: C, dtype: float64"
      ]
     },
     "execution_count": 33,
Andreas Herten's avatar
Andreas Herten committed
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df_demo[\"C\"]"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "slideshow": {
     "slide_type": "subslide"
    }
   },
   "source": [
    "* Select more than one column by providing list `[]` to slice operator `[]`\n",
    "* *You usually end up forgett one of the brackets…*\n",
    "* Example: Select list of columns `A` and `C`, `[\"A\", \"C\"]` from `df_demo`"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 34,
Andreas Herten's avatar
Andreas Herten committed
   "metadata": {
    "slideshow": {
     "slide_type": "fragment"
    }
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>A</th>\n",
       "      <th>C</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>1.2</td>\n",
       "      <td>-2.718282</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>1.2</td>\n",
       "      <td>1.718282</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>1.2</td>\n",
       "      <td>-1.304068</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>1.2</td>\n",
       "      <td>0.986231</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>1.2</td>\n",
       "      <td>-0.718282</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "     A         C\n",
       "0  1.2 -2.718282\n",
       "1  1.2  1.718282\n",
       "2  1.2 -1.304068\n",
       "3  1.2  0.986231\n",
       "4  1.2 -0.718282"
      ]
     },
     "execution_count": 34,
Andreas Herten's avatar
Andreas Herten committed
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df_demo[[\"A\", \"C\"]]"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "slideshow": {
     "slide_type": "subslide"
    }
   },
   "source": [
    "## Slicing of Data Frames\n",
    "\n",
    "### Slicing rows\n",
    "\n",
    "* Use numberical values to slice into rows\n",
    "* Use ranges just like with Python lists"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 35,
Andreas Herten's avatar
Andreas Herten committed
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>A</th>\n",
       "      <th>B</th>\n",
       "      <th>C</th>\n",
       "      <th>D</th>\n",
       "      <th>E</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>1.2</td>\n",
       "      <td>2018-02-26</td>\n",
Andreas Herten's avatar
Andreas Herten committed
       "      <td>1.718282</td>\n",
       "      <td>column</td>\n",
       "      <td>Same</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>1.2</td>\n",
       "      <td>2018-02-26</td>\n",
Andreas Herten's avatar
Andreas Herten committed
       "      <td>-1.304068</td>\n",
       "      <td>has</td>\n",
       "      <td>Same</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "     A          B         C       D     E\n",
       "1  1.2 2018-02-26  1.718282  column  Same\n",
       "2  1.2 2018-02-26 -1.304068     has  Same"
     "execution_count": 35,
Andreas Herten's avatar
Andreas Herten committed
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df_demo[1:3]"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "slideshow": {
     "slide_type": "fragment"
    }
   },
   "source": [
    "* Get a certain range as **per the current sort structure**"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 36,
Andreas Herten's avatar
Andreas Herten committed
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>A</th>\n",
       "      <th>B</th>\n",
       "      <th>C</th>\n",
       "      <th>D</th>\n",
       "      <th>E</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>1.2</td>\n",
       "      <td>2018-02-26</td>\n",
       "      <td>1.718282</td>\n",
       "      <td>column</td>\n",
       "      <td>Same</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>1.2</td>\n",
       "      <td>2018-02-26</td>\n",
       "      <td>-1.304068</td>\n",
       "      <td>has</td>\n",
       "      <td>Same</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "     A          B         C       D     E\n",
       "1  1.2 2018-02-26  1.718282  column  Same\n",
       "2  1.2 2018-02-26 -1.304068     has  Same"
      ]
     },
     "execution_count": 36,
Andreas Herten's avatar
Andreas Herten committed
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df_demo.iloc[1:3]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 37,
Andreas Herten's avatar
Andreas Herten committed
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>A</th>\n",
       "      <th>B</th>\n",
       "      <th>C</th>\n",
       "      <th>D</th>\n",
       "      <th>E</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>1.2</td>\n",
       "      <td>2018-02-26</td>\n",
       "      <td>1.718282</td>\n",
       "      <td>column</td>\n",
       "      <td>Same</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>1.2</td>\n",
       "      <td>2018-02-26</td>\n",
       "      <td>0.986231</td>\n",
       "      <td>entries</td>\n",
       "      <td>Same</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "     A          B         C        D     E\n",
       "1  1.2 2018-02-26  1.718282   column  Same\n",
       "3  1.2 2018-02-26  0.986231  entries  Same"
      ]
     },
     "execution_count": 37,
Andreas Herten's avatar
Andreas Herten committed
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df_demo.iloc[1:6:2]"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "slideshow": {
     "slide_type": "subslide"
    }
   },
   "source": [
    "* Attention: `.iloc[]` location might change after re-sorting!"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 38,
Andreas Herten's avatar
Andreas Herten committed
   "metadata": {
    "slideshow": {
     "slide_type": "fragment"
    }
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>A</th>\n",
       "      <th>B</th>\n",
       "      <th>C</th>\n",
       "      <th>D</th>\n",
       "      <th>E</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>1.2</td>\n",
       "      <td>2018-02-26</td>\n",
Andreas Herten's avatar
Andreas Herten committed
       "      <td>-1.304068</td>\n",
       "      <td>has</td>\n",
       "      <td>Same</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>1.2</td>\n",
       "      <td>2018-02-26</td>\n",
Andreas Herten's avatar
Andreas Herten committed
       "      <td>-0.718282</td>\n",
       "      <td>entries</td>\n",
       "      <td>Same</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "     A          B         C        D     E\n",
       "2  1.2 2018-02-26 -1.304068      has  Same\n",
       "4  1.2 2018-02-26 -0.718282  entries  Same"
     "execution_count": 38,
Andreas Herten's avatar
Andreas Herten committed
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df_demo.sort_values(\"C\").iloc[1:3]"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "slideshow": {
     "slide_type": "subslide"
    }
   },
   "source": [
    "* One more row-slicing option: `.loc[]`\n",
    "* See the difference with a *proper* index (and not the auto-generated default index from before)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 39,
Andreas Herten's avatar
Andreas Herten committed
   "metadata": {
    "slideshow": {
     "slide_type": "fragment"
    }
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>A</th>\n",
       "      <th>B</th>\n",
       "      <th>C</th>\n",
       "      <th>E</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>D</th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>This</th>\n",
       "      <td>1.2</td>\n",
       "      <td>2018-02-26</td>\n",
Andreas Herten's avatar
Andreas Herten committed
       "      <td>-2.718282</td>\n",
       "      <td>Same</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>column</th>\n",
       "      <td>1.2</td>\n",
       "      <td>2018-02-26</td>\n",
Andreas Herten's avatar
Andreas Herten committed
       "      <td>1.718282</td>\n",
       "      <td>Same</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>has</th>\n",
       "      <td>1.2</td>\n",
       "      <td>2018-02-26</td>\n",
Andreas Herten's avatar
Andreas Herten committed
       "      <td>-1.304068</td>\n",
       "      <td>Same</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>entries</th>\n",
       "      <td>1.2</td>\n",
       "      <td>2018-02-26</td>\n",
Andreas Herten's avatar
Andreas Herten committed
       "      <td>0.986231</td>\n",
       "      <td>Same</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>entries</th>\n",
       "      <td>1.2</td>\n",
       "      <td>2018-02-26</td>\n",
Andreas Herten's avatar
Andreas Herten committed
       "      <td>-0.718282</td>\n",
       "      <td>Same</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "           A          B         C     E\n",
       "D                                      \n",
       "This     1.2 2018-02-26 -2.718282  Same\n",
       "column   1.2 2018-02-26  1.718282  Same\n",
       "has      1.2 2018-02-26 -1.304068  Same\n",
       "entries  1.2 2018-02-26  0.986231  Same\n",
       "entries  1.2 2018-02-26 -0.718282  Same"
     "execution_count": 39,
Andreas Herten's avatar
Andreas Herten committed
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df_demo_indexed = df_demo.set_index(\"D\")\n",
    "df_demo_indexed"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 40,
Andreas Herten's avatar
Andreas Herten committed
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",