In [2]:
import pandas as pd
In [3]:
# for visualizations, don't worry about these for now
import seaborn as sns
import matplotlib.pyplot as plt
from matplotlib.patches import Rectangle

Datasets¶

Age of First Marriage¶
In [4]:
age_marriage = pd.read_csv("age_at_mar.csv", index_col=0).squeeze("columns") 

age_marriage.head()
Out[4]:
1    32
2    25
3    24
4    26
5    32
Name: age, dtype: int64
In [5]:
age_marriage.shape
Out[5]:
(5534,)
In [6]:
fig, ax = plt.subplots(figsize=(14, 7))
sns.histplot(age_marriage, ax=ax)
Out[6]:
<Axes: xlabel='age', ylabel='Count'>
No description has been provided for this image
S&P Returns 1990's¶
In [7]:
sp500 = pd.read_csv('SP500.csv', index_col=0).squeeze("columns") 
sp500.head()
Out[7]:
1   -0.258891
2   -0.865031
3   -0.980414
4    0.450432
5   -1.185667
Name: dat, dtype: float64
In [8]:
sp500.shape
Out[8]:
(2780,)
In [9]:
fig, ax = plt.subplots(figsize=(14, 7))
sns.histplot(sp500, ax=ax)
Out[9]:
<Axes: xlabel='dat', ylabel='Count'>
No description has been provided for this image

Activities¶

1. Rename the series¶
In [12]:
age_marriage.name = 'Age of First Marriage'
sp500.name ='S&P500 Returns 90s'

Basic Analysis¶

2. What's the maximum Age of marriage?¶
In [14]:
age_marriage.max()
Out[14]:
43
3. What's the median Age of Marriage?¶
In [15]:
age_marriage.median()
Out[15]:
23.0
4. What's the minimum return from S&P500?¶
In [16]:
sp500.min()
Out[16]:
-7.11274461287603

Simple Selection and Filtering¶

5. How many Women marry at age 21?¶
In [20]:
age_marriage.loc[age_marriage == 21].count()
Out[20]:
495
6. How many Women marry at 39y/o or older?¶
In [22]:
age_marriage.loc[age_marriage >= 39].count()
Out[22]:
39
7. How many positive S&P500 returns are there?¶

The following visualization shows a red vertical line at the point 0, we're looking for everything at the right of that line:

In [23]:
ax = sns.histplot(sp500)
ax.axvline(0, color='red')
Out[23]:
<matplotlib.lines.Line2D at 0x72d3665df950>
No description has been provided for this image
In [24]:
sp500.loc[sp500 > 0].count()
Out[24]:
1474
8. How many returns are less or equals than -2?¶

(Left to the red line)

In [25]:
ax = sns.histplot(sp500)
ax.axvline(-2, color='red')
Out[25]:
<matplotlib.lines.Line2D at 0x72d366053650>
No description has been provided for this image
In [26]:
sp500.loc[sp500 <-2 ].count()
Out[26]:
63

Advanced Selection with Boolean Operators¶

9. Select all women below 20 or above 39¶

The segments depicted below:

In [27]:
fig, ax = plt.subplots(figsize=(14, 7))
sns.histplot(age_marriage, ax=ax)
ax.add_patch(Rectangle((10, 0), 9, 450, alpha=.3, color='red'))
ax.add_patch(Rectangle((39, 0), 5, 450, alpha=.3, color='red'))
Out[27]:
<matplotlib.patches.Rectangle at 0x72d3668cddd0>
No description has been provided for this image
In [36]:
age_20_39 = age_marriage.loc[(age_marriage < 20) | (age_marriage > 39)]
10. Select all women whose ages are even, and are older than 30 y/o¶
In [38]:
age_30_even = age_marriage.loc[(age_marriage > 30 ) & ((age_marriage % 2) == 0) ]
10. Select the S&P500 returns between 1.5 and 3¶

The ones depicted below:

In [ ]:
fig, ax = plt.subplots(figsize=(14, 7))
sns.histplot(sp500, ax=ax)
ax.add_patch(Rectangle((1, 0), 1.5, 250, alpha=.3, color='red'))
In [40]:
sp_15_to_3 = sp500.loc[(sp500 > 1.5) & (sp500 < 3)]
In [ ]: