In [2]:
import pandas as pd
In [3]:
# for visualizations, don't worry about these for now
import seaborn as sns
import matplotlib.pyplot as plt
from matplotlib.patches import Rectangle
Datasets¶
Age of First Marriage¶
In [4]:
age_marriage = pd.read_csv("age_at_mar.csv", index_col=0).squeeze("columns")
age_marriage.head()
Out[4]:
1 32 2 25 3 24 4 26 5 32 Name: age, dtype: int64
In [5]:
age_marriage.shape
Out[5]:
(5534,)
In [6]:
fig, ax = plt.subplots(figsize=(14, 7))
sns.histplot(age_marriage, ax=ax)
Out[6]:
<Axes: xlabel='age', ylabel='Count'>
S&P Returns 1990's¶
In [7]:
sp500 = pd.read_csv('SP500.csv', index_col=0).squeeze("columns")
sp500.head()
Out[7]:
1 -0.258891 2 -0.865031 3 -0.980414 4 0.450432 5 -1.185667 Name: dat, dtype: float64
In [8]:
sp500.shape
Out[8]:
(2780,)
In [9]:
fig, ax = plt.subplots(figsize=(14, 7))
sns.histplot(sp500, ax=ax)
Out[9]:
<Axes: xlabel='dat', ylabel='Count'>
Activities¶
1. Rename the series¶
In [12]:
age_marriage.name = 'Age of First Marriage'
sp500.name ='S&P500 Returns 90s'
Basic Analysis¶
2. What's the maximum Age of marriage?¶
In [14]:
age_marriage.max()
Out[14]:
43
3. What's the median Age of Marriage?¶
In [15]:
age_marriage.median()
Out[15]:
23.0
4. What's the minimum return from S&P500?¶
In [16]:
sp500.min()
Out[16]:
-7.11274461287603
Simple Selection and Filtering¶
5. How many Women marry at age 21?¶
In [20]:
age_marriage.loc[age_marriage == 21].count()
Out[20]:
495
6. How many Women marry at 39y/o or older?¶
In [22]:
age_marriage.loc[age_marriage >= 39].count()
Out[22]:
39
7. How many positive S&P500 returns are there?¶
The following visualization shows a red vertical line at the point 0
, we're looking for everything at the right of that line:
In [23]:
ax = sns.histplot(sp500)
ax.axvline(0, color='red')
Out[23]:
<matplotlib.lines.Line2D at 0x72d3665df950>
In [24]:
sp500.loc[sp500 > 0].count()
Out[24]:
1474
8. How many returns are less or equals than -2?¶
(Left to the red line)
In [25]:
ax = sns.histplot(sp500)
ax.axvline(-2, color='red')
Out[25]:
<matplotlib.lines.Line2D at 0x72d366053650>
In [26]:
sp500.loc[sp500 <-2 ].count()
Out[26]:
63
Advanced Selection with Boolean Operators¶
9. Select all women below 20 or above 39¶
The segments depicted below:
In [27]:
fig, ax = plt.subplots(figsize=(14, 7))
sns.histplot(age_marriage, ax=ax)
ax.add_patch(Rectangle((10, 0), 9, 450, alpha=.3, color='red'))
ax.add_patch(Rectangle((39, 0), 5, 450, alpha=.3, color='red'))
Out[27]:
<matplotlib.patches.Rectangle at 0x72d3668cddd0>
In [36]:
age_20_39 = age_marriage.loc[(age_marriage < 20) | (age_marriage > 39)]
10. Select all women whose ages are even, and are older than 30 y/o¶
In [38]:
age_30_even = age_marriage.loc[(age_marriage > 30 ) & ((age_marriage % 2) == 0) ]
10. Select the S&P500 returns between 1.5 and 3¶
The ones depicted below:
In [ ]:
fig, ax = plt.subplots(figsize=(14, 7))
sns.histplot(sp500, ax=ax)
ax.add_patch(Rectangle((1, 0), 1.5, 250, alpha=.3, color='red'))
In [40]:
sp_15_to_3 = sp500.loc[(sp500 > 1.5) & (sp500 < 3)]
In [ ]: