In [3]:
import pandas as pd
In [4]:
df = pd.read_csv('words.csv', index_col='Word')
In [5]:
df.head()
Out[5]:
Char Count | Value | |
---|---|---|
Word | ||
aa | 2 | 2 |
aah | 3 | 10 |
aahed | 5 | 19 |
aahing | 6 | 40 |
aahs | 4 | 29 |
Activities¶
How many elements does this dataframe have?¶
In [6]:
df.shape
Out[6]:
(172821, 2)
What is the value of the word microspectrophotometries
?¶
In [7]:
df.loc["microspectrophotometries","Value"]
Out[7]:
317
What is the highest possible value of a word?¶
In [12]:
df["Value"].max()
Out[12]:
319
Which of the following words have a Char Count of 15
?¶
In [ ]:
What is the highest possible length of a word?¶
In [13]:
df.describe()
Out[13]:
Char Count | Value | |
---|---|---|
count | 172821.000000 | 172821.000000 |
mean | 9.087628 | 107.754179 |
std | 2.818285 | 39.317452 |
min | 2.000000 | 2.000000 |
25% | 7.000000 | 80.000000 |
50% | 9.000000 | 103.000000 |
75% | 11.000000 | 131.000000 |
max | 28.000000 | 319.000000 |
What is the word with the value of 319
?¶
In [15]:
df.loc[df["Value"]== 319]
Out[15]:
Char Count | Value | |
---|---|---|
Word | ||
reinstitutionalizations | 23 | 319 |
What is the most common value?¶
In [19]:
df["Value"].value_counts().head()
Out[19]:
Value 93 1965 100 1921 95 1915 99 1907 92 1902 Name: count, dtype: int64
What is the shortest word with value 274
?¶
In [25]:
df.loc[
(df["Value"] == 274) &
(df["Char Count"] == df.loc[df["value"] == 274 , "Char Count"].min())
]
--------------------------------------------------------------------------- KeyError Traceback (most recent call last) File /usr/local/lib/python3.11/site-packages/pandas/core/indexes/base.py:3805, in Index.get_loc(self, key) 3804 try: -> 3805 return self._engine.get_loc(casted_key) 3806 except KeyError as err: File index.pyx:167, in pandas._libs.index.IndexEngine.get_loc() File index.pyx:196, in pandas._libs.index.IndexEngine.get_loc() File pandas/_libs/hashtable_class_helper.pxi:7081, in pandas._libs.hashtable.PyObjectHashTable.get_item() File pandas/_libs/hashtable_class_helper.pxi:7089, in pandas._libs.hashtable.PyObjectHashTable.get_item() KeyError: 'value' The above exception was the direct cause of the following exception: KeyError Traceback (most recent call last) Cell In[25], line 3 1 df.loc[ 2 (df["Value"] == 274) & ----> 3 (df["Char Count"] == df.loc[df["value"] == 274 , "Char Count"].min()) 4 ] File /usr/local/lib/python3.11/site-packages/pandas/core/frame.py:4102, in DataFrame.__getitem__(self, key) 4100 if self.columns.nlevels > 1: 4101 return self._getitem_multilevel(key) -> 4102 indexer = self.columns.get_loc(key) 4103 if is_integer(indexer): 4104 indexer = [indexer] File /usr/local/lib/python3.11/site-packages/pandas/core/indexes/base.py:3812, in Index.get_loc(self, key) 3807 if isinstance(casted_key, slice) or ( 3808 isinstance(casted_key, abc.Iterable) 3809 and any(isinstance(x, slice) for x in casted_key) 3810 ): 3811 raise InvalidIndexError(key) -> 3812 raise KeyError(key) from err 3813 except TypeError: 3814 # If we have a listlike key, _check_indexing_error will raise 3815 # InvalidIndexError. Otherwise we fall through and re-raise 3816 # the TypeError. 3817 self._check_indexing_error(key) KeyError: 'value'
Create a column Ratio
which represents the 'Value Ratio' of a word¶
In [26]:
df.head()
Out[26]:
Char Count | Value | |
---|---|---|
Word | ||
aa | 2 | 2 |
aah | 3 | 10 |
aahed | 5 | 19 |
aahing | 6 | 40 |
aahs | 4 | 29 |
In [27]:
df["Ratio"] = df["Value"]/df["Char Count"]
In [28]:
df.head()
Out[28]:
Char Count | Value | Ratio | |
---|---|---|---|
Word | |||
aa | 2 | 2 | 1.000000 |
aah | 3 | 10 | 3.333333 |
aahed | 5 | 19 | 3.800000 |
aahing | 6 | 40 | 6.666667 |
aahs | 4 | 29 | 7.250000 |
What is the maximum value of Ratio
?¶
In [30]:
df["Ratio"].max()
Out[30]:
22.5
What word is the one with the highest Ratio
?¶
In [31]:
df.loc[df["Ratio"] == 22.5]
Out[31]:
Char Count | Value | Ratio | |
---|---|---|---|
Word | |||
xu | 2 | 45 | 22.5 |
How many words have a Ratio
of 10
?¶
In [32]:
df["Ratio"].value_counts()
Out[32]:
Ratio 12.000000 3751 11.000000 3428 13.000000 3272 10.000000 2604 14.000000 2357 ... 17.545455 1 16.466667 1 19.125000 1 14.823529 1 18.083333 1 Name: count, Length: 1333, dtype: int64
What is the maximum Value
of all the words with a Ratio
of 10
?¶
In [33]:
df.loc[df["Ratio"] == 10 ,"Value"].max()
Out[33]:
240
Of those words with a Value
of 260
, what is the lowest Char Count
found?¶
In [37]:
df.loc[df["Value"] == 260 ,"Char Count"].min()
Out[37]:
17
Based on the previous task, what word is it?¶
In [38]:
df["Char Count"].min()
Out[38]:
2
In [44]:
df.query("Value == 260").sort_values(by="Char Count")
Out[44]:
Char Count | Value | Ratio | |
---|---|---|---|
Word | |||
hydroxytryptamine | 17 | 260 | 15.294118 |
neuropsychologists | 18 | 260 | 14.444444 |
psychophysiologist | 18 | 260 | 14.444444 |
revolutionarinesses | 19 | 260 | 13.684211 |
countermobilizations | 20 | 260 | 13.000000 |
underrepresentations | 20 | 260 | 13.000000 |
In [ ]: