In [3]:
import pandas as pd
In [4]:
df = pd.read_csv('words.csv', index_col='Word')
In [5]:
df.head()
Out[5]:
Char Count Value
Word
aa 2 2
aah 3 10
aahed 5 19
aahing 6 40
aahs 4 29

Activities¶

How many elements does this dataframe have?¶
In [6]:
df.shape
Out[6]:
(172821, 2)
What is the value of the word microspectrophotometries?¶
In [7]:
df.loc["microspectrophotometries","Value"]
Out[7]:
317
What is the highest possible value of a word?¶
In [12]:
df["Value"].max()
Out[12]:
319
Which of the following words have a Char Count of 15?¶
In [ ]:
 
What is the highest possible length of a word?¶
In [13]:
df.describe()
Out[13]:
Char Count Value
count 172821.000000 172821.000000
mean 9.087628 107.754179
std 2.818285 39.317452
min 2.000000 2.000000
25% 7.000000 80.000000
50% 9.000000 103.000000
75% 11.000000 131.000000
max 28.000000 319.000000
What is the word with the value of 319?¶
In [15]:
df.loc[df["Value"]== 319]
Out[15]:
Char Count Value
Word
reinstitutionalizations 23 319
What is the most common value?¶
In [19]:
df["Value"].value_counts().head()
Out[19]:
Value
93     1965
100    1921
95     1915
99     1907
92     1902
Name: count, dtype: int64
What is the shortest word with value 274?¶
In [25]:
df.loc[
    (df["Value"] == 274) &
    (df["Char Count"] == df.loc[df["value"] == 274 , "Char Count"].min())
]
---------------------------------------------------------------------------
KeyError                                  Traceback (most recent call last)
File /usr/local/lib/python3.11/site-packages/pandas/core/indexes/base.py:3805, in Index.get_loc(self, key)
   3804 try:
-> 3805     return self._engine.get_loc(casted_key)
   3806 except KeyError as err:

File index.pyx:167, in pandas._libs.index.IndexEngine.get_loc()

File index.pyx:196, in pandas._libs.index.IndexEngine.get_loc()

File pandas/_libs/hashtable_class_helper.pxi:7081, in pandas._libs.hashtable.PyObjectHashTable.get_item()

File pandas/_libs/hashtable_class_helper.pxi:7089, in pandas._libs.hashtable.PyObjectHashTable.get_item()

KeyError: 'value'

The above exception was the direct cause of the following exception:

KeyError                                  Traceback (most recent call last)
Cell In[25], line 3
      1 df.loc[
      2     (df["Value"] == 274) &
----> 3     (df["Char Count"] == df.loc[df["value"] == 274 , "Char Count"].min())
      4 ]

File /usr/local/lib/python3.11/site-packages/pandas/core/frame.py:4102, in DataFrame.__getitem__(self, key)
   4100 if self.columns.nlevels > 1:
   4101     return self._getitem_multilevel(key)
-> 4102 indexer = self.columns.get_loc(key)
   4103 if is_integer(indexer):
   4104     indexer = [indexer]

File /usr/local/lib/python3.11/site-packages/pandas/core/indexes/base.py:3812, in Index.get_loc(self, key)
   3807     if isinstance(casted_key, slice) or (
   3808         isinstance(casted_key, abc.Iterable)
   3809         and any(isinstance(x, slice) for x in casted_key)
   3810     ):
   3811         raise InvalidIndexError(key)
-> 3812     raise KeyError(key) from err
   3813 except TypeError:
   3814     # If we have a listlike key, _check_indexing_error will raise
   3815     #  InvalidIndexError. Otherwise we fall through and re-raise
   3816     #  the TypeError.
   3817     self._check_indexing_error(key)

KeyError: 'value'
Create a column Ratio which represents the 'Value Ratio' of a word¶
In [26]:
df.head()
Out[26]:
Char Count Value
Word
aa 2 2
aah 3 10
aahed 5 19
aahing 6 40
aahs 4 29
In [27]:
df["Ratio"] = df["Value"]/df["Char Count"]
In [28]:
df.head()
Out[28]:
Char Count Value Ratio
Word
aa 2 2 1.000000
aah 3 10 3.333333
aahed 5 19 3.800000
aahing 6 40 6.666667
aahs 4 29 7.250000
What is the maximum value of Ratio?¶
In [30]:
df["Ratio"].max()
Out[30]:
22.5
What word is the one with the highest Ratio?¶
In [31]:
df.loc[df["Ratio"] == 22.5]
Out[31]:
Char Count Value Ratio
Word
xu 2 45 22.5
How many words have a Ratio of 10?¶
In [32]:
df["Ratio"].value_counts()
Out[32]:
Ratio
12.000000    3751
11.000000    3428
13.000000    3272
10.000000    2604
14.000000    2357
             ... 
17.545455       1
16.466667       1
19.125000       1
14.823529       1
18.083333       1
Name: count, Length: 1333, dtype: int64
What is the maximum Value of all the words with a Ratio of 10?¶
In [33]:
df.loc[df["Ratio"] == 10 ,"Value"].max()
Out[33]:
240
Of those words with a Value of 260, what is the lowest Char Count found?¶
In [37]:
df.loc[df["Value"] == 260 ,"Char Count"].min()
Out[37]:
17
Based on the previous task, what word is it?¶
In [38]:
df["Char Count"].min()
Out[38]:
2
In [44]:
df.query("Value == 260").sort_values(by="Char Count")
Out[44]:
Char Count Value Ratio
Word
hydroxytryptamine 17 260 15.294118
neuropsychologists 18 260 14.444444
psychophysiologist 18 260 14.444444
revolutionarinesses 19 260 13.684211
countermobilizations 20 260 13.000000
underrepresentations 20 260 13.000000
In [ ]: