In [1]:
import pandas as pd
In [2]:
df = pd.read_csv('words.csv', index_col='Word')
In [3]:
df.head()
Out[3]:
Char Count Value
Word
aa 2 2
aah 3 10
aahed 5 19
aahing 6 40
aahs 4 29

Activities¶

How many elements does this dataframe have?¶
In [4]:
df.shape
Out[4]:
(172821, 2)
What is the value of the word microspectrophotometries?¶
In [ ]:
 
What is the highest possible value of a word?¶
In [ ]:
 
Which of the following words have a Char Count of 15?¶
In [ ]:
 
What is the highest possible length of a word?¶
In [ ]:
 
What is the word with the value of 319?¶
In [ ]:
 
What is the most common value?¶
In [7]:
df["Value"].value_counts().head()
Out[7]:
Value
93     1965
100    1921
95     1915
99     1907
92     1902
Name: count, dtype: int64
What is the shortest word with value 274?¶
In [12]:
df.loc[df["Value"] == 274, 'Char Count'].min()
Out[12]:
20
Create a column Ratio which represents the 'Value Ratio' of a word¶
In [16]:
df["Ratio"] = df["Value"]/df["Char Count"]
df.head()
Out[16]:
Char Count Value ValuRatio Ratio
Word
aa 2 2 1.000000 1.000000
aah 3 10 3.333333 3.333333
aahed 5 19 3.800000 3.800000
aahing 6 40 6.666667 6.666667
aahs 4 29 7.250000 7.250000
What is the maximum value of Ratio?¶
In [18]:
df["Ratio"].max()
Out[18]:
22.5
What word is the one with the highest Ratio?¶
In [20]:
df.loc[df["Ratio"] == 22.5]
Out[20]:
Char Count Value ValuRatio Ratio
Word
xu 2 45 22.5 22.5
In [23]:
df.sort_values(by="Ratio",ascending = False).head(2)
Out[23]:
Char Count Value ValuRatio Ratio
Word
xu 2 45 22.5 22.5
muzzy 5 111 22.2 22.2
How many words have a Ratio of 10?¶
In [30]:
df.loc[df["Ratio"] == 10].shape
Out[30]:
(2604, 4)
What is the maximum Value of all the words with a Ratio of 10?¶
In [33]:
df.loc[df["Ratio"] == 10,"Value"]
Out[33]:
Word
aardwolf       80
abatements    100
abducts        70
abetment       80
abettals       80
             ... 
ycleped        70
yodeled        70
zamia          50
zebecs         60
zwieback       80
Name: Value, Length: 2604, dtype: int64
Of those words with a Value of 260, what is the lowest Char Count found?¶
In [36]:
df.loc[df["Value"] == 260, "Char Count"].sort_values(ascending = True)
Out[36]:
Word
hydroxytryptamine       17
neuropsychologists      18
psychophysiologist      18
revolutionarinesses     19
countermobilizations    20
underrepresentations    20
Name: Char Count, dtype: int64
Based on the previous task, what word is it?¶
In [38]:
df.loc[df["Value"] == 260, df["Char Count"].min()]
---------------------------------------------------------------------------
KeyError                                  Traceback (most recent call last)
File /usr/local/lib/python3.11/site-packages/pandas/core/indexes/base.py:3805, in Index.get_loc(self, key)
   3804 try:
-> 3805     return self._engine.get_loc(casted_key)
   3806 except KeyError as err:

File index.pyx:167, in pandas._libs.index.IndexEngine.get_loc()

File index.pyx:196, in pandas._libs.index.IndexEngine.get_loc()

File pandas/_libs/hashtable_class_helper.pxi:7081, in pandas._libs.hashtable.PyObjectHashTable.get_item()

File pandas/_libs/hashtable_class_helper.pxi:7089, in pandas._libs.hashtable.PyObjectHashTable.get_item()

KeyError: 2

The above exception was the direct cause of the following exception:

KeyError                                  Traceback (most recent call last)
Cell In[38], line 1
----> 1 df.loc[df["Value"] == 260, df["Char Count"].min()]

File /usr/local/lib/python3.11/site-packages/pandas/core/indexing.py:1184, in _LocationIndexer.__getitem__(self, key)
   1182     if self._is_scalar_access(key):
   1183         return self.obj._get_value(*key, takeable=self._takeable)
-> 1184     return self._getitem_tuple(key)
   1185 else:
   1186     # we by definition only have the 0th axis
   1187     axis = self.axis or 0

File /usr/local/lib/python3.11/site-packages/pandas/core/indexing.py:1368, in _LocIndexer._getitem_tuple(self, tup)
   1366 with suppress(IndexingError):
   1367     tup = self._expand_ellipsis(tup)
-> 1368     return self._getitem_lowerdim(tup)
   1370 # no multi-index, so validate all of the indexers
   1371 tup = self._validate_tuple_indexer(tup)

File /usr/local/lib/python3.11/site-packages/pandas/core/indexing.py:1065, in _LocationIndexer._getitem_lowerdim(self, tup)
   1061 for i, key in enumerate(tup):
   1062     if is_label_like(key):
   1063         # We don't need to check for tuples here because those are
   1064         #  caught by the _is_nested_tuple_indexer check above.
-> 1065         section = self._getitem_axis(key, axis=i)
   1067         # We should never have a scalar section here, because
   1068         #  _getitem_lowerdim is only called after a check for
   1069         #  is_scalar_access, which that would be.
   1070         if section.ndim == self.ndim:
   1071             # we're in the middle of slicing through a MultiIndex
   1072             # revise the key wrt to `section` by inserting an _NS

File /usr/local/lib/python3.11/site-packages/pandas/core/indexing.py:1431, in _LocIndexer._getitem_axis(self, key, axis)
   1429 # fall thru to straight lookup
   1430 self._validate_key(key, axis)
-> 1431 return self._get_label(key, axis=axis)

File /usr/local/lib/python3.11/site-packages/pandas/core/indexing.py:1381, in _LocIndexer._get_label(self, label, axis)
   1379 def _get_label(self, label, axis: AxisInt):
   1380     # GH#5567 this will fail if the label is not present in the axis.
-> 1381     return self.obj.xs(label, axis=axis)

File /usr/local/lib/python3.11/site-packages/pandas/core/generic.py:4287, in NDFrame.xs(self, key, axis, level, drop_level)
   4285 if axis == 1:
   4286     if drop_level:
-> 4287         return self[key]
   4288     index = self.columns
   4289 else:

File /usr/local/lib/python3.11/site-packages/pandas/core/frame.py:4102, in DataFrame.__getitem__(self, key)
   4100 if self.columns.nlevels > 1:
   4101     return self._getitem_multilevel(key)
-> 4102 indexer = self.columns.get_loc(key)
   4103 if is_integer(indexer):
   4104     indexer = [indexer]

File /usr/local/lib/python3.11/site-packages/pandas/core/indexes/base.py:3812, in Index.get_loc(self, key)
   3807     if isinstance(casted_key, slice) or (
   3808         isinstance(casted_key, abc.Iterable)
   3809         and any(isinstance(x, slice) for x in casted_key)
   3810     ):
   3811         raise InvalidIndexError(key)
-> 3812     raise KeyError(key) from err
   3813 except TypeError:
   3814     # If we have a listlike key, _check_indexing_error will raise
   3815     #  InvalidIndexError. Otherwise we fall through and re-raise
   3816     #  the TypeError.
   3817     self._check_indexing_error(key)

KeyError: 2
In [ ]: