In [1]:
import pandas as pd
In [2]:
df = pd.read_csv('words.csv', index_col='Word')
In [3]:
df.head()
Out[3]:
| Char Count | Value | |
|---|---|---|
| Word | ||
| aa | 2 | 2 |
| aah | 3 | 10 |
| aahed | 5 | 19 |
| aahing | 6 | 40 |
| aahs | 4 | 29 |
Activities¶
How many elements does this dataframe have?¶
In [4]:
df.shape
Out[4]:
(172821, 2)
What is the value of the word microspectrophotometries?¶
In [ ]:
What is the highest possible value of a word?¶
In [ ]:
Which of the following words have a Char Count of 15?¶
In [ ]:
What is the highest possible length of a word?¶
In [ ]:
What is the word with the value of 319?¶
In [ ]:
What is the most common value?¶
In [7]:
df["Value"].value_counts().head()
Out[7]:
Value 93 1965 100 1921 95 1915 99 1907 92 1902 Name: count, dtype: int64
What is the shortest word with value 274?¶
In [12]:
df.loc[df["Value"] == 274, 'Char Count'].min()
Out[12]:
20
Create a column Ratio which represents the 'Value Ratio' of a word¶
In [16]:
df["Ratio"] = df["Value"]/df["Char Count"]
df.head()
Out[16]:
| Char Count | Value | ValuRatio | Ratio | |
|---|---|---|---|---|
| Word | ||||
| aa | 2 | 2 | 1.000000 | 1.000000 |
| aah | 3 | 10 | 3.333333 | 3.333333 |
| aahed | 5 | 19 | 3.800000 | 3.800000 |
| aahing | 6 | 40 | 6.666667 | 6.666667 |
| aahs | 4 | 29 | 7.250000 | 7.250000 |
What is the maximum value of Ratio?¶
In [18]:
df["Ratio"].max()
Out[18]:
22.5
What word is the one with the highest Ratio?¶
In [20]:
df.loc[df["Ratio"] == 22.5]
Out[20]:
| Char Count | Value | ValuRatio | Ratio | |
|---|---|---|---|---|
| Word | ||||
| xu | 2 | 45 | 22.5 | 22.5 |
In [23]:
df.sort_values(by="Ratio",ascending = False).head(2)
Out[23]:
| Char Count | Value | ValuRatio | Ratio | |
|---|---|---|---|---|
| Word | ||||
| xu | 2 | 45 | 22.5 | 22.5 |
| muzzy | 5 | 111 | 22.2 | 22.2 |
How many words have a Ratio of 10?¶
In [30]:
df.loc[df["Ratio"] == 10].shape
Out[30]:
(2604, 4)
What is the maximum Value of all the words with a Ratio of 10?¶
In [33]:
df.loc[df["Ratio"] == 10,"Value"]
Out[33]:
Word
aardwolf 80
abatements 100
abducts 70
abetment 80
abettals 80
...
ycleped 70
yodeled 70
zamia 50
zebecs 60
zwieback 80
Name: Value, Length: 2604, dtype: int64
Of those words with a Value of 260, what is the lowest Char Count found?¶
In [36]:
df.loc[df["Value"] == 260, "Char Count"].sort_values(ascending = True)
Out[36]:
Word hydroxytryptamine 17 neuropsychologists 18 psychophysiologist 18 revolutionarinesses 19 countermobilizations 20 underrepresentations 20 Name: Char Count, dtype: int64
Based on the previous task, what word is it?¶
In [38]:
df.loc[df["Value"] == 260, df["Char Count"].min()]
--------------------------------------------------------------------------- KeyError Traceback (most recent call last) File /usr/local/lib/python3.11/site-packages/pandas/core/indexes/base.py:3805, in Index.get_loc(self, key) 3804 try: -> 3805 return self._engine.get_loc(casted_key) 3806 except KeyError as err: File index.pyx:167, in pandas._libs.index.IndexEngine.get_loc() File index.pyx:196, in pandas._libs.index.IndexEngine.get_loc() File pandas/_libs/hashtable_class_helper.pxi:7081, in pandas._libs.hashtable.PyObjectHashTable.get_item() File pandas/_libs/hashtable_class_helper.pxi:7089, in pandas._libs.hashtable.PyObjectHashTable.get_item() KeyError: 2 The above exception was the direct cause of the following exception: KeyError Traceback (most recent call last) Cell In[38], line 1 ----> 1 df.loc[df["Value"] == 260, df["Char Count"].min()] File /usr/local/lib/python3.11/site-packages/pandas/core/indexing.py:1184, in _LocationIndexer.__getitem__(self, key) 1182 if self._is_scalar_access(key): 1183 return self.obj._get_value(*key, takeable=self._takeable) -> 1184 return self._getitem_tuple(key) 1185 else: 1186 # we by definition only have the 0th axis 1187 axis = self.axis or 0 File /usr/local/lib/python3.11/site-packages/pandas/core/indexing.py:1368, in _LocIndexer._getitem_tuple(self, tup) 1366 with suppress(IndexingError): 1367 tup = self._expand_ellipsis(tup) -> 1368 return self._getitem_lowerdim(tup) 1370 # no multi-index, so validate all of the indexers 1371 tup = self._validate_tuple_indexer(tup) File /usr/local/lib/python3.11/site-packages/pandas/core/indexing.py:1065, in _LocationIndexer._getitem_lowerdim(self, tup) 1061 for i, key in enumerate(tup): 1062 if is_label_like(key): 1063 # We don't need to check for tuples here because those are 1064 # caught by the _is_nested_tuple_indexer check above. -> 1065 section = self._getitem_axis(key, axis=i) 1067 # We should never have a scalar section here, because 1068 # _getitem_lowerdim is only called after a check for 1069 # is_scalar_access, which that would be. 1070 if section.ndim == self.ndim: 1071 # we're in the middle of slicing through a MultiIndex 1072 # revise the key wrt to `section` by inserting an _NS File /usr/local/lib/python3.11/site-packages/pandas/core/indexing.py:1431, in _LocIndexer._getitem_axis(self, key, axis) 1429 # fall thru to straight lookup 1430 self._validate_key(key, axis) -> 1431 return self._get_label(key, axis=axis) File /usr/local/lib/python3.11/site-packages/pandas/core/indexing.py:1381, in _LocIndexer._get_label(self, label, axis) 1379 def _get_label(self, label, axis: AxisInt): 1380 # GH#5567 this will fail if the label is not present in the axis. -> 1381 return self.obj.xs(label, axis=axis) File /usr/local/lib/python3.11/site-packages/pandas/core/generic.py:4287, in NDFrame.xs(self, key, axis, level, drop_level) 4285 if axis == 1: 4286 if drop_level: -> 4287 return self[key] 4288 index = self.columns 4289 else: File /usr/local/lib/python3.11/site-packages/pandas/core/frame.py:4102, in DataFrame.__getitem__(self, key) 4100 if self.columns.nlevels > 1: 4101 return self._getitem_multilevel(key) -> 4102 indexer = self.columns.get_loc(key) 4103 if is_integer(indexer): 4104 indexer = [indexer] File /usr/local/lib/python3.11/site-packages/pandas/core/indexes/base.py:3812, in Index.get_loc(self, key) 3807 if isinstance(casted_key, slice) or ( 3808 isinstance(casted_key, abc.Iterable) 3809 and any(isinstance(x, slice) for x in casted_key) 3810 ): 3811 raise InvalidIndexError(key) -> 3812 raise KeyError(key) from err 3813 except TypeError: 3814 # If we have a listlike key, _check_indexing_error will raise 3815 # InvalidIndexError. Otherwise we fall through and re-raise 3816 # the TypeError. 3817 self._check_indexing_error(key) KeyError: 2
In [ ]: