# `pd` is the conventional alias for Pandas, as `np` is for NumPy
import pandas as pd
Series, DataFrames, and Indices¶
Series, DataFrames, and Indices are fundamental pandas
data structures for storing tabular data and processing the data using vectorized operations.
s = pd.Series(["welcome", "to", "data 100"])
s
0 welcome 1 to 2 data 100 dtype: object
s.values
array(['welcome', 'to', 'data 100'], dtype=object)
s.index
RangeIndex(start=0, stop=3, step=1)
In the example above, pandas
automatically generated an Index
of integer labels. We can also create a Series
object by providing a custom Index
.
s = pd.Series([-1, 10, 2], index=["a", "b", "c"])
s
a -1 b 10 c 2 dtype: int64
s.values
array([-1, 10, 2])
s.index
Index(['a', 'b', 'c'], dtype='object')
After it has been created, we can reassign the Index of a Series
to a new Index.
s.index = ["first", "second", "third"]
s
first -1 second 10 third 2 dtype: int64
Selection in Series¶
We can select a single value or a set of values in a Series
using:
- A single label
- A list of labels
- A filtering condition
s = pd.Series([4, -2, 0, 6], index=["a", "b", "c", "d"])
s
a 4 b -2 c 0 d 6 dtype: int64
Selection using one or more label(s)
# Selection using a single label
# Notice how the return value is a single array element
s["a"]
4
# Selection using a list of labels
# Notice how the return value is another Series
s[["a", "c"]]
a 4 c 0 dtype: int64
Selection using a filter condition
# Filter condition: select all elements greater than 0
s>0
a True b False c False d True dtype: bool
# Use the Boolean filter to select data from the original Series
s[s>0]
a 4 d 6 dtype: int64
Instructor Note: Return to slides!
DataFrame¶
A DataFrame
is a 2-D tabular data structure with both row and column labels. In this lecture, we will see how a DataFrame
can be created from scratch or loaded from a file.
Creating a new DataFrame
object¶
We can also create a DataFrame
in a variety of ways. Here, we cover the following:
- From a CSV file
- Using a list and column names
- From a dictionary
- From a
Series
Creating a DataFrame
from a CSV file¶
For loading data into a DataFrame
, pandas
has a number of very useful file reading tools. We'll be using read_csv
today to load data from a CSV file into a DataFrame
object.
elections = pd.read_csv("data/elections.csv")
elections
Year | Candidate | Party | Popular vote | Result | % | |
---|---|---|---|---|---|---|
0 | 1824 | Andrew Jackson | Democratic-Republican | 151271 | loss | 57.210122 |
1 | 1824 | John Quincy Adams | Democratic-Republican | 113142 | win | 42.789878 |
2 | 1828 | Andrew Jackson | Democratic | 642806 | win | 56.203927 |
3 | 1828 | John Quincy Adams | National Republican | 500897 | loss | 43.796073 |
4 | 1832 | Andrew Jackson | Democratic | 702735 | win | 54.574789 |
... | ... | ... | ... | ... | ... | ... |
182 | 2024 | Donald Trump | Republican | 77303568 | win | 49.808629 |
183 | 2024 | Kamala Harris | Democratic | 75019230 | loss | 48.336772 |
184 | 2024 | Jill Stein | Green | 861155 | loss | 0.554864 |
185 | 2024 | Robert Kennedy | Independent | 756383 | loss | 0.487357 |
186 | 2024 | Chase Oliver | Libertarian Party | 650130 | loss | 0.418895 |
187 rows × 6 columns
By passing a column to the index_col
attribute, the Index
can be defined at the initialization.
elections = pd.read_csv("data/elections.csv", index_col="Candidate")
elections
Year | Party | Popular vote | Result | % | |
---|---|---|---|---|---|
Candidate | |||||
Andrew Jackson | 1824 | Democratic-Republican | 151271 | loss | 57.210122 |
John Quincy Adams | 1824 | Democratic-Republican | 113142 | win | 42.789878 |
Andrew Jackson | 1828 | Democratic | 642806 | win | 56.203927 |
John Quincy Adams | 1828 | National Republican | 500897 | loss | 43.796073 |
Andrew Jackson | 1832 | Democratic | 702735 | win | 54.574789 |
... | ... | ... | ... | ... | ... |
Donald Trump | 2024 | Republican | 77303568 | win | 49.808629 |
Kamala Harris | 2024 | Democratic | 75019230 | loss | 48.336772 |
Jill Stein | 2024 | Green | 861155 | loss | 0.554864 |
Robert Kennedy | 2024 | Independent | 756383 | loss | 0.487357 |
Chase Oliver | 2024 | Libertarian Party | 650130 | loss | 0.418895 |
187 rows × 5 columns
elections = pd.read_csv("data/elections.csv", index_col="Year")
elections
Candidate | Party | Popular vote | Result | % | |
---|---|---|---|---|---|
Year | |||||
1824 | Andrew Jackson | Democratic-Republican | 151271 | loss | 57.210122 |
1824 | John Quincy Adams | Democratic-Republican | 113142 | win | 42.789878 |
1828 | Andrew Jackson | Democratic | 642806 | win | 56.203927 |
1828 | John Quincy Adams | National Republican | 500897 | loss | 43.796073 |
1832 | Andrew Jackson | Democratic | 702735 | win | 54.574789 |
... | ... | ... | ... | ... | ... |
2024 | Donald Trump | Republican | 77303568 | win | 49.808629 |
2024 | Kamala Harris | Democratic | 75019230 | loss | 48.336772 |
2024 | Jill Stein | Green | 861155 | loss | 0.554864 |
2024 | Robert Kennedy | Independent | 756383 | loss | 0.487357 |
2024 | Chase Oliver | Libertarian Party | 650130 | loss | 0.418895 |
187 rows × 5 columns
Creating a DataFrame
using a list and column names¶
# Creating a single-column DataFrame using a list
df_list_1 = pd.DataFrame([1, 2, 3],
columns=["Number"])
display(df_list_1)
Number | |
---|---|
0 | 1 |
1 | 2 |
2 | 3 |
# Creating a multi-column DataFrame using a list of lists
df_list_2 = pd.DataFrame([[1, "one"], [2, "two"]],
columns=["Number", "Description"])
df_list_2
Number | Description | |
---|---|---|
0 | 1 | one |
1 | 2 | two |
Creating a DataFrame
from a dictionary¶
# Creating a DataFrame from a dictionary of columns
df_dict_1 = pd.DataFrame({"Fruit":["Strawberry", "Orange"],
"Price":[5.49, 3.99]})
df_dict_1
Fruit | Price | |
---|---|---|
0 | Strawberry | 5.49 |
1 | Orange | 3.99 |
# Creating a DataFrame from a list of row dictionaries
df_dict_2 = pd.DataFrame([{"Fruit":"Strawberry", "Price":5.49},
{"Fruit":"Orange", "Price":3.99}])
df_dict_2
Fruit | Price | |
---|---|---|
0 | Strawberry | 5.49 |
1 | Orange | 3.99 |
Creating a DataFrame
from a Series
¶
# In the examples below, we create a DataFrame from a Series
s_a = pd.Series(["a1", "a2", "a3"], index=["r1", "r2", "r3"])
s_b = pd.Series(["b1", "b2", "b3"], index=["r1", "r2", "r3"])
# Passing Series objects for columns
df_ser = pd.DataFrame({"A-column":s_a, "B-column":s_b})
df_ser
A-column | B-column | |
---|---|---|
r1 | a1 | b1 |
r2 | a2 | b2 |
r3 | a3 | b3 |
# Passing a Series to the DataFrame constructor to make a one-column DataFrame
df_ser = pd.DataFrame(s_a)
df_ser
0 | |
---|---|
r1 | a1 |
r2 | a2 |
r3 | a3 |
# Using to_frame() to convert a Series to DataFrame
ser_to_df = s_a.to_frame()
ser_to_df
0 | |
---|---|
r1 | a1 |
r2 | a2 |
r3 | a3 |
# Creating a DataFrame from a CSV file and specifying the Index column
elections = pd.read_csv("data/elections.csv", index_col="Candidate")
elections.head(5) # Using `.head` shows only the first 5 rows to save space
Year | Party | Popular vote | Result | % | |
---|---|---|---|---|---|
Candidate | |||||
Andrew Jackson | 1824 | Democratic-Republican | 151271 | loss | 57.210122 |
John Quincy Adams | 1824 | Democratic-Republican | 113142 | win | 42.789878 |
Andrew Jackson | 1828 | Democratic | 642806 | win | 56.203927 |
John Quincy Adams | 1828 | National Republican | 500897 | loss | 43.796073 |
Andrew Jackson | 1832 | Democratic | 702735 | win | 54.574789 |
elections.reset_index(inplace=True) # Need to reset the Index to keep 'Candidate' as one of the DataFrane Columns
elections.set_index("Party", inplace=True) # This sets the Index to the "Candidate" column
elections
Candidate | Year | Popular vote | Result | % | |
---|---|---|---|---|---|
Party | |||||
Democratic-Republican | Andrew Jackson | 1824 | 151271 | loss | 57.210122 |
Democratic-Republican | John Quincy Adams | 1824 | 113142 | win | 42.789878 |
Democratic | Andrew Jackson | 1828 | 642806 | win | 56.203927 |
National Republican | John Quincy Adams | 1828 | 500897 | loss | 43.796073 |
Democratic | Andrew Jackson | 1832 | 702735 | win | 54.574789 |
... | ... | ... | ... | ... | ... |
Republican | Donald Trump | 2024 | 77303568 | win | 49.808629 |
Democratic | Kamala Harris | 2024 | 75019230 | loss | 48.336772 |
Green | Jill Stein | 2024 | 861155 | loss | 0.554864 |
Independent | Robert Kennedy | 2024 | 756383 | loss | 0.487357 |
Libertarian Party | Chase Oliver | 2024 | 650130 | loss | 0.418895 |
187 rows × 5 columns
DataFrame
attributes: index
, columns
, and shape
¶
elections.index
Index(['Democratic-Republican', 'Democratic-Republican', 'Democratic', 'National Republican', 'Democratic', 'National Republican', 'Anti-Masonic', 'Whig', 'Democratic', 'Whig', ... 'Green', 'Democratic', 'Republican', 'Libertarian', 'Green', 'Republican', 'Democratic', 'Green', 'Independent', 'Libertarian Party'], dtype='object', name='Party', length=187)
elections.columns
Index(['Candidate', 'Year', 'Popular vote', 'Result', '%'], dtype='object')
The Index
column can be set to the default list of integers by calling reset_index()
on a DataFrame
.
elections.reset_index(inplace=True) # Revert the Index back to its default numeric labeling
elections
Party | Candidate | Year | Popular vote | Result | % | |
---|---|---|---|---|---|---|
0 | Democratic-Republican | Andrew Jackson | 1824 | 151271 | loss | 57.210122 |
1 | Democratic-Republican | John Quincy Adams | 1824 | 113142 | win | 42.789878 |
2 | Democratic | Andrew Jackson | 1828 | 642806 | win | 56.203927 |
3 | National Republican | John Quincy Adams | 1828 | 500897 | loss | 43.796073 |
4 | Democratic | Andrew Jackson | 1832 | 702735 | win | 54.574789 |
... | ... | ... | ... | ... | ... | ... |
182 | Republican | Donald Trump | 2024 | 77303568 | win | 49.808629 |
183 | Democratic | Kamala Harris | 2024 | 75019230 | loss | 48.336772 |
184 | Green | Jill Stein | 2024 | 861155 | loss | 0.554864 |
185 | Independent | Robert Kennedy | 2024 | 756383 | loss | 0.487357 |
186 | Libertarian Party | Chase Oliver | 2024 | 650130 | loss | 0.418895 |
187 rows × 6 columns
elections.shape
(187, 6)
Instructor Note: Return to slides!
Slicing in DataFrame
s¶
We can use .head
to return only a few rows of a dataframe.
# Loading DataFrame again to keep the original ordering of columns
elections = pd.read_csv("data/elections.csv")
elections.head() # By default, calling .head with no argument will show the first 5 rows
Year | Candidate | Party | Popular vote | Result | % | |
---|---|---|---|---|---|---|
0 | 1824 | Andrew Jackson | Democratic-Republican | 151271 | loss | 57.210122 |
1 | 1824 | John Quincy Adams | Democratic-Republican | 113142 | win | 42.789878 |
2 | 1828 | Andrew Jackson | Democratic | 642806 | win | 56.203927 |
3 | 1828 | John Quincy Adams | National Republican | 500897 | loss | 43.796073 |
4 | 1832 | Andrew Jackson | Democratic | 702735 | win | 54.574789 |
elections.head(3)
Year | Candidate | Party | Popular vote | Result | % | |
---|---|---|---|---|---|---|
0 | 1824 | Andrew Jackson | Democratic-Republican | 151271 | loss | 57.210122 |
1 | 1824 | John Quincy Adams | Democratic-Republican | 113142 | win | 42.789878 |
2 | 1828 | Andrew Jackson | Democratic | 642806 | win | 56.203927 |
We can also use .tail
to get the last so many rows.
elections.tail(5)
Year | Candidate | Party | Popular vote | Result | % | |
---|---|---|---|---|---|---|
182 | 2024 | Donald Trump | Republican | 77303568 | win | 49.808629 |
183 | 2024 | Kamala Harris | Democratic | 75019230 | loss | 48.336772 |
184 | 2024 | Jill Stein | Green | 861155 | loss | 0.554864 |
185 | 2024 | Robert Kennedy | Independent | 756383 | loss | 0.487357 |
186 | 2024 | Chase Oliver | Libertarian Party | 650130 | loss | 0.418895 |
Label-Based Extraction Usingloc
¶
Arguments to .loc
can be:
- A list.
- A slice (syntax is inclusive of the right-hand side of the slice).
- A single value.
loc
selects items by row and column label.
# Selection by a list
elections.loc[[87, 25, 179], ["Year", "Candidate", "Result"]]
Year | Candidate | Result | |
---|---|---|---|
87 | 1932 | Herbert Hoover | loss |
25 | 1860 | John C. Breckinridge | loss |
179 | 2020 | Donald Trump | loss |
# Selection by a list and a slice of columns
elections.loc[[87, 25, 179], "Popular vote":"%"]
Popular vote | Result | % | |
---|---|---|---|
87 | 15761254 | loss | 39.830594 |
25 | 848019 | loss | 18.138998 |
179 | 74216154 | loss | 46.858542 |
# Extracting all rows using a colon
elections.loc[:, ["Year", "Candidate", "Result"]]
Year | Candidate | Result | |
---|---|---|---|
0 | 1824 | Andrew Jackson | loss |
1 | 1824 | John Quincy Adams | win |
2 | 1828 | Andrew Jackson | win |
3 | 1828 | John Quincy Adams | loss |
4 | 1832 | Andrew Jackson | win |
... | ... | ... | ... |
182 | 2024 | Donald Trump | win |
183 | 2024 | Kamala Harris | loss |
184 | 2024 | Jill Stein | loss |
185 | 2024 | Robert Kennedy | loss |
186 | 2024 | Chase Oliver | loss |
187 rows × 3 columns
# Extracting all columns using a colon
elections.loc[[87, 25, 179], :]
Year | Candidate | Party | Popular vote | Result | % | |
---|---|---|---|---|---|---|
87 | 1932 | Herbert Hoover | Republican | 15761254 | loss | 39.830594 |
25 | 1860 | John C. Breckinridge | Southern Democratic | 848019 | loss | 18.138998 |
179 | 2020 | Donald Trump | Republican | 74216154 | loss | 46.858542 |
# Selection by a list and a single-column label
elections.loc[[87, 25, 179], "Popular vote"]
87 15761254 25 848019 179 74216154 Name: Popular vote, dtype: int64
# Note that if we pass "Popular vote" in a list, the output will be a DataFrame
elections.loc[[87, 25, 179], ["Popular vote"]]
Popular vote | |
---|---|
87 | 15761254 |
25 | 848019 |
179 | 74216154 |
# Selection by a row label and a column label
elections.loc[0, "Candidate"]
'Andrew Jackson'
Integer-Based Extraction Using iloc
¶
iloc
selects items by row and column integer position.
Arguments to .iloc
can be:
- A list.
- A slice (syntax is exclusive of the right hand side of the slice).
- A single value.
# Select the rows at positions 1, 2, and 3.
# Select the columns at positions 0, 1, and 2.
# Remember that Python indexing begins at position 0!
elections.iloc[[1, 2, 3], [0, 1, 2]]
Year | Candidate | Party | |
---|---|---|---|
1 | 1824 | John Quincy Adams | Democratic-Republican |
2 | 1828 | Andrew Jackson | Democratic |
3 | 1828 | John Quincy Adams | National Republican |
# Index-based extraction using a list of rows and a slice of column indices
elections.iloc[[1, 2, 3], 0:3]
Year | Candidate | Party | |
---|---|---|---|
1 | 1824 | John Quincy Adams | Democratic-Republican |
2 | 1828 | Andrew Jackson | Democratic |
3 | 1828 | John Quincy Adams | National Republican |
# Selecting all rows using a colon
elections.iloc[:, 0:3]
Year | Candidate | Party | |
---|---|---|---|
0 | 1824 | Andrew Jackson | Democratic-Republican |
1 | 1824 | John Quincy Adams | Democratic-Republican |
2 | 1828 | Andrew Jackson | Democratic |
3 | 1828 | John Quincy Adams | National Republican |
4 | 1832 | Andrew Jackson | Democratic |
... | ... | ... | ... |
182 | 2024 | Donald Trump | Republican |
183 | 2024 | Kamala Harris | Democratic |
184 | 2024 | Jill Stein | Green |
185 | 2024 | Robert Kennedy | Independent |
186 | 2024 | Chase Oliver | Libertarian Party |
187 rows × 3 columns
elections.iloc[[1, 2, 3], 1]
1 John Quincy Adams 2 Andrew Jackson 3 John Quincy Adams Name: Candidate, dtype: object
# Extracting the value at row 0 and the second column
elections.iloc[0,1]
'Andrew Jackson'
Context-dependent Extraction using []
¶
We could technically do anything we want using loc
or iloc
. However, in practice, the []
operator is often used instead to yield more concise code.
[]
is a bit trickier to understand than loc
or iloc
, but it achieves essentially the same functionality. The difference is that []
is context-dependent.
[]
only takes one argument, which may be:
- A slice of row integers.
- A list of column labels.
- A single column label.
If we provide a slice of row numbers, we get the numbered rows.
elections[3:7]
Year | Candidate | Party | Popular vote | Result | % | |
---|---|---|---|---|---|---|
3 | 1828 | John Quincy Adams | National Republican | 500897 | loss | 43.796073 |
4 | 1832 | Andrew Jackson | Democratic | 702735 | win | 54.574789 |
5 | 1832 | Henry Clay | National Republican | 484205 | loss | 37.603628 |
6 | 1832 | William Wirt | Anti-Masonic | 100715 | loss | 7.821583 |
If we provide a list of column names, we get the listed columns.
elections[["Year", "Candidate", "Result"]]
Year | Candidate | Result | |
---|---|---|---|
0 | 1824 | Andrew Jackson | loss |
1 | 1824 | John Quincy Adams | win |
2 | 1828 | Andrew Jackson | win |
3 | 1828 | John Quincy Adams | loss |
4 | 1832 | Andrew Jackson | win |
... | ... | ... | ... |
182 | 2024 | Donald Trump | win |
183 | 2024 | Kamala Harris | loss |
184 | 2024 | Jill Stein | loss |
185 | 2024 | Robert Kennedy | loss |
186 | 2024 | Chase Oliver | loss |
187 rows × 3 columns
And if we provide a single column name we get back just that column, stored as a Series
.
elections["Candidate"]
0 Andrew Jackson 1 John Quincy Adams 2 Andrew Jackson 3 John Quincy Adams 4 Andrew Jackson ... 182 Donald Trump 183 Kamala Harris 184 Jill Stein 185 Robert Kennedy 186 Chase Oliver Name: Candidate, Length: 187, dtype: object
example = pd.Series([4, 5, 6], index=["one", "two", "three"])
example[example > 4].values
array([5, 6])
Question 2
We are expecting to get the following output:
df1 = pd.DataFrame([["A", "B"], [84, 79]], columns=["Group", "Score"])
df1
Group | Score | |
---|---|---|
0 | A | B |
1 | 84 | 79 |
df2 = pd.DataFrame([["A", 84], ["B", 79]], columns=["Group", "Score"])
df2
Group | Score | |
---|---|---|
0 | A | 84 |
1 | B | 79 |
df3 = pd.DataFrame({"A": 84, "B": 79}, columns=["Group", "Score"])
df3
Group | Score |
---|
df4 = pd.DataFrame({"Group": ["A", "B"], "Score": [84, 79]})
df4
Group | Score | |
---|---|---|
0 | A | 84 |
1 | B | 79 |
df5 = pd.DataFrame([{"Group": "A", "Score": 84}, {"Group": "B", "Score": 79}])
df5
Group | Score | |
---|---|---|
0 | A | 84 |
1 | B | 79 |
df = pd.DataFrame({"c1":[1, 2, 3, 4], "c2":[2, 4, 6, 8]})
df.columns
Index(['c1', 'c2'], dtype='object')
Questions 3, 4, and 5
Which of the following statements correctly return the value "blue fish" from the "weird" DataFrame?
weird = pd.DataFrame({"a":["one fish", "two fish"],
"b":["red fish", "blue fish"]})
weird
a | b | |
---|---|---|
0 | one fish | red fish |
1 | two fish | blue fish |
weird.iloc[1, 1]
'blue fish'
weird.loc[1, 1]
--------------------------------------------------------------------------- KeyError Traceback (most recent call last) File /srv/conda/envs/notebook/lib/python3.11/site-packages/pandas/core/indexes/base.py:3812, in Index.get_loc(self, key) 3811 try: -> 3812 return self._engine.get_loc(casted_key) 3813 except KeyError as err: File pandas/_libs/index.pyx:167, in pandas._libs.index.IndexEngine.get_loc() File pandas/_libs/index.pyx:196, in pandas._libs.index.IndexEngine.get_loc() File pandas/_libs/hashtable_class_helper.pxi:7088, in pandas._libs.hashtable.PyObjectHashTable.get_item() File pandas/_libs/hashtable_class_helper.pxi:7096, in pandas._libs.hashtable.PyObjectHashTable.get_item() KeyError: 1 The above exception was the direct cause of the following exception: KeyError Traceback (most recent call last) Cell In[58], line 1 ----> 1 weird.loc[1, 1] File /srv/conda/envs/notebook/lib/python3.11/site-packages/pandas/core/indexing.py:1183, in _LocationIndexer.__getitem__(self, key) 1181 key = tuple(com.apply_if_callable(x, self.obj) for x in key) 1182 if self._is_scalar_access(key): -> 1183 return self.obj._get_value(*key, takeable=self._takeable) 1184 return self._getitem_tuple(key) 1185 else: 1186 # we by definition only have the 0th axis File /srv/conda/envs/notebook/lib/python3.11/site-packages/pandas/core/frame.py:4219, in DataFrame._get_value(self, index, col, takeable) 4216 series = self._ixs(col, axis=1) 4217 return series._values[index] -> 4219 series = self._get_item_cache(col) 4220 engine = self.index._engine 4222 if not isinstance(self.index, MultiIndex): 4223 # CategoricalIndex: Trying to use the engine fastpath may give incorrect 4224 # results if our categories are integers that dont match our codes 4225 # IntervalIndex: IntervalTree has no get_loc File /srv/conda/envs/notebook/lib/python3.11/site-packages/pandas/core/frame.py:4643, in DataFrame._get_item_cache(self, item) 4638 res = cache.get(item) 4639 if res is None: 4640 # All places that call _get_item_cache have unique columns, 4641 # pending resolution of GH#33047 -> 4643 loc = self.columns.get_loc(item) 4644 res = self._ixs(loc, axis=1) 4646 cache[item] = res File /srv/conda/envs/notebook/lib/python3.11/site-packages/pandas/core/indexes/base.py:3819, in Index.get_loc(self, key) 3814 if isinstance(casted_key, slice) or ( 3815 isinstance(casted_key, abc.Iterable) 3816 and any(isinstance(x, slice) for x in casted_key) 3817 ): 3818 raise InvalidIndexError(key) -> 3819 raise KeyError(key) from err 3820 except TypeError: 3821 # If we have a listlike key, _check_indexing_error will raise 3822 # InvalidIndexError. Otherwise we fall through and re-raise 3823 # the TypeError. 3824 self._check_indexing_error(key) KeyError: 1
weird
a | b | |
---|---|---|
0 | one fish | red fish |
1 | two fish | blue fish |
weird.loc[1, 'b']
'blue fish'
weird
a | b | |
---|---|---|
0 | one fish | red fish |
1 | two fish | blue fish |
weird.loc[[1,1]]
a | b | |
---|---|---|
1 | two fish | blue fish |
1 | two fish | blue fish |
weird
a | b | |
---|---|---|
0 | one fish | red fish |
1 | two fish | blue fish |
weird.loc[[1,'b']]
--------------------------------------------------------------------------- KeyError Traceback (most recent call last) Cell In[64], line 1 ----> 1 weird.loc[[1,'b']] File /srv/conda/envs/notebook/lib/python3.11/site-packages/pandas/core/indexing.py:1191, in _LocationIndexer.__getitem__(self, key) 1189 maybe_callable = com.apply_if_callable(key, self.obj) 1190 maybe_callable = self._check_deprecated_callable_usage(key, maybe_callable) -> 1191 return self._getitem_axis(maybe_callable, axis=axis) File /srv/conda/envs/notebook/lib/python3.11/site-packages/pandas/core/indexing.py:1420, in _LocIndexer._getitem_axis(self, key, axis) 1417 if hasattr(key, "ndim") and key.ndim > 1: 1418 raise ValueError("Cannot index with multidimensional key") -> 1420 return self._getitem_iterable(key, axis=axis) 1422 # nested tuple slicing 1423 if is_nested_tuple(key, labels): File /srv/conda/envs/notebook/lib/python3.11/site-packages/pandas/core/indexing.py:1360, in _LocIndexer._getitem_iterable(self, key, axis) 1357 self._validate_key(key, axis) 1359 # A collection of keys -> 1360 keyarr, indexer = self._get_listlike_indexer(key, axis) 1361 return self.obj._reindex_with_indexers( 1362 {axis: [keyarr, indexer]}, copy=True, allow_dups=True 1363 ) File /srv/conda/envs/notebook/lib/python3.11/site-packages/pandas/core/indexing.py:1558, in _LocIndexer._get_listlike_indexer(self, key, axis) 1555 ax = self.obj._get_axis(axis) 1556 axis_name = self.obj._get_axis_name(axis) -> 1558 keyarr, indexer = ax._get_indexer_strict(key, axis_name) 1560 return keyarr, indexer File /srv/conda/envs/notebook/lib/python3.11/site-packages/pandas/core/indexes/base.py:6212, in Index._get_indexer_strict(self, key, axis_name) 6209 else: 6210 keyarr, indexer, new_indexer = self._reindex_non_unique(keyarr) -> 6212 self._raise_if_missing(keyarr, indexer, axis_name) 6214 keyarr = self.take(indexer) 6215 if isinstance(key, Index): 6216 # GH 42790 - Preserve name from an Index File /srv/conda/envs/notebook/lib/python3.11/site-packages/pandas/core/indexes/base.py:6264, in Index._raise_if_missing(self, key, indexer, axis_name) 6261 raise KeyError(f"None of [{key}] are in the [{axis_name}]") 6263 not_found = list(ensure_index(key)[missing_mask.nonzero()[0]].unique()) -> 6264 raise KeyError(f"{not_found} not in index") KeyError: "['b'] not in index"
weird
a | b | |
---|---|---|
0 | one fish | red fish |
1 | two fish | blue fish |
weird.iloc[1, 1]
'blue fish'
weird
a | b | |
---|---|---|
0 | one fish | red fish |
1 | two fish | blue fish |
weird.iloc[1, :]
a two fish b blue fish Name: 1, dtype: object
weird
a | b | |
---|---|---|
0 | one fish | red fish |
1 | two fish | blue fish |
weird.iloc[2,2]
--------------------------------------------------------------------------- IndexError Traceback (most recent call last) Cell In[70], line 1 ----> 1 weird.iloc[2,2] File /srv/conda/envs/notebook/lib/python3.11/site-packages/pandas/core/indexing.py:1183, in _LocationIndexer.__getitem__(self, key) 1181 key = tuple(com.apply_if_callable(x, self.obj) for x in key) 1182 if self._is_scalar_access(key): -> 1183 return self.obj._get_value(*key, takeable=self._takeable) 1184 return self._getitem_tuple(key) 1185 else: 1186 # we by definition only have the 0th axis File /srv/conda/envs/notebook/lib/python3.11/site-packages/pandas/core/frame.py:4216, in DataFrame._get_value(self, index, col, takeable) 4197 """ 4198 Quickly retrieve single value at passed column and index. 4199 (...) 4213 `self.columns._index_as_unique`; Caller is responsible for checking. 4214 """ 4215 if takeable: -> 4216 series = self._ixs(col, axis=1) 4217 return series._values[index] 4219 series = self._get_item_cache(col) File /srv/conda/envs/notebook/lib/python3.11/site-packages/pandas/core/frame.py:4013, in DataFrame._ixs(self, i, axis) 4009 return result 4011 # icol 4012 else: -> 4013 label = self.columns[i] 4015 col_mgr = self._mgr.iget(i) 4016 result = self._box_col_values(col_mgr, i) File /srv/conda/envs/notebook/lib/python3.11/site-packages/pandas/core/indexes/base.py:5401, in Index.__getitem__(self, key) 5398 if is_integer(key) or is_float(key): 5399 # GH#44051 exclude bool, which would return a 2d ndarray 5400 key = com.cast_scalar_indexer(key) -> 5401 return getitem(key) 5403 if isinstance(key, slice): 5404 # This case is separated from the conditional above to avoid 5405 # pessimization com.is_bool_indexer and ndim checks. 5406 return self._getitem_slice(key) IndexError: index 2 is out of bounds for axis 0 with size 2
weird
a | b | |
---|---|---|
0 | one fish | red fish |
1 | two fish | blue fish |
weird.iloc[0,'b']
--------------------------------------------------------------------------- ValueError Traceback (most recent call last) File /srv/conda/envs/notebook/lib/python3.11/site-packages/pandas/core/indexing.py:966, in _LocationIndexer._validate_tuple_indexer(self, key) 965 try: --> 966 self._validate_key(k, i) 967 except ValueError as err: File /srv/conda/envs/notebook/lib/python3.11/site-packages/pandas/core/indexing.py:1614, in _iLocIndexer._validate_key(self, key, axis) 1613 else: -> 1614 raise ValueError(f"Can only index by location with a [{self._valid_types}]") ValueError: Can only index by location with a [integer, integer slice (START point is INCLUDED, END point is EXCLUDED), listlike of integers, boolean array] The above exception was the direct cause of the following exception: ValueError Traceback (most recent call last) Cell In[72], line 1 ----> 1 weird.iloc[0,'b'] File /srv/conda/envs/notebook/lib/python3.11/site-packages/pandas/core/indexing.py:1184, in _LocationIndexer.__getitem__(self, key) 1182 if self._is_scalar_access(key): 1183 return self.obj._get_value(*key, takeable=self._takeable) -> 1184 return self._getitem_tuple(key) 1185 else: 1186 # we by definition only have the 0th axis 1187 axis = self.axis or 0 File /srv/conda/envs/notebook/lib/python3.11/site-packages/pandas/core/indexing.py:1690, in _iLocIndexer._getitem_tuple(self, tup) 1689 def _getitem_tuple(self, tup: tuple): -> 1690 tup = self._validate_tuple_indexer(tup) 1691 with suppress(IndexingError): 1692 return self._getitem_lowerdim(tup) File /srv/conda/envs/notebook/lib/python3.11/site-packages/pandas/core/indexing.py:968, in _LocationIndexer._validate_tuple_indexer(self, key) 966 self._validate_key(k, i) 967 except ValueError as err: --> 968 raise ValueError( 969 "Location based indexing can only have " 970 f"[{self._valid_types}] types" 971 ) from err 972 return key ValueError: Location based indexing can only have [integer, integer slice (START point is INCLUDED, END point is EXCLUDED), listlike of integers, boolean array] types
weird
a | b | |
---|---|---|
0 | one fish | red fish |
1 | two fish | blue fish |
weird[1]['b']
--------------------------------------------------------------------------- KeyError Traceback (most recent call last) File /srv/conda/envs/notebook/lib/python3.11/site-packages/pandas/core/indexes/base.py:3812, in Index.get_loc(self, key) 3811 try: -> 3812 return self._engine.get_loc(casted_key) 3813 except KeyError as err: File pandas/_libs/index.pyx:167, in pandas._libs.index.IndexEngine.get_loc() File pandas/_libs/index.pyx:196, in pandas._libs.index.IndexEngine.get_loc() File pandas/_libs/hashtable_class_helper.pxi:7088, in pandas._libs.hashtable.PyObjectHashTable.get_item() File pandas/_libs/hashtable_class_helper.pxi:7096, in pandas._libs.hashtable.PyObjectHashTable.get_item() KeyError: 1 The above exception was the direct cause of the following exception: KeyError Traceback (most recent call last) Cell In[74], line 1 ----> 1 weird[1]['b'] File /srv/conda/envs/notebook/lib/python3.11/site-packages/pandas/core/frame.py:4107, in DataFrame.__getitem__(self, key) 4105 if self.columns.nlevels > 1: 4106 return self._getitem_multilevel(key) -> 4107 indexer = self.columns.get_loc(key) 4108 if is_integer(indexer): 4109 indexer = [indexer] File /srv/conda/envs/notebook/lib/python3.11/site-packages/pandas/core/indexes/base.py:3819, in Index.get_loc(self, key) 3814 if isinstance(casted_key, slice) or ( 3815 isinstance(casted_key, abc.Iterable) 3816 and any(isinstance(x, slice) for x in casted_key) 3817 ): 3818 raise InvalidIndexError(key) -> 3819 raise KeyError(key) from err 3820 except TypeError: 3821 # If we have a listlike key, _check_indexing_error will raise 3822 # InvalidIndexError. Otherwise we fall through and re-raise 3823 # the TypeError. 3824 self._check_indexing_error(key) KeyError: 1
weird
a | b | |
---|---|---|
0 | one fish | red fish |
1 | two fish | blue fish |
weird['b'][1]
'blue fish'
weird
a | b | |
---|---|---|
0 | one fish | red fish |
1 | two fish | blue fish |
weird[1,'b']
--------------------------------------------------------------------------- KeyError Traceback (most recent call last) File /srv/conda/envs/notebook/lib/python3.11/site-packages/pandas/core/indexes/base.py:3812, in Index.get_loc(self, key) 3811 try: -> 3812 return self._engine.get_loc(casted_key) 3813 except KeyError as err: File pandas/_libs/index.pyx:167, in pandas._libs.index.IndexEngine.get_loc() File pandas/_libs/index.pyx:196, in pandas._libs.index.IndexEngine.get_loc() File pandas/_libs/hashtable_class_helper.pxi:7088, in pandas._libs.hashtable.PyObjectHashTable.get_item() File pandas/_libs/hashtable_class_helper.pxi:7096, in pandas._libs.hashtable.PyObjectHashTable.get_item() KeyError: (1, 'b') The above exception was the direct cause of the following exception: KeyError Traceback (most recent call last) Cell In[78], line 1 ----> 1 weird[1,'b'] File /srv/conda/envs/notebook/lib/python3.11/site-packages/pandas/core/frame.py:4107, in DataFrame.__getitem__(self, key) 4105 if self.columns.nlevels > 1: 4106 return self._getitem_multilevel(key) -> 4107 indexer = self.columns.get_loc(key) 4108 if is_integer(indexer): 4109 indexer = [indexer] File /srv/conda/envs/notebook/lib/python3.11/site-packages/pandas/core/indexes/base.py:3819, in Index.get_loc(self, key) 3814 if isinstance(casted_key, slice) or ( 3815 isinstance(casted_key, abc.Iterable) 3816 and any(isinstance(x, slice) for x in casted_key) 3817 ): 3818 raise InvalidIndexError(key) -> 3819 raise KeyError(key) from err 3820 except TypeError: 3821 # If we have a listlike key, _check_indexing_error will raise 3822 # InvalidIndexError. Otherwise we fall through and re-raise 3823 # the TypeError. 3824 self._check_indexing_error(key) KeyError: (1, 'b')
weird
a | b | |
---|---|---|
0 | one fish | red fish |
1 | two fish | blue fish |
weird[[1,'b']]
--------------------------------------------------------------------------- KeyError Traceback (most recent call last) Cell In[80], line 1 ----> 1 weird[[1,'b']] File /srv/conda/envs/notebook/lib/python3.11/site-packages/pandas/core/frame.py:4113, in DataFrame.__getitem__(self, key) 4111 if is_iterator(key): 4112 key = list(key) -> 4113 indexer = self.columns._get_indexer_strict(key, "columns")[1] 4115 # take() does not accept boolean indexers 4116 if getattr(indexer, "dtype", None) == bool: File /srv/conda/envs/notebook/lib/python3.11/site-packages/pandas/core/indexes/base.py:6212, in Index._get_indexer_strict(self, key, axis_name) 6209 else: 6210 keyarr, indexer, new_indexer = self._reindex_non_unique(keyarr) -> 6212 self._raise_if_missing(keyarr, indexer, axis_name) 6214 keyarr = self.take(indexer) 6215 if isinstance(key, Index): 6216 # GH 42790 - Preserve name from an Index File /srv/conda/envs/notebook/lib/python3.11/site-packages/pandas/core/indexes/base.py:6264, in Index._raise_if_missing(self, key, indexer, axis_name) 6261 raise KeyError(f"None of [{key}] are in the [{axis_name}]") 6263 not_found = list(ensure_index(key)[missing_mask.nonzero()[0]].unique()) -> 6264 raise KeyError(f"{not_found} not in index") KeyError: '[1] not in index'