import pandas as pd
import sqlalchemy
engine = sqlalchemy.create_engine("sqlite:///nautical.db")
conn = engine.connect()
For lecture I am also going to use a postgres database.
import sqlalchemy
engine = sqlalchemy.create_engine("postgres://jegonzal:@localhost:5432/data100")
conn = engine.connect()
script = """
DROP TABLE IF EXISTS sailors;
DROP TABLE IF EXISTS boats;
DROP TABLE IF EXISTS reserves;
CREATE TABLE sailors (
sid INTEGER,
name CHAR(20),
PRIMARY KEY (sid));
CREATE TABLE boats (
bid INTEGER,
name CHAR (20),
PRIMARY KEY (bid));
CREATE TABLE reserves (
bid INTEGER,
sid INTEGER,
day DATE);
INSERT INTO sailors VALUES
(22, 'Alice'),
(31, 'Bob'),
(95, 'Nora');
INSERT INTO boats VALUES
(101, 'Titanic'),
(102, 'Beagle'),
(103, 'Bismarck'),
(104, 'Clipper');
INSERT INTO reserves VALUES
(101, 22, '10/10/96'),
(103, 95, '11/12/96'),
(41, 38, '8/11/18');
"""
for line in script.split(";"):
if len(line.strip()) > 0: conn.execute(line)
pd.read_sql("""
SELECT * FROM sailors
""", conn)
boats = pd.read_sql("""
SELECT * FROM boats
""", conn)
boats
reserves = pd.read_sql("""
SELECT * FROM reserves
""", conn)
reserves
The normal join only keeps rows that match the join predicate.
bid | name | |
---|---|---|
0 | 101 | Titanic |
1 | 102 | Beagle |
2 | 103 | Bismarck |
3 | 104 | Clipper |
bid | sid | day | |
---|---|---|---|
0 | 101 | 22 | 1996-10-10 |
1 | 103 | 95 | 1996-11-12 |
2 | 41 | 38 | 2018-08-11 |
pd.read_sql("""
SELECT *
FROM boats b, reserves r
WHERE b.bid = r.bid
""", conn)
pd.read_sql("""
SELECT *
FROM boats b INNER JOIN reserves r
ON b.bid = r.bid
""", conn)
Same as inner join but if there is a row on the left table that failed to join with a row on the right table it is also included with null values in place of all entries in the right table.
bid | name | |
---|---|---|
0 | 101 | Titanic |
1 | 102 | Beagle |
2 | 103 | Bismarck |
3 | 104 | Clipper |
bid | sid | day | |
---|---|---|---|
0 | 101 | 22 | 1996-10-10 |
1 | 103 | 95 | 1996-11-12 |
2 | 41 | 38 | 2018-08-11 |
pd.read_sql("""
SELECT *
FROM boats b LEFT JOIN reserves r
ON b.bid = r.bid
""", conn, )
Just like left join but keeping values on the right side.
bid | name | |
---|---|---|
0 | 101 | Titanic |
1 | 102 | Beagle |
2 | 103 | Bismarck |
3 | 104 | Clipper |
bid | sid | day | |
---|---|---|---|
0 | 101 | 22 | 1996-10-10 |
1 | 103 | 95 | 1996-11-12 |
2 | 41 | 38 | 2018-08-11 |
pd.read_sql("""
SELECT *
FROM boats b RIGHT JOIN reserves r
ON b.bid = r.bid
""", conn)
pd.read_sql("""
SELECT *
FROM reserves r LEFT JOIN boats b
ON b.bid = r.bid
""", conn)
The outer ensures that at least one row from both tables appears in the final result even if there are no matches by adding null values.
SQLite Note
Full outer joins are not support in sqlite.
bid | name | |
---|---|---|
0 | 101 | Titanic |
1 | 102 | Beagle |
2 | 103 | Bismarck |
3 | 104 | Clipper |
bid | sid | day | |
---|---|---|---|
0 | 101 | 22 | 1996-10-10 |
1 | 103 | 95 | 1996-11-12 |
2 | 41 | 38 | 2018-08-11 |
pd.read_sql("""
SELECT *
FROM boats b FULL JOIN reserves r
ON b.bid = r.bid
""", conn)