import pickle
pickle.dumps(1234, protocol=0)

b'I1234\n.'


pickle.dumps([1])

b'\x80\x04\x95\x06\x00\x00\x00\x00\x00\x00\x00]\x94K\x01a.'


a = [1, 1.5, "hello", {3, 4}, {'int': 9, 'real': 9.0, 'complex': 9j}]

with open('data.pkl', 'wb') as f:
    pickle.dump(a, f)
    
with open('data.pkl', 'rb') as f:
    b = pickle.load(f)
    
b

[1, 1.5, 'hello', {3, 4}, {'int': 9, 'real': 9.0, 'complex': 9j}]


import shelve

x = 1234

with shelve.open('spam') as db:
    db['eggs'] = 'eggs'
    db['numbers'] = [1,2, 3, 9.99, 1j]
    db['xx'] = x


%ls -l spam.*

-rw-r--r-- 1 jovyan jovyan   53 Nov  3 07:08 spam.bak
-rw-r--r-- 1 jovyan jovyan 1030 Nov  3 07:08 spam.dat
-rw-r--r-- 1 jovyan jovyan   53 Nov  3 07:08 spam.dir


with shelve.open('spam') as db:
    e = db['eggs']
    n = db['numbers']

print(f'{e = }')
print(f'{n = }')

e = 'eggs'
n = [1, 2, 3, 9.99, 1j]


db = shelve.open('spam')


for var, data in db.items():
    print(f'{var} = {data}')

eggs = eggs
numbers = [1, 2, 3, 9.99, 1j]
xx = 1234


db.close()


import numpy as np

a = np.array([1, 2, 3.4])
fname = 'arr.npy'
np.save(fname, a)
b = np.load(fname)
(a == b).all()

True


fname = 'arrays.npz'
np.savez(fname, a=a, b=np.random.normal(10), c=3.4)


arrays = np.load(fname)
arrays.files

['a', 'b', 'c']


arrays['a']

array([1. , 2. , 3.4])


arrays['c']

array(3.4)


import json
a = ['foo', {'bar': ['baz', None, 1.0, 2]}]
json.dumps(a)

'["foo", {"bar": ["baz", null, 1.0, 2]}]'


with open('test.json', 'w') as f:
    json.dump(a, f)
    
with open('test.json', 'r') as f:
    b = json.load(f)
    
b

['foo', {'bar': ['baz', None, 1.0, 2]}]


a == b

True


c = ['foo', {'bar': ('baz', None, 1.0, 2)}]

with open('test2.json', 'w') as f:
    json.dump(c, f)
    
with open('test2.json', 'r') as f:
    d = json.load(f)
    
c == d

False

d

['foo', {'bar': ['baz', None, 1.0, 2]}]


from IPython.display import JSON

JSON(c)

<IPython.core.display.JSON object>


classroom = {
    "type": "Feature",
    "geometry": {
        "type": "Point",
        "coordinates": [-122.25915, 37.87125]
    },
    "properties": {
    "name": "Wheeler Hall Auditorium"
  }
}

JSON(classroom)

<IPython.core.display.JSON object>


from IPython.display import GeoJSON

GeoJSON(classroom)

<IPython.display.GeoJSON object>


from pathlib import Path

import pandas as pd

df = pd.read_csv(Path.home()/"shared/climate-data/monthly_in_situ_co2_mlo_cleaned.csv")
df


df.to_feather("co2.fth")
%ls -l co2*

-rw-r--r-- 1 jovyan jovyan 32218 Nov  3 07:10 co2.fth


df2 = pd.read_feather("co2.fth")
df2


from pathlib import Path
import xarray as xr

DATA_DIR = Path.home()/Path('shared/climate-data')
ds = xr.open_dataset(DATA_DIR / "era5_monthly_2deg_aws_v20210920.nc")
ds

<xarray.Dataset>
Dimensions:                                                                                   (
                                                                                               time: 504,
                                                                                               latitude: 90,
                                                                                               longitude: 180)
Coordinates:
  * time                                                                                      (time) datetime64[ns] ...
  * latitude                                                                                  (latitude) float32 ...
  * longitude                                                                                 (longitude) float32 ...
Data variables: (12/15)
    air_pressure_at_mean_sea_level                                                            (time, latitude, longitude) float32 ...
    air_temperature_at_2_metres                                                               (time, latitude, longitude) float32 ...
    air_temperature_at_2_metres_1hour_Maximum                                                 (time, latitude, longitude) float32 ...
    air_temperature_at_2_metres_1hour_Minimum                                                 (time, latitude, longitude) float32 ...
    dew_point_temperature_at_2_metres                                                         (time, latitude, longitude) float32 ...
    eastward_wind_at_100_metres                                                               (time, latitude, longitude) float32 ...
    ...                                                                                        ...
    northward_wind_at_100_metres                                                              (time, latitude, longitude) float32 ...
    northward_wind_at_10_metres                                                               (time, latitude, longitude) float32 ...
    precipitation_amount_1hour_Accumulation                                                   (time, latitude, longitude) float32 ...
    sea_surface_temperature                                                                   (time, latitude, longitude) float32 ...
    snow_density                                                                              (time, latitude, longitude) float32 ...
    surface_air_pressure                                                                      (time, latitude, longitude) float32 ...
Attributes:
    institution:  ECMWF
    source:       Reanalysis
    title:        ERA5 forecasts

array(['1979-01-16T11:30:00.000000000', '1979-02-14T23:30:00.000000000',
       '1979-03-16T11:30:00.000000000', ..., '2020-10-16T11:30:00.000000000',
       '2020-11-15T23:30:00.000000000', '2020-12-16T11:30:00.000000000'],
      dtype='datetime64[ns]')

array([-88.875, -86.875, -84.875, -82.875, -80.875, -78.875, -76.875, -74.875,
       -72.875, -70.875, -68.875, -66.875, -64.875, -62.875, -60.875, -58.875,
       -56.875, -54.875, -52.875, -50.875, -48.875, -46.875, -44.875, -42.875,
       -40.875, -38.875, -36.875, -34.875, -32.875, -30.875, -28.875, -26.875,
       -24.875, -22.875, -20.875, -18.875, -16.875, -14.875, -12.875, -10.875,
        -8.875,  -6.875,  -4.875,  -2.875,  -0.875,   1.125,   3.125,   5.125,
         7.125,   9.125,  11.125,  13.125,  15.125,  17.125,  19.125,  21.125,
        23.125,  25.125,  27.125,  29.125,  31.125,  33.125,  35.125,  37.125,
        39.125,  41.125,  43.125,  45.125,  47.125,  49.125,  51.125,  53.125,
        55.125,  57.125,  59.125,  61.125,  63.125,  65.125,  67.125,  69.125,
        71.125,  73.125,  75.125,  77.125,  79.125,  81.125,  83.125,  85.125,
        87.125,  89.125], dtype=float32)

array([  0.875,   2.875,   4.875,   6.875,   8.875,  10.875,  12.875,  14.875,
        16.875,  18.875,  20.875,  22.875,  24.875,  26.875,  28.875,  30.875,
        32.875,  34.875,  36.875,  38.875,  40.875,  42.875,  44.875,  46.875,
        48.875,  50.875,  52.875,  54.875,  56.875,  58.875,  60.875,  62.875,
        64.875,  66.875,  68.875,  70.875,  72.875,  74.875,  76.875,  78.875,
        80.875,  82.875,  84.875,  86.875,  88.875,  90.875,  92.875,  94.875,
        96.875,  98.875, 100.875, 102.875, 104.875, 106.875, 108.875, 110.875,
       112.875, 114.875, 116.875, 118.875, 120.875, 122.875, 124.875, 126.875,
       128.875, 130.875, 132.875, 134.875, 136.875, 138.875, 140.875, 142.875,
       144.875, 146.875, 148.875, 150.875, 152.875, 154.875, 156.875, 158.875,
       160.875, 162.875, 164.875, 166.875, 168.875, 170.875, 172.875, 174.875,
       176.875, 178.875, 180.875, 182.875, 184.875, 186.875, 188.875, 190.875,
       192.875, 194.875, 196.875, 198.875, 200.875, 202.875, 204.875, 206.875,
       208.875, 210.875, 212.875, 214.875, 216.875, 218.875, 220.875, 222.875,
       224.875, 226.875, 228.875, 230.875, 232.875, 234.875, 236.875, 238.875,
       240.875, 242.875, 244.875, 246.875, 248.875, 250.875, 252.875, 254.875,
       256.875, 258.875, 260.875, 262.875, 264.875, 266.875, 268.875, 270.875,
       272.875, 274.875, 276.875, 278.875, 280.875, 282.875, 284.875, 286.875,
       288.875, 290.875, 292.875, 294.875, 296.875, 298.875, 300.875, 302.875,
       304.875, 306.875, 308.875, 310.875, 312.875, 314.875, 316.875, 318.875,
       320.875, 322.875, 324.875, 326.875, 328.875, 330.875, 332.875, 334.875,
       336.875, 338.875, 340.875, 342.875, 344.875, 346.875, 348.875, 350.875,
       352.875, 354.875, 356.875, 358.875], dtype=float32)

[8164800 values with dtype=float32]

[8164800 values with dtype=float32]

[8164800 values with dtype=float32]

[8164800 values with dtype=float32]

[8164800 values with dtype=float32]

[8164800 values with dtype=float32]


%%time
file_aws = "https://mur-sst.s3.us-west-2.amazonaws.com/zarr-v1"
ds_sst = xr.open_zarr(file_aws, consolidated=True)
ds_sst

CPU times: user 1.28 s, sys: 119 ms, total: 1.4 s
Wall time: 3.18 s

<xarray.Dataset>
Dimensions:           (time: 6443, lat: 17999, lon: 36000)
Coordinates:
  * lat               (lat) float32 -89.99 -89.98 -89.97 ... 89.97 89.98 89.99
  * lon               (lon) float32 -180.0 -180.0 -180.0 ... 180.0 180.0 180.0
  * time              (time) datetime64[ns] 2002-06-01T09:00:00 ... 2020-01-2...
Data variables:
    analysed_sst      (time, lat, lon) float32 dask.array<chunksize=(5, 1799, 3600), meta=np.ndarray>
    analysis_error    (time, lat, lon) float32 dask.array<chunksize=(5, 1799, 3600), meta=np.ndarray>
    mask              (time, lat, lon) float32 dask.array<chunksize=(5, 1799, 3600), meta=np.ndarray>
    sea_ice_fraction  (time, lat, lon) float32 dask.array<chunksize=(5, 1799, 3600), meta=np.ndarray>
Attributes: (12/47)
    Conventions:                CF-1.7
    Metadata_Conventions:       Unidata Observation Dataset v1.0
    acknowledgment:             Please acknowledge the use of these data with...
    cdm_data_type:              grid
    comment:                    MUR = "Multi-scale Ultra-high Resolution"
    creator_email:              ghrsst@podaac.jpl.nasa.gov
    ...                         ...
    summary:                    A merged, multi-sensor L4 Foundation SST anal...
    time_coverage_end:          20200116T210000Z
    time_coverage_start:        20200115T210000Z
    title:                      Daily MUR SST, Final product
    uuid:                       27665bc0-d5fc-11e1-9b23-0800200c9a66
    westernmost_longitude:      -180.0

array([-89.99, -89.98, -89.97, ...,  89.97,  89.98,  89.99], dtype=float32)

array([-179.99, -179.98, -179.97, ...,  179.98,  179.99,  180.  ],
      dtype=float32)

array(['2002-06-01T09:00:00.000000000', '2002-06-02T09:00:00.000000000',
       '2002-06-03T09:00:00.000000000', ..., '2020-01-18T09:00:00.000000000',
       '2020-01-19T09:00:00.000000000', '2020-01-20T09:00:00.000000000'],
      dtype='datetime64[ns]')


ds = xr.open_dataset("data/test_hgroups.nc")
ds

<xarray.Dataset>
Dimensions:   (recNum: 74)
Dimensions without coordinates: recNum
Data variables:
    UTC_time  (recNum) object '2012-03-04 03:54:19' ... '2012-03-04 04:24:10'

array(['2012-03-04 03:54:19', '2012-03-04 03:54:42', '2012-03-04 03:54:59',
       '2012-03-04 03:55:20', '2012-03-04 03:55:43', '2012-03-04 03:56:09',
       '2012-03-04 03:56:41', '2012-03-04 03:57:12', '2012-03-04 03:57:27',
       '2012-03-04 03:57:41', '2012-03-04 03:57:56', '2012-03-04 03:58:10',
       '2012-03-04 03:58:23', '2012-03-04 03:58:35', '2012-03-04 03:58:54',
       '2012-03-04 03:59:18', '2012-03-04 03:59:30', '2012-03-04 03:59:43',
       '2012-03-04 03:59:55', '2012-03-04 04:00:07', '2012-03-04 04:00:20',
       '2012-03-04 04:00:39', '2012-03-04 04:01:03', '2012-03-04 04:01:28',
       '2012-03-04 04:01:43', '2012-03-04 04:01:57', '2012-03-04 04:02:12',
       '2012-03-04 04:02:24', '2012-03-04 04:02:41', '2012-03-04 04:02:57',
       '2012-03-04 04:03:15', '2012-03-04 04:03:28', '2012-03-04 04:03:45',
       '2012-03-04 04:04:02', '2012-03-04 04:04:20', '2012-03-04 04:04:39',
       '2012-03-04 04:04:57', '2012-03-04 04:05:20', '2012-03-04 04:05:36',
       '2012-03-04 04:05:55', '2012-03-04 04:06:16', '2012-03-04 04:06:32',
       '2012-03-04 04:06:49', '2012-03-04 04:07:08', '2012-03-04 04:07:29',
       '2012-03-04 04:07:50', '2012-03-04 04:08:12', '2012-03-04 04:08:33',
       '2012-03-04 04:08:54', '2012-03-04 04:09:22', '2012-03-04 04:09:50',
       '2012-03-04 04:10:20', '2012-03-04 04:10:49', '2012-03-04 04:11:20',
       '2012-03-04 04:11:47', '2012-03-04 04:12:23', '2012-03-04 04:12:55',
       '2012-03-04 04:13:31', '2012-03-04 04:14:07', '2012-03-04 04:14:48',
       '2012-03-04 04:15:40', '2012-03-04 04:16:37', '2012-03-04 04:17:26',
       '2012-03-04 04:17:53', '2012-03-04 04:18:16', '2012-03-04 04:18:45',
       '2012-03-04 04:19:27', '2012-03-04 04:20:13', '2012-03-04 04:21:01',
       '2012-03-04 04:21:46', '2012-03-04 04:22:27', '2012-03-04 04:23:05',
       '2012-03-04 04:23:43', '2012-03-04 04:24:10'], dtype=object)


import netCDF4 as nc

dsn = nc.Dataset("data/test_hgroups.nc")
dsn

<class 'netCDF4._netCDF4.Dataset'>
root group (NETCDF4 data model, file format HDF5):
    dimensions(sizes): recNum(74)
    variables(dimensions): <class 'str'> UTC_time(recNum)
    groups: mozaic_flight_2012030403540535_ascent, mozaic_flight_2012030321335035_descent, mozaic_flight_2012030403540535_descent, mozaic_flight_2012030412545335_ascent, mozaic_flight_2012030419144751_ascent, mozaic_flight_2012030319051051_descent, mozaic_flight_2012030421382353_ascent


ds4 = xr.open_dataset("data/test_hgroups.nc",
                     group="mozaic_flight_2012030403540535_ascent")
ds4

<xarray.Dataset>
Dimensions:    (recNum: 74)
Dimensions without coordinates: recNum
Data variables:
    air_press  (recNum) float64 1.008e+05 9.851e+04 ... 2.31e+04 2.275e+04
    CO         (recNum) float64 216.0 189.0 167.0 159.0 ... 80.0 77.0 85.0 73.0
    O3         (recNum) float64 -99.0 -99.0 -99.0 -99.0 ... -99.0 -99.0 -99.0
    altitude   (recNum) float64 46.4 237.0 387.1 ... 1.087e+04 1.097e+04
    lat        float64 32.01
    lon        float64 34.89
Attributes:
    airport_dep:  TLV
    flight:       2012030403540535
    level:        calibrated
    airport_arr:  FRA
    mission:      mozaic
    time_dep:     2012-03-04 03:54:05
    aircraft:     3
    link:         http://www.iagos.fr/extract
    phase:        ascent
    time_arr:     2012-03-04 08:01:44

array([100769.,  98510.,  96760.,  94785.,  93579.,  91608.,  90241.,  88630.,
        87143.,  85338.,  83717.,  82227.,  80790.,  79227.,  77753.,  76087.,
        74987.,  73690.,  72107.,  70650.,  69238.,  68014.,  66993.,  65708.,
        64417.,  63048.,  61755.,  60698.,  59370.,  58163.,  56995.,  56113.,
        55038.,  53938.,  52813.,  51738.,  50720.,  49490.,  48613.,  47626.,
        46578.,  45748.,  44848.,  43913.,  42976.,  42094.,  41198.,  40382.,
        39590.,  38627.,  37851.,  37011.,  36230.,  35364.,  34701.,  33965.,
        33261.,  32480.,  31767.,  31139.,  30408.,  29762.,  29124.,  28362.,
        27668.,  27220.,  26549.,  25899.,  25353.,  24783.,  24209.,  23646.,
        23102.,  22747.])

array([216., 189., 167., 159., 153., 146., 139., 138., 134., 125., 121., 120.,
       119., 121., 126., 129., 130., 129., 125., 121., 124., 131., 131., 130.,
       131., 129., 127., 126., 120., 114., 106., 102., 101., -99., -99., -99.,
       -99., -99., -99., -99.,  79.,  81.,  82.,  82.,  82.,  84.,  86.,  87.,
        86.,  84.,  82.,  84.,  86.,  81.,  81.,  84.,  80.,  71.,  68.,  74.,
        75.,  82.,  76.,  80.,  78.,  74.,  79.,  82.,  85.,  81.,  80.,  77.,
        85.,  73.])

array([-99., -99., -99., -99., -99., -99., -99., -99., -99., -99., -99., -99.,
       -99., -99., -99., -99., -99., -99., -99., -99., -99., -99., -99., -99.,
       -99., -99., -99., -99., -99., -99., -99., -99., -99., -99., -99., -99.,
       -99., -99., -99., -99., -99., -99., -99., -99., -99., -99., -99., -99.,
       -99., -99., -99., -99., -99., -99., -99., -99., -99., -99., -99., -99.,
       -99., -99., -99., -99., -99., -99., -99., -99., -99., -99., -99., -99.,
       -99., -99.])

array([   46.4,   237. ,   387.1,   559.2,   665.8,   842.2,   966.4,  1114.8,
        1253.7,  1425. ,  1581.2,  1727. ,  1869.7,  2027.2,  2178.1,  2351.5,
        2467.6,  2606.3,  2778.4,  2939.5,  3098.2,  3237.8,  3355.9,  3506.7,
        3660.5,  3826.4,  3985.7,  4118.1,  4286.9,  4443.2,  4596.8,  4714.6,
        4860.1,  5011.4,  5168.8,  5321.6,  5468.9,  5650. ,  5781.4,  5931.5,
        6093.8,  6224.4,  6368.1,  6520. ,  6674.8,  6823.1,  6976.3,  7118.2,
        7258.2,  7431.5,  7573.6,  7730.2,  7878.5,  8045.9,  8176.3,  8323.5,
        8466.6,  8628.5,  8779. ,  8913.7,  9073.3,  9217.3,  9361.7,  9537.6,
        9701.2,  9808.6,  9972.2, 10133.8, 10272.2, 10419.2, 10570.1, 10720.8,
       10869.3, 10967.9])

array(32.011391)

array(34.886665)

	year	month	date_index	fraction_date	c02	data_adjusted_season	data_fit	data_adjusted_seasonally_fit	data_filled	data_adjusted_seasonally_filed
0	1958	1	21200	1958.0411	-99.99	-99.99	-99.99	-99.99	-99.99	-99.99
1	1958	2	21231	1958.1260	-99.99	-99.99	-99.99	-99.99	-99.99	-99.99
2	1958	3	21259	1958.2027	315.70	314.43	316.19	314.90	315.70	314.43
3	1958	4	21290	1958.2877	317.45	315.16	317.30	314.98	317.45	315.16
4	1958	5	21320	1958.3699	317.51	314.71	317.86	315.06	317.51	314.71
...	...	...	...	...	...	...	...	...	...	...
763	2021	8	44423	2021.6219	-99.99	-99.99	-99.99	-99.99	-99.99	-99.99
764	2021	9	44454	2021.7068	-99.99	-99.99	-99.99	-99.99	-99.99	-99.99
765	2021	10	44484	2021.7890	-99.99	-99.99	-99.99	-99.99	-99.99	-99.99
766	2021	11	44515	2021.8740	-99.99	-99.99	-99.99	-99.99	-99.99	-99.99
767	2021	12	44545	2021.9562	-99.99	-99.99	-99.99	-99.99	-99.99	-99.99

	year	month	date_index	fraction_date	c02	data_adjusted_season	data_fit	data_adjusted_seasonally_fit	data_filled	data_adjusted_seasonally_filed
0	1958	1	21200	1958.0411	-99.99	-99.99	-99.99	-99.99	-99.99	-99.99
1	1958	2	21231	1958.1260	-99.99	-99.99	-99.99	-99.99	-99.99	-99.99
2	1958	3	21259	1958.2027	315.70	314.43	316.19	314.90	315.70	314.43
3	1958	4	21290	1958.2877	317.45	315.16	317.30	314.98	317.45	315.16
4	1958	5	21320	1958.3699	317.51	314.71	317.86	315.06	317.51	314.71
...	...	...	...	...	...	...	...	...	...	...
763	2021	8	44423	2021.6219	-99.99	-99.99	-99.99	-99.99	-99.99	-99.99
764	2021	9	44454	2021.7068	-99.99	-99.99	-99.99	-99.99	-99.99	-99.99
765	2021	10	44484	2021.7890	-99.99	-99.99	-99.99	-99.99	-99.99	-99.99
766	2021	11	44515	2021.8740	-99.99	-99.99	-99.99	-99.99	-99.99	-99.99
767	2021	12	44545	2021.9562	-99.99	-99.99	-99.99	-99.99	-99.99	-99.99

Data Serialization¶

Pickle and Shelve¶

What can be pickled and unpickled?¶

A shelf of pickles¶

Numpy: npy, npz¶

JSON¶

Dataframes: CSVs and Feather¶

HDF5, NetCDF, and Xarray¶