import pickle
pickle.dumps(1234, protocol=0)

b'I1234\n.'


pickle.dumps([1])

b'\x80\x04\x95\x06\x00\x00\x00\x00\x00\x00\x00]\x94K\x01a.'


a = [1, 1.5, "hello", {3, 4}, {'int': 9, 'real': 9.0, 'complex': 9j}]

with open('data.pkl', 'wb') as f:
    pickle.dump(a, f)
    
with open('data.pkl', 'rb') as f:
    b = pickle.load(f)
    
b

[1, 1.5, 'hello', {3, 4}, {'int': 9, 'real': 9.0, 'complex': 9j}]


import shelve

x = 1234

with shelve.open('spam') as db:
    db['eggs'] = 'eggs'
    db['numbers'] = [1,2, 3, 9.99, 1j]
    db['xx'] = x


%ls -l spam.*

-rw-r--r-- 1 jovyan jovyan   53 Apr  6 17:52 spam.bak
-rw-r--r-- 1 jovyan jovyan 1030 Apr  6 17:52 spam.dat
-rw-r--r-- 1 jovyan jovyan   53 Apr  6 17:52 spam.dir


with shelve.open('spam') as db:
    e = db['eggs']
    n = db['numbers']

print(f'{e = }')
print(f'{n = }')

e = 'eggs'
n = [1, 2, 3, 9.99, 1j]


db = shelve.open('spam')


for var, data in db.items():
    print(f'{var} = {data}')

eggs = eggs
numbers = [1, 2, 3, 9.99, 1j]
xx = 1234


db.close()


import numpy as np

a = np.array([1, 2, 3.4])
fname = 'arr.npy'
np.save(fname, a)
b = np.load(fname)
(a == b).all()

True


fname = 'arrays.npz'
np.savez(fname, a=a, b=np.random.normal(10), c=3.4)


arrays = np.load(fname)
arrays.files

['a', 'b', 'c']


arrays['a']

array([1. , 2. , 3.4])


arrays['c']

array(3.4)


import json
a = ['foo', {'bar': ['baz', None, 1.0, 2]}]
json.dumps(a)

'["foo", {"bar": ["baz", null, 1.0, 2]}]'


with open('test.json', 'w') as f:
    json.dump(a, f)
    
with open('test.json', 'r') as f:
    b = json.load(f)
    
b

['foo', {'bar': ['baz', None, 1.0, 2]}]


a == b

True


c = ['foo', {'bar': ('baz', None, 1.0, 2)}]

with open('test2.json', 'w') as f:
    json.dump(c, f)
    
with open('test2.json', 'r') as f:
    d = json.load(f)
    
c == d

False

d

['foo', {'bar': ['baz', None, 1.0, 2]}]


from IPython.display import JSON

JSON(c)

<IPython.core.display.JSON object>


classroom = {
    "type": "Feature",
    "geometry": {
        "type": "Point",
        "coordinates": [-122.25915, 37.87125]
    },
    "properties": {
    "name": "Wheeler Hall Auditorium"
  }
}

JSON(classroom)

<IPython.core.display.JSON object>


from IPython.display import GeoJSON

GeoJSON(classroom)

<IPython.display.GeoJSON object>


from pathlib import Path

import pandas as pd

df = pd.read_csv(Path.home()/"shared/climate-data/monthly_in_situ_co2_mlo_cleaned.csv")
df


%ls -l ~/shared/climate-data/monthly_in_situ_co2_mlo_cleaned.csv

-rw-r--r-- 1 jovyan jovyan 50201 Nov  3 07:10 /home/jovyan/shared/climate-data/monthly_in_situ_co2_mlo_cleaned.csv


df.to_feather("co2.fth")
%ls -l co2*

-rw-r--r-- 1 jovyan jovyan 32218 Apr  6 17:52 co2.fth


df2 = pd.read_feather("co2.fth")
df2


from pathlib import Path
import xarray as xr

DATA_DIR = Path.home()/Path('shared/climate-data')
ds = xr.open_dataset(DATA_DIR / "era5_monthly_2deg_aws_v20210920.nc")
ds

<xarray.Dataset>
Dimensions:                                                                                   (
                                                                                               time: 504,
                                                                                               latitude: 90,
                                                                                               longitude: 180)
Coordinates:
  * time                                                                                      (time) datetime64[ns] ...
  * latitude                                                                                  (latitude) float32 ...
  * longitude                                                                                 (longitude) float32 ...
Data variables: (12/15)
    air_pressure_at_mean_sea_level                                                            (time, latitude, longitude) float32 ...
    air_temperature_at_2_metres                                                               (time, latitude, longitude) float32 ...
    air_temperature_at_2_metres_1hour_Maximum                                                 (time, latitude, longitude) float32 ...
    air_temperature_at_2_metres_1hour_Minimum                                                 (time, latitude, longitude) float32 ...
    dew_point_temperature_at_2_metres                                                         (time, latitude, longitude) float32 ...
    eastward_wind_at_100_metres                                                               (time, latitude, longitude) float32 ...
    ...                                                                                        ...
    northward_wind_at_100_metres                                                              (time, latitude, longitude) float32 ...
    northward_wind_at_10_metres                                                               (time, latitude, longitude) float32 ...
    precipitation_amount_1hour_Accumulation                                                   (time, latitude, longitude) float32 ...
    sea_surface_temperature                                                                   (time, latitude, longitude) float32 ...
    snow_density                                                                              (time, latitude, longitude) float32 ...
    surface_air_pressure                                                                      (time, latitude, longitude) float32 ...
Attributes:
    institution:  ECMWF
    source:       Reanalysis
    title:        ERA5 forecasts

array(['1979-01-16T11:30:00.000000000', '1979-02-14T23:30:00.000000000',
       '1979-03-16T11:30:00.000000000', ..., '2020-10-16T11:30:00.000000000',
       '2020-11-15T23:30:00.000000000', '2020-12-16T11:30:00.000000000'],
      dtype='datetime64[ns]')

array([-88.875, -86.875, -84.875, -82.875, -80.875, -78.875, -76.875, -74.875,
       -72.875, -70.875, -68.875, -66.875, -64.875, -62.875, -60.875, -58.875,
       -56.875, -54.875, -52.875, -50.875, -48.875, -46.875, -44.875, -42.875,
       -40.875, -38.875, -36.875, -34.875, -32.875, -30.875, -28.875, -26.875,
       -24.875, -22.875, -20.875, -18.875, -16.875, -14.875, -12.875, -10.875,
        -8.875,  -6.875,  -4.875,  -2.875,  -0.875,   1.125,   3.125,   5.125,
         7.125,   9.125,  11.125,  13.125,  15.125,  17.125,  19.125,  21.125,
        23.125,  25.125,  27.125,  29.125,  31.125,  33.125,  35.125,  37.125,
        39.125,  41.125,  43.125,  45.125,  47.125,  49.125,  51.125,  53.125,
        55.125,  57.125,  59.125,  61.125,  63.125,  65.125,  67.125,  69.125,
        71.125,  73.125,  75.125,  77.125,  79.125,  81.125,  83.125,  85.125,
        87.125,  89.125], dtype=float32)

array([  0.875,   2.875,   4.875,   6.875,   8.875,  10.875,  12.875,  14.875,
        16.875,  18.875,  20.875,  22.875,  24.875,  26.875,  28.875,  30.875,
        32.875,  34.875,  36.875,  38.875,  40.875,  42.875,  44.875,  46.875,
        48.875,  50.875,  52.875,  54.875,  56.875,  58.875,  60.875,  62.875,
        64.875,  66.875,  68.875,  70.875,  72.875,  74.875,  76.875,  78.875,
        80.875,  82.875,  84.875,  86.875,  88.875,  90.875,  92.875,  94.875,
        96.875,  98.875, 100.875, 102.875, 104.875, 106.875, 108.875, 110.875,
       112.875, 114.875, 116.875, 118.875, 120.875, 122.875, 124.875, 126.875,
       128.875, 130.875, 132.875, 134.875, 136.875, 138.875, 140.875, 142.875,
       144.875, 146.875, 148.875, 150.875, 152.875, 154.875, 156.875, 158.875,
       160.875, 162.875, 164.875, 166.875, 168.875, 170.875, 172.875, 174.875,
       176.875, 178.875, 180.875, 182.875, 184.875, 186.875, 188.875, 190.875,
       192.875, 194.875, 196.875, 198.875, 200.875, 202.875, 204.875, 206.875,
       208.875, 210.875, 212.875, 214.875, 216.875, 218.875, 220.875, 222.875,
       224.875, 226.875, 228.875, 230.875, 232.875, 234.875, 236.875, 238.875,
       240.875, 242.875, 244.875, 246.875, 248.875, 250.875, 252.875, 254.875,
       256.875, 258.875, 260.875, 262.875, 264.875, 266.875, 268.875, 270.875,
       272.875, 274.875, 276.875, 278.875, 280.875, 282.875, 284.875, 286.875,
       288.875, 290.875, 292.875, 294.875, 296.875, 298.875, 300.875, 302.875,
       304.875, 306.875, 308.875, 310.875, 312.875, 314.875, 316.875, 318.875,
       320.875, 322.875, 324.875, 326.875, 328.875, 330.875, 332.875, 334.875,
       336.875, 338.875, 340.875, 342.875, 344.875, 346.875, 348.875, 350.875,
       352.875, 354.875, 356.875, 358.875], dtype=float32)

[8164800 values with dtype=float32]

[8164800 values with dtype=float32]

[8164800 values with dtype=float32]

[8164800 values with dtype=float32]

[8164800 values with dtype=float32]

[8164800 values with dtype=float32]


%%time
file_aws = "https://mur-sst.s3.us-west-2.amazonaws.com/zarr-v1"
ds_sst = xr.open_zarr(file_aws, consolidated=True)
ds_sst

CPU times: user 1.28 s, sys: 115 ms, total: 1.4 s
Wall time: 3.01 s

<xarray.Dataset>
Dimensions:           (time: 6443, lat: 17999, lon: 36000)
Coordinates:
  * lat               (lat) float32 -89.99 -89.98 -89.97 ... 89.97 89.98 89.99
  * lon               (lon) float32 -180.0 -180.0 -180.0 ... 180.0 180.0 180.0
  * time              (time) datetime64[ns] 2002-06-01T09:00:00 ... 2020-01-2...
Data variables:
    analysed_sst      (time, lat, lon) float32 dask.array<chunksize=(5, 1799, 3600), meta=np.ndarray>
    analysis_error    (time, lat, lon) float32 dask.array<chunksize=(5, 1799, 3600), meta=np.ndarray>
    mask              (time, lat, lon) float32 dask.array<chunksize=(5, 1799, 3600), meta=np.ndarray>
    sea_ice_fraction  (time, lat, lon) float32 dask.array<chunksize=(5, 1799, 3600), meta=np.ndarray>
Attributes: (12/47)
    Conventions:                CF-1.7
    Metadata_Conventions:       Unidata Observation Dataset v1.0
    acknowledgment:             Please acknowledge the use of these data with...
    cdm_data_type:              grid
    comment:                    MUR = "Multi-scale Ultra-high Resolution"
    creator_email:              ghrsst@podaac.jpl.nasa.gov
    ...                         ...
    summary:                    A merged, multi-sensor L4 Foundation SST anal...
    time_coverage_end:          20200116T210000Z
    time_coverage_start:        20200115T210000Z
    title:                      Daily MUR SST, Final product
    uuid:                       27665bc0-d5fc-11e1-9b23-0800200c9a66
    westernmost_longitude:      -180.0

array([-89.99, -89.98, -89.97, ...,  89.97,  89.98,  89.99], dtype=float32)

array([-179.99, -179.98, -179.97, ...,  179.98,  179.99,  180.  ],
      dtype=float32)

array(['2002-06-01T09:00:00.000000000', '2002-06-02T09:00:00.000000000',
       '2002-06-03T09:00:00.000000000', ..., '2020-01-18T09:00:00.000000000',
       '2020-01-19T09:00:00.000000000', '2020-01-20T09:00:00.000000000'],
      dtype='datetime64[ns]')


ds = xr.open_dataset("data/test_hgroups.nc")
ds

---------------------------------------------------------------------------
KeyError                                  Traceback (most recent call last)
/srv/conda/envs/notebook/lib/python3.9/site-packages/xarray/backends/file_manager.py in _acquire_with_cache_info(self, needs_lock)
    200             try:
--> 201                 file = self._cache[self._key]
    202             except KeyError:

/srv/conda/envs/notebook/lib/python3.9/site-packages/xarray/backends/lru_cache.py in __getitem__(self, key)
     54         with self._lock:
---> 55             value = self._cache[key]
     56             self._cache.move_to_end(key)

KeyError: [<class 'netCDF4._netCDF4.Dataset'>, ('/home/jovyan/sp23-dev/lec/lec22/data/test_hgroups.nc',), 'r', (('clobber', True), ('diskless', False), ('format', 'NETCDF4'), ('persist', False))]

During handling of the above exception, another exception occurred:

FileNotFoundError                         Traceback (most recent call last)
/tmp/ipykernel_758/987355351.py in <module>
----> 1 ds = xr.open_dataset("data/test_hgroups.nc")
      2 ds

/srv/conda/envs/notebook/lib/python3.9/site-packages/xarray/backends/api.py in open_dataset(filename_or_obj, engine, chunks, cache, decode_cf, mask_and_scale, decode_times, decode_timedelta, use_cftime, concat_characters, decode_coords, drop_variables, inline_array, backend_kwargs, **kwargs)
    529 
    530     overwrite_encoded_chunks = kwargs.pop("overwrite_encoded_chunks", None)
--> 531     backend_ds = backend.open_dataset(
    532         filename_or_obj,
    533         drop_variables=drop_variables,

/srv/conda/envs/notebook/lib/python3.9/site-packages/xarray/backends/netCDF4_.py in open_dataset(self, filename_or_obj, mask_and_scale, decode_times, concat_characters, decode_coords, drop_variables, use_cftime, decode_timedelta, group, mode, format, clobber, diskless, persist, lock, autoclose)
    553 
    554         filename_or_obj = _normalize_path(filename_or_obj)
--> 555         store = NetCDF4DataStore.open(
    556             filename_or_obj,
    557             mode=mode,

/srv/conda/envs/notebook/lib/python3.9/site-packages/xarray/backends/netCDF4_.py in open(cls, filename, mode, format, group, clobber, diskless, persist, lock, lock_maker, autoclose)
    382             netCDF4.Dataset, filename, mode=mode, kwargs=kwargs
    383         )
--> 384         return cls(manager, group=group, mode=mode, lock=lock, autoclose=autoclose)
    385 
    386     def _acquire(self, needs_lock=True):

/srv/conda/envs/notebook/lib/python3.9/site-packages/xarray/backends/netCDF4_.py in __init__(self, manager, group, mode, lock, autoclose)
    330         self._group = group
    331         self._mode = mode
--> 332         self.format = self.ds.data_model
    333         self._filename = self.ds.filepath()
    334         self.is_remote = is_remote_uri(self._filename)

/srv/conda/envs/notebook/lib/python3.9/site-packages/xarray/backends/netCDF4_.py in ds(self)
    391     @property
    392     def ds(self):
--> 393         return self._acquire()
    394 
    395     def open_store_variable(self, name, var):

/srv/conda/envs/notebook/lib/python3.9/site-packages/xarray/backends/netCDF4_.py in _acquire(self, needs_lock)
    385 
    386     def _acquire(self, needs_lock=True):
--> 387         with self._manager.acquire_context(needs_lock) as root:
    388             ds = _nc4_require_group(root, self._group, self._mode)
    389         return ds

/srv/conda/envs/notebook/lib/python3.9/contextlib.py in __enter__(self)
    115         del self.args, self.kwds, self.func
    116         try:
--> 117             return next(self.gen)
    118         except StopIteration:
    119             raise RuntimeError("generator didn't yield") from None

/srv/conda/envs/notebook/lib/python3.9/site-packages/xarray/backends/file_manager.py in acquire_context(self, needs_lock)
    187     def acquire_context(self, needs_lock=True):
    188         """Context manager for acquiring a file."""
--> 189         file, cached = self._acquire_with_cache_info(needs_lock)
    190         try:
    191             yield file

/srv/conda/envs/notebook/lib/python3.9/site-packages/xarray/backends/file_manager.py in _acquire_with_cache_info(self, needs_lock)
    205                     kwargs = kwargs.copy()
    206                     kwargs["mode"] = self._mode
--> 207                 file = self._opener(*self._args, **kwargs)
    208                 if self._mode == "w":
    209                     # ensure file doesn't get overridden when opened again

src/netCDF4/_netCDF4.pyx in netCDF4._netCDF4.Dataset.__init__()

src/netCDF4/_netCDF4.pyx in netCDF4._netCDF4._ensure_nc_success()

FileNotFoundError: [Errno 2] No such file or directory: b'/home/jovyan/sp23-dev/lec/lec22/data/test_hgroups.nc'


import netCDF4 as nc

dsn = nc.Dataset("data/test_hgroups.nc")
dsn


ds4 = xr.open_dataset("data/test_hgroups.nc",
                     group="mozaic_flight_2012030403540535_ascent")
ds4

	year	month	date_index	fraction_date	c02	data_adjusted_season	data_fit	data_adjusted_seasonally_fit	data_filled	data_adjusted_seasonally_filed
0	1958	1	21200	1958.0411	-99.99	-99.99	-99.99	-99.99	-99.99	-99.99
1	1958	2	21231	1958.1260	-99.99	-99.99	-99.99	-99.99	-99.99	-99.99
2	1958	3	21259	1958.2027	315.70	314.43	316.19	314.90	315.70	314.43
3	1958	4	21290	1958.2877	317.45	315.16	317.30	314.98	317.45	315.16
4	1958	5	21320	1958.3699	317.51	314.71	317.86	315.06	317.51	314.71
...	...	...	...	...	...	...	...	...	...	...
763	2021	8	44423	2021.6219	-99.99	-99.99	-99.99	-99.99	-99.99	-99.99
764	2021	9	44454	2021.7068	-99.99	-99.99	-99.99	-99.99	-99.99	-99.99
765	2021	10	44484	2021.7890	-99.99	-99.99	-99.99	-99.99	-99.99	-99.99
766	2021	11	44515	2021.8740	-99.99	-99.99	-99.99	-99.99	-99.99	-99.99
767	2021	12	44545	2021.9562	-99.99	-99.99	-99.99	-99.99	-99.99	-99.99

	year	month	date_index	fraction_date	c02	data_adjusted_season	data_fit	data_adjusted_seasonally_fit	data_filled	data_adjusted_seasonally_filed
0	1958	1	21200	1958.0411	-99.99	-99.99	-99.99	-99.99	-99.99	-99.99
1	1958	2	21231	1958.1260	-99.99	-99.99	-99.99	-99.99	-99.99	-99.99
2	1958	3	21259	1958.2027	315.70	314.43	316.19	314.90	315.70	314.43
3	1958	4	21290	1958.2877	317.45	315.16	317.30	314.98	317.45	315.16
4	1958	5	21320	1958.3699	317.51	314.71	317.86	315.06	317.51	314.71
...	...	...	...	...	...	...	...	...	...	...
763	2021	8	44423	2021.6219	-99.99	-99.99	-99.99	-99.99	-99.99	-99.99
764	2021	9	44454	2021.7068	-99.99	-99.99	-99.99	-99.99	-99.99	-99.99
765	2021	10	44484	2021.7890	-99.99	-99.99	-99.99	-99.99	-99.99	-99.99
766	2021	11	44515	2021.8740	-99.99	-99.99	-99.99	-99.99	-99.99	-99.99
767	2021	12	44545	2021.9562	-99.99	-99.99	-99.99	-99.99	-99.99	-99.99

Data Serialization¶

Pickle and Shelve¶

What can be pickled and unpickled?¶

A shelf of pickles¶

Numpy: npy, npz¶

JSON¶

Dataframes: CSVs and Feather¶

HDF5, NetCDF, and Xarray¶