Streaming data from DANDI#
This script shows how to stream data from the DANDI Archive all the way to pynapple.
Prelude#
The data used in this tutorial were used in this publication: Sargolini, Francesca, et al. “Conjunctive representation of position, direction, and velocity in entorhinal cortex.” Science 312.5774 (2006): 758-762. The data can be found on the DANDI Archive in Dandiset 000582.
DANDI#
DANDI allows you to stream data without downloading all the files. In this case the data extracted from the NWB file are stored in the nwb-cache folder.
from pynwb import NWBHDF5IO
from dandi.dandiapi import DandiAPIClient
import fsspec
from fsspec.implementations.cached import CachingFileSystem
import h5py
# ecephys
dandiset_id, filepath = (
"000582",
"sub-10073/sub-10073_ses-17010302_behavior+ecephys.nwb",
)
with DandiAPIClient() as client:
asset = client.get_dandiset(dandiset_id, "draft").get_asset_by_path(filepath)
s3_url = asset.get_content_url(follow_redirects=1, strip_query=True)
# first, create a virtual filesystem based on the http protocol
fs = fsspec.filesystem("http")
# create a cache to save downloaded data to disk (optional)
fs = CachingFileSystem(
fs=fs,
cache_storage="nwb-cache", # Local folder for the cache
)
# next, open the file
file = h5py.File(fs.open(s3_url, "rb"))
io = NWBHDF5IO(file=file, load_namespaces=True)
print(io)
<pynwb.NWBHDF5IO object at 0x7fc190a2eb30>
/home/runner/.local/lib/python3.10/site-packages/hdmf/spec/namespace.py:535: UserWarning: Ignoring cached namespace 'core' version 2.6.0-alpha because version 2.7.0 is already loaded.
warn("Ignoring cached namespace '%s' version %s because version %s is already loaded."
Pynapple#
If opening the NWB works, you can start streaming data straight into pynapple with the NWBFile
class.
import pynapple as nap
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np
custom_params = {"axes.spines.right": False, "axes.spines.top": False}
sns.set_theme(style="ticks", palette="colorblind", font_scale=1.5, rc=custom_params)
nwb = nap.NWBFile(io.read())
print(nwb)
17010302
┍━━━━━━━━━━━━━━━━━━━━━┯━━━━━━━━━━┑
│ Keys │ Type │
┝━━━━━━━━━━━━━━━━━━━━━┿━━━━━━━━━━┥
│ units │ TsGroup │
│ ElectricalSeriesLFP │ Tsd │
│ SpatialSeriesLED1 │ TsdFrame │
│ ElectricalSeries │ Tsd │
┕━━━━━━━━━━━━━━━━━━━━━┷━━━━━━━━━━┙
We can load the spikes as a TsGroup for inspection.
units = nwb["units"]
print(units)
Index rate unit_name histology hemisphere depth
------- ------- ----------- ----------- ------------ -------
0 2.93217 t1c1 MEC LII 0.0024
1 1.50193 t2c1 MEC LII 0.0024
2 2.57878 t2c3 MEC LII 0.0024
3 1.13186 t3c1 MEC LII 0.0024
4 1.29356 t3c2 MEC LII 0.0024
5 1.35857 t3c3 MEC LII 0.0024
6 2.8855 t3c4 MEC LII 0.0024
7 1.46525 t4c1 MEC LII 0.0024
As well as the position
position = nwb["SpatialSeriesLED1"]
Here we compute the 2d tuning curves
tc, binsxy = nap.compute_2d_tuning_curves(units, position, 20)
/home/runner/.local/lib/python3.10/site-packages/pynapple/process/tuning_curves.py:269: RuntimeWarning: invalid value encountered in divide
count = count / occupancy
Let’s plot the tuning curves
plt.figure(figsize=(15, 7))
for i in tc.keys():
plt.subplot(2, 4, i + 1)
plt.imshow(tc[i], origin="lower", aspect="auto")
plt.title("Unit {}".format(i))
plt.tight_layout()
plt.show()
Let’s plot the spikes of unit 1 who has a nice grid
Here I use the function value_from
to assign to each spike the closest position in time.
plt.figure(figsize=(15, 6))
plt.subplot(121)
extent = (
np.min(position["x"]),
np.max(position["x"]),
np.min(position["y"]),
np.max(position["y"]),
)
plt.imshow(tc[1], origin="lower", extent=extent, aspect="auto")
plt.xlabel("x")
plt.ylabel("y")
plt.subplot(122)
plt.plot(position["y"], position["x"], color="grey")
spk_pos = units[1].value_from(position)
plt.plot(spk_pos["y"], spk_pos["x"], "o", color="red", markersize=5, alpha=0.5)
plt.xlabel("x")
plt.ylabel("y")
plt.tight_layout()
plt.show()