scrape-dandi/scrape_dandi/metadata.py
2023-10-30 12:18:07 -07:00

44 lines
No EOL
1 KiB
Python

from dandi.cli.cmd_ls import ls
import io
import sys
import json
from click.exceptions import ClickException
from scrape_dandi import DANDI_ID, MAX_DANDISET
from tqdm import trange
from multiprocessing import Pool
def dset_ls(dset: int) -> dict:
orig = sys.stdout
capturer = io.StringIO()
metadata = {}
try:
sys.stdout = capturer
ls.main([DANDI_ID.format(dset), '--format', 'json'], standalone_mode=False)
metadata = json.loads(capturer.getvalue())[0]
except ClickException:
pass
finally:
sys.stdout = orig
return metadata
def get_all_meta() -> dict:
meta = {}
try:
for i in trange(MAX_DANDISET):
this_meta = dset_ls(i)
if this_meta:
meta[i] = this_meta
finally:
return meta
def get_and_save_meta(file:str='metadata.json'):
#try:
meta = get_all_meta()
#finally:
with open(file, 'w') as jfile:
json.dump(meta, jfile)
if __name__ == '__main__':
get_and_save_meta()