from pathlib import Path from tqdm import trange from datetime import datetime from dandi.consts import ZARR_EXTENSIONS, metadata_all_fields from dandi.dandiarchive import DandisetURL, _dandi_url_parser, parse_dandi_url from dandi.cli.cmd_download import download #OUT_DIR = Path('/mnt/seedbank/p2p/dandi/') OUT_DIR = '.' LOG_TXT = 'log.txt' SKIP_DANDISETS = [ '000108' # humongous 372 human light sheet imaging ] DANDI_ID = 'DANDI:{:06d}' MAX_DANDISET = 683 def check_nwb(dandiset:int) -> bool: if dandiset == 108: return False id = DANDI_ID.format(dandiset) try: url = parse_dandi_url(id) with url.navigate(strict=True) as (c, dandiset,assets): is_nwb = any([a.path.endswith('nwb') for a in assets]) is_not_draft = dandiset.version.identifier != 'draft' return is_nwb and is_not_draft except: return False def main(): for i in trange(MAX_DANDISET): if not check_nwb(i): with open(LOG_TXT, 'a') as lfile: lfile.write(f"{datetime.now().isoformat()} - {i:03d} - SKIP\n") continue id = DANDI_ID.format(i) download( [ id, '-o', str(OUT_DIR), '--existing', 'refresh', '--jobs', '24' ] ) with open(LOG_TXT, 'a') as lfile: lfile.write(f"{datetime.now().isoformat()} - {i:03d} - GET\n")