diff --git a/scrape_dandi/__init__.py b/scrape_dandi/__init__.py index fb64942..d930b04 100644 --- a/scrape_dandi/__init__.py +++ b/scrape_dandi/__init__.py @@ -1,7 +1,8 @@ from pathlib import Path from tqdm import trange from datetime import datetime - +import sys +import click from dandi.consts import ZARR_EXTENSIONS, metadata_all_fields from dandi.dandiarchive import DandisetURL, _dandi_url_parser, parse_dandi_url from dandi.cli.cmd_download import download @@ -11,11 +12,11 @@ OUT_DIR = Path('/mnt/seedbank/p2p/dandi/') #OUT_DIR = '.' LOG_TXT = 'log.txt' SKIP_DANDISETS = [ - '000108' # humongous 372 human light sheet imaging + 108 # humongous 372 human light sheet imaging ] DANDI_ID = 'DANDI:{:06d}' MAX_DANDISET = 683 - +JOBS = 64 def check_nwb(dandiset:int) -> bool: @@ -36,22 +37,31 @@ def check_nwb(dandiset:int) -> bool: def main(): for i in trange(MAX_DANDISET): - - if not check_nwb(i): + if not check_nwb(i) or i in SKIP_DANDISETS: with open(LOG_TXT, 'a') as lfile: lfile.write(f"{datetime.now().isoformat()} - {i:03d} - SKIP\n") continue id = DANDI_ID.format(i) - download( - [ - id, - '-o', str(OUT_DIR), - '--existing', 'refresh', - '--jobs', '24' - ] - ) - with open(LOG_TXT, 'a') as lfile: - lfile.write(f"{datetime.now().isoformat()} - {i:03d} - GET\n") + try: + download.main([ + + id, + '-o', str(OUT_DIR), + '--existing', 'refresh', + '--jobs', str(JOBS) + ], standalone_mode=False + ) + print('\nafterdl') + with open(LOG_TXT, 'a') as lfile: + lfile.write(f"{datetime.now().isoformat()} - {i:03d} - GET\n") + except KeyboardInterrupt: + sys.exit(1) + except Exception as e: + print('\nexception') + with open(LOG_TXT, 'a') as lfile: + lfile.write(f"{datetime.now().isoformat()} - {i:03d} - ERROR\n") + lfile.write(str(e)) + # continue