From 76c8d086f9fc32cf47e6a900de56d93f3faab64b Mon Sep 17 00:00:00 2001 From: jonny Date: Tue, 24 Oct 2023 00:08:45 -0700 Subject: [PATCH] some updates --- scrape_dandi/__init__.py | 30 ++++++++++++++++++------------ 1 file changed, 18 insertions(+), 12 deletions(-) diff --git a/scrape_dandi/__init__.py b/scrape_dandi/__init__.py index fb64942..3fdc4f5 100644 --- a/scrape_dandi/__init__.py +++ b/scrape_dandi/__init__.py @@ -11,11 +11,11 @@ OUT_DIR = Path('/mnt/seedbank/p2p/dandi/') #OUT_DIR = '.' LOG_TXT = 'log.txt' SKIP_DANDISETS = [ - '000108' # humongous 372 human light sheet imaging + 108 # humongous 372 human light sheet imaging ] DANDI_ID = 'DANDI:{:06d}' MAX_DANDISET = 683 - +JOBS = 64 def check_nwb(dandiset:int) -> bool: @@ -36,20 +36,26 @@ def check_nwb(dandiset:int) -> bool: def main(): for i in trange(MAX_DANDISET): - - if not check_nwb(i): + + if not check_nwb(i) or i in SKIP_DANDISETS: with open(LOG_TXT, 'a') as lfile: lfile.write(f"{datetime.now().isoformat()} - {i:03d} - SKIP\n") continue id = DANDI_ID.format(i) - download( - [ - id, - '-o', str(OUT_DIR), - '--existing', 'refresh', - '--jobs', '24' - ] - ) + try: + download( + [ + id, + '-o', str(OUT_DIR), + '--existing', 'refresh', + '--jobs', str(JOBS) + ] + ) + except Exception as e: + with open(LOG_TXT, 'a') as lfile: + lfile.write(f"{datetime.now().isoformat()} - {i:03d} - ERROR\n") + lfile.write(str(e)) + continue with open(LOG_TXT, 'a') as lfile: lfile.write(f"{datetime.now().isoformat()} - {i:03d} - GET\n")