From 76c8d086f9fc32cf47e6a900de56d93f3faab64b Mon Sep 17 00:00:00 2001 From: jonny Date: Tue, 24 Oct 2023 00:08:45 -0700 Subject: [PATCH 1/2] some updates --- scrape_dandi/__init__.py | 30 ++++++++++++++++++------------ 1 file changed, 18 insertions(+), 12 deletions(-) diff --git a/scrape_dandi/__init__.py b/scrape_dandi/__init__.py index fb64942..3fdc4f5 100644 --- a/scrape_dandi/__init__.py +++ b/scrape_dandi/__init__.py @@ -11,11 +11,11 @@ OUT_DIR = Path('/mnt/seedbank/p2p/dandi/') #OUT_DIR = '.' LOG_TXT = 'log.txt' SKIP_DANDISETS = [ - '000108' # humongous 372 human light sheet imaging + 108 # humongous 372 human light sheet imaging ] DANDI_ID = 'DANDI:{:06d}' MAX_DANDISET = 683 - +JOBS = 64 def check_nwb(dandiset:int) -> bool: @@ -36,20 +36,26 @@ def check_nwb(dandiset:int) -> bool: def main(): for i in trange(MAX_DANDISET): - - if not check_nwb(i): + + if not check_nwb(i) or i in SKIP_DANDISETS: with open(LOG_TXT, 'a') as lfile: lfile.write(f"{datetime.now().isoformat()} - {i:03d} - SKIP\n") continue id = DANDI_ID.format(i) - download( - [ - id, - '-o', str(OUT_DIR), - '--existing', 'refresh', - '--jobs', '24' - ] - ) + try: + download( + [ + id, + '-o', str(OUT_DIR), + '--existing', 'refresh', + '--jobs', str(JOBS) + ] + ) + except Exception as e: + with open(LOG_TXT, 'a') as lfile: + lfile.write(f"{datetime.now().isoformat()} - {i:03d} - ERROR\n") + lfile.write(str(e)) + continue with open(LOG_TXT, 'a') as lfile: lfile.write(f"{datetime.now().isoformat()} - {i:03d} - GET\n") From 05240668a83b9358e415ffbe5f16649bc8000454 Mon Sep 17 00:00:00 2001 From: jonny Date: Thu, 26 Oct 2023 20:11:01 -0700 Subject: [PATCH 2/2] fix click probs --- scrape_dandi/__init__.py | 20 ++++++++++++-------- 1 file changed, 12 insertions(+), 8 deletions(-) diff --git a/scrape_dandi/__init__.py b/scrape_dandi/__init__.py index 3fdc4f5..d930b04 100644 --- a/scrape_dandi/__init__.py +++ b/scrape_dandi/__init__.py @@ -1,7 +1,8 @@ from pathlib import Path from tqdm import trange from datetime import datetime - +import sys +import click from dandi.consts import ZARR_EXTENSIONS, metadata_all_fields from dandi.dandiarchive import DandisetURL, _dandi_url_parser, parse_dandi_url from dandi.cli.cmd_download import download @@ -36,28 +37,31 @@ def check_nwb(dandiset:int) -> bool: def main(): for i in trange(MAX_DANDISET): - if not check_nwb(i) or i in SKIP_DANDISETS: with open(LOG_TXT, 'a') as lfile: lfile.write(f"{datetime.now().isoformat()} - {i:03d} - SKIP\n") continue id = DANDI_ID.format(i) try: - download( - [ + download.main([ + id, '-o', str(OUT_DIR), '--existing', 'refresh', '--jobs', str(JOBS) - ] + ], standalone_mode=False ) + print('\nafterdl') + with open(LOG_TXT, 'a') as lfile: + lfile.write(f"{datetime.now().isoformat()} - {i:03d} - GET\n") + except KeyboardInterrupt: + sys.exit(1) except Exception as e: + print('\nexception') with open(LOG_TXT, 'a') as lfile: lfile.write(f"{datetime.now().isoformat()} - {i:03d} - ERROR\n") lfile.write(str(e)) - continue - with open(LOG_TXT, 'a') as lfile: - lfile.write(f"{datetime.now().isoformat()} - {i:03d} - GET\n") + # continue