Merge branch 'main' of https://git.jon-e.net/jonny/scrape-dandi
This commit is contained in:
commit
e89e8cf913
1 changed files with 25 additions and 15 deletions
|
@ -1,7 +1,8 @@
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
from tqdm import trange
|
from tqdm import trange
|
||||||
from datetime import datetime
|
from datetime import datetime
|
||||||
|
import sys
|
||||||
|
import click
|
||||||
from dandi.consts import ZARR_EXTENSIONS, metadata_all_fields
|
from dandi.consts import ZARR_EXTENSIONS, metadata_all_fields
|
||||||
from dandi.dandiarchive import DandisetURL, _dandi_url_parser, parse_dandi_url
|
from dandi.dandiarchive import DandisetURL, _dandi_url_parser, parse_dandi_url
|
||||||
from dandi.cli.cmd_download import download
|
from dandi.cli.cmd_download import download
|
||||||
|
@ -11,11 +12,11 @@ OUT_DIR = Path('/mnt/seedbank/p2p/dandi/')
|
||||||
#OUT_DIR = '.'
|
#OUT_DIR = '.'
|
||||||
LOG_TXT = 'log.txt'
|
LOG_TXT = 'log.txt'
|
||||||
SKIP_DANDISETS = [
|
SKIP_DANDISETS = [
|
||||||
'000108' # humongous 372 human light sheet imaging
|
108 # humongous 372 human light sheet imaging
|
||||||
]
|
]
|
||||||
DANDI_ID = 'DANDI:{:06d}'
|
DANDI_ID = 'DANDI:{:06d}'
|
||||||
MAX_DANDISET = 683
|
MAX_DANDISET = 683
|
||||||
|
JOBS = 64
|
||||||
|
|
||||||
|
|
||||||
def check_nwb(dandiset:int) -> bool:
|
def check_nwb(dandiset:int) -> bool:
|
||||||
|
@ -36,22 +37,31 @@ def check_nwb(dandiset:int) -> bool:
|
||||||
def main():
|
def main():
|
||||||
|
|
||||||
for i in trange(MAX_DANDISET):
|
for i in trange(MAX_DANDISET):
|
||||||
|
if not check_nwb(i) or i in SKIP_DANDISETS:
|
||||||
if not check_nwb(i):
|
|
||||||
with open(LOG_TXT, 'a') as lfile:
|
with open(LOG_TXT, 'a') as lfile:
|
||||||
lfile.write(f"{datetime.now().isoformat()} - {i:03d} - SKIP\n")
|
lfile.write(f"{datetime.now().isoformat()} - {i:03d} - SKIP\n")
|
||||||
continue
|
continue
|
||||||
id = DANDI_ID.format(i)
|
id = DANDI_ID.format(i)
|
||||||
download(
|
try:
|
||||||
[
|
download.main([
|
||||||
id,
|
|
||||||
'-o', str(OUT_DIR),
|
id,
|
||||||
'--existing', 'refresh',
|
'-o', str(OUT_DIR),
|
||||||
'--jobs', '24'
|
'--existing', 'refresh',
|
||||||
]
|
'--jobs', str(JOBS)
|
||||||
)
|
], standalone_mode=False
|
||||||
with open(LOG_TXT, 'a') as lfile:
|
)
|
||||||
lfile.write(f"{datetime.now().isoformat()} - {i:03d} - GET\n")
|
print('\nafterdl')
|
||||||
|
with open(LOG_TXT, 'a') as lfile:
|
||||||
|
lfile.write(f"{datetime.now().isoformat()} - {i:03d} - GET\n")
|
||||||
|
except KeyboardInterrupt:
|
||||||
|
sys.exit(1)
|
||||||
|
except Exception as e:
|
||||||
|
print('\nexception')
|
||||||
|
with open(LOG_TXT, 'a') as lfile:
|
||||||
|
lfile.write(f"{datetime.now().isoformat()} - {i:03d} - ERROR\n")
|
||||||
|
lfile.write(str(e))
|
||||||
|
# continue
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
Loading…
Reference in a new issue