59 lines
1.4 KiB
Python
59 lines
1.4 KiB
Python
|
from pathlib import Path
|
||
|
from tqdm import trange
|
||
|
from datetime import datetime
|
||
|
|
||
|
from dandi.consts import ZARR_EXTENSIONS, metadata_all_fields
|
||
|
from dandi.dandiarchive import DandisetURL, _dandi_url_parser, parse_dandi_url
|
||
|
from dandi.cli.cmd_download import download
|
||
|
|
||
|
|
||
|
#OUT_DIR = Path('/mnt/seedbank/p2p/dandi/')
|
||
|
OUT_DIR = '.'
|
||
|
LOG_TXT = 'log.txt'
|
||
|
SKIP_DANDISETS = [
|
||
|
'000108' # humongous 372 human light sheet imaging
|
||
|
]
|
||
|
DANDI_ID = 'DANDI:{:06d}'
|
||
|
MAX_DANDISET = 683
|
||
|
|
||
|
|
||
|
|
||
|
def check_nwb(dandiset:int) -> bool:
|
||
|
if dandiset == 108:
|
||
|
return False
|
||
|
|
||
|
id = DANDI_ID.format(dandiset)
|
||
|
try:
|
||
|
url = parse_dandi_url(id)
|
||
|
with url.navigate(strict=True) as (c, dandiset,assets):
|
||
|
is_nwb = any([a.path.endswith('nwb') for a in assets])
|
||
|
is_not_draft = dandiset.version.identifier != 'draft'
|
||
|
return is_nwb and is_not_draft
|
||
|
except:
|
||
|
return False
|
||
|
|
||
|
|
||
|
def main():
|
||
|
|
||
|
for i in trange(MAX_DANDISET):
|
||
|
|
||
|
if not check_nwb(i):
|
||
|
with open(LOG_TXT, 'a') as lfile:
|
||
|
lfile.write(f"{datetime.now().isoformat()} - {i:03d} - SKIP\n")
|
||
|
continue
|
||
|
id = DANDI_ID.format(i)
|
||
|
download(
|
||
|
[
|
||
|
id,
|
||
|
'-o', str(OUT_DIR),
|
||
|
'--existing', 'refresh',
|
||
|
'--jobs', '24'
|
||
|
]
|
||
|
)
|
||
|
with open(LOG_TXT, 'a') as lfile:
|
||
|
lfile.write(f"{datetime.now().isoformat()} - {i:03d} - GET\n")
|
||
|
|
||
|
|
||
|
|
||
|
|