2023-10-24 06:57:44 +00:00
|
|
|
from pathlib import Path
|
|
|
|
from tqdm import trange
|
|
|
|
from datetime import datetime
|
|
|
|
|
|
|
|
from dandi.consts import ZARR_EXTENSIONS, metadata_all_fields
|
|
|
|
from dandi.dandiarchive import DandisetURL, _dandi_url_parser, parse_dandi_url
|
|
|
|
from dandi.cli.cmd_download import download
|
|
|
|
|
|
|
|
|
2023-10-24 06:58:07 +00:00
|
|
|
OUT_DIR = Path('/mnt/seedbank/p2p/dandi/')
|
|
|
|
#OUT_DIR = '.'
|
2023-10-24 06:57:44 +00:00
|
|
|
LOG_TXT = 'log.txt'
|
|
|
|
SKIP_DANDISETS = [
|
|
|
|
'000108' # humongous 372 human light sheet imaging
|
|
|
|
]
|
|
|
|
DANDI_ID = 'DANDI:{:06d}'
|
|
|
|
MAX_DANDISET = 683
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def check_nwb(dandiset:int) -> bool:
|
|
|
|
if dandiset == 108:
|
|
|
|
return False
|
|
|
|
|
|
|
|
id = DANDI_ID.format(dandiset)
|
|
|
|
try:
|
|
|
|
url = parse_dandi_url(id)
|
|
|
|
with url.navigate(strict=True) as (c, dandiset,assets):
|
|
|
|
is_nwb = any([a.path.endswith('nwb') for a in assets])
|
|
|
|
is_not_draft = dandiset.version.identifier != 'draft'
|
|
|
|
return is_nwb and is_not_draft
|
|
|
|
except:
|
|
|
|
return False
|
|
|
|
|
|
|
|
|
|
|
|
def main():
|
|
|
|
|
|
|
|
for i in trange(MAX_DANDISET):
|
|
|
|
|
|
|
|
if not check_nwb(i):
|
|
|
|
with open(LOG_TXT, 'a') as lfile:
|
|
|
|
lfile.write(f"{datetime.now().isoformat()} - {i:03d} - SKIP\n")
|
|
|
|
continue
|
|
|
|
id = DANDI_ID.format(i)
|
|
|
|
download(
|
|
|
|
[
|
|
|
|
id,
|
|
|
|
'-o', str(OUT_DIR),
|
|
|
|
'--existing', 'refresh',
|
|
|
|
'--jobs', '24'
|
|
|
|
]
|
|
|
|
)
|
|
|
|
with open(LOG_TXT, 'a') as lfile:
|
|
|
|
lfile.write(f"{datetime.now().isoformat()} - {i:03d} - GET\n")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|