NEANIAS Gitlab

Commit 37f62428 authored by Carlos H. Brandt's avatar Carlos H. Brandt
Browse files

Get data sizes writing size tables

parent bc0e1630
......@@ -14,6 +14,7 @@ information of size, for instance, is kept. Size values are given in Megabytes.
* PDS stands for Planetary Data Systems
"""
import os
import itertools
import pandas
......@@ -88,6 +89,8 @@ def build_urls(template, **kwargs):
yield dict(zip(keys, instance))
urls = [ template.format(**d) for d in list(product_dict(**kwargs)) ]
# TODO:
# - remove repeated '/' in case a kwarg is an empty string list ([""])
return urls
def read_urls(urls, match=None):
......@@ -97,14 +100,31 @@ def read_urls(urls, match=None):
tabs.append(tab)
return tabs
def write_tabs(url, tab, outdir='dset_tabs'):
from urllib.parse import urlparse
import pathlib
o = urlparse(url)
path_ = outdir + o.path
print(path_)
pathlib.Path(path_).mkdir(parents=True, exist_ok=True)
filename = os.path.join(path_, 'indexdf.csv')
tab.to_csv(filename)
def run():
template = 'https://pds-imaging.jpl.nasa.gov/data/mro/ctx/mrox_{i:04d}/data/'
match = 'IMG'
urls = build_urls(template, i=range(10))
urls = build_urls(template, i=range(5))
tabs = read_urls(urls, match)
print(tabs)
for url,tab in zip(urls,tabs):
if tab is None:
print("URL {} has no table".format(url))
continue
print("Writing tab from {}".format(url))
write_tabs(url, tab)
#return tabs
if __name__ == '__main__':
run()
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment