NEANIAS Gitlab
Skip to content
GitLab
Menu
Projects
Groups
Snippets
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Sign in
Toggle navigation
Menu
Open sidebar
Carlos H. Brandt
DM
Commits
37f62428
Commit
37f62428
authored
Apr 23, 2020
by
Carlos H. Brandt
Browse files
Get data sizes writing size tables
parent
bc0e1630
Changes
1
Hide whitespace changes
Inline
Side-by-side
datasets/code/get_datasets_sizes.py
View file @
37f62428
...
...
@@ -14,6 +14,7 @@ information of size, for instance, is kept. Size values are given in Megabytes.
* PDS stands for Planetary Data Systems
"""
import
os
import
itertools
import
pandas
...
...
@@ -88,6 +89,8 @@ def build_urls(template, **kwargs):
yield
dict
(
zip
(
keys
,
instance
))
urls
=
[
template
.
format
(
**
d
)
for
d
in
list
(
product_dict
(
**
kwargs
))
]
# TODO:
# - remove repeated '/' in case a kwarg is an empty string list ([""])
return
urls
def
read_urls
(
urls
,
match
=
None
):
...
...
@@ -97,14 +100,31 @@ def read_urls(urls, match=None):
tabs
.
append
(
tab
)
return
tabs
def
write_tabs
(
url
,
tab
,
outdir
=
'dset_tabs'
):
from
urllib.parse
import
urlparse
import
pathlib
o
=
urlparse
(
url
)
path_
=
outdir
+
o
.
path
print
(
path_
)
pathlib
.
Path
(
path_
).
mkdir
(
parents
=
True
,
exist_ok
=
True
)
filename
=
os
.
path
.
join
(
path_
,
'indexdf.csv'
)
tab
.
to_csv
(
filename
)
def
run
():
template
=
'https://pds-imaging.jpl.nasa.gov/data/mro/ctx/mrox_{i:04d}/data/'
match
=
'IMG'
urls
=
build_urls
(
template
,
i
=
range
(
10
))
urls
=
build_urls
(
template
,
i
=
range
(
5
))
tabs
=
read_urls
(
urls
,
match
)
print
(
tabs
)
for
url
,
tab
in
zip
(
urls
,
tabs
):
if
tab
is
None
:
print
(
"URL {} has no table"
.
format
(
url
))
continue
print
(
"Writing tab from {}"
.
format
(
url
))
write_tabs
(
url
,
tab
)
#return tabs
if
__name__
==
'__main__'
:
run
()
...
...
Write
Preview
Supports
Markdown
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment