Commit 67d3a83f authored by Jonathan Schaeffer's avatar Jonathan Schaeffer
Browse files

Tentative pour évaluer l'archive PH5

Reste à tester correctement
parent d604c009
......@@ -11,6 +11,9 @@ volumes: # list of directories to scan
- name: path2
path: /data/all/example
type: bud
- name: path3
path: /data/other
type: node
postgres:
host: postgres-server # Host serving postgres
port: 5432 # Postgres port
......
......@@ -10,6 +10,7 @@ from datetime import datetime, date, timedelta
import yaml
import psycopg2
import click
import h5py
from fdsnextender import FdsnExtender
from . import __version__
logging.basicConfig(format='%(asctime)s %(levelname)s: %(message)s', level=logging.INFO)
......@@ -42,7 +43,7 @@ def scan_volume(path):
except ValueError:
logger.info("%s is probably not a normal path. Skip it.", path)
continue
if re.match('[2-9][0-9]{3}', path[0]):
if re.match('[1-9][0-9]{3}', path[0]):
data.append({'year': path[0], 'network': path[1], 'station': path[2],
'channel': channel, 'quality': quality, 'size': size})
else:
......@@ -57,8 +58,32 @@ def scan_node_volume(path):
Un repertoire contenant des données nodes doit être analysé différemment
- a minima, un /du/ du répertoire et on stocke l'info seulement pour le réseau
- sinon, en analysant les volumes ph5, mais je ne sais pas si on en a vraiment besoin.
TODO
"""
data = []
h5data = h5py.File(f"{path}/master.ph5")
stations = [sta[6:] for sta in list(h5data['Experiment_g']['Maps_g']) if sta.startswith('Das_g_')]
# get year from path. Ugly but should work ...
network = os.path.split(os.path.normpath(path))[1]
try:
year = int(network[2:])
except ValueError:
# Bon, ça n'a pas marché, on fait quoi ?
logger.error("Unable to get year from path %s. Ignoring this one", path)
return data
total = 0
for dirpath, dirnames, filenames in os.walk(path):
for i in filenames:
if i.endswith('ph5'):
total = os.path.getsize(i)
# Make a statistic array with those stations dividing total size on each station.
per_station_size = total / len(stations)
for sta in stations:
data.append({'type': node, 'year': year, 'network': network, 'station': sta,
'channel': None, 'quality': None, 'size': per_station_size})
return data
def scan_volumes(volumes):
# volumes is a complex data type :
......@@ -71,11 +96,14 @@ def scan_volumes(volumes):
for volume in volumes:
logger.debug("Preparing scan of volume %s", volume['path'])
if 'path' in volume:
stats = scan_volume(volume['path'])
# On rajoute le type comme un élément de chaque statistique
if 'type' in volume:
for s in stats:
s['type'] = volume['type']
if 'type' in volume and volume['type'] == "node":
stats = scan_node_volume(volume['path'])
else:
stats = scan_volume(volume['path'])
# On rajoute le type comme un élément de chaque statistique
if 'type' in volume:
for s in stats:
s['type'] = volume['type']
if 'name' in volume:
for s in stats:
s['volume'] = volume['name']
......@@ -134,6 +162,8 @@ def cli(configfile, force_scan, dryrun, verbose, version):
statistics = scan_volumes(cfg['volumes'])
statistics.append(scan_node_volumes(cfg['node_volumes']))
# add the network_type (is the network permanent or not) to the statistic
# also insert the extended network code.
extender = FdsnExtender()
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment