Commit b79368cd authored by Jonathan Schaeffer's avatar Jonathan Schaeffer
Browse files

Evaluating PH5 archive

parent acb508a6
......@@ -53,38 +53,50 @@ def scan_volume(path):
logger.debug("Volume scanned in %s", datetime.now() - starttime)
return data
def scan_node_volume(path):
def scan_ph5_volume(volpath):
"""
Un repertoire contenant des données nodes doit être analysé différemment
- a minima, un /du/ du répertoire et on stocke l'info seulement pour le réseau
- sinon, en analysant les volumes ph5, mais je ne sais pas si on en a vraiment besoin.
"""
data = []
h5data = h5py.File(f"{path}/master.ph5")
stations = [sta[6:] for sta in list(h5data['Experiment_g']['Maps_g']) if sta.startswith('Das_g_')]
# get year from path. Ugly but should work ...
network = os.path.split(os.path.normpath(path))[1]
try:
year = int(network[2:])
except ValueError:
# Bon, ça n'a pas marché, on fait quoi ?
logger.error("Unable to get year from path %s. Ignoring this one", path)
return data
total = 0
for dirpath, dirnames, filenames in os.walk(path):
for i in filenames:
if i.endswith('ph5'):
total = os.path.getsize(i)
# Make a statistic array with those stations dividing total size on each station.
per_station_size = total / len(stations)
for sta in stations:
data.append({'type': node, 'year': year, 'network': network, 'station': sta,
'channel': None, 'quality': None, 'size': per_station_size})
stations = [""]
volume = os.path.realpath(volpath)+'/'
logger.debug("Volume %s", volume)
starttime = datetime.now()
proc = subprocess.Popen(["ls", volume], stdout=subprocess.PIPE)
for l in io.TextIOWrapper(proc.stdout, encoding='utf-8'):
network = l.strip()
path = f"{volume}/{network}"
logger.debug("Scanned %s", network)
try:
year = int(network[2:])
except ValueError:
# Bon, ça n'a pas marché, on fait quoi ?
logger.error("Unable to get year from path %s. Ignoring this one", path)
continue
try:
h5data = h5py.File(f"{path}/master.ph5",'r')
logger.debug("Master PH5 stations: %s",
h5data['Experiment_g']['Maps_g'])
stations = [sta[6:] for sta in list(h5data['Experiment_g']['Maps_g']) if sta.startswith('Das_g_')]
except Exception as err:
logger.error("No master.ph5 file in %s. Let's assume there is one station", path)
total = 0
for dirpath, dirnames, filenames in os.walk(path):
for i in filenames:
logger.debug("Scanning %s: file %s", network, i)
if i.endswith('ph5'):
total = os.path.getsize(f"{path}/{i}")
# Make a statistic array with those stations dividing total size on each station.
per_station_size = int(total / len(stations))
for sta in stations:
data.append({'type': 'ph5_validated', 'year': year, 'network': network, 'station': sta,
'channel': None, 'quality': None, 'size': per_station_size})
return data
def scan_volumes(volumes):
# volumes is a complex data type :
# List of dictionaries of 2 elements (path and type)
......@@ -96,8 +108,8 @@ def scan_volumes(volumes):
for volume in volumes:
logger.debug("Preparing scan of volume %s", volume['path'])
if 'path' in volume:
if 'type' in volume and volume['type'] == "node":
stats = scan_node_volume(volume['path'])
if 'type' in volume and volume['type'] == "ph5":
stats = scan_ph5_volume(volume['path'])
else:
stats = scan_volume(volume['path'])
# On rajoute le type comme un élément de chaque statistique
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment