Commit d604c009 authored by Jonathan Schaeffer's avatar Jonathan Schaeffer
Browse files

code simplification, version management

parent ecf5694e
......@@ -6,13 +6,12 @@ import sys
import io
import subprocess
import re
from time import gmtime, strftime
from datetime import datetime, date, timedelta
import yaml
import psycopg2
import click
from fdsnextender import FdsnExtender
from . import __version__
logging.basicConfig(format='%(asctime)s %(levelname)s: %(message)s', level=logging.INFO)
logger = logging.getLogger(__name__)
......@@ -27,7 +26,6 @@ def scan_volume(path):
data = []
volume = os.path.realpath(path)+'/'
logger.debug("Volume %s", volume)
# TODO mettre le niveau de profondeur (2) en option
starttime =
proc = subprocess.Popen(["du", "--exclude", ".snapshot", "-b", "-d4", volume], stdout=subprocess.PIPE)
for l in io.TextIOWrapper(proc.stdout, encoding='utf-8'):
......@@ -41,9 +39,9 @@ def scan_volume(path):
logger.debug("path: %s, size: %s", path, size)
(channel, quality) = path[3].split('.')
except ValueError as e:
logger.warning("Probably not a normal path. Skip it")
except ValueError:"%s is probably not a normal path. Skip it.", path)
if re.match('[2-9][0-9]{3}', path[0]):
data.append({'year': path[0], 'network': path[1], 'station': path[2],
'channel': channel, 'quality': quality, 'size': size})
......@@ -54,6 +52,13 @@ def scan_volume(path):
logger.debug("Volume scanned in %s", - starttime)
return data
def scan_node_volume(path):
Un repertoire contenant des données nodes doit être analysé différemment
- a minima, un /du/ du répertoire et on stocke l'info seulement pour le réseau
- sinon, en analysant les volumes ph5, mais je ne sais pas si on en a vraiment besoin.
def scan_volumes(volumes):
# volumes is a complex data type :
......@@ -85,12 +90,14 @@ def scan_volumes(volumes):
@click.option("--version", flag_value=True, default=False, help="Print version and exit")
@click.option('--config-file', 'configfile', type=click.File(), help='Configuration file path', envvar='CONFIG_FILE', show_default=True,
@click.option('--force-scan', flag_value=True, default=False, help='Force scanning of the archive')
@click.option('--dryrun', flag_value=True, default=False, help="Do not send metrics to database")
@click.option("--verbose", flag_value=True, default=False, help="Verbose mode")
def cli(configfile, force_scan, dryrun, verbose):
def cli(configfile, force_scan, dryrun, verbose, version):
Command line interface. Stands as main
......@@ -99,11 +106,12 @@ def cli(configfile, force_scan, dryrun, verbose):"Starting")
cfg = yaml.load(configfile, Loader=yaml.SafeLoader)
print(f"Error reading file {configfile}")
except yaml.YAMLError as err:
logger.error("Could not parse %s", configfile)
# At this point we ensure that configuration is sane.
statistics = []
today ="%Y-%m-%d")
......@@ -114,9 +122,10 @@ def cli(configfile, force_scan, dryrun, verbose):
cur = conn.cursor()
cur.execute('select distinct date from dataholdings order by date desc limit 1;')
last_stat_date = cur.fetchone()[0]"Last report: %s", last_stat_date)
if - last_stat_date > timedelta(days=(cfg['cache_ttl'])):"Cache is old, let's scan volumes")"Last report is old enough. Let's get the job done.")
"Last data report made at %s. Younger than %s. Don't scan",
......@@ -142,18 +151,10 @@ def cli(configfile, force_scan, dryrun, verbose):
stat['date'] = today
# Open dump file and write the stats.
with open(os.path.split([0]+"/data.yaml", 'w') as outfile:
yaml.dump({'date': today,
'volumes': cfg['volumes'],
outfile, default_flow_style=False)
logger.error("Error writing data to cache")
if dryrun:"Dryrun mode, exit")"Dryrun mode, dump stats and exit")
for stat in statistics:
# Write to postgres database
if 'postgres' in cfg:
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment