Commit 90b926a9 authored by Jerome Touvier's avatar Jerome Touvier Committed by Jonathan Schaeffer
Browse files

gros commit

parent 5ae287bd
**/logs
**/__pycache__
database_config.ini
......@@ -5,7 +5,7 @@ RUN pip install --no-cache-dir -r /requirements.txt
RUN pip install --no-cache-dir gunicorn
WORKDIR /app
COPY start*.py log_init.py ./
COPY start*.py ./
COPY apps ./apps/
COPY templates ./templates/
COPY static ./static/
......
import argparse
import configparser
import json
import logging
import os
......@@ -14,6 +13,7 @@ levels = [logging.CRITICAL, logging.ERROR, logging.WARNING, logging.INFO, loggin
## valid request parameters
url_keys = ("network", "station", "channel", "location", "net", "sta", "cha", "loc")
def request_parser(params, url):
for pairs in re.findall(r"[\w]+=[\w?*-]+", url):
logging.debug(pairs)
......@@ -36,7 +36,7 @@ def records_to_dictlist(data):
def is_like_or_equal(params, key):
""" Built the condition for the specified key in the "where" clause taking into account lists or wilcards """
""" Builds the condition for the specified key in the "where" clause taking into account lists or wildcards. """
subquery = list()
for param in params[key].split(","):
......@@ -47,7 +47,7 @@ def is_like_or_equal(params, key):
def sql_request(params):
""" Built the PostgreSQL request."""
""" Builds the PostgreSQL request."""
select = f"""SELECT DISTINCT network, s.station, location, channel FROM networks AS n, station AS s, channel AS c WHERE n.network_id = s.network_id AND s.station_id = c.station_id AND ({is_like_or_equal(params, "network")}) AND ({is_like_or_equal(params, "station")}) AND ({is_like_or_equal(params, "channel")}) AND ({is_like_or_equal(params, "location")})"""
......@@ -63,7 +63,8 @@ def collect_data(params):
conn = None
try:
logging.debug("Try to connect to the RESIF database.")
conn = psycopg2.connect(os.getenv('PG_DBURI')) # connect to the RESIF database using environment variable
# connect to the RESIF database using environment variable
conn = psycopg2.connect(os.getenv("PG_DBURI"))
cursor = conn.cursor() # cursor to execute SQL command
logging.debug(conn.get_dsn_parameters())
logging.debug(f"Postgres version : {conn.server_version}")
......@@ -91,7 +92,7 @@ def extend(args=None, path=None):
## parameters parsing ##
parser = argparse.ArgumentParser(
description=(
"Wilcards extender. Returns extended name of each channel containing wilcards (? and *) as atomic quadruplet (network, station, location, channel). Arguments can be provided individualy with the n(etwork), s(tation), l(ocation), c(hannel) parameters or directly parsed from an URL."
"Wildcards extender. Returns extended name of each channel containing wildcards (? and *) as atomic quadruplet (network, station, location, channel). Arguments can be provided individualy with the n(etwork), s(tation), l(ocation), c(hannel) parameters or directly parsed from an URL."
)
)
parser.add_argument(
......
import os
# global constants
FROM_CLIENT = False
FROM_CLIENT = True
FDSN_CLIENT = "RESIF"
DATA_MOUNT_POINT = os.getenv("DATADIR")
USER_AGENT_TIMESERIES = "resifws-timeseries"
......@@ -75,7 +76,7 @@ class Error:
This could be due to a temporary service outage, an invalid FDSN service address,\n\
an inactive internet connection or a blocking firewall rule."
OK_CONNECTION = "Connection OK. "
NO_DATA = "Your query doesn't match any data available."
NODATA = "Your query doesn't match any available data."
TIMEOUT = f"Your query exceeds timeout ({TIMEOUT} seconds)."
MISSING = "Missing parameter : "
BAD_VAL = " Invalid value: "
......@@ -114,7 +115,7 @@ class Error:
INVALID_OUTPUT_PARAM = (
"The option 'out(put)' is no longer valid. Use 'format' instead."
)
NO_WILCARDS = "Wilcards or lists are allowed only with plot or mseed output options (Invalid value for: "
NO_WILDCARDS = "Wildcards or lists are allowed only with plot or mseed output options (Invalid value for: "
NO_SELECTION = "Request contains no selections."
......
......@@ -75,10 +75,10 @@ Par exemple, pour le code des canaux : channel=EH?,BHZ
#### Détails sur la nomenclature des codes
- NETWORK = 1 à 2 caractères alphanumériques. Un groupe de points de mesures.
- STATION = 1 à 5 caractères alphanumériques. Un site de mesure dans un réseau.
- CHANNEL = 3 caractères qui désignent : la fréquence d'échantillonnage et la bande de fréquence du capteur, le type de l'instrument, l'orientation physique de la composante.
- LOCATION = 2 caractères qui permettent de distinguer plusieurs flux de données d'un même canal
- NETWORK : 1 à 2 caractères alphanumériques. Un groupe de points de mesures.
- STATION : 1 à 5 caractères alphanumériques. Un site de mesure dans un réseau.
- CHANNEL : 3 caractères alphanumériques. Le premier caractère indique la bande de fréquence du capteur, le second le type de l'instrument et le troisième l'orientation physique.
- LOCATION : 2 caractères alphanumériques. Ils permettent de distinguer plusieurs flux de données d'un même canal.
### Formats autorisés pour l'intervalle de temps
La définition de l'intervalle de temps peut prendre différentes formes :
......
......@@ -55,7 +55,7 @@ This service provides access to the time series data of the RESIF seismic networ
## Detailed descriptions of each query parameter
### Station codes details
### Station code details
The four parameters (network, station, location, channel) determine channels of interest.
| Parameters | Examples | Discussion |
......
......@@ -69,11 +69,11 @@ def check_parameters(params):
return error_param(params, Error.OUTPUT_TIMESERIES + str(params["format"]))
params["format"] = params["format"].lower()
# wilcards or list are allowed only with plot and mseed output options
# wildcards or list are allowed only with plot and mseed output options
if params["format"] not in ("plot", "mseed", "miniseed"):
for key in ("network", "station", "location", "channel"):
if re.search(r"[,*?]", params[key]):
return error_param(params, Error.NO_WILCARDS + key + ").")
return error_param(params, Error.NO_WILDCARDS + key + ").")
for key, val in params.items():
logging.debug(key + ": " + str(val))
......
import logging
import time
from tempfile import NamedTemporaryFile
from flask import make_response
from obspy.clients.fdsn import Client
from obspy.core import UTCDateTime
from obspy.signal.filter import envelope
from apps.globals import Error
from apps.globals import FDSN_CLIENT
from apps.globals import FROM_CLIENT
from apps.globals import MAX_DATA_POINTS
from apps.globals import MAX_DATA_POINTS_PROCESSING
from apps.globals import MAX_PLOTS
from apps.globals import USER_AGENT_TIMESERIES_INVENTORY
from apps.utils import error_500
from apps.utils import get_bounds
from apps.utils import get_signal
from apps.utils import get_signal_from_client
from apps.utils import get_periodogram
from apps.utils import get_response
from apps.utils import nodata_error
from apps.utils import overflow_error
from apps.utils import remove_response
from apps.utils import static_plots
from apps.utils import tictac
def get_processed_signal(st, params):
"""Signal processing """
tic = time.time()
for n, tr in enumerate(st):
logging.debug(f"Processing trace {n}...")
for item in params["request"]:
# Processings are applied only once and in the order given by the list params["request"].
# Do to this with boolean parameters (e.g. demean) we check if they are both true and in the request.
if params["earthunits"] and item in ("earthunits", "correct"):
remove_response(tr, params)
elif params["demean"] and item == "demean":
tr.detrend("demean")
logging.debug("demean")
elif params["detrend"] and item == "detrend":
tr.detrend("linear")
logging.debug("detrend")
elif params["envelope"] and item == "envelope":
tr.data = envelope(tr.data)
logging.debug("envelope")
elif params["diff"] and item == "diff":
tr.differentiate(method="gradient")
logging.debug("diff (method=gradient)")
elif params["int"] and item == "int":
tr.integrate(method="cumtrapz")
logging.debug("int (method=cumtrapz)")
elif item == "scale":
tr.data = params["scale"] * tr.data
logging.debug(f"scale: {params['scale']}")
elif item == "divscale":
tr.data = (1.0 / params["divscale"]) * tr.data
logging.debug(f"divscale: {params['divscale']}")
elif item in ("decimate", "deci"):
tr.decimate(params["deci"], strict_length=False, no_filter=False)
logging.debug("deci (strict_length=False, no_filter=False)")
elif item == "taper":
taper_trace(tr, params)
elif item in ("lpfilter", "lp"):
tr.filter("lowpass", freq=params["lp"], zerophase=params["zerophase"])
logging.debug(f"lp: {params['lp']}, zerophase={params['zerophase']}")
elif item in ("hpfilter", "hp"):
tr.filter("highpass", freq=params["hp"], zerophase=params["zerophase"])
logging.debug(f"hp: {params['hp']}, zerophase={params['zerophase']}")
elif item in ("bpfilter", "bp"):
tr.filter(
"bandpass",
freqmin=params["bp"][0],
freqmax=params["bp"][1],
zerophase=params["zerophase"],
)
logging.debug(f"bp: {params['bp']}, zerophase={params['zerophase']}")
if params["spectrum"]:
for n, tr in enumerate(st):
tr.time_array, tr.data = get_periodogram(tr.data)
logging.info(f"Processed signal in {tictac(tic)} seconds.")
return st
def taper_trace(tr, params):
win = "hann" if params["taper"][1] == "HANNING" else params["taper"][1].lower()
tr.taper(params["taper"][0], type=win, max_length=None, side="both")
msg = f"Taper trace : max_percentage={params['taper'][0]}, type={win}"
logging.debug(msg)
def get_file_type(params):
# (time, values) 2 columns
if params["format"] in ("ascii", "tspair"):
file_type, file_ext = "TSPAIR", ".csv"
# (values) written from left to right (6 columns max)
elif params["format"] == "slist":
file_type, file_ext = "SLIST", ".csv"
elif params["format"] in ("miniseed", "mseed"):
file_type, file_ext = "MSEED", ".mseed"
# little-endian SAC
elif params["format"] == "sac":
file_type, file_ext = "SAC", ".sac"
return (file_type, file_ext)
def set_sac_header(params, st):
try:
for tr in st:
stats = tr.stats
client = Client(FDSN_CLIENT, user_agent=USER_AGENT_TIMESERIES_INVENTORY)
inventory = client.get_stations(
network=stats["network"],
station=stats["station"],
location=stats["location"],
channel=stats["channel"],
level="channel",
)
inv_sta = inventory[0][0]
inv_cha = inv_sta[0]
stats.sac = {}
if hasattr(inv_cha, "azimuth"):
stats.sac["cmpaz"] = inv_cha.azimuth
if hasattr(inv_cha, "dip"):
stats.sac["cmpinc"] = inv_cha.dip
if hasattr(inv_sta, "latitude"):
stats.sac["stla"] = inv_sta.latitude
if hasattr(inv_sta, "longitude"):
stats.sac["stlo"] = inv_sta.longitude
if hasattr(inv_sta, "depth"):
stats.sac["stdp"] = inv_sta.depth
if hasattr(inv_sta, "elevation"):
stats.sac["stel"] = inv_sta.elevation
except Exception as ex:
logging.exception(str(ex))
return error_500(Error.UNSPECIFIED)
def get_file(params, st):
"""Create temporary timeseries file.
The name is built according to the template :
resifws-timeseries.2018-11-29T10_11_32.000Z.2018-11-29T23_42_56.000Z
:param params: Parameters object with url parameters (network, station, ...)
:param st: obspy stream
:returns: response_class flask object containing the file
"""
tic = time.time()
try:
(file_type, file_ext) = get_file_type(params)
(start, end) = get_bounds(st)
start = UTCDateTime(start).strftime("%Y-%m-%dT%H:%M:%S.%f")[:-3] + "Z"
end = UTCDateTime(end).strftime("%Y-%m-%dT%H:%M:%S.%f")[:-3] + "Z"
period = ".".join([start, end])
fname = "resifws-timeseries"
if len(st) == 1:
stat = st[0].stats
fname = ".".join([stat.network, stat.station, stat.location, stat.channel])
fname = fname + "." + period + file_ext
headers = {"Content-Disposition": "attachment; filename=" + fname}
tmp = NamedTemporaryFile(delete=True)
if file_ext == ".sac":
set_sac_header(params, st)
st.write(tmp.name, format=file_type)
response = make_response((tmp.read(), headers))
if file_ext == ".csv":
response.mimetype = "text/csv"
else:
response.mimetype = "application/octet-stream"
return response
except Exception as ex:
logging.exception(str(ex))
return error_500(Error.UNSPECIFIED)
finally:
tmp.close()
logging.info(f"Response with file created in {tictac(tic)} seconds.")
def get_output(params):
"""Create timeseries plots.
:param params: Parameters object with url parameters (network, station, ...)
:returns: static plot(s) or data file
:raises MemoryError raises memory exception
:raises ValueError raises value exception
"""
try:
tic0 = time.time()
st = None
response = None
tic1 = time.time()
if FROM_CLIENT:
st = get_signal_from_client(params)
else:
st = get_signal(params)
logging.info(f"Get data in {tictac(tic1)} seconds.")
if st is None or len(st) == 0:
return None
npoints = sum([len(tr.data) for tr in st])
if params["format"] == "plot":
if npoints > MAX_DATA_POINTS:
return overflow_error(Error.TOO_MUCH_DATA)
if npoints > MAX_DATA_POINTS_PROCESSING and params["earthunits"]:
return overflow_error(Error.TOO_MUCH_DATA_PROCESSING)
if len(st) > MAX_PLOTS:
return overflow_error(Error.PLOTS)
if params["earthunits"]:
tic1 = time.time()
try:
st.attach_response(get_response(params))
except Exception as ex:
logging.debug(str(ex))
return nodata_error(Error.RESPONSE)
logging.info(f"Attach response in {tictac(tic1)} seconds.")
st = get_processed_signal(st, params)
if params["format"] == "plot":
response = static_plots(params, st)
else:
response = get_file(params, st)
return response
except (MemoryError, ValueError) as plotex:
logging.exception(str(plotex))
return overflow_error(Error.PROCESSING)
except Exception as ex:
logging.exception(str(ex))
return error_500(Error.UNSPECIFIED)
finally:
if st:
delta = params["end"] - params["start"]
logging.debug(f"Period of {delta} with {npoints} data points.")
if response:
bytes = response.headers.get("Content-Length")
logging.info(f"{bytes} bytes rendered in {tictac(tic0)} seconds.")
......@@ -9,7 +9,7 @@ from apps.timeseries.constants import ALIAS_PARAMS
from apps.timeseries.constants import Args
from apps.timeseries.constants import PARAMS
from apps.timeseries.model import check_parameters
from apps.timeseries.plots import get_output
from apps.timeseries.output import get_output
from apps.utils import check_request
from apps.utils import error_request
......@@ -110,7 +110,7 @@ def timeseries(request):
resp = q.get(timeout=TIMEOUT)
if resp is not None:
return resp
result = {"msg": HTTP._202_, "details": Error.NO_DATA, "code": 202}
result = {"msg": HTTP._202_, "details": Error.NODATA, "code": 202}
except queue.Empty:
result = {"msg": HTTP._408_, "details": Error.TIMEOUT, "code": 408}
......
......@@ -63,10 +63,10 @@ Par exemple, pour le code des canaux : channel=EH?,BHZ
#### Détails sur la nomenclature des codes
- NETWORK = 1 à 2 caractères alphanumériques. Un groupe de points de mesures.
- STATION = 1 à 5 caractères alphanumériques. Un site de mesure dans un réseau.
- CHANNEL = 3 caractères qui désignent : la fréquence d'échantillonnage et la bande de fréquence du capteur, le type de l'instrument, l'orientation physique de la composante.
- LOCATION = 2 caractères qui permettent de distinguer plusieurs flux de données d'un même canal
- NETWORK : 1 à 2 caractères alphanumériques. Un groupe de points de mesures.
- STATION : 1 à 5 caractères alphanumériques. Un site de mesure dans un réseau.
- CHANNEL : 3 caractères alphanumériques. Le premier caractère indique la bande de fréquence du capteur, le second le type de l'instrument et le troisième l'orientation physique.
- LOCATION : 2 caractères alphanumériques. Ils permettent de distinguer plusieurs flux de données d'un même canal.
### Formats autorisés pour l'intervalle de temps
La définition de l'intervalle de temps peut prendre différentes formes :
......
......@@ -42,7 +42,7 @@ The timeseriesplot service returns a graphical representation of time series dat
## Detailed descriptions of each query parameter
### Station codes details
### Station code details
The four parameters (network, station, location, channel) determine channels of interest.
| Parameters | Examples | Discussion |
......
import logging
import re
import time
from bokeh.embed import file_html
from bokeh.layouts import column
from bokeh.models import DatetimeTickFormatter, HoverTool
from bokeh.plotting import figure
from bokeh.resources import CDN
from flask import make_response
from apps.globals import Error
from apps.globals import FROM_CLIENT
from apps.globals import MAX_DATA_POINTS
from apps.globals import MAX_DATA_POINTS_PROCESSING
from apps.globals import MAX_PLOTS
from apps.utils import error_500
from apps.utils import get_bounds
from apps.utils import get_signal
from apps.utils import get_signal_from_client
from apps.utils import get_periodogram
from apps.utils import get_response
from apps.utils import get_units
from apps.utils import nodata_error
from apps.utils import overflow_error
from apps.utils import remove_response
from apps.utils import static_plots
from apps.utils import tictac
def get_processed_signal(st, params):
"""Signal processing """
tic = time.time()
for n, tr in enumerate(st):
logging.debug(f"Processing trace {n}...")
if params["earthunits"]:
remove_response(tr, params)
elif params["demean"]:
tr.detrend("demean")
logging.debug("demean")
if params["spectrum"]:
for n, tr in enumerate(st):
tr.time_array, tr.data = get_periodogram(tr.data)
logging.info(f"Processed signal in {tictac(tic)} seconds.")
return st
def date_tick_formatter():
"""Create an xaxis formatter according to different date and time scales."""
return DatetimeTickFormatter(
# microseconds=["%fus "]
# milliseconds=["%3Nms ", "%S.%3Ns"]
milliseconds=["%F ", "%T"],
seconds=["%F ", "%T"],
minsec=["%F ", "%T"],
minutes=["%F ", "%T"],
hourmin=["%F ", "%T"],
hours=["%F ", "%T"],
days=["%F ", "%T"],
months=["%F ", "%T"],
years=["%F ", "%T"],
)
# from bokeh.models import FuncTickFormatter
# return FuncTickFormatter(code="""
# var date = new Date(tick*1000).toISOString()
# if (index == 0) {
# return(date.replace("Z", ""));
# } else {
# return(date.split("T")[1].replace("Z", ""));
# } """)
def dynamic_plots(params, st):
"""Create dynamic timeseries plots.
This function return dynamic timeseries plots builds with bokeh and embedded
into HTML via Flask renderer.
:param params: Parameters object with url parameters (network, station, ...)
:returns: response_class flask object containing dynamic timeseries plot(s)
"""
tic = time.time()
plots = list()
(left, right) = get_bounds(st)
pcolor = "black" if params["monochrome"] else "#" + params["color"]
for tr in st:
# define tools and tooltips
units = get_units(params, tr.stats.channel)
tooltips = [("Amplitude", "@y" + units), ("Date", "@x{%F %T.%3N}")]
hover = HoverTool(tooltips=tooltips, formatters={"@x": "datetime"})
tools = "crosshair, save, pan, wheel_zoom, box_zoom, zoom_in, zoom_out, reset"
# create a new plot with the tools above
plot = figure(
tools=[hover, tools],
plot_width=params["width"],
plot_height=params["height"],
x_range=(1000 * left, 1000 * right),
active_drag="box_zoom",
# active_scroll="wheel_zoom",
)
plot.toolbar.logo = None
plot.xaxis[0].ticker.desired_num_ticks = 4
plot.xaxis[0].formatter = date_tick_formatter()
plot.line([t * 1000 for t in tr.times("timestamp")], tr.data, color=pcolor)
text = (tr.stats.network, tr.stats.station, tr.stats.location, tr.stats.channel)
if params["showtitle"]:
plot.title.text = "[ %s_%s_%s_%s ]" % text
plots.append(plot)
# Puts the result in a column.
plots = column(plots)
# Generate a complete HTML page embedding the Bokeh plot.
html = file_html(plots, CDN)
html = re.sub(r"<title>.*</title>", "<title>resifws-timeseriesplot</title>", html)
# Returns the rendered HTML to the browser.
logging.info(f"Response with dynamic plot created in {tictac(tic)} seconds.")
return make_response(html)
def get_output(params):
"""Create timeseries plots.
:param params: Parameters object with url parameters (network, station, ...)
:returns: static or dynamic plot(s)
:raises MemoryError raises memory exception
:raises ValueError raises value exception
"""
try:
tic0 = time.time()
st = None
response = None
tic1 = time.time()
if FROM_CLIENT:
st = get_signal_from_client(params)
else:
st = get_signal(params)
logging.info(f"Get data in {tictac(tic1)} seconds.")
if st is None or len(st) == 0:
return None
npoints = sum([len(tr.data) for tr in st])
if npoints > MAX_DATA_POINTS:
return overflow_error(Error.TOO_MUCH_DATA)
if npoints > MAX_DATA_POINTS_PROCESSING and params["earthunits"]:
return overflow_error(Error.TOO_MUCH_DATA_PROCESSING)
if len(st) > MAX_PLOTS:
return overflow_error(Error.PLOTS)
if params["earthunits"]:
tic1 = time.time()
try:
st.attach_response(get_response(params))
except Exception as ex:
logging.debug(str(ex))
return nodata_error(Error.RESPONSE)
logging.info(f"Attach response in {tictac(tic1)} seconds.")
st = get_processed_signal(st, params)
if params["iplot"]:
response = dynamic_plots(params, st)
else:
response = static_plots(params, st)
return response
except (MemoryError, ValueError) as plotex:
logging.exception(str(plotex))
return overflow_error(Error.PROCESSING)
except Exception as ex:
logging.exception(str(ex))
return error_500(Error.UNSPECIFIED)
finally:
if st:
delta = params["end"] - params["start"]