Commit fc79fbd3 authored by Jonathan Schaeffer's avatar Jonathan Schaeffer
Browse files

Implement Caching !

parent 6fbc9dca
#
# Configuration file for resif_data_reporter.py
#
logger_file: "logger.conf" # path for the logger configuration file
cache_file: "data.yml" # path to the cache file (yaml format)
cache_ttl: 15 # Cache validity time in day
volumes: # list of directories to scan
- /path/1
- /data/all/example
......@@ -16,7 +18,7 @@ influxdb: # influxdb database to send reports
password: iiKa5GfvDzWDXyE797 # password for the user
database: sandbox # database name
tags: # list of tags to add to metrics, key values pairs
- host: localhost
- host: localhost
metadata: # Information about the gathered data. Used to tag the timeserie values
permanent_networks: # List all permanent networks here. Otherwise, they will be considered as temporary
- CL
......
......@@ -6,9 +6,12 @@ import os
import subprocess
from time import gmtime,strftime
import yaml
import json
from influxdb import InfluxDBClient
from pprint import pprint, pformat
import datetime
logger = logging.getLogger(__name__)
def test_config_file(f = "config.yml"):
......@@ -44,6 +47,29 @@ def dict_dumper(dict):
except:
return dict.__dict__
def scan_volumes(volumes):
data = {}
for volume in volumes:
volume = volume.rstrip('/')+'/'
# TODO mettre le niveau de profondeur (2) en option
lines = subprocess.check_output(["du", "-d2", volume]).decode("utf-8").splitlines()
for l in lines:
logger.debug(l)
(size, path) = l.split('\t')
# On ne garde que le chemin qui nous intéresse
path = path.replace(volume,'').split('/')
# Ne pas considérer le seul chemin de niveau 1
if len(path) == 2:
logger.debug(f"path : {path}")
logger.debug(f"size : {size}")
# data : {2011: {G: 23, FR: 100, ...} 2012: {G: 12, FR: 120, ...}, ...}
if not (path[0] in data) :
data[path[0]] = {}
if not (path[1] in data[path[0]]) :
data[path[0]][path[1]] = 0
data[path[0]][path[1]] += int(size)
return data
def main():
config_path = get_config_file()
with open(config_path, 'r') as ymlfile:
......@@ -54,37 +80,40 @@ def main():
logging.config.fileConfig(cfg["logger_file"])
logger = logging.getLogger("resif_data_reporter")
logger.info("Starting")
data = {}
# Refresh or use cache ?
# Try to open data.yaml
try:
with open(cfg['cache_file'], 'r') as ymlfile:
data = yaml.load(ymlfile)
# Compare volumes in cfg and in cache
if set(cfg['volumes']) == set(data['volumes']):
# Get previous run data
previous_run_date = datetime.datetime.strptime(data['date'], "%Y-%m-%d").date()
# Compute cache age
if datetime.date.today() - previous_run_date > datetime.timedelta(days=(cfg['cache_ttl'])):
logger.info("Cache is old, let's scan volumes")
data = scan_volumes(cfg['volumes'])
else:
logger.info("Cache is available, let's be lazy for this time and use it")
del data["date"]
del data["volumes"]
except FileNotFoundError:
logger.debug("Cache file %s not found, let's scan volumes."%cfg['cache_file'])
data = scan_volumes(cfg['volumes'])
influxdb_json_data = []
for volume in cfg['volumes']:
volume = volume.rstrip('/')+'/'
# TODO mettre le niveau de profondeur (2) en option
lines = subprocess.check_output(["du", "-d2", volume]).decode("utf-8").splitlines()
for l in lines:
logger.debug(l)
(size, path) = l.split('\t')
# On ne garde que le chemin qui nous intéresse
path = path.replace(volume,'').split('/')
# Ne pas considérer le seul chemin de niveau 1
if len(path) == 2:
logger.debug(f"path : {path}")
logger.debug(f"size : {size}")
# data : {2011: {G: 23, FR: 100, ...} 2012: {G: 12, FR: 120, ...}, ...}
if not (path[0] in data) :
data[path[0]] = {}
if not (path[1] in data[path[0]]) :
data[path[0]][path[1]] = 0
data[path[0]][path[1]] += int(size)
logger.info(pformat(data))
# Opent dump file
# Open dump file
try:
cache_file = open(os.path.dirname(os.path.realpath(__file__))+"/data.json",'w')
cache_file.write(json.dumps(data, default=dict_dumper, indent=2))
cache_file.close()
with open(os.path.dirname(os.path.realpath(__file__))+"/data.yaml", 'w') as outfile:
yaml.dump({'date': datetime.datetime.now().strftime("%Y-%m-%d"),
'volumes': cfg['volumes'],
**data},
outfile, default_flow_style=False)
except:
logger.error("Error writing data to cache : "+sys.exc_info()[0])
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment