Vous avez reçu un message "Your GitLab account has been locked ..." ? Pas d'inquiétude : lisez cet article https://docs.gricad-pages.univ-grenoble-alpes.fr/help/unlock/

Commit e0660bf5 authored by Jonathan Schaeffer's avatar Jonathan Schaeffer
Browse files

Parallélisation

ref #2
parent aa51d7a0
......@@ -5,6 +5,8 @@
# This line tells to redirect all outputs to logger and stdout
exec 1> >(logger -s -t $(basename $0)) 2>&1
set -a
####################
#
# Zabbix sender
......@@ -77,9 +79,12 @@ irods_push(){
format_report $KEY $SIZEMB $(date +%Y-%m-%dT%H:%M:%S --date=@$start) $duration $throughput 'OK' >> $LOCAL_REPORT
irods_commit $KEY
fi
# Send report to irods
iput -f $LOCAL_REPORT $IRODS_REPORT
zabbix_ok "$KEY|${SIZEMB}MB|${duration}s|${throughput}MB/s"
# Send report to irods. Do some locking here
(
flock -e 200
iput -f $LOCAL_REPORT $IRODS_REPORT
) 200>/$RESIFDD_WORKDIR/report.lock
}
# In case of any problem, this function rolls every distant file operation bask
......@@ -144,6 +149,62 @@ irods_commit(){
ils $KEY/previous_to_delete.tar 2>/dev/null && irm -f $KEY/previous_to_delete.tar
}
# Choose if the data should be processed or skipped
# Pack the data and send it to iRODS
# Argument is the path (abs or relative) to the data
# Optional second argument can be the parallel slot number as given by {%}
pack_and_send() {
[ $# -eq 0 ] && echo "[pack_and_send] Need a path for data to send" && return 1
# Parse path to get year, station and network
dir=$1
IFS='/' read -r -a YNS <<< $dir
[ ${#YNS[@]} -lt 4 ] && echo "[pack_and_send] Path $dir is not complete (${#YNS[@]} levels) " && return 2
YEAR=${YNS[-3]}
NETWORK=${YNS[-2]}
STATION=${YNS[-1]}
KEY=${YEAR}_${NETWORK}_${STATION}
echo "[$KEY] Starting job $2"
# Test if in recovery mode, we should send or not
if [[ -r $RECOVERY_FILE ]]; then
if egrep -q -e ".*$YEAR.*$NETWORK.*$STATION.*( OK | Skipped ).*" $RECOVERY_FILE; then
echo "[$KEY] Found OK or skipped in ${RECOVERY_FILE}. Skipping"
format_report $KEY "-" $(date +%Y-%m-%dT%H:%M:%S) "-" "-" "Skipped" >> $LOCAL_REPORT
return 0
fi
fi
echo "[$KEY] Creating tar on $RESIFDD_WORKDIR/$KEY.tar"
echo "[$KEY] tar cf $RESIFDD_WORKDIR/$KEY.tar -C ${dir%$YEAR/$NETWORK/$STATION} $YEAR/$NETWORK/$STATION"
tar cf $RESIFDD_WORKDIR/$KEY.tar -C ${dir%$YEAR/$NETWORK/$STATION} $YEAR/$NETWORK/$STATION
if [[ $? -ne 0 ]]; then
# Something went wrong creating archive. Exit
echo "[$KEY] Error 007 creating tar"
# Send key to zabbix_err
zabbix_err "$KEY:Error 007"
return 1
fi
# Check if file exists on irods server
ils -L $KEY/latest.tar > /dev/null 2>&1
if [[ $? -eq 0 ]]; then
echo "[$KEY] latest.tar already exists on iRODS server. Let's compare hashes"
local_sha=$(sha256sum $RESIFDD_WORKDIR/$KEY.tar | awk '{print $1}' | xxd -r -p | base64)
irods_sha=$(ichksum $KEY/latest.tar | awk -F':' '/sha2:/ {print $2; exit;}')
echo "[$KEY] local checksum: $local_sha"
echo "[$KEY] irods checksum: $irods_sha"
# If the hashes differs, then move distant file and push this one
if [[ "$local_sha" = "$irods_sha" ]]; then
echo "[$KEY] The archive on irods is the same as our version. Skipping."
SIZE=$(stat -c %s $RESIFDD_WORKDIR/$KEY.tar)
format_report $KEY $(($SIZE/1024/1024)) $(date +%Y-%m-%dT%H:%M:%S) "-" "-" "Skipped" >> $LOCAL_REPORT
return 0
fi
fi
# Send latest archive file to IRODS
irods_push $KEY
rm $RESIFDD_WORKDIR/$KEY*
}
export -f pack_and_send # Necessary for call with GNU parallel
####################
#
# Preliminary tests
......@@ -188,14 +249,15 @@ fi
# Header for the report :
IRODS_REPORT=reports/$(date +%Y%m%d-%H%M).csv
LOCAL_REPORT=$RESIFDD_WORKDIR/report.csv
LOCAL_REPORT=$RESIFDD_WORKDIR/report_$(date +%Y%m%d-%H%M).csv
format_report "Year_Network_Station" "Size(MB)" "Dumpdate" "Duration(s)" "Throughput(MB/s)" "Comment" > $LOCAL_REPORT
imkdir -p reports
iput -f $LOCAL_REPORT $IRODS_REPORT
rm $RESIFDD_WORKDIR/report.lock
##################
#
# Metadata
# Dump Metadata
#
##################
KEY="validated-seismic-metadata"
......@@ -205,14 +267,14 @@ if [[ -r $RECOVERY_FILE ]] && egrep -q -e ".*($KEY ).*( OK | Skipped ).*" $RECOV
else
# Get the snapshot name for this month
MONTH=$(date +%Y-%m)
SNAPSHOT_DIR=$(ls -d $RESIFDD_DATADIR/validated_seismic_metadata/.snapshot/monthly.${MONTH}*|tail -1)
SNAPSHOT_DIR=$(ls -d $RESIFDD_DATADIR/validated_seismic_metadata/.snapshot/weekly.${MONTH}*|tail -1)
if [[ ! -d $SNAPSHOT_DIR ]]; then
echo "Error 000 Snapshot directory $SNAPSHOT_DIR does not exist"
exit 1
fi
echo "[$KEY] Starting dump from ${SNAPSHOT_DIR}"
tar cf $RESIFDD_WORKDIR/$KEY.tar --exclude portalproducts -C $SNAPSHOT_DIR $SNAPSHOT_DIR
tar cf $RESIFDD_WORKDIR/$KEY.tar --exclude portalproducts -C $SNAPSHOT_DIR .
if [[ $? -ne 0 ]]; then
echo "[$KEY] Error 001 while creating tar archive."
zabbix_err "${KEY}:Error 001"
......@@ -235,52 +297,6 @@ if [[ ! -d $SNAPSHOT_DIR ]]; then
exit 1
fi
echo "Starting dump of validated data"
cd $SNAPSHOT_DIR
for dir in $(find . -maxdepth 3 -type d | sort); do
# First clean workspace
rm -rf $RESIFDD_WORKDIR/*.tar $RESIFDD_WORKDIR/*.restart
# Parse path to get year, station and network
IFS='/' read -r -a YNS <<< $dir
[ ! ${#YNS[@]} -eq 4 ] && continue
YEAR=${YNS[1]}
NETWORK=${YNS[2]}
STATION=${YNS[3]}
KEY=${YEAR}_${NETWORK}_${STATION}
# Test if in recovery mode, we should send or not
if [[ -r $RECOVERY_FILE ]]; then
if egrep -q -e ".*$YEAR.*$NETWORK.*$STATION.*( OK | Skipped ).*" $RECOVERY_FILE; then
echo "[$KEY] Found OK or skipped in ${RECOVERY_FILE}. Skipping"
format_report $KEY "-" $(date +%Y-%m-%dT%H:%M:%S) "-" "-" "Skipped" >> $LOCAL_REPORT
continue
fi
fi
echo "[$KEY] Creating tar on $RESIFDD_WORKDIR/$KEY.tar"
tar cf $RESIFDD_WORKDIR/$KEY.tar $dir
if [[ $? -ne 0 ]]; then
# Something went wrong creating archive. Exit
echo "[$KEY] Error 007 creating tar"
# Send key to zabbix_err
zabbix_err "$KEY:Error 007"
continue
fi
# Check if file exists on irods server
ils -L $KEY/latest.tar > /dev/null 2>&1
if [[ $? -eq 0 ]]; then
echo "[$KEY] latest.tar already exists on iRODS server. Let's compare hashes"
local_sha=$(sha256sum $RESIFDD_WORKDIR/$KEY.tar | awk '{print $1}' | xxd -r -p | base64)
irods_sha=$(ichksum $KEY/latest.tar | awk -F':' '/sha2:/ {print $2; exit;}')
echo "[$KEY] local checksum: $local_sha"
echo "[$KEY] irods checksum: $irods_sha"
# If the hashes differs, then move distant file and push this one
if [[ "$local_sha" = "$irods_sha" ]]; then
echo "[$KEY] The archive on irods is the same as our version. Skipping."
format_report $KEY "-" $(date +%Y-%m-%dT%H:%M:%S) "-" "-" "Skipped" >> $LOCAL_REPORT
continue
fi
fi
# Send latest archive file to IRODS
irods_push $KEY
done
echo "Starting dump of validated data with 4 jobs"
find $SNAPSHOT_DIR -maxdepth 3 -mindepth 3 -type d | sort | parallel --jobs 4 --max-args 1 pack_and_send {} {%}
echo "Dump of validated data done"
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment