Vous avez reçu un message "Your GitLab account has been locked ..." ? Pas d'inquiétude : lisez cet article https://docs.gricad-pages.univ-grenoble-alpes.fr/help/unlock/

resifdatadump 11.8 KB
Newer Older
1
2
#!/bin/bash
# Auteur: Jonathan Schaeffer <jonathan.schaeffer@univ-grenoble-alpes.fr>
Jonathan Schaeffer's avatar
Jonathan Schaeffer committed
3
# This script dumps precious RESIF data and metadata to an IRODS server
Jonathan Schaeffer's avatar
Jonathan Schaeffer committed
4
#
Jonathan Schaeffer's avatar
Jonathan Schaeffer committed
5
6
7

# This line tells to redirect all outputs to logger and stdout
exec 1> >(logger -s -t $(basename $0)) 2>&1
8

Jonathan Schaeffer's avatar
Jonathan Schaeffer committed
9
set -a
Jonathan Schaeffer's avatar
Jonathan Schaeffer committed
10
11
12
13
14
15
####################
#
# Zabbix sender
#
###################

Jonathan Schaeffer's avatar
Jonathan Schaeffer committed
16
zabbix_err(){
Jonathan Schaeffer's avatar
Jonathan Schaeffer committed
17
    zabbix_sender -k resifdatadump.failed -s $(hostname -f) -o "${@}" -z monitoring.osug.fr
Jonathan Schaeffer's avatar
Jonathan Schaeffer committed
18
19
20
}

zabbix_ok(){
Jonathan Schaeffer's avatar
Jonathan Schaeffer committed
21
    zabbix_sender -k resifdatadump.ok -s $(hostname -f) -o "${@}" -z monitoring.osug.fr
Jonathan Schaeffer's avatar
Jonathan Schaeffer committed
22
23
}

Jonathan Schaeffer's avatar
Jonathan Schaeffer committed
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
# Format a report line. Parameters is the list of data to append to the report :
# "2019_FR_PLOP" "123456" '2015-07-15T06:51:02' duration throughput "feaaaaaa" "OK"
format_report(){
    IFS='_' read -r -a YNS <<< $1
    if [[ ${#YNS[@]} -eq 3 ]]; then
        Y=${YNS[0]}
        N=${YNS[1]}
        S=${YNS[2]}
    else
        Y=${YNS[0]}
        N="-"
        S="-"
    fi
    SIZE=$2
    DATE=$3
    DURATION=$4
    THROUGHPUT=$5
41
    COMMENT=${@:6}
Jonathan Schaeffer's avatar
Jonathan Schaeffer committed
42
    printf " %6s | %9s | %9s | %13s | %16s | %6s | %6s | %18s | %s" $Y $N $S $SIZE $DATE $DURATION $THROUGHPUT "${COMMENT}"
Jonathan Schaeffer's avatar
Jonathan Schaeffer committed
43
    echo
Jonathan Schaeffer's avatar
Jonathan Schaeffer committed
44
45
}

Jonathan Schaeffer's avatar
Jonathan Schaeffer committed
46
# Push data to irods as a staging file
47
# Argument is the distant directory to push to.
Jonathan Schaeffer's avatar
Jonathan Schaeffer committed
48
49
50
51
# If something goes wrong, roll back
# If everything goes fine, then validate staging data
irods_push(){
    KEY=$1
52
    LOCAL_SHA=$2
Jonathan Schaeffer's avatar
Jonathan Schaeffer committed
53
54
    SIZE=$(stat -c %s $RESIFDD_WORKDIR/$KEY.tar)
    SIZEMB=$(($SIZE/1024/1024))
55

Jonathan Schaeffer's avatar
Jonathan Schaeffer committed
56
57
    for n in $(seq 1 5); do [ $n -gt 1 ] && sleep 10 ; imkdir -p $KEY && s=0 && break || s=$?; done
    if [[ $s -ne 0 ]]; then
58
        echo "[$KEY] Error 002 creating remote directory. Manual action has to be taken."
Jonathan Schaeffer's avatar
Jonathan Schaeffer committed
59
        format_report $KEY $SIZEMB $(date +%Y-%m-%dT%H:%M:%S) - - 'Error 010. imkdir failed' >> $LOCAL_REPORT
60
        zabbix_err "${KEY}:Error 002"
Jonathan Schaeffer's avatar
Jonathan Schaeffer committed
61
        return 1
Jonathan Schaeffer's avatar
Jonathan Schaeffer committed
62
    fi
63
64
65
    echo "[$KEY] Cleaning old staging.tar if exists"
    irm -f $KEY/staging.tar > /dev/null 2>&1
    echo "[$KEY] Sending data to iRODS ($SIZEMB MB)"
Jonathan Schaeffer's avatar
Jonathan Schaeffer committed
66

67
    COMMAND="iput --retries 5 -T -f -X $RESIFDD_WORKDIR/${KEY}.restart $RESIFDD_WORKDIR/${KEY}.tar $KEY/staging.tar"
Jonathan Schaeffer's avatar
Jonathan Schaeffer committed
68
    start=$(date +%s)
69
70
71
    eval $COMMAND
    IPUTRC=$?
    if [[ $IPUTRC -ne 0 ]]; then
Jonathan Schaeffer's avatar
Jonathan Schaeffer committed
72
        echo "[$KEY] Error 011 sending file to irods. The command was: $COMMAND"
Jonathan Schaeffer's avatar
Jonathan Schaeffer committed
73
        format_report $KEY $SIZEMB $(date +%Y-%m-%dT%H:%M:%S --date=@$start) - - 'Error 011. Transfer cancelled' >> $LOCAL_REPORT
Jonathan Schaeffer's avatar
Jonathan Schaeffer committed
74
75
76
        # Roll back
        irods_rollback $KEY
        # Alert to zabbix
77
        zabbix_err "${KEY}:Error 011"
Jonathan Schaeffer's avatar
Jonathan Schaeffer committed
78
    else
79
80
        duration=$(($(date +%s)-$start))
        throughput=$(($SIZEMB / $duration ))
81
82
83
84
85
86
87
88
    # Check integrity.
    # We do it separately from transfer because for big files it can be very long, and the network can cut
        irods_sha=$(ichksum $KEY/staging.tar | awk -F':' '/sha2:/ {print $2; exit;}')
        if [[ "$LOCAL_SHA" != "$irods_sha" ]]; then
            echo "[$KEY] Error 012 distant file is corrupted (localsha $LOCAL_SHA irodssha $irods_sha). Rollback"
            irods_rollback $KEY
            zabbix_err "${KEY}:Error 012"
        fi
89
90
91
        echo "[$KEY] staging.tar data sent, let's commit everything on irods server"
        format_report $KEY $SIZEMB $(date +%Y-%m-%dT%H:%M:%S --date=@$start) $duration $throughput 'OK' >> $LOCAL_REPORT
        irods_commit $KEY
Jonathan Schaeffer's avatar
Jonathan Schaeffer committed
92
    fi
93
    zabbix_ok "$KEY|${SIZEMB}MB|${duration}s|${throughput}MB/s"
Jonathan Schaeffer's avatar
Jonathan Schaeffer committed
94
95
96
97
98
    # Send report to irods. Do some locking here
    (
      flock -e 200
      iput -f $LOCAL_REPORT $IRODS_REPORT
    ) 200>/$RESIFDD_WORKDIR/report.lock
Jonathan Schaeffer's avatar
Jonathan Schaeffer committed
99
}
Jonathan Schaeffer's avatar
Jonathan Schaeffer committed
100
101
102
103
104
105
106

# In case of any problem, this function rolls every distant file operation bask
# 1. tries to revover latest.tar from previous.tar
# 2. tries to recover previous.tar from previous_to_delete.tar
# Argument is the distant directory to work with
irods_rollback(){
    KEY=$1
107
    irm -f ${KEY}/staging.tar
Jonathan Schaeffer's avatar
Jonathan Schaeffer committed
108
    echo "[$KEY] Rollback : try to recover latest.tar from previous"
Jonathan Schaeffer's avatar
Jonathan Schaeffer committed
109
    ils ${KEY}/previous.tar 2>/dev/null && (
Jonathan Schaeffer's avatar
Jonathan Schaeffer committed
110
111
        imv ${KEY}/previous.tar ${KEY}/latest.tar
        if [[ $? -ne 0 ]]; then
Jonathan Schaeffer's avatar
Jonathan Schaeffer committed
112
            echo "[$KEY] Error 008 recovering latest.tar Exit 1"
Jonathan Schaeffer's avatar
Jonathan Schaeffer committed
113
114
115
116
117
            return 1
        fi
    )
    echo "[$KEY] Rollback : OK"
    echo "[$KEY] Rollback : try to recover previous.tar from previous_to_delete"
Jonathan Schaeffer's avatar
Jonathan Schaeffer committed
118
    ils ${KEY}/previous_to_delete.tar 2>/dev/null && (
Jonathan Schaeffer's avatar
Jonathan Schaeffer committed
119
120
        imv ${KEY}/previous_to_delete.tar ${KEY}/previous.tar
        if [[ $? -ne 0 ]]; then
Jonathan Schaeffer's avatar
Jonathan Schaeffer committed
121
            echo "[$KEY] Error 009 recovering previous.tar Exit 1"
Jonathan Schaeffer's avatar
Jonathan Schaeffer committed
122
123
124
125
126
127
128
            return 1
        fi
    )
    echo "[$KEY] Rollback : OK"
    return 0
}

Jonathan Schaeffer's avatar
Jonathan Schaeffer committed
129
130
131
132
133
134
135
136
137
138
139
# When the transfer to irods is successfull, we can move file arounds remotely :
# 1. $KEY/previous.tar move to trash
# 2. $KEY/latest.tar move to $KEY/previous.tar
# 3. $KEY/staging.tar move to $KEY/latest.tar
irods_commit(){
    KEY=$1
    echo "[$KEY] If previous.tar exists, move it around but keep it safe"
    ils $KEY/previous.tar 2>/dev/null && (
        imv $KEY/previous.tar $KEY/previous_to_delete.tar
        if [[ $? -ne 0 ]]; then
            echo "[$KEY] Error 003 moving previous.tar around. Corrective action has to be taken manualy"
140
            zabbix_err "${KEY}:Error 003"
Jonathan Schaeffer's avatar
Jonathan Schaeffer committed
141
142
143
144
145
146
147
148
149
            return 1
        fi
    )

    echo "[$KEY] If latest.tar exists, move it to previous.tar"
    ils $KEY/latest.tar 2>/dev/null && (
        imv $KEY/latest.tar $KEY/previous.tar
        if [[ $? -ne 0 ]]; then
            echo "[$KEY] Error 004 moving latest.tar to previous.tar. Corrective action has to be taken manualy"
150
            zabbix_err "${KEY}:Error 004"
Jonathan Schaeffer's avatar
Jonathan Schaeffer committed
151
152
153
            return 1
        fi
    )
Jonathan Schaeffer's avatar
Jonathan Schaeffer committed
154
    echo "[$KEY] Validate staging.tar by moving it to latest.tar"
Jonathan Schaeffer's avatar
Jonathan Schaeffer committed
155
156
157
    imv $KEY/staging.tar $KEY/latest.tar
    if [[ $? -ne 0 ]]; then
        echo "[$KEY] Error 005 moving statging.tar to latest.tar. Corrective action has to be taken manualy"
158
        zabbix_err "${KEY}:Error 005"
Jonathan Schaeffer's avatar
Jonathan Schaeffer committed
159
160
161
162
163
        return 1
    fi
    ils $KEY/previous_to_delete.tar 2>/dev/null && irm -f $KEY/previous_to_delete.tar
}

Jonathan Schaeffer's avatar
Jonathan Schaeffer committed
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
# Choose if the data should be processed or skipped
# Pack the data and send it to iRODS
# Argument is the path (abs or relative) to the data
# Optional second argument can be the parallel slot number as given by {%}
pack_and_send() {
    [ $# -eq 0 ] && echo "[pack_and_send] Need a path for data to send" && return 1
    # Parse path to get year, station and network
    dir=$1
    IFS='/' read -r -a YNS <<< $dir
    [ ${#YNS[@]} -lt 4 ] && echo "[pack_and_send] Path $dir is not complete (${#YNS[@]} levels) " && return 2
    YEAR=${YNS[-3]}
    NETWORK=${YNS[-2]}
    STATION=${YNS[-1]}
    KEY=${YEAR}_${NETWORK}_${STATION}
    echo "[$KEY] Starting job $2"
    # Test if in recovery mode, we should send or not
    if [[ -r $RECOVERY_FILE  ]]; then
        if egrep -q -e ".*$YEAR.*$NETWORK.*$STATION.*( OK | Skipped ).*" $RECOVERY_FILE; then
            echo "[$KEY] Found OK or skipped in ${RECOVERY_FILE}. Skipping"
            format_report $KEY "-" $(date +%Y-%m-%dT%H:%M:%S) "-" "-" "Skipped" >> $LOCAL_REPORT
            return 0
        fi
    fi
    echo "[$KEY] Creating tar on $RESIFDD_WORKDIR/$KEY.tar"
188
189
190
    tar_cmd="tar cf $RESIFDD_WORKDIR/$KEY.tar -C ${dir%$YEAR/$NETWORK/$STATION} ${YEAR}/${NETWORK}/${STATION}"
    echo "[$KEY] $tar_cmd"
    eval $tar_cmd
Jonathan Schaeffer's avatar
Jonathan Schaeffer committed
191
192
193
194
    if [[ $? -ne 0 ]]; then
        # Something went wrong creating archive. Exit
        echo "[$KEY] Error 007 creating tar"
        # Send key to zabbix_err
195
        zabbix_err "$KEY:Error 007"
Jonathan Schaeffer's avatar
Jonathan Schaeffer committed
196
197
198
199
200
201
        return 1
    fi
    # Check if file exists on irods server
    ils -L $KEY/latest.tar > /dev/null 2>&1
    if [[ $? -eq 0 ]]; then
        echo "[$KEY] latest.tar already exists on iRODS server. Let's compare hashes"
202
        local_sha=$(sha256sum $RESIFDD_WORKDIR/$KEY.tar | awk '{print $1}' | xxd -r -p | base64)
Jonathan Schaeffer's avatar
Jonathan Schaeffer committed
203
204
205
206
207
208
        irods_sha=$(ichksum $KEY/latest.tar | awk -F':' '/sha2:/ {print $2; exit;}')
        echo "[$KEY] local checksum: $local_sha"
        echo "[$KEY] irods checksum: $irods_sha"
        # If the hashes differs, then move distant file and push this one
        if [[ "$local_sha" = "$irods_sha" ]]; then
            echo "[$KEY] The archive on irods is the same as our version. Skipping."
209
            format_report $KEY "-" $(date +%Y-%m-%dT%H:%M:%S) "-" "-" "Skipped" >> $LOCAL_REPORT
Jonathan Schaeffer's avatar
Jonathan Schaeffer committed
210
211
212
213
            return 0
        fi
    fi
    # Send latest archive file to IRODS
214
    irods_push $KEY $local_sha
215
216
    rm $RESIFDD_WORKDIR/$KEY*

Jonathan Schaeffer's avatar
Jonathan Schaeffer committed
217
218
219
}

export -f pack_and_send   # Necessary for call with GNU parallel
Jonathan Schaeffer's avatar
Jonathan Schaeffer committed
220
221
222
223
224
225
####################
#
# Preliminary tests
#
####################

Jonathan Schaeffer's avatar
Jonathan Schaeffer committed
226
227
228
229
230
231
# Checking IRODS environment
if [[ ! -f ~/.irods/.irodsA ]] ; then
    echo "The irods scrambled password file is not present. Please run iinit and provide the password. Exit 1"
    exit 1
fi

232
233
# Test for working directory
if [[ ! -w $RESIFDD_WORKDIR ]] ; then
234
    echo "RESIFDD_WORKDIR \"$RESIFDD_WORKDIR\" not writable. Check permissions. Exit 1"
235
236
    exit 1
fi
Jonathan Schaeffer's avatar
Jonathan Schaeffer committed
237
# test the data directory where to dump everything from
238
if [[ -z $RESIFDD_DATADIR ]]; then
Jonathan Schaeffer's avatar
Jonathan Schaeffer committed
239
    echo "Variable RESIFDD_DATADIR must be set to the RESIF mountpoint. Exit 1"
240
241
242
    exit 1
fi
if [[ ! -d $RESIFDD_DATADIR ]]; then
243
    echo "Variable RESIFDD_DATADIR set to \"$RESIFDD_DATADIR\" must be a directory. Exit 1"
244
245
246
    exit 1
fi

247
248
249
250
251
####################
#
# Option ContinueFrom
#
####################
Jonathan Schaeffer's avatar
Jonathan Schaeffer committed
252
if [[ -r ${RESIFDD_CONTINUE_FROM_FILE} ]]; then
253
254
255
256
257
258
259
    # Continue from previous report
    echo "Recovery file $RESIFDD_CONTINUE_FROM_FILE exists"
    cp $RESIFDD_CONTINUE_FROM_FILE $RESIFDD_WORKDIR/recovery.$$
    RECOVERY_FILE=$RESIFDD_WORKDIR/recovery.$$
    echo "Now using $RESIFDD_WORKDIR/recovery.$$ as recovery file"
else
    echo "No recovery file present. Dumping everything now"
Jonathan Schaeffer's avatar
Jonathan Schaeffer committed
260
261
fi

Jonathan Schaeffer's avatar
Jonathan Schaeffer committed
262
# Header for the report :
Jonathan Schaeffer's avatar
Jonathan Schaeffer committed
263
IRODS_REPORT=reports/$(date +%Y%m%d-%H%M).csv
264
265
LOCAL_REPORT=$RESIFDD_WORKDIR/report_$(date +%Y%m%d-%H%M).csv
format_report "Year_Network_Station" "Size(MB)" "Dumpdate" "Duration(s)" "Throughput(MB/s)" "Comment" > $LOCAL_REPORT
Jonathan Schaeffer's avatar
Jonathan Schaeffer committed
266
imkdir -p reports
267
iput -f $LOCAL_REPORT $IRODS_REPORT
Jonathan Schaeffer's avatar
Jonathan Schaeffer committed
268

269
270
##################
#
Jonathan Schaeffer's avatar
Jonathan Schaeffer committed
271
# Dump Metadata
272
273
#
##################
Jonathan Schaeffer's avatar
Jonathan Schaeffer committed
274
KEY="validated-seismic-metadata"
Jonathan Schaeffer's avatar
Jonathan Schaeffer committed
275

Jonathan Schaeffer's avatar
Jonathan Schaeffer committed
276
if [[ -r $RECOVERY_FILE ]] && egrep -q -e ".*($KEY ).*( OK | Skipped ).*" $RECOVERY_FILE ; then
Jonathan Schaeffer's avatar
Bugfix  
Jonathan Schaeffer committed
277
    format_report $KEY "-" $(date +%Y-%m-%dT%H:%M:%S) "-" "-" "Skipped" >> $LOCAL_REPORT
278
else
279
    # Get the snapshot name for this month
280
281
    MONTH=$(date +%Y-%m)
    SNAPSHOT_DIR=$(ls -d $RESIFDD_DATADIR/validated_seismic_metadata/.snapshot/weekly.${MONTH}*|tail -1)
282
283
284
285
    if [[ ! -d $SNAPSHOT_DIR ]]; then
        echo "Error 000 Snapshot directory $SNAPSHOT_DIR does not exist"
        exit 1
    fi
286

287
    echo "[$KEY] Starting dump from ${SNAPSHOT_DIR}"
288
    tar cf  $RESIFDD_WORKDIR/$KEY.tar --exclude portalproducts -C $SNAPSHOT_DIR $SNAPSHOT_DIR
289
290
    if [[ $? -ne 0 ]]; then
        echo "[$KEY] Error 001 while creating tar archive."
291
        zabbix_err "${KEY}:Error 001"
292
293
        exit 1
    fi
294
295
    local_sha=$(sha256sum $RESIFDD_WORKDIR/$KEY.tar | awk '{print $1}' | xxd -r -p | base64)
    irods_push $KEY $local_sha
296
297
298
    echo "[$KEY] Dump terminated :"
    ils -l $KEY
fi
Jonathan Schaeffer's avatar
Jonathan Schaeffer committed
299

Jonathan Schaeffer's avatar
Jonathan Schaeffer committed
300
301
302
303
304
####################
#
# Start dumping validated data
#
####################
305

Jonathan Schaeffer's avatar
Jonathan Schaeffer committed
306
SNAPSHOT_DIR=$(ls -d ${RESIFDD_DATADIR}/validated_seismic_data/.snapshot/monthly.${MONTH}*|tail -1)
307
if [[ ! -d $SNAPSHOT_DIR ]]; then
Jonathan Schaeffer's avatar
Jonathan Schaeffer committed
308
    echo "Error 006 Snapshot directory $SNAPSHOT_DIR does not exist"
Jonathan Schaeffer's avatar
Jonathan Schaeffer committed
309
310
311
    exit 1
fi

312
313
echo "Starting dump of validated data with 4 jobs"
find $SNAPSHOT_DIR -maxdepth 3 -mindepth 3 -type d | sort | parallel --jobs 4 --max-args 1 pack_and_send {} {%}
Jonathan Schaeffer's avatar
Jonathan Schaeffer committed
314
echo "Dump of validated data done"
Jonathan Schaeffer's avatar
Jonathan Schaeffer committed
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330

####################
#
# Start dumping cold data
#
####################

SNAPSHOT_DIR=$(ls -d ${RESIFDD_DATADIR}/cold_validated_seismic_data/.snapshot/weekly.$(date +%Y-%m-%d --date 'last sunday')_*|tail -1)
if [[ ! -d $SNAPSHOT_DIR ]]; then
    echo "Error 006 Snapshot directory $SNAPSHOT_DIR does not exist"
    exit 1
fi

echo "Starting dump of cold data with 4 jobs"
find $SNAPSHOT_DIR -maxdepth 3 -mindepth 3 -type d | sort | parallel --jobs 4 --max-args 1 pack_and_send {} {%}
echo "Dump of cold validated data done"