From 05d93d7544b9df7556e69f2a886681edf9749696 Mon Sep 17 00:00:00 2001 From: =?utf8?q?Alejandro=20R=2E=20Sede=C3=B1o?= Date: Thu, 3 Mar 2022 13:55:50 -0500 Subject: [PATCH] fedora and fedora-epel: use quick-fedora-mirror Initially committing this to use --dry-run. If that looks good, we'll run the real thing. Also switching back to mirrors.rit.edu. NB: quick-fedora-mirror requires zsh --- fetch-scripts/fedora | 45 +- fetch-scripts/fedora-epel | 43 +- tools/fedora/quick-fedora-mirror | 1679 ++++++++++++++++++++++++++++++ 3 files changed, 1745 insertions(+), 22 deletions(-) create mode 100755 tools/fedora/quick-fedora-mirror diff --git a/fetch-scripts/fedora b/fetch-scripts/fedora index 79bfa46..b169283 100644 --- a/fetch-scripts/fedora +++ b/fetch-scripts/fedora @@ -1,14 +1,35 @@ #!/bin/sh -#RSYNCSOURCE=rsync://download.fedora.redhat.com/fedora-enchilada -#RSYNCSOURCE=rsync://mirrors2.kernel.org/fedora-enchilada -#RSYNCSOURCE=rsync://fedora-archives.ibiblio.org/fedora-enchilada -#RSYNCSOURCE=rsync://download.wpi.edu:874/fedora-enchilada -#RSYNCSOURCE=rsync://mirrors.rit.edu/fedora-enchilada/fedora/ -#RSYNCSOURCE=rsync://download-i2.fedoraproject.org/fedora-enchilada0 -RSYNCSOURCE=rsync://archive.linux.duke.edu/fedora -BASEDIR=${MIRRORDIR}/fedora - -rsync -vaH --numeric-ids --delete --delete-after \ - --delay-updates \ - ${RSYNCSOURCE} ${BASEDIR} +RSYNCSOURCE=rsync://mirrors.rit.edu/ +MODULE="fedora-enchilada" +MODULEDIR="fedora" +TIMEFILE="${MIRRORDIR}/.locks/fedora-timefile" + +QFM="./tools/fedora/quick-fedora-mirror" +CONF="qfm.conf" + +if ! [ -x "${QFM}" ]; then + # It is expected that this script is sourced by fetch-hudson + # and that cwd is the workspace where the root of this repository + # is checked out. + echo "Could not find quick-fedora-mirror." + exit 1 +fi + +cat >"${CONF}" <"${CONF}" < in 2016. +# Donated to the public domain. If you require a statement of license, please +# consider this work to be licensed as "CC0 Universal", any version you choose. + +# Variables in upper case are user configurables. + +# ZSHISM? Turn on empty globs +set -G +export LANG=C +# ZSHISM? newline for IFS. +IFS=$'\n' + +# Declare globals +typeset -A tcounts # Transfer counts + +# Do this very early +starttime=$(date +%s) + +# Debug output; +# Level 0: nothing except errors. +# Level 1: lvl0 unless there is a tranfer, and then basic info and times. +# Output goes to a file which may be spit out at the end of the run. +# Level >= 2: Always some info, output to the terminal. +db1 () { + if (( VERBOSE >= 2 )); then + echo $* + elif (( VERBOSE >= 1 )); then + echo $* >> $outfile + fi + # Otherwise output nothing.... +} +db1f () { db1 $(printf $*); } + +db2 () { (( VERBOSE >= 2 )) && echo $*} +db2f () { (( VERBOSE >= 2 )) && printf $*} +db3 () { (( VERBOSE >= 3 )) && echo '>>' $*} +db4 () { (( VERBOSE >= 4 )) && echo '>>>>' $*} +sep () { (( VERBOSE >= 2 )) && echo '============================================================'} + +logwrite () { + # Send logging info to the right place + if [[ -n $LOGJOURNAL ]]; then + echo $* >&3 + elif [[ -n $LOGFILE && -w $LOGFILE ]]; then + echo $(date '+%b %d %T') $* >> $LOGFILE + fi +} + +logit () { + # Basic logging function + local item=$1 + shift + local err='' + [[ $item == 'E' ]] && err='ERR:' + [[ $item == 'e' ]] && err='Err:' + + if [[ $LOGITEMS =~ $item || $LOGITEMS =~ '@' ]]; then + logwrite $err $* + fi + if (( VERBOSE >= 3 )); then + db3 Log: $err $* + fi + + # XXX Consider sending errors to stdout + #if [[ -n $err ]]; then + # (>&2 echo $*) + #fi + +} + +retcheck () { + local ret=$1 + local prg='' + [[ -n $2 ]] && prg="$2 " + + if [[ $ret -ne 0 ]]; then + db1 "${prg}failed at $functrace[1]: with return $ret" + logit E "${prg}call failed at $functrace[1]: with return $ret" + fi +} + +lock () { + eval "exec 9>>$1" + flock -n 9 && return 0 + return 1 +} + +save_state () { + # Doing an mv here actually undoes the locking. Could use cp instead. + # Currently the unlocking is a good thing because it allows the checkin to + # proceed without the next run waiting. But this should be audited. + if [[ -z $skiptimestamp ]]; then + db2 Saving mirror time to $TIMEFILE + if [[ -e $TIMEFILE ]]; then + mv $TIMEFILE $TIMEFILE.prev + fi + echo LASTTIME=$starttime > $TIMEFILE + + if (( ? != 0 )); then + (>&2 echo Problem saving timestamp file $TIMEFILE) + logit E "Failed to update timestamp file" + exit 1 + fi + else + db2 Skipping timestamp save. + fi +} + +append_state () { + # Think about how to save extra state in the timestamp file or some + # associated file. Should we even do this? + # Should this be saved to a separate status file instead? + + + # Cannot rewrite the file or else the locking breaks. Updating it should + # be OK. + # Save things in a format that can be sourced (VAR=value). + # Repeated uses (VAR=value2) are OK and overwrite the previous value when the file is sourced. + + # What would use this? A separate status program or some other monitor? + # + # Save data about the current transfer: + # The current point in the process ( + # Counts + # The current tempdir + # Important transfer list files + # The current rsync output file (for tailing and counting) since this is random. + +} + +finish () { + # Finish up. + # + # Takes two optional arguments. The first is the return value; the script + # will exit with that value and will dump the output file to stdout if the + # value is nonzero. If the second is nonempty, the output will be dumped + # regardless of the return value. + local ret=$1 + local out=$2 + db1 "=========================" + db1 "Mirror finished: $(date) ($ret)" + logit R "Run end; exiting $ret." + [[ $ret -gt 0 || -n $out ]] && cat $outfile + exit $ret +} + +filter () { + # Client-side file list filtering. + if [[ -n $FILTEREXP ]]; then + db4 filtering $1 + sed --in-place=-prefilter -r -e "\,$FILTEREXP,d" $1 + fi +} + +hr_b () { + # Produce human-readable byte counts + # Yes, this has a bug at 1024EB + typeset -F2 out + + if [[ $1 -lt 1024 ]]; then + echo ${1}B + return + fi + + out=$(( $1 / 1024. )) + for unit in KB MB GB TB PB EB; do + (( $out < 1024 )) && break + out=$(( out / 1024. )) + done + + echo ${out}${unit} +} + +hr_s () { + # Produce human-readable second counts + typeset -F2 out=$1 + + if [[ $1 -lt 60 ]]; then + echo ${1}s + return + fi + + out=$(( $1 / 60. )) + if [[ $out -lt 60 ]]; then + echo ${out}m + return + fi + + out=$(( $out / 60. )) + echo ${out}h +} + +parse_rsync_stats () { + # Parse some of the statistics that rsync gives us. + # Takes an rsync output log (stdout) as an argument. + # No return value, but sill set several global variables: + # rsfilestransferred + # rsfilesize + # rstotalbytesreceived + # rstotalbytessent + # rsfilelistgentime + # rsfilelisttransfertime + # rstransferspeed + # rsspeedup + # These will all be set unset if not present in the given log. + # + # Here's the full block of info that rsync provides: + # + # rsync[30399] (receiver) heap statistics: + # arena: 311296 (bytes from sbrk) + # ordblks: 2 (chunks not in use) + # smblks: 1 + # hblks: 2 (chunks from mmap) + # hblkhd: 532480 (bytes from mmap) + # allmem: 843776 (bytes from sbrk + mmap) + # usmblks: 0 + # fsmblks: 48 + # uordblks: 178272 (bytes used) + # fordblks: 133024 (bytes free) + # keepcost: 131200 (bytes in releasable chunk) + # + # rsync[30394] (generator) heap statistics: + # arena: 311296 (bytes from sbrk) + # ordblks: 2 (chunks not in use) + # smblks: 1 + # hblks: 2 (chunks from mmap) + # hblkhd: 532480 (bytes from mmap) + # allmem: 843776 (bytes from sbrk + mmap) + # usmblks: 0 + # fsmblks: 48 + # uordblks: 178208 (bytes used) + # fordblks: 133088 (bytes free) + # keepcost: 131200 (bytes in releasable chunk) + # + # Number of files: 11,140 (reg: 9,344, dir: 1,796) + # Number of created files: 1,329 (reg: 1,327, dir: 2) + # Number of deleted files: 0 + # Number of regular files transferred: 1,182 + # Total file size: 165,405,056,029 bytes + # Total transferred file size: 3,615,178,247 bytes + # Literal data: 3,229,943,512 bytes + # Matched data: 385,234,735 bytes + # File list size: 468,791 + # File list generation time: 0.217 seconds + # File list transfer time: 0.000 seconds + # Total bytes sent: 1,249,286 + # Total bytes received: 3,231,373,895 + # + # sent 1,249,286 bytes received 3,231,373,895 bytes 81,838,561.54 bytes/sec + # total size is 165,405,056,029 speedup is 51.17 + + local log=$1 + + # Number of regular files transferred: 1 + unset rsfilestransferred + rsfilestransferred=$(awk '/^Number of regular files transferred:/ {print $6; exit}' $log) + + # Total file size: 10,174,746 bytes + unset rsfilesize + rsfilesize=$(awk '/^Total file size: (.*) bytes/ {print $4; exit}' $log | sed -e 's/,//g') + + # Total bytes received: 2,425,728 + unset rstotalbytesreceived + rstotalbytesreceived=$(awk '/^Total bytes received: (.*)/ {print $4; exit}' $log | sed -e 's/,//g') + + # Total bytes sent: 384,602 + unset rstotalbytessent + rstotalbytessent=$(awk '/^Total bytes sent: (.*)/ {print $4; exit}' $log | sed -e 's/,//g') + + # File list generation time: 0.308 seconds + unset rsfilelistgentime + rsfilelistgentime=$(awk '/^File list generation time: (.*) seconds/ {print $5; exit}' $log) + + # File list transfer time: 0.000 seconds + unset rsfilelisttransfertime + rsfilelisttransfertime=$(awk '/^File list transfer time: (.*) seconds/ {print $5; exit}' $log) + + # sent 71 bytes received 2,425,728 bytes 156,503.16 bytes/sec + unset rstransferspeed + rstransferspeed=$(awk '/^sent .* bytes .* received .* bytes (.*) bytes\/sec$/ {print $7; exit}' $log \ + | sed -e 's/,//g') + + # total size is 10,174,746 speedup is 4.19 + unset rsspeedup + rsspeedup=$(awk '/^total size is .* speedup is (.*)$/ {print $7; exit}' $log) +} + +do_rsync () { + # The main function to do a transfer + # Accepts four options: + # 1) The source repository + # 2) The destination directory + # 3) The list of files + # 4) The name of an array containing additional rsync options + # + # This may sleep and retry when receiving specific errors. + # Returns the rsync return code (where 0 indicates full success, but other + # values may indicate a finished copy). + + local src=$1 dest=$2 files=$3 opts=$4 + local runcount=0 + local log=$(mktemp -p . rsync-out-XXXXXX.log) + local errlog=$(mktemp -p . rsync-err-XXXXXX.log) + local sleep rr rvbash rvzsh + local rsyncto="--timeout=$RSYNCTIMEOUT" + + local -a verboseopts flopts allopts + + # These add to the default rsync verbosity + (( VERBOSE >= 7 )) && verboseopts+=(--progress) + (( VERBOSE >= 5 )) && verboseopts+=(-v) + (( VERBOSE >= 4 )) && verboseopts+=(-v) + + # Usually we won't want to see this. + (( VERBOSE <= 3 )) && verboseopts+=(--no-motd) + + flopts=("--files-from=$files") + allopts=($rsyncto $RSYNCOPTS $verboseopts $flopts ${(P)opts} $src $dest) + + while true; do + runcount=$(( runcount+1 )) + # ZSHISM: (P) flag to act on a variable by name. Sadly, bash has + # broken array handling. bash 4.3 has local -n for this. Older bash + # needs hacks, or eval. More info: + # https://stackoverflow.com/questions/1063347/passing-arrays-as-parameters-in-bash + # Or just use a freaking global. + + # We have to do this separately because you can't redirect to /dev/stderr when running under sudo. + # ZSHISM Teeing both stderr and stdout while keeping the return code is + # easy in zsh with multios but seems to be terribly difficult under bash. + db3 Calling $RSYNC $allopts + logit c calling $RSYNC $allopts + if (( VERBOSE >= 5 )); then + $RSYNC $allopts 1>&1 2>&2 >> $log 2>> $errlog + elif (( VERBOSE >= 2 )); then + $RSYNC $allopts >> $log 2>&2 2>> $errlog + else + $RSYNC $allopts >> $log 2>> $errlog + fi + rr=$? + + # Check return values + if (( rr == 0 )); then + logit C rsync call completed succesfully with return $rr + parse_rsync_stats $log + return 0 + + elif (( rr == 24 )); then + # 24: Partial transfer due to vanished source files + logit e "rsync says source files vanished." + return $rr + + elif (( rr == 5 || rr == 10 || rr == 23 || rr == 30 || rr == 35 )); then + # Most of these are retryable network issues + # 5: Error starting client-server protocol + # 10: Error in socket I/O + # 30: Timeout in data send/receive + # 35: Timeout waiting for daemon connection + # 23: Partial transfer due to error + # (could be a file list problem) + if [[ $rr -eq 23 && -f $errlog ]] ; then + # See if it we tried to tranfer files that don't exist + grep -q '^rsync: link_stat .* failed: No such file or directory (2)$' $errlog + if (( ? == 0 )); then + logit e "Looks like the file list is outdated." + (>&2 echo "Looks like the file list is outdated.") + [[ -f $errlog ]] && (>&2 cat $errlog) + return $rr + fi + fi + + # It's not one of those special 23 errors, so we may retry. First + # see if we've already tried too many times. + if (( runcount >= MAXRETRIES )); then + logit E rsync from $REMOTE failed + (>&2 echo "Could not sync from $REMOTE") + [[ -f $errlog ]] && (>&2 cat $errlog) + return $rr + fi + + # Then sleep for a bit + sleep=$(( 2 ** runcount )) + logit e "rsync returned $rr (retryable), sleeping for $sleep" + db2 rsync failed: sleeping for $sleep + sleep $sleep + continue + fi + + # We only get here if we got a return we didn't expect + logit E "rsync returned $rr, which was not expected." + (>&2 echo "rsync returned $rr, which was not expected." + [[ -f $errlog ]] && cat $errlog + ) + return $rr + done +} + +usage () { + cat < 0 ]]; do + opt=$1 + case $opt in + -a | --alwayscheck) + alwayscheck=1 + ;; + -c | --config) + cfgfile=$2 + shift + if [[ ! -r $cfgfile ]]; then + (>&2 echo Cannot read $cfgfile) + exit 1 + fi + ;; + -d) # Debugging + verboseopt=$2 + shift + ;; + -h | --help) + usage + exit 1 + ;; + -n | --dry-run) + rsyncdryrun=1 + skipdelete=1 + skiptimestamp=1 + ;; + -N | --transfer-only) + skipdelete=1 + skiptimestamp=1 + ;; + -t ) + backdate=$2 + alwayscheck=1 + shift + ;; + -T | --backdate) + backdate=$(date -d "$2" +%s) + alwayscheck=1 + shift + ;; + --checkin-only) + skiptransfer=1 + skipdelete=1 + skiptimestamp=1 + forcecheckin=1 + ;; + --dir-times) + updatealldirtimes=1 + alwayscheck=1 + ;; + --refresh) + skipdelete=1 + skiptimestamp=1 + skipcheckin=1 + refreshpattern=$2 + shift + ;; + --dump-mm-checkin) + # Just for the test suite; dump the raw payload to the given + # filename with the module name appended. + dumpmmcheckin=$2 + shift + ;; + --no-paranoia) + # Don't backdate the last mirrortime + noparanoia=1 + ;; + *) + (>&2 echo "Unrecognized argument.") + exit 1 + ;; + esac + shift + done +} + +read_config () { + # Load up the configuration file from any of a number of locations + local file + for file in \ + $cfgfile \ + /etc/quick-fedora-mirror.conf \ + ~/.config/quick-fedora-mirror.conf \ + $(dirname $0)/quick-fedora-mirror.conf \ + ./quick-fedora-mirror.conf; \ + do + if [[ -r $file ]]; then + source $file + cfgfile=$file + break + fi + done + + # Override some settings with previously parsed command-line options + [[ -n $verboseopt ]] && VERBOSE=$verboseopt + + # Check that the required parameters were provided + if [[ -z $DESTD ]]; then + (>&2 echo "You must define DESTD in your configuration file ($cfgfile).") + fi + if [[ -z $TIMEFILE ]]; then + (>&2 echo "You must define TIMEFILE in your configuration file ($cfgfile).") + fi + + # Set some other general variables based on the value of provided + # configuration settings + [[ -z $CHECKIN_SITE ]] && skipcheckin=1 + [[ -z $MAXCHECKINRETRIES ]] && MAXCHECKINRETRIES=$MAXRETRIES +} + +set_default_vars () { + # Set various defaults before the configuration file is loaded. + + # Mapping from module names to directories under fedora-buffet + # ZSHISM (initialize associative array) + typeset -g -A MODULEMAPPING + typeset -g -A MIRRORMANAGERMAPPING + typeset -g -A MIRRORMANAGERMODULEMAPPING + + MODULEMAPPING=( + fedora-alt alt + fedora-archive archive + fedora-enchilada fedora + fedora-epel epel + fedora-secondary fedora-secondary + ) + + MIRRORMANAGERMAPPING=( + fedora-alt 'fedora other' + fedora-archive 'fedora archive' + fedora-enchilada 'fedora linux' + fedora-epel 'fedora epel' + fedora-secondary 'fedora secondary arches' + ) + + # Mirrormanager has a weird prefix for "fedora-enchilada", so copy the + # existing module mapping and alter it + MIRRORMANAGERMODULEMAPPING=(${(kv)MODULEMAPPING}) + MIRRORMANAGERMODULEMAPPING[fedora-enchilada]="fedora/linux" + + # Default arguments; override in quick-fedora-mirror.conf + VERBOSE=0 + LOGITEMS=aeElrR + + DESTD= + TIMEFILE= + + CHECKIN_HOST=$(hostname) + CURL=/usr/bin/curl + FILELIST='fullfiletimelist-$mdir' + EXTRAFILES=(fullfilelist imagelist-\$mdir) + MIRRORMANAGER=https://admin.fedoraproject.org/mirrormanager/xmlrpc + REMOTE=rsync://dl.fedoraproject.org + RSYNC=/usr/bin/rsync + RSYNCTIMEOUT=$((60 * 10)) + WARNDELAY=$((60 * 60 * 24)) + MAXRETRIES=10 + + rsyncver=$(rsync --version | head -1 | awk '{print $3}') + if [[ $rsyncver == 3.1.3 ]]; then + # 3.1.3 has broken support for --preallocate and -S (--sparse) together + RSYNCOPTS=(-aSH -f 'R .~tmp~' --stats --delay-updates --out-format='@ %i %10l %n%L') + elif [[ $rsyncver == 3.1* ]]; then + RSYNCOPTS=(-aSH -f 'R .~tmp~' --stats --preallocate --delay-updates --out-format='@ %i %10l %n%L') + else + RSYNCOPTS=(-aSH -f 'R .~tmp~' --stats --delay-updates --out-format='@ %i %10l %n%L') + fi + + MASTERMODULE=fedora-buffet + MODULES=(fedora-enchilada fedora-epel) +} + +check_file_list_version () { + # Look at the file list to see if we can handle it + # + # Takes the file list name. + # Returns 0 if we can handle it, 1 if we can't. + local max_fl_version=3 + local fl=$1 + + if [[ ! -f $fl ]]; then + (>&2 echo "Cannot check file list \"$fl\". Exiting.") + exit 1 + fi + + local flversion=$(awk -F '\t' '/^\[Version/ {s=1; next} /^$/ {exit} {if (s) print $0}' < $fl) + if [[ "$flversion" -le $max_fl_version ]]; then + return + fi + + # Either it is too new or we just can't parse it, so quit. + (>&2 echo "File list from the mirror cannot be processed by this script. Exiting.") + exit 1 +} + +clean_all_transfer_temps () { + # Delete temporary transfer files, but not any log files. + # Be sure to add any extra generated temporaries here. + # XXX Is it OK that this doesn't delete the file lists? They will just get + # copied over. + rm -f *.old + for i in ${(v)MODULEMAPPING} alldirs allfiles allfilesizes changedpaths \ + changes checksumfailed checksums deletedirs deletefiles flist \ + localdirs localfiles localfilesizes localfulllist master missingdirs \ + missingfiles newdirs newfiles staletmpdirs staletmpfiles \ + transferlist updatedfiles updatetimestamps; do + rm -f $i-* + done +} + +clean_stale_rsync_temps () { + # Clean up temporaries left over from a previous aborted rsync run. + local mod=$1 + + db2 Possibly aborted rsync run. Cleaning up. + logit a "cleaning up previous aborted run: $(wc -l < staletmpfiles-$mod) file(s)." + + # Move the files in those tmpdirs a level up if a file with the + # same name doesn't exist. We don't update the file lists because + # we want rsync to re-check those files and possibly fix up the + # permissions. The dirs will be cleaned up later. + # Note that this _may_ leave a few files around which should not be + # there. They will of course be cleaned up at the next run. + # XXX We could do better by comparing the stale files against the + # to-be-fransferred list, but it's probably not worth it. + for dir in $(cat staletmpdirs-$mod); do + pushd $DESTD/$dir + for file in *; do + if [[ ! -f ../$file ]]; then + logit A Saving previous download $file + db3 Saving previous download: $file + mv $file .. + fi + done + popd + done +} + +fetch_file_lists () { + # Download the file list for each configred module + # Will set the global variable "checksums" containing the checksum of the + # file list of each module that exists on the client at the beginning of the transfer. + + local extra flname module rsyncreturn + + sep + logit o Remote file list download start + db2 Downloading file lists + # ZSHISM (declare associative array) + typeset -g -A checksums + checksums=() + for module in $MODULES; do + # ZSHISM? (associative array indexing) + moduledir=$MODULEMAPPING[$module] + mkdir $moduledir + flname=${FILELIST/'$mdir'/$moduledir} + if [[ -f $DESTD/$moduledir/$flname ]]; then + cp -p $DESTD/$moduledir/$flname $moduledir + ln $moduledir/$flname $moduledir/$flname.old + # ZSHISM (assign assoc. array value) + checksums[$module]=$(sha1sum $DESTD/$moduledir/$flname | cut -d' ' -f1) + fi + + echo $moduledir/$flname >> filelist-transferlist + done + + extra=(--no-dirs --relative --compress) + do_rsync $REMOTE/$MASTERMODULE/ . filelist-transferlist extra + rsyncreturn=$? + if [[ $rsyncreturn -ne 0 ]]; then + (>&2 echo "rsync finished with nonzero exit status.\nCould not retrieve file lists.") + logit E Aborting due to rsync failure while retrieving file lists + finish 1 + fi + + # Log very basic stats + logit s "File list download: $(hr_b $rstotalbytesreceived) received, $(hr_b $rstransferspeed)/s" + + # Check that we can handle the downloaded lists + for module in $MODULES; do + moduledir=$MODULEMAPPING[$module] + flname=${FILELIST/'$mdir'/$moduledir} + check_file_list_version $moduledir/$flname + done + + # rsync won't transfer those files to the current directory, so move them and + # clean up. + mv */* . + rmdir * 2> /dev/null + logit o Remote file list download: end +} + +checkin_build_inner_payload () { + # Build the inner json payload + # Takes the module name and the name of the output file to use + local module=$1 + local mm=$2 + local checkinhost=$3 + + local moduledir=$MIRRORMANAGERMODULEMAPPING[$module] + local mmcheckin=$MIRRORMANAGERMAPPING[$module] + + cat >$mm <>$mm + done + + # The data sent by report_mirror always includes a blank directory; add it + # manually here which conveniently means we don't have to deal with the + # trailing comma. And after that, the various parameters mirrormanager + # wants. + cat >>$mm < $in.bz2.b64 + + # change '+' to '-' and '/' to '_' + tr '+/' '-_' < $in.bz2.b64 > $out + + rm $in.bz2 $in.bz2.b64 +} + +checkin_build_outer_payload () { + # Wrap the encoded payload in just the right xml + # Takes input and output filenames + + local in=$1 + local out=$2 + + cat >>$out < + +checkin + + +EOF + echo -n "" >>$out + + cat <$in >>$out + + cat >>$out < + + + +EOF +} + +checkin_upload_payload () { + # Now actually upload the payload + # We have to remove the Expect: header that curl sends but which mirrormanager cannot handle + local payload=$1 + local module=$2 + local -a curlopts + local curlret + + logit M "Making xmlrpc call for $module" + curlopts=(--silent) + curl --help | grep -q http1\.1 + (( ? == 0 )) && curlopts+=(--http1.1) + (( VERBOSE >= 4 )) && curlopts=(--verbose) + db3 "$CURL $curlopts -H \"Expect:\" -H \"Content-Type: text/xml\" --data @$mx $MIRRORMANAGER" + $CURL $curlopts -H "Expect:" -H "Content-Type: text/xml" --data @$mx $MIRRORMANAGER > curl.out + curlret=$? + if [[ $curlret -ne 0 ]]; then + logit e "Checkin failure: curl returned $curlret" + (>&2 echo "Checkin failure: curl returned $curlret") + return 2 + fi + + # Parse the output to see if we got any useful return + # The sed call attempts to strip xml tags. Easily fooled but we don't expect + # any complicated return from mirrormanager. + sed -e 's/<[^>]*>//g' curl.out > curl.noxml + grep -q -i successful curl.noxml + + if [[ $? -ne 0 ]]; then + db1 "Mirrormanager checkin for $module did not appear to succeed." + logit e "Doesn't look like we got a good return from mirrormanager." + logit e $(cat curl.noxml) + return 1 + fi + return 0 +} + +checkin_module () { + # Perform the mirrormanager checkin for a particular module + local module=$1 + + local mm=mirrormanager-payload-$module + local mx=mirrormanager-xmlrpc-$module + local moduledir=$MODULEMAPPING[$module] + + if [[ ! -f alldirs-$module ]]; then + # We were asked to check in a module that we hadn't previously + # processed, which should not happen. + logit E "Cannot perform checkin for $module; no directory list exists." + return + fi + + # Determine the "mirrormanager hostname" to use for this checkin. + # Different modules can be set up under different "hosts" in mirrormanager, + # even though these might all be on the same machine. This works around + # problems mirrormanager has when crawling machines which mirror + # everything. + # ZSHISM: This uses "(P)"; the equivalent in bash is "!". + local checkinhost=$CHECKIN_HOST + local hostspecificvar=CHECKIN_HOST_${module//-/_} + if [[ -n ${(P)hostspecificvar} ]]; then + checkinhost=${(P)hostspecificvar} + fi + + db3 "Performing mirrormanager checkin for $module (in $moduledir) as $checkinhost" + logit M "Processing $module (in $moduledir) as $checkinhost" + + # Construct the checkin payload + checkin_build_inner_payload $module $mm $checkinhost + checkin_encode_inner_payload $mm $mm.enc + checkin_build_outer_payload $mm.enc $mx + + # For the test suite, just dump the checkin info and bail + if [[ -n $dumpmmcheckin ]]; then + cat $mx > $dumpmmcheckin-$module + return + fi + + # Try to check in until we've retried too often. + local retries=1 + while true; do + checkin_upload_payload $mx $module + + if [[ $? -eq 0 ]]; then + break + fi + + if (( retries >= MAXRETRIES )); then + logit E "Could not complete checkin after $MAXCHECKINRETRIES tries." + break + fi + + logit e "Checkin attempt $retries failed. Will retry." + retries=$(( retries +1 )) + sleep $(( 2*retries )) + done + + logit M "Processing $module: end" +} + +awk_extract_file_list () { + local inf=$1 + local outf=$inf.flist + [[ -n $2 ]] && outf=$2 + + awk ' \ + /^\[Files/ {s=1;next} + /^$/ {if (s==1) exit} + s {print}' \ + < $inf > $outf + retcheck $? awk +} + +awk_extract_paths_from_file_list_restricted () { + local inf=$1 + local outf=$2 + local mdir=$3 + + # We can just ignore the type and permissions completely + awk -F '\t' "{print \"$mdir/\" \$4}" < $inf > $outf + retcheck $? awk +} + +awk_extract_paths_from_file_list_norestricted () { + local inf=$1 + local outf=$2 + local mdir=$3 + + awk -F '\t' " \ + { if (\$2 == \"d\" || \$2 == \"f\" || \$2 == \"l\") \ + print \"$mdir/\" \$4 \ + }" < $inf > $outf + retcheck $? awk +} + +awk_extract_newer_dirs_restricted () { + local inf=$1 + local outf=$2 + local mdir=$3 + + local last=0 + [[ -n $4 ]] && last=$4 + + awk -F '\t' " \ + /\\[Files/ {s=1;next} + /^\$/ {s=0;next} + { if (s && \$1 >= $last \ + && (\$2 == \"d\" || \$2 == \"d-\" || \$2 == \"d*\")) \ + print \"$mdir/\" \$4 \ + }" \ + < $inf > $outf + retcheck $? awk +} + +awk_extract_newer_dirs_no_restricted () { + local inf=$1 + local outf=$2 + local mdir=$3 + + local last=0 + [[ -n $4 ]] && last=$4 + + awk -F '\t' " \ + /\\[Files/ {s=1;next} \ + /^\$/ {s=0;next} \ + { if (s && \$1 >= $last \ + && (\$2 == \"d\")) \ + print \"$mdir/\" \$4 \ + }" \ + < $inf > $outf + retcheck $? awk +} + +awk_extract_newer_files_restricted () { + local inf=$1 + local outf=$2 + local mdir=$3 + + local last=0 + [[ -n $4 ]] && last=$4 + + awk -F '\t' "/\\[Files/ {s=1;next} \ + /^\$/ {s=0;next} \ + {if (s && \$1 >= $last && \ + (\$2 == \"f\" || \$2 == \"f-\" || \$2 == \"f*\" \ + || \$2 == \"l\" || \$2 == \"l-\" || \$2 == \"l*\" \ + )) \ + print \"$mdir/\" \$4 \"\t\" \$3 \ + } \ + " $inf > $outf + retcheck $? awk +} + +awk_extract_newer_files_no_restricted () { + local inf=$1 + local outf=$2 + local mdir=$3 + + local last=0 + [[ -n $4 ]] && last=$4 + + awk -F '\t' "/\\[Files/ {s=1;next} \ + /^\$/ {s=0;next} \ + {if (s && \$1 >= $last && \ + (\$2 == \"f\" \ + || \$2 == \"l\" \ + )) \ + print \"$mdir/\" \$4 \"\t\" \$3 \ + } \ + " $inf > $outf + retcheck $? awk +} + +process_file_list_diff () { + # Extract and then diff the old and new file lists for a module + # Creates changedfiles-$module file + + local fl=$1 + local mod=$2 + local mdir=$3 + + local oldflist=flist-old-$mod + local newflist=flist-new-$mod + + logit l "Generating database diff start: $mod" + + # Extract the file list part of old and new file lists. + awk_extract_file_list $fl.old flist-old-$mod + awk_extract_file_list $fl flist-new-$mod + + # sort each by path + sort -t$'\t' -k4 $oldflist > $oldflist.sorted + sort -t$'\t' -k4 $newflist > $newflist.sorted + + # compute the changes + diff --changed-group-format='%>' --unchanged-group-format='' $oldflist.sorted $newflist.sorted > changes-$mod + + # Extract path from changes + if [[ -n $PREBITFLIP ]]; then + awk_extract_paths_from_file_list_restricted changes-$mod changedpaths-$mod $mdir + else + awk_extract_paths_from_file_list_norestricted changes-$mod changedpaths-$mod $mdir + fi + + # We must filter here so that files we don't want to transfer won't appear + # to have changed. + filter changedpaths-$mod + + logit l "Generating database diff end: $mod" +} + +compute_file_list_stats () { + # Calculate and log counts of the various generated lists + local mod=$1 + local -a stats + stats=(allfiles alldirs newfiles newdirs changedpaths localfiles \ + localdirs deletefiles deletedirs missingfiles missingdirs \ + updatedfiles updatetimestamps checksumfailed) + + for i in stats; do + counts[$i]=0 + [[ -f $i-$mod ]] && counts[$i]=$(wc -l < $i-$mod) + done + + counts[totaltransfer]=$(wc -l transferlist-$mod) + + # Until the rest of the code is fixed up + counts[extrafiles]=$counts[deletefiles] + counts[extradirs]=$counts[deletedirs] + counts[sizechanged]=$counts[updatedfiles] + counts[allserverfiles]=$counts[allfiles] + counts[allserverdirs]=$counts[alldirs] + counts[newserverfiles]=$counts[newfiles] + counts[newserverdirs]=$counts[newdirs] + + # Previously these two were printed before generating the local file lists + db2f "Total on server: %7d files, %4d dirs.\n" $cntallserverfiles $cntallserverdirs + db2f "New on server: %7d files, %4d dirs.\n" $cntnewserverfiles $cntnewserverdirs + + db2f "Total on client: %7d files, %4d dirs.\n" $counts[localfiles $counts[localdirs] + db2f "Not present on server: %7d files, %4d dirs.\n" $counts[extrafiles] $counts[extradirs] + db2f "Missing on client: %7d files, %4d dirs.\n" $counts[missingfiles] $counts[missingdirs] + db2f "Size Changed: %7d files.\n" $counts[sizechanged] + db2f "Timestamps to restore: %7d files.\n" $counts[updatetimestamps] + db2f "Checksum Failed: %7d files.\n" $counts[checksumfailed] + db2f "Filelist changes: %7d paths.\n" $counts[changedpaths] + db2f "Total to transfer: %7d paths.\n" $counts[totaltransfer] + + logit L "Counts for $mod: Svr:$counts[allserverfiles]/$counts[allserverdirs] Loc:$counts[localfiles]/$counts[localdirs] Diff:$counts[changedpaths] New:$counts[newserverfiles]/$counts[newserverdirs] Xtra:$counts[extrafiles]/$counts[extradirs] Miss:$counts[missingfiles]/$counts[missingdirs] Size:$counts[sizechanged] Csum:$counts[checksumfailed] Dtim:$counts[updatetimestamps]" + +} + +generate_local_file_list () { + # Generate lists of what the client has. + local mod=$1 + local mdir=$2 + + db3 Generating local file/dir list + logit l "Generating local file list start: $mod" + + # Traverse the filesystem only once + pushd $DESTD + find $mdir/* -printf '%y\t%p\t%s\n' > $tempd/localfulllist-$mod + popd + + # Now extract file and dir lists from that + awk -F '\t' '{if ($1 == "d") {print $2}}' < localfulllist-$mod > localdirs-$mod + awk -F '\t' '{if ($1 == "f" || $1 == "l") {print $2}}' < localfulllist-$mod > localfiles-$mod + awk -F '\t' '{if ($1 == "f" || $1 == "l") {print $2 "\t" $3}}' < localfulllist-$mod > localfilesizes-$mod + + # Look for stray .~tmp~ dirs + if [[ -z $NORSYNCRECOVERY ]]; then + grep '\.~tmp~' localdirs-$mod > staletmpdirs-$mod + grep '\.~tmp~' localfiles-$mod > staletmpfiles-$mod + fi + + logit l "Generating local file list end: $mod" +} + +process_local_file_list () { + # Compare what the client has to what the server has, and generate more + # lists based on that. + # Generates the fillowing file lists: + # deletefiles-$module + # deletedirs-$module + # updatetimestamps-$module + # missingfiles-$module + # missingdirs-$module + # updatedfiles-$module + # checksumfailed-$module + + # XXX Don't do any master transferlist manipulation here. + local mod=$1 + local mdir=$2 + + # Find files on the client which don't exist on the server + sort allfiles-$mod allfiles-$mod localfiles-$mod \ + | uniq -u > deletefiles-$mod + remove_filelists_from_file deletefiles-$mod $mdir + + # Find dirs on the client which don't exist on the server + sort alldirs-$mod alldirs-$mod localdirs-$mod \ + | uniq -u > deletedirs-$mod + + # Extract dirnames of every file and dir in the delete lists, and all of their parents. + if [[ -n $updatealldirtimes ]]; then + echo $mdir > updatetimestamps-$mod + cat alldirs-$mod >> updatetimestamps-$mod + else + awk '{dn($0)} function dn(p) { while (sub(/\/[^\/]*\]?$/, "", p)) print p }' \ + deletefiles-$mod deletedirs-$mod \ + | sort -u > updatetimestamps-$mod + fi + + # Find files on the server which are missing on the client + sort localfiles-$mod localfiles-$mod allfiles-$mod \ + | uniq -u > missingfiles-$mod + + # Find dirs on the server which are missing on the client + sort localdirs-$mod localdirs-$mod alldirs-$mod \ + | uniq -u > missingdirs-$mod + + # Find files which have changed size + sort allfilesizes-$mod localfilesizes-$mod \ + | uniq -u | awk -F '\t' '{print $1}' \ + | uniq -d > updatedfiles-$mod + + # Extract and verify checksums + awk -F '\t' "/^\[Checksums/ {s=1; next} /^$/ {s=0; next} {if (s) print \$1 \" $mdir/\" \$2}" $fl > checksums-$mod + pushd $DESTD > /dev/null 2>&1 + sha1sum --check --quiet $tempd/checksums-$mod 2> /dev/null \ + | grep -i 'failed$' \ + | awk -F: '{print $1}' > $tempd/checksumfailed-$mod + popd > /dev/null 2>&1 +} + +process_remote_file_list () { + # Extract various file and directory lists from the master file list + # + # This will also handle ignoring restricted or pre-bitflip content if + # necessary. + # + # Will create the following files: + # allfilesizes-$module + # allfiles-$module + # alldirs-$module + # newdirs-$module + + local fl=$1 + local module=$2 + local moduledir=$3 + + db3 Extracting file and directory lists for $module. + + if [[ -n $PREBITFLIP ]]; then + db4 "Directories (pre-bitflip included)" + awk_extract_newer_dirs_restricted $fl alldirs-$module $moduledir + + db4 "New dirs (pre-bitflip included)" + awk_extract_newer_dirs_restricted $fl newdirs-$module $moduledir $LASTTIME + + db4 "Files (pre-bitflip included)" + awk_extract_newer_files_restricted $fl allfilesizes-$module $moduledir + + db4 "New files (pre-bitflip included)" + awk_extract_newer_files_restricted $fl newfilesizes-$module $moduledir $LASTTIME + else + # All dirs, unrestricted only + db4 "Directories (pre-bitflip excluded)" + awk_extract_newer_dirs_no_restricted $fl alldirs-$module $moduledir + + db4 "New dirs (pre-bitflip excluded)" + awk_extract_newer_dirs_no_restricted $fl newdirs-$module $moduledir $LASTTIME + + db4 "Files (pre-bitflip excluded)" + awk_extract_newer_files_no_restricted $fl allfilesizes-$module $moduledir + + db4 "New files (pre-bitflip excluded)" + awk_extract_newer_files_no_restricted $fl newfilesizes-$module $moduledir $LASTTIME + fi + + # Filter the lists if needed + filter alldirs-$module + filter newdirs-$module + filter allfilesizes-$module + filter newfilesizes-$module + + # Produce the file lists without sizes. + awk -F '\t' '{print $1}' allfilesizes-$module > allfiles-$module; retcheck $? awk + awk -F '\t' '{print $1}' newfilesizes-$module > newfiles-$module; retcheck $? awk +} + +update_master_file_lists () { + # Simply append various per-module lists to the master lists + cat deletefiles-$module >> master-deletefiles + cat deletedirs-$module >> master-deletedirs + cat updatetimestamps-$module >> master-updatetimestamps + cat missingfiles-$module >> transferlist-$module + cat missingdirs-$module >> transferlist-$module + cat updatedfiles-$module >> transferlist-$module + cat checksumfailed-$module >> transferlist-$module +} + +remove_filelists_from_file () { + # Remove the file from $FILELIST and anything given by $EXTRAFILES. + # Takes: + # file to modify + # directory of current module (for substituting $mdir) + # Modifies the file directly + # Calls egrep -v in a loop. Generally this is called on files of no more + # than a few thousand lines, so performance shouldn't be an issue. + + local f=$1 + local moduledir=$2 + local tmp=$f.rfff + local fl + + for fl in $FILELIST $EXTRAFILES; do + fl=${fl/'$mdir'/$moduledir} + egrep -v "^[^/]*/$fl" $f > $tmp + mv $tmp $f + done + + rm -f $tmp +} + +process_module () { + # Determine what needs to be transferred and removed from a single module. + # + # Takes the name of the module to process, returns nothing. + # + # Sets the following globals: + # changed_modules + # + # Will leave the following lists in the temporary dir for use by other + # functions: (all of them; currently deletes nothing) + # + # May leave other files, but don't depend on them. + # + # The various status variables, for logging: + # cntallserverfiles/cntallserverdirs - total files/dirs on server. + # cntnewserverfiles/cntnewserverdirs - new files/dirs on server (since last mirror time) + # cntlocalfiles/cntlocaldirs - total files/dirs on client. + # cntextrafiles/cntextradirs - files/dirs on client but not server. + # cntmissingfiles/cntmissingdirs - files/dirs on server but not client. + # cntsizechanged - files where size differs between server/client. + # cntupdatetimestamps - dir timestamps to restore + # cntchecksumfailed - files where checksum differs between server/client. + # cntchangedpaths - count of all differences between file lists. + + local module=$1 + # ZSHISM? (associative array indexing) + local moduledir=$MODULEMAPPING[$module] + + local fl=${FILELIST/'$mdir'/$moduledir} + local cntallserverfiles cntallserverdirs cntnewserverfiles cntnewserverdirs + local cntchangedpaths cntlocalfiles cntlocaldirs cntextrafiles cntextradirsi + local cntmissingfiles cntmissingdirs cntsizechanged cntupdatetimestamps cntchecksumfailed + local extra + + if [[ -z $alwayscheck && \ + -n $checksums[$module] && \ + $(sha1sum $fl | cut -d' ' -f1) == $checksums[$module] ]]; then + logit N No change in file list for $module + db2 No change in file list checksum. Skipping $module. + continue + fi + + sep + logit P Processing start: $module + db2 Processing $module + changed_modules+=$module + + # Make sure the list is complete. + tail -2 $fl | grep -q '^\[End\]$' + if (( ? != 0 )); then + logit e "Invalid file list; skipping $module" + (>&2 echo "No end marker. Corrupted file list?" + echo Skipping $module.) + return + fi + + process_remote_file_list $fl $module $moduledir + + cntallserverfiles=$(wc -l < allfiles-$module) + cntallserverdirs=$(wc -l < alldirs-$module) + db2f "Total on server: %7d files, %4d dirs.\n" $cntallserverfiles $cntallserverdirs + + cntnewserverfiles=$(wc -l < newfiles-$module) + cntnewserverdirs=$(wc -l < newdirs-$module) + db2f "New on server: %7d files, %4d dirs.\n" $cntnewserverfiles $cntnewserverdirs + + # Add extra files to the transfer list + echo $moduledir/$fl >> newfiles-$module + for extra in $EXTRAFILES; do + extra=${extra/'$mdir'/$moduledir} + echo $moduledir/$extra >> newfiles-$module + done + cat newfiles-$module >> transferlist-$module + cat newdirs-$module >> transferlist-$module + + if [[ -d $DESTD/$moduledir ]]; then + db3 Finding file list changes since last run + process_file_list_diff $fl $module $moduledir + cat changedpaths-$module >> transferlist-$module + + generate_local_file_list $module $moduledir + + if [[ -s staletmpdirs-$module ]]; then + clean_stale_rsync_temps $module + fi + + # Find files on the client which don't exist on the server + process_local_file_list $module $moduledir + update_master_file_lists $module + + # Count some things we want to use for stats later. + cntchangedpaths=$(wc -l < changedpaths-$module) + cntlocalfiles=$(wc -l < localfiles-$module) + cntlocaldirs=$(wc -l < localdirs-$module) + cntextrafiles=$(wc -l < deletefiles-$module) + cntextradirs=$(wc -l < deletedirs-$module) + cntmissingfiles=$(wc -l < missingfiles-$module) + cntmissingdirs=$(wc -l < missingdirs-$module) + cntsizechanged=$(wc -l < updatedfiles-$module) + cntupdatetimestamps=$(wc -l < updatetimestamps-$module) + cntchecksumfailed=$(wc -l < checksumfailed-$module) + + db2f "Total on client: %7d files, %4d dirs.\n" $cntlocalfiles $cntlocaldirs + db2f "Not present on server: %7d files, %4d dirs.\n" $cntextrafiles $cntextradirs + db2f "Missing on client: %7d files, %4d dirs.\n" $cntmissingfiles $cntmissingdirs + db2f "Size Changed: %7d files.\n" $cntsizechanged + db2f "Timestamps to restore: %7d files.\n" $cntupdatetimestamps + db2f "Checksum Failed: %7d files.\n" $cntchecksumfailed + db2f "Filelist changes: %7d paths.\n" $cntchangedpaths + fi + + sort -u transferlist-$module >> transferlist-sorted-$module + cat transferlist-sorted-$module >> master-transferlist + local cnttotaltransfer=$(wc -l < transferlist-sorted-$module) + db2f "Total to transfer: %7d paths.\n" $cnttotaltransfer + + logit L "Counts for $module: Svr:$cntallserverfiles/$cntallserverdirs Loc:$cntlocalfiles/$cntlocaldirs Diff:$cntchangedpaths New:$cntnewserverfiles/$cntnewserverdirs Xtra:$cntextrafiles/$cntextradirs Miss:$cntmissingfiles/$cntmissingdirs Size:$cntsizechanged Csum:$cntchecksumfailed Dtim:$cntupdatetimestamps" + logit P Processing end: $module + db2 Finished processing $module. + + # Some basic info about the transfer. + db1 Changes in $module: $cnttotaltransfer files/dirs + if (( cnttotaltransfer <= 5 )); then + for i in $(cat transferlist-sorted-$module); do + db1 " $i" + done + fi + + # XXX We should clean some things up at this point, but we also need some + # files for the checkin later. + # Should be able to delete all *-$module, except for the dirlists, to give + # the current mirrormanager versions the things it needs. + #if (( VERBOSE <= 4 )); then + # rm *-$module + #fi +} + + +# Main program execution +# ====================== +parse_args "$@" +set_default_vars +read_config +# XXX check_dependencies + +# Paranoia; give us a few extra seconds. +[[ -z $noparanoia ]] && starttime=$(($starttime-5)) + +# Find the previous mirror time, and backdate if necessary +LASTTIME=0 +if [[ -r $TIMEFILE ]]; then + source $TIMEFILE +fi +if [[ -n $backdate ]]; then + LASTTIME=$backdate +fi + +# Make a temp dir and clean it up unless we're doing a lot of debugging +if [[ -z $TMPDIR ]]; then + tempd=$(mktemp -d -t quick-mirror.XXXXXXXXXX) +else + tempd=$(mktemp -d -p $TMPDIR -t quick-mirror.XXXXXXXXXX) +fi + +if [[ $? -ne 0 ]]; then + (>&2 echo "Creating temporary directory failed?") + exit 1 +fi +if (( VERBOSE <= 8 )); then + trap "rm -rf $tempd" EXIT +fi + +# Set up a FIFO for logging. Just calling systemd-cat repeatedly just gives us +# a different PID every time, which is annoying. +if [[ -n $LOGJOURNAL ]]; then + logfifo=$tempd/journal.fifo + mkfifo $logfifo + systemd-cat -t quick-fedora-mirror < $logfifo & + exec 3>$logfifo +fi + +outfile=$tempd/output +touch $outfile + +cd $tempd + +# At this point we can acquire the lock +lock $TIMEFILE +if (( ? != 0 )); then + db4 Could not acquire lock. + logit k lock contention + # Maybe we haven't been able to mirror for some time.... + delay=$(( starttime - LASTTIME )) + if [[ -n $backdate || $LASTTIME -eq 0 ]]; then + delay=0 + fi + + if (( delay > WARNDELAY )); then + (>&2 echo No completed run since $(date -d @$LASTTIME ).) + logit E No completed run since $(date -d @$LASTTIME ). + fi + exit 1 +fi + +db1 "Mirror starting: $(date)" +logit r Run start: cfg $cfgfile, tmp $tempd + +if (( VERBOSE >= 6 )); then + echo Times: + echo LASTTIME=$LASTTIME + echo starttime=$starttime + echo TIMEFILE=$TIMEFILE + echo Dirs: + echo tempd=$tempd + echo DESTD=$DESTD + echo Rsync: + echo REMOTE=$REMOTE + echo MASTERMODULE=$MASTERMODULE + echo RSYNC=$RSYNC + echo RSYNCOPTS=$RSYNCOPTS + echo Modules: + echo MODULES=$MODULES + echo MODULEMAPPING=$MODULEMAPPING + echo Misc: + echo VERBOSE=$VERBOSE +fi + +(( VERBOSE >= 8 )) && set -x + +if [[ -n $MIRRORBUFFET ]]; then + # We want to mirror everything, so save the admin from listing the + # individual modules. + # ZSHISM (get keys from an associative array with (k)) + MODULES=(${(k)MODULEMAPPING}) + # BASHEQ MODULES=${!MODULEMAPPING[@]} + # bash3 equivalent is terrible +fi + +fetch_file_lists + +logit p Processing start +changed_modules=() +for module in $MODULES; do + process_module $module +done + +if [[ ! -e master-transferlist ]]; then + logit n No changes to synchronize + db2 No changed files. + finish 0 +fi + +if [[ -n $MIRRORBUFFET ]]; then + echo DIRECTORY_SIZES.txt >> master-transferlist +fi + +# The actual transfer +# =================== +sort -u master-transferlist > master-transferlist.sorted +linecount=$(wc -l < master-transferlist.sorted) +sep; sep +db2 Transferring $linecount files. +# XXX send total count to log as well + +# Now we have a list of everything which has changed recently in every module +# we want, pass that to rsync (non recursive mode!) and it should transfer just +# the changed files without having to pull the entire huge file list. +extra=() +if [[ -n $rsyncdryrun ]]; then + extra+=(-n) +fi +do_rsync $REMOTE/$MASTERMODULE/ $DESTD master-transferlist.sorted extra +if (( ? != 0 )); then + (>&2 echo "rsync failed; aborting run.\nWill not check in or delete anything.") + logit "E Skipping further operations due to rsync failure." + finish 1 +fi + +# Total downloaded file count, bytes received, transfer speed +logit s "stat: downloaded $rsfilestransferred files" +logit s "stat: received $(hr_b $rstotalbytesreceived)" +logit s "stat: transfer speed $(hr_b $rstransferspeed)/s" + +# Everything we can extract from rsync +logit S "stat: sent $(hr_b $rstotalbytessent)" +logit S "stat: speedup: $rsspeedup" +logit S "stat: total size of transferred files: $(hr_b $rsfilesize)" +logit S "stat: file list gen time $(hr_s $rsfilelistgentime)" +logit S "stat: file list transfer time $(hr_s $rsfilelisttransfertime)" + +db1 "=========================" +db1 "Main transfer statistics:" +db1 " Downloaded files: $rsfilestransferred" +db1 " Total size of those files: $(hr_b $rsfilesize)" +db1 " Received: $(hr_b $rstotalbytesreceived)" +db1 " Sent: $(hr_b $rstotalbytessent)" +db1 " Speedup: $rsspeedup" +db1 " Trasfer speed: $(hr_b $rstransferspeed)/s" +db1 " File list generation time: $(hr_s $rsfilelistgentime)" +db1 " File list transfer time: $(hr_s $rsfilelisttransfertime)" + +# Local dir/file deletion +# ======================= +if [[ -s master-deletedirs ]]; then + linecount=$(wc -l < master-deletedirs) + + if [[ -n $skipdelete && $VERBOSE -ge 2 ]]; then + logit d Directory deletion skipped + echo "Not deleting $linecount directories. Delete list is:" + cat master-deletedirs + echo + else + logit d Directory deletion start: $linecount directories + db2 Removing $linecount stale directories. + for nuke in $(cat master-deletedirs); do + if [[ -d "$DESTD/$nuke" ]]; then + logit D Deleting directory $nuke + db4 Removing $nuke + rm -rf "$DESTD/$nuke" + deletedsomething=1 + fi + done + logit d Directory deletion end + fi +else + db2 No stale directories to delete. +fi + +if [[ -s master-deletefiles ]]; then + linecount=$(wc -l < master-deletefiles) + + if [[ -n $skipdelete ]]; then + logit d File deletion skipped + echo Not deleting $linecount stale files. Delete list is: + cat master-deletefiles + echo + else + logit d File deletion begin: $linecount files + db2 Removing $linecount stale files. + # xopts=() + # (( VERBOSE >= 4 )) && xopts=(-t) + tr '\n' '\0' < master-deletefiles \ + | (pushd $DESTD; xargs $xopts -0 rm -f ; popd) + # for nuke in $(cat master-deletefiles); do + # logit D Deleting file $nuke + # rm -f "$DESTD/$nuke" + # done + deletedsomething=1 + logit d File deletion end + fi +else + db2 No stale files to delete. +fi + +if [[ ( -n $KEEPDIRTIMES || -n $updatealldirtimes ) && -s master-updatetimestamps ]]; then + extra=() + if [[ -n $rsyncdryrun ]]; then + extra+=(-n) + fi + logit d "Updating timestamps on $(wc -l < master-updatetimestamps) dirs" + do_rsync $REMOTE/$MASTERMODULE/ $DESTD master-updatetimestamps extra +fi + +# We've completed a run, so save the timestamp +save_state + +# Mirrormanager Checkin and Callout +# ================================= +# At this point we know that we had a clean run with no complaints from rsync, +# and as far as we're concerned the run is now complete and recorded. +# +# So for each module we mirrored, the filtered file list is correct. This +# means that the alldirs-$module file is accurate and we can simply report its +# contents to mirrormanager. +if [[ -z $skipcheckin || -n $dumpmmcheckin ]]; then + db2 Performing mirrormanager checkin + logit m "mirrormanager checkin start" + + # Check in just the changed modules + for module in $changed_modules; do + checkin_module $module + done + + logit m "mirrormanager checkin end" +fi +finish 0 yes -- 2.45.2