#!/bin/sh
#
# liftcheck - compare reposurgeon lift of a Subversion repo with a checkout
#
# The -v option reveals executed subcommands.
# The -q option makes repotool extra quiet and reports only errors.
# The -p option shows revision numbers when comparing multiple revisions
# The -u option enables context-diffing in the compare operation
# The -i option disables the normal suppression of ignore comparisons
# The -d option disables removal of the generated repositories after the run
# The -a option enables progress messages from the Subversion loader.
# The -s option enables display of common files
# The -r option sets a revision to check rather than trunk, branches and tags of HEAD.
#               The corresponding git commit is found with the legacy map, which means
#               only the branch modified in the revision will be compared, unless the
#               -B option is also used to make a flat repository.
#               You can use the "all" keyword or a range min-max to check multiple
#               revisions in a row. Non-existing revisions are silently ignored,
# The -R option sets a read limit to the dump parsing, for speed
# The -e option sets exckude patterbs to ignore uninteresting tags and branches
# The -B option makes liftcheck use --nobranch to compare against a flat repository.
#               It is automatically applied if --nobranch is passed via a
#               "#reposurgeon-read-options:" comment in the stream.
#
# The REPOSURGEON environment variable can be used to substitute in a
# different implementation.
#
# The ENGINE variable can be used to run a different converter. This is tricky,
# read the code befire trying to use it.  Presently the only engine supported
# is svn2git, and that only wthout an -r option.
#
# The TESTOPT variable can be used to pass an early command or option setting
# to reposurgeon.
set -e

export LANG=C

repotoolopt=""
debug=no
compareopt=""
svnquiet=-q
slimit=""
rlimit=""
nobranch=""
progress=no
readopt=""
while getopts ade:ir:R:suvBpq opt
do
    case $opt in
	d) debug=yes;;
	p) progress=yes;;
	e) compareopt="${compareopt} -e ${OPTARG}";;
	a) svnquiet="";;
	i|s|u) compareopt="${compareopt} -${opt}";;
	R) slimit="-r ${OPTARG}" rlimit="readlimit ${OPTARG}";;
	r) revision="${OPTARG}"; readopt="--preserve";; # --preserve is to avoid setting commit.Branch
	                                                # to something unrelated, because we rely on it
	                                                # to know which dir to checkout in SVN.
	v|q) repotoolopt="${repotoolopt} -${opt}";;
	u) compareopt="${compareopt} -${opt}";;
	B) nobranch="--nobranch";;
    esac
done
shift $(($OPTIND - 1))

if [ -z "$1" ]
then
    echo "liftcheck: an argument file (a Subversion dump) is required."
    exit 1
fi

ENGINE=${ENGINE:=reposurgeon}

for what in $*
do
    if [ -f ${what}.svn ]
    then
	what=${what}.svn
    elif [ ! -f ${what} ]
    then
	echo "No Subversion dumpfile matching ${what} found"
	exit 1
    fi

    if grep '^ *# reposurgeon-read-options:.*--nobranch' "${what}" >/dev/null
    then
	nobranch=--nobranch
    fi

    stem=liftcheck$$

    rm -fr $stem*

    if [ $debug = no ]
    then
	trap "rm -fr ${stem} ${stem}-checkout ${stem}-git ${stem}.gfi ${stem}.info ${stem}*~ ${stem}-tmpdir" 0 1 2 15
    fi

    TMPDIR=$(readlink -f ${stem}-tmpdir)
    export TMPDIR
    mkdir ${TMPDIR}

    # Notification
    { echo -n "  ${what}"; grep --text '^ ##' ${what} || echo ' ## (no description)'; } | sed -e '/ ## /s//: /' >&2; \

    # Make a Subversion repo from the dump
    ./svn-to-svn ${svnquiet} ${slimit} -c <${what} ${stem}

    case ${ENGINE} in
	 reposurgeon)
	     # Make a git repo from the dump using reposurgeon
	     ${REPOSURGEON:-../reposurgeon} "${TESTOPT}" "${rlimit}" "read ${nobranch} ${readopt} <${what}" \
					    "prefer git" "rebuild ${stem}-git" "write >${stem}.gfi" >/dev/null
	     ;;
	 svn2git)
	     # svn2git needs to be run in a scratch directory; it clobbers things
	     if [ ${progress} != no ]
	     then
		 outdev=stdout;
	     else
		 outdev=null
	     fi
	     svnrepo=file://${PWD}/${stem}
	     (mkdir ${stem}-git; cd ${stem}-git >/dev/null; svn2git ${svnrepo} >/dev/${outdev};)
	     ;;
	 *)
	     echo "liftcheck: unknown conversion engine ${ENGINE}" >&2; exit 1
	     ;;
    esac

    if [   "(" -e ${stem}-checkout/trunk ")"    \
	-o "(" -e ${stem}-checkout/branches ")" \
	-o "(" -e ${stem}-checkout/tags ")"     ]
    then
	structured=true
    else
	structured=
    fi

    if [ -n "${revision}" ]
    then
	if [ ! -f ${stem}-git/.git/marks ]
	then
	    echo Missing git marks file, unable to compare. >&2
	    exit 1
	fi

	# Generate a <legacy-id>TAB<mark>TAB<ref> file from the git-fast-import stream
	cat ${stem}.gfi | sed -n '/^commit / h;
				  /^#legacy-id / H;
				  /^mark / {
					H; s/^.*$//; x;
					t next; :next;
					s/^commit \(.*\)\n#legacy-id \(.*\)\nmark \(:.*\)$/\2\t\3\t\1/;
					t ok; d;
					:ok; p
				    }' > ${stem}.info
	rm -f ${stem}.gfi

	if [ ${revision} = "all" ]
	then
	    min=1
	    max=`tail -n 1 ${stem}.info | cut -f 1 | cut -d. -f 1`
	else
	    min=`echo ${revision} | sed 's/-.*$//'`
	    max=`echo ${revision} | sed 's/^.*-//'`
	fi

	while read -r line # input file at the end of the loop
	do
	    revision=`echo "${line}" | cut -f 1`
	    intrev=`echo "${revision}" | cut -d. -f 1`
	    if [ ${intrev} -lt ${min} -o ${intrev} -gt ${max} ]
	    then
		continue
	    fi

	    # Find out the git revision to pass
	    mark=`echo "${line}" | cut -f 2`
	    sha=`grep "^${mark}" ${stem}-git/.git/marks --max-count=1 | cut -d" " -f 2`
	    if [ -z "${sha}" ]
	    then
		continue
	    fi

	    # Find the corresponding ref to checkout in SVN
	    ref=`echo "${line}" | cut -f 3 | sed 's!refs/!!'`

	    # Skip the comparison if the revision is "out of namespaces"
	    # because the Git side will only contain that outside part
	    # while the SVN side will contain the whole repository
	    if [ "${ref}" = "heads/root" ]
	    then
		continue
	    fi

	    # Use --nobranch if asked to, or if there is no structure and
	    # the asked branch is master
	    if [ -n "${nobranch}" -o "("                 \
		   "(" "${ref}" = "heads/master" ")"     \
		       -a "(" -z "${structured}" ")" ")" ]
	    then
		refopt=--nobranch
	    else
		refopt=`echo "${ref}" | sed 's!^deleted/r[^/]*/!!; s!^tags/!-t !; s!^heads/!-b !'`
	    fi

	    if [ ${progress} != no ]
	    then
		echo "Checking lift at r${revision} (${refopt})"
	    fi
	    # Compare the original with the lift
	    ../repotool ${repotoolopt} compare ${compareopt} \
		--accept-missing ${refopt} -r ${intrev}:${sha} \
		${stem}-checkout ${stem}-git
	done < ${stem}.info
    else

	if [ -z "${structured}" ]
	then
	    # No structure at all, use --nobranch to compare
	    # --accept-missing is to use an empty dir if git has no master
	    # (this will only succeed if the SVN repository has no files)
	    compareopt="$compareopt --nobranch --accept-missing"
	fi

	# Compare the original with the lift
	../repotool ${repotoolopt} compare-all ${compareopt} ${nobranch} \
					       ${stem}-checkout ${stem}-git

    fi

    if [ $debug != no ]
    then
	ls -d liftcheck[0123456789]*
    fi
done
    
# end
