#!/bin/bash -p # Copyright (c) 2011 The Chromium Authors. All rights reserved. # Use of this source code is governed by a BSD-style license that can be # found in the LICENSE file. # usage: dirdiffer.sh old_dir new_dir patch_dir # # dirdiffer creates a patch directory patch_dir that represents the difference # between old_dir and new_dir. patch_dir can be used with dirpatcher to # recreate new_dir given old_dir. # # dirdiffer operates recursively, properly handling ordinary files, symbolic # links, and directories, as they are found in new_dir. Symbolic links and # directories are always replicated as-is in patch_dir. Ordinary files will # be represented at the appropriate location in patch_dir by one of the # following: # # - a binary diff prepared by goobsdiff that can transform the file at the # same position in old_dir to the version in new_dir, but only when such a # file already exists in old_dir and is an ordinary file. These files are # given a "$gbs" suffix. # - a bzip2-compressed copy of the new file from new_dir; in patch_dir, the # new file will have a "$bz2" suffix. # - a gzip-compressed copy of the new file from new_dir; in patch_dir, the # new file will have a "$gz" suffix. # - an xz/lzma2-compressed copy of the new file from new_dir; in patch_dir, # the new file will have an "$xz" suffix. # - an uncompressed copy of the new file from new_dir; in patch_dir, the # new file will have a "$raw" suffix. # # The unconventional suffixes are used because they aren't likely to occur in # filenames. # # Of these options, the smallest possible representation is chosen. Note that # goobsdiff itself will also compress various sections of a binary diff with # bzip2, gzip, or xz/lzma2, or leave them uncompressed, according to which is # smallest. The approach of choosing the smallest possible representation is # time-consuming but given the choices of compressors results in an overall # size reduction of about 3%-5% relative to using bzip2 as the only # compressor; bzip2 is generally more effective for these data sets than gzip, # and xz/lzma2 more effective than bzip2. # # For large input files, goobsdiff is also very time-consuming and # memory-intensive. The overall "wall clock time" spent preparing a patch_dir # representing the differences between Google Chrome's 6.0.422.0 and 6.0.427.0 # versioned directories from successive weekly dev channel releases on a # 2.53GHz dual-core 4GB MacBook Pro is 3 minutes. Reconstructing new_dir with # dirpatcher is much quicker; in the above configuration, only 10 seconds are # needed for reconstruction. # # After creating a full patch_dir structure, but before returning, dirpatcher # is invoked to attempt to recreate new_dir in a temporary location given # old_dir and patch_dir. The recreated new_dir is then compared against the # original new_dir as a verification step. Should verification fail, dirdiffer # exits with a nonzero status, and patch_dir should not be used. # # Environment variables: # DIRDIFFER_EXCLUDE # When an entry in new_dir matches this regular expression, it will not be # included in patch_dir. All prospective paths in new_dir will be matched # against this regular expression, including directories. If a directory # matches this pattern, dirdiffer will also ignore the directory's contents. # DIRDIFFER_NO_DIFF # When an entry in new_dir matches this regular expression, it will not be # represented in patch_dir by a $gbs file prepared by goobsdiff. It will only # appear as a $bz2, $gz, or $raw file. Only files in new_dir, not # directories, will be matched against this regular expression. # # Exit codes: # 0 OK # 1 Unknown failure # 2 Incorrect number of parameters # 3 Input directories do not exist or are not directories # 4 Output directory already exists # 5 Parent of output directory does not exist or is not a directory # 6 An input or output directories contains another # 7 Could not create output directory # 8 File already exists in output directory # 9 Found an irregular file (non-directory, file, or symbolic link) in input # 10 Could not create symbolic link # 11 File copy failed # 12 bzip2 compression failed # 13 gzip compression failed # 14 xz/lzma2 compression failed # 15 Patch creation failed # 16 Verification failed # 17 Could not set mode (permissions) # 18 Could not set modification time # 19 Invalid regular expression (irregular expression?) set -eu # Environment sanitization. Set a known-safe PATH. Clear environment variables # that might impact the interpreter's operation. The |bash -p| invocation # on the #! line takes the bite out of BASH_ENV, ENV, and SHELLOPTS (among # other features), but clearing them here ensures that they won't impact any # shell scripts used as utility programs. SHELLOPTS is read-only and can't be # unset, only unexported. export PATH="/usr/bin:/bin:/usr/sbin:/sbin" unset BASH_ENV CDPATH ENV GLOBIGNORE IFS POSIXLY_CORRECT export -n SHELLOPTS shopt -s dotglob nullglob # find_tool looks for an executable file named |tool_name|: # - in the same directory as this script, # - if this script is located in a Chromium source tree, at the expected # Release output location in the Mac xcodebuild directory, # - as above, but in the Debug output location # If found in any of the above locations, the script's path is output. # Otherwise, this function outputs |tool_name| as a fallback, allowing it to # be found (or not) by an ordinary ${PATH} search. find_tool() { local tool_name="${1}" local script_dir script_dir="$(dirname "${0}")" local tool="${script_dir}/${tool_name}" if [[ -f "${tool}" ]] && [[ -x "${tool}" ]]; then echo "${tool}" return fi local script_dir_phys script_dir_phys="$(cd "${script_dir}" && pwd -P)" if [[ "${script_dir_phys}" =~ ^(.*)/src/chrome/installer/mac$ ]]; then tool="${BASH_REMATCH[1]}/src/xcodebuild/Release/${tool_name}" if [[ -f "${tool}" ]] && [[ -x "${tool}" ]]; then echo "${tool}" return fi tool="${BASH_REMATCH[1]}/src/xcodebuild/Debug/${tool_name}" if [[ -f "${tool}" ]] && [[ -x "${tool}" ]]; then echo "${tool}" return fi fi echo "${tool_name}" } ME="$(basename "${0}")" readonly ME DIRPATCHER="$(dirname "${0}")/dirpatcher.sh" readonly DIRPATCHER GOOBSDIFF="$(find_tool goobsdiff)" readonly GOOBSDIFF readonly BZIP2="bzip2" readonly GZIP="gzip" XZ="$(dirname "${0}")/xz" readonly XZ readonly GBS_SUFFIX='$gbs' readonly BZ2_SUFFIX='$bz2' readonly GZ_SUFFIX='$gz' readonly XZ_SUFFIX='$xz' readonly PLAIN_SUFFIX='$raw' # Workaround for http://code.google.com/p/chromium/issues/detail?id=83180#c3 # In bash 4.0, "declare VAR" no longer initializes VAR if not already set. : ${DIRDIFFER_EXCLUDE:=} : ${DIRDIFFER_NO_DIFF:=} err() { local error="${1}" echo "${ME}: ${error}" >& 2 } declare -a g_cleanup g_verify_exclude cleanup() { local status=${?} trap - EXIT trap '' HUP INT QUIT TERM if [[ ${status} -ge 128 ]]; then err "Caught signal $((${status} - 128))" fi if [[ "${#g_cleanup[@]}" -gt 0 ]]; then rm -rf "${g_cleanup[@]}" fi exit ${status} } copy_mode_and_time() { local new_file="${1}" local patch_file="${2}" local mode mode="$(stat "-f%OMp%OLp" "${new_file}")" if ! chmod -h "${mode}" "${patch_file}"; then exit 17 fi if ! [[ -L "${patch_file}" ]]; then # Symbolic link modification times can't be copied because there's no # shell tool that provides direct access to lutimes. Instead, the symbolic # link was created with rsync, which already copied the timestamp with # lutimes. if ! touch -r "${new_file}" "${patch_file}"; then exit 18 fi fi } file_size() { local file="${1}" stat -f %z "${file}" } make_patch_file() { local old_file="${1}" local new_file="${2}" local patch_file="${3}" local uncompressed_file="${patch_file}${PLAIN_SUFFIX}" if ! cp "${new_file}" "${uncompressed_file}"; then exit 11 fi local uncompressed_size uncompressed_size="$(file_size "${new_file}")" local keep_file="${uncompressed_file}" local keep_size="${uncompressed_size}" local bz2_file="${patch_file}${BZ2_SUFFIX}" if [[ -e "${bz2_file}" ]]; then err "${bz2_file} already exists" exit 8 fi if ! "${BZIP2}" -9c < "${new_file}" > "${bz2_file}"; then err "couldn't compress ${new_file} to ${bz2_file} with ${BZIP2}" exit 12 fi local bz2_size bz2_size="$(file_size "${bz2_file}")" if [[ "${bz2_size}" -ge "${keep_size}" ]]; then rm -f "${bz2_file}" else rm -f "${keep_file}" keep_file="${bz2_file}" keep_size="${bz2_size}" fi local gz_file="${patch_file}${GZ_SUFFIX}" if [[ -e "${gz_file}" ]]; then err "${gz_file} already exists" exit 8 fi if ! "${GZIP}" -9cn < "${new_file}" > "${gz_file}"; then err "couldn't compress ${new_file} to ${gz_file} with ${GZIP}" exit 13 fi local gz_size gz_size="$(file_size "${gz_file}")" if [[ "${gz_size}" -ge "${keep_size}" ]]; then rm -f "${gz_file}" else rm -f "${keep_file}" keep_file="${gz_file}" keep_size="${gz_size}" fi local xz_flags=("-c") # If the file looks like a Mach-O file, including a universal/fat file, add # the x86 BCJ filter, which results in slightly better compression of x86 # and x86_64 executables. Mach-O files might contain other architectures, # but they aren't currently expected in Chrome. local file_output file_output="$(file "${new_file}" 2> /dev/null || true)" if [[ "${file_output}" =~ Mach-O ]]; then xz_flags+=("--x86") fi # Use an lzma2 encoder. This is equivalent to xz -9 -e, but allows filters # to precede the compressor. xz_flags+=("--lzma2=preset=9e") local xz_file="${patch_file}${XZ_SUFFIX}" if [[ -e "${xz_file}" ]]; then err "${xz_file} already exists" exit 8 fi if ! "${XZ}" "${xz_flags[@]}" < "${new_file}" > "${xz_file}"; then err "couldn't compress ${new_file} to ${xz_file} with ${XZ}" exit 14 fi local xz_size xz_size="$(file_size "${xz_file}")" if [[ "${xz_size}" -ge "${keep_size}" ]]; then rm -f "${xz_file}" else rm -f "${keep_file}" keep_file="${xz_file}" keep_size="${xz_size}" fi if [[ -f "${old_file}" ]] && ! [[ -L "${old_file}" ]] && ! [[ "${new_file}" =~ ${DIRDIFFER_NO_DIFF} ]]; then local gbs_file="${patch_file}${GBS_SUFFIX}" if [[ -e "${gbs_file}" ]]; then err "${gbs_file} already exists" exit 8 fi if ! "${GOOBSDIFF}" "${old_file}" "${new_file}" "${gbs_file}"; then err "couldn't create ${gbs_file} by comparing ${old_file} to ${new_file}" exit 15 fi local gbs_size gbs_size="$(file_size "${gbs_file}")" if [[ "${gbs_size}" -ge "${keep_size}" ]]; then rm -f "${gbs_file}" else rm -f "${keep_file}" keep_file="${gbs_file}" keep_size="${gbs_size}" fi fi copy_mode_and_time "${new_file}" "${keep_file}" } make_patch_symlink() { local new_file="${1}" local patch_file="${2}" # local target # target="$(readlink "${new_file}")" # ln -s "${target}" "${patch_file}" # Use rsync instead of the above, as it's the only way to preserve the # timestamp of a symbolic link using shell tools. if ! rsync -lt "${new_file}" "${patch_file}"; then exit 10 fi copy_mode_and_time "${new_file}" "${patch_file}" } make_patch_dir() { local old_dir="${1}" local new_dir="${2}" local patch_dir="${3}" if ! mkdir "${patch_dir}"; then exit 7 fi local new_file for new_file in "${new_dir}/"*; do local file="${new_file:${#new_dir} + 1}" local old_file="${old_dir}/${file}" local patch_file="${patch_dir}/${file}" if [[ "${new_file}" =~ ${DIRDIFFER_EXCLUDE} ]]; then g_verify_exclude+=("${new_file}") continue fi if [[ -e "${patch_file}" ]]; then err "${patch_file} already exists" exit 8 fi if [[ -L "${new_file}" ]]; then make_patch_symlink "${new_file}" "${patch_file}" elif [[ -d "${new_file}" ]]; then make_patch_dir "${old_file}" "${new_file}" "${patch_file}" elif [[ ! -f "${new_file}" ]]; then err "can't handle irregular file ${new_file}" exit 9 else make_patch_file "${old_file}" "${new_file}" "${patch_file}" fi done copy_mode_and_time "${new_dir}" "${patch_dir}" } verify_patch_dir() { local old_dir="${1}" local new_dir="${2}" local patch_dir="${3}" local verify_temp_dir verify_dir verify_temp_dir="$(mktemp -d -t "${ME}")" g_cleanup+=("${verify_temp_dir}") verify_dir="${verify_temp_dir}/patched" if ! "${DIRPATCHER}" "${old_dir}" "${patch_dir}" "${verify_dir}"; then err "patch application for verification failed" exit 16 fi # rsync will print a line for any file, directory, or symbolic link that # differs or exists only in one directory. As used here, it correctly # considers link targets, file contents, permissions, and timestamps. local rsync_command=(rsync -clprt --delete --out-format=%n \ "${new_dir}/" "${verify_dir}") if [[ ${#g_verify_exclude[@]} -gt 0 ]]; then local exclude for exclude in "${g_verify_exclude[@]}"; do # ${g_verify_exclude[@]} contains paths in ${new_dir}. Strip off # ${new_dir} from the beginning of each, but leave a leading "/" so that # rsync treats them as being at the root of the "transfer." rsync_command+=("--exclude" "${exclude:${#new_dir}}") done fi local rsync_output if ! rsync_output="$("${rsync_command[@]}")"; then err "rsync for verification failed" exit 16 fi rm -rf "${verify_temp_dir}" unset g_cleanup[${#g_cleanup[@]}] if [[ -n "${rsync_output}" ]]; then err "verification failed" exit 16 fi } # shell_safe_path ensures that |path| is safe to pass to tools as a # command-line argument. If the first character in |path| is "-", "./" is # prepended to it. The possibly-modified |path| is output. shell_safe_path() { local path="${1}" if [[ "${path:0:1}" = "-" ]]; then echo "./${path}" else echo "${path}" fi } dirs_contained() { local dir1="${1}/" local dir2="${2}/" if [[ "${dir1:0:${#dir2}}" = "${dir2}" ]] || [[ "${dir2:0:${#dir1}}" = "${dir1}" ]]; then return 0 fi return 1 } usage() { echo "usage: ${ME} old_dir new_dir patch_dir" >& 2 } main() { local old_dir new_dir patch_dir old_dir="$(shell_safe_path "${1}")" new_dir="$(shell_safe_path "${2}")" patch_dir="$(shell_safe_path "${3}")" trap cleanup EXIT HUP INT QUIT TERM if ! [[ -d "${old_dir}" ]] || ! [[ -d "${new_dir}" ]]; then err "old_dir and new_dir must exist and be directories" usage exit 3 fi if [[ -e "${patch_dir}" ]]; then err "patch_dir must not exist" usage exit 4 fi local patch_dir_parent patch_dir_parent="$(dirname "${patch_dir}")" if ! [[ -d "${patch_dir_parent}" ]]; then err "patch_dir parent directory must exist and be a directory" usage exit 5 fi # The weird conditional structure is because the status of the RE comparison # needs to be available in ${?} without conflating it with other conditions # or negating it. Only a status of 2 from the =~ operator indicates an # invalid regular expression. if [[ -n "${DIRDIFFER_EXCLUDE}" ]]; then if [[ "" =~ ${DIRDIFFER_EXCLUDE} ]]; then true elif [[ ${?} -eq 2 ]]; then err "DIRDIFFER_EXCLUDE contains an invalid regular expression" exit 19 fi fi if [[ -n "${DIRDIFFER_NO_DIFF}" ]]; then if [[ "" =~ ${DIRDIFFER_NO_DIFF} ]]; then true elif [[ ${?} -eq 2 ]]; then err "DIRDIFFER_NO_DIFF contains an invalid regular expression" exit 19 fi fi local old_dir_phys new_dir_phys patch_dir_parent_phys patch_dir_phys old_dir_phys="$(cd "${old_dir}" && pwd -P)" new_dir_phys="$(cd "${new_dir}" && pwd -P)" patch_dir_parent_phys="$(cd "${patch_dir_parent}" && pwd -P)" patch_dir_phys="${patch_dir_parent_phys}/$(basename "${patch_dir}")" if dirs_contained "${old_dir_phys}" "${new_dir_phys}" || dirs_contained "${old_dir_phys}" "${patch_dir_phys}" || dirs_contained "${new_dir_phys}" "${patch_dir_phys}"; then err "directories must not contain one another" usage exit 6 fi g_cleanup[${#g_cleanup[@]}]="${patch_dir}" make_patch_dir "${old_dir}" "${new_dir}" "${patch_dir}" verify_patch_dir "${old_dir}" "${new_dir}" "${patch_dir}" unset g_cleanup[${#g_cleanup[@]}] trap - EXIT } if [[ ${#} -ne 3 ]]; then usage exit 2 fi main "${@}" exit ${?}