[Advanced Diff] Superpotente y Recargado

d
Autor: Desconocido
Descripción: Compara dos ficheros comprimidos de manera recursiva. También investiga ficheros comprimidos dentro de los originales


  • Soporta compresión zip, gzip y bz2
  • Detecta si el fichero está empaquetado y comprimido solo probado con (tar.gz, tar.bz2)


Para saber mas sobre Diff click aqui

#!/bin/bash
#################################################
# Advanced diff v0.12 - 24/10/2007
# It compares archives (zip, jar, tar.gz, tar.bz2) recursively
# so when a compressed file is found, it is uncompressed
# and files inside compared as well
# Diff output is: diff_[file1]_vs_[file2]_[timestamp].txt
#################################################
TEMP="$HOME/borrar"
BIN_FILES="$TEMP/bin_files"
num_loop=0
MAX_LOOPS=5
# Checks if file exists (-1 if not) and if its format is supported (0 if it does, -2 if not)
function check_file {
   if [ ! -f "$1" ]; then
      echo "Not able to found file "$1", please check."
      return -1
   else
      format=`file $1 | awk '{ print $2 }' | tr "[:upper:]" "[:lower:]"`
      if [ $format != "zip" ] && [ $format != "bzip2" ] && [ $format != "gzip" ]; then
         echo "No valid file format encountered (`basename $1`): $format"
         return -2
      fi
   fi
}
# Extracts content from compressed file ($1) to temporary location ($2)
function extract {
   if [ ! -d $2 ]; then
      mkdir -p $2
      # Added support for other compression types: bz2, tar.gz
      # No support for more than 1 tar package inside compressed file
      format=`file $1 | awk '{ print $2 }' | tr "[:upper:]" "[:lower:]"`
      case $format in
         "zip")
            unzip -q $1 -d $2
         ;;
         "bzip2")
            # Modified for non GNU tar compatibility            
            # tar -xjf $1 -C $2
            bzip2 -d -c $1 | (cd $2;tar xf - )
         ;;
         "gzip")
            # Modified for non GNU tar compatibility
            # tar -xzf $1 -C $2
            gzip -d -c $1 | (cd $2;tar xf - )
      esac
   fi
}
# Returns 1 if there are more files to compare, 0 otherwise
# ToDo: optimize
function loop {
   echo -n "Uncompressing content... "
   if [ $num_loop -lt 1 ]; then
      EXT_DIR1=`echo "$TEMP/$1" | sed s/\[\.\]/_/g`
      EXT_DIR2=`echo "$TEMP/$2" | sed s/\[\.\]/_/g`
   else
      EXT_DIR1=`echo "$1" | sed s/\[\.\]/_/g`
      EXT_DIR2=`echo "$2" | sed s/\[\.\]/_/g`
   fi
   extract $1 $EXT_DIR1
   extract $2 $EXT_DIR2
   echo "OK"
   FILE1=`basename "$1"`
   FILE2=`basename "$2"`
   OUTPUT=diff_"$FILE1"_vs_"$FILE2"_`date +%H%M%S_%d%m%y`.txt
   diff -br -U0 $EXT_DIR1 $EXT_DIR2 > $OUTPUT
   if [ ! -s $OUTPUT ]; then
      echo "No differences found"
      return 0
   else
      # Search diff's output for binary files that need to be analyzed
      grep -i binary $OUTPUT | awk '{ if(NF eq 6)print($3" "$5);}' > $BIN_FILES
      if [ -s $BIN_FILES ]; then
         echo "Found `wc -w $BIN_FILES | awk '{ print $1 }'` more binary files to compare"
         return 1
      else
         return 0
      fi
   fi
}
function remove_temp {
   echo -n "Removing temporal files... "
   rm -f $BIN_FILES
   if [ -d $TEMP ]; then
      rm -rf $TEMP
   fi
   echo "OK"
}
###############################
# Here start the main program #
###############################
if [ $# -lt 2 ]; then
   echo "Usage: `basename $0` file1 file2"
   exit -1
fi
check_file $1
if [ $? -eq 255 ]; then
   echo "Exiting"
   exit -1
fi
check_file $2
if [ $? -eq 255 ]; then
   echo "Exiting"
   exit -1
fi
if [ $1 == $2 ]; then
   echo "Files are the same, no need to diff them"
   exit 0
fi
# Here starts the original comparison
loop $1 $2
# Goes for more files (see supported formats) if founded inside given ones
if [ $? -gt 0 ]; then
   while read line; do
      # $num_loop controls depth level, not number of files encountered at one level
      let num_loop+=1
      if [ $num_loop -gt $MAX_LOOPS ]; then
         echo "[INFO] $MAX_LOOPS loops reached. Maybe you enter into an infinite loop, increase MAX_LOOPS variable otherwise. Exiting"
         exit -1
      fi
      TEMP1=`echo "$line" | cut -d " " -f1`
      TEMP2=`echo "$line" | cut -d " " -f2`
      #ToDo: formatting - field start counting (same number of tabs as number of loops) from last loop baseline, not from the end!
      echo "   `echo "$TEMP1" | awk -F/ '{ print $(NF-num)"/"$NF}' num=$num_loop` <=> `echo "$TEMP2" | awk -F/ '{ print $(NF-num)"/"$NF}' num=$num_loop`"
      check_file $TEMP1
      if [ $? -eq 0 ]; then
         loop $TEMP1 $TEMP2
      else
         # False positive (not able to process)
         let num_loop-=1
      fi 
   done < $BIN_FILES
fi

# Removing useless paths from output file if it's not empty
if [ -s $OUTPUT ]; then
   sed "s|$TEMP/||g" $OUTPUT > tmp
   mv tmp $OUTPUT
else
   rm -f $OUTPUT
fi
remove_temp
exit 0


0 comentarios: