#!/bin/sh # # Optimizes StarOffice/OpenOffice file sizes by re-compressing images. # myversion="1.2" # # Help text # # BEGIN_HELP_TEXT # MY_NAME - Shrink Star/OpenOffice documents by optimizing image compression # # Takes a Star/OpenOffice document and optimizes size by resizing and # re-compressing its pictures. Works best with presentations # # Usage: MY_NAME -[s|v|n] [-h] [presentation.od[ptc] ...] # # The resulting, optimized presentations will end in .new.od[ptc] # # Command line switches: # # -s Silent mode: Don't say anything, except in case of errors. # # -v Verbose mode: Tell me everything. # # -n Normal mode: Just tell me the basic steps you are taking. (Default) # # -b Be brave: Replace original with optimized version (See Warning). # # -h Help: Print out this help. # # This tool depends heavily on the ImageMagick set of graphics tools, in # particular "convert" and "identify". They are expected to be installed in # the path. # # Warning: This tool recompresses and sometimes resizes the images in the # presentation. This process is lossy (although this tool tries to make # them as unnoticeable as possible) and cannot be reversed. Always check that # the optimized presentation still looks good enough before deleting the # original. If you use the brave option (-b) then be so. You are warned. # # Disclaimer: Neither the author nor anybody else is responsible for any losses # as a result of using this tool or any bugs therein. Always keep a copy of # your original presentation or absolutely make sure that you are happy with # the optimized version of the presentation before deleting any originals. # # Please send your comments, suggestions and bugreports to: # # Constantin Gonzalez, # # END_HELP_TEXT # # Configuration parameters # # Maximum allowed width of pictures, 1280 is a good ballpark figure. maxwidth=1280 # Maximum allowed height of pictures, 1024 is recommended. maxheight=1024 # JPEG compression level, 60 gives you lots of compression without too much # loss. jpegquality=75 # PNG compression level, 9 is highest, recommended. pngcomplevel=9 # PNG and GIF pictures with this number of colors or more will be converted to # JPEG. 128 is probably a good value, you can use smaller values if you like. maxcolors=128 # # Useful routines # # Dosable echo. Only echo second argument if first argument is smaller or equal # than the current verbosity level. myecho() { if [ ${1} -le ${verbosity} ]; then echo "$2" fi } # Check if the necessary tools are installed required_tools="convert, identify" checktools() { checktools_result=1 for i in `echo ${required_tools} | sed -e "s/,//g"` ; do test=`which ${i} | cut -f 1 -d " "` if [ "${test}" = "no" ]; then checktools_result=0 fi done echo ${checktools_result} } # Get the size of a given file in bytes filesize() { ls -al "${1}" | sed -e "s/[ ][ ]*/:/g" | cut -f 5 -d ":" } # Substitute a string ($2) for another ($3) in a file ($1) patch() { # The file may be tricky: StarOffice XML files have very long lines even # though they are XML files :(. So we need to do some black magick here... # We assume that a file is evil if it has only 2 lines or less. patch_lines=`wc -l "${1}" | sed -e "s/[ ][ ]*/:/g" | cut -f 2 -d ":"` patch_tmp="/tmp/ooshrink.patch.$$.tmp" if [ ${patch_lines} -le 2 ]; then # Make sure there's a space at the end of the file. If we add one, it won't # affect our XML file but it ensures that it will be correctly dealt with # by sed. patch_size=`filesize "${1}"` patch_should=${patch_size} patch_cut=`expr ${patch_size} - 1` patch_last=`od -j ${patch_cut} -x "${1}" | head -1 | cut -f 2 -d " "` if [ "${patch_last}" -ne "2000" ]; then printf " " >> "${1}" patch_should=`expr ${patch_should} + 1` patch_trim=1 else patch_trim=0 fi # The strings to look for may contain "/", so we better use "%" as a # delimiter. cat "${1}" | tr '\n ' '\r\n' | sed -e "s%${2}%${3}%g" | tr '\r\n' '\n ' > "${patch_tmp}" # Check that the patching has gone well if [ `filesize "${patch_tmp}"` -ne ${patch_should} ]; then # something went wrong, better leave the original untouched patch_success=0 rm "${patch_tmp}" else # Remove the extra character at the end if needed. if [ ${patch_trim} = 1 ]; then split -b "${patch_size}" "${patch_tmp}" "/tmp/split.$$.tmp." cp "/tmp/split.$$.tmp.aa" "${patch_tmp}" rm "/tmp/split.$$.tmp.aa" "/tmp/split.$$.tmp.ab" fi rm "${1}" mv "${patch_tmp}" "${1}" patch_success=1 fi else cat "${1}" | sed -e "s%${2}%${3}%g" > "${patch_tmp}" rm "${1}" mv "${patch_tmp}" "${1}" patch_success=1 fi echo ${patch_success} } # # Evaluate command line # files="" help="" verbosity=1 brave=0 if [ -z "${*}" ]; then help="1" else for i in ${*} ${myswitch} ; do first=`echo ${i} | cut -c 1` second=`echo ${i} | cut -c 2` case ${first} in -) # It's a command line option! case ${second} in h) help=1 ;; s) verbosity=0 ;; n) verbosity=1 ;; v) verbosity=2 ;; b) brave=1 ;; *) echo "Sorry, I can't recognize command line option \"${i}\"." >2 exit 1 esac ;; *) # It's a file name! files="${files} ${i}" ;; esac done fi myname=`basename ${0}` if [ -n "${help}" ]; then helpfirst=`cat ${0} | grep -n BEGIN_HELP_TEXT | head -1 | cut -f 1 -d :` helplast=`cat ${0} | grep -n END_HELP_TEXT | head -1 | cut -f 1 -d :` helplength=`expr ${helplast} - ${helpfirst} - 1` helpfirst=`expr ${helpfirst} + 1` cat ${0} | tail +${helpfirst} | head -${helplength} | cut -c 3- | sed -e "s%MY_NAME%${myname}%g" | more fi if [ -z "${files}" ]; then myecho 1 "No files to convert." exit 0 fi # # Make sure the tools we need are available # # put some directories into some paths where we suspect our needed tools toolspath=/opt/gnome-1.4/bin:/opt/sfw/bin:/usr/local/bin toolslibs=`echo ${toolspath} | sed -e "s%/bin%/lib%g"` PATH=${toolspath}:${PATH} export PATH if [ -z "$LD_LIBRARY_PATH" ]; then LD_LIBRARY_PATH=${toolslibs} else LD_LIBRARY_PATH=${toolslibs}:${LD_LIBRARY_PATH} fi export LD_LIBRARY_PATH # See if our tools are there if [ `checktools` -eq 0 ]; then echo "Error: Can't find required tools \"${required_tools}\" in path!" exit 1 fi myecho 2 "Required tools \"${required_tools}\" found." # # Say Hello if necessary. # if [ -z "${help}" ]; then myecho 1 "ooshrink ${myversion}" myecho 1 "Check out \"${myname} -h\" for help information, warnings and disclaimers." myecho 1 fi # # Convert all files one by one # for file in ${files} ; do # Check if it's a valid file if [ -f ${file} ]; then fileext=`echo ${file} | sed -e "s/^.*\(.[so][xd][cdiptw]\)$/\1/"` if [ "${fileext}" = "${file}" ]; then echo "Error: \"${file}\" does not seem to be a document file!" exit 1 fi else echo "Error: Can't find file \"${file}\"!" exit 1 fi basename=`basename $file ${fileext}` dirname=`dirname $file` # # Make working directory and unpack file # workdir=${basename}.$$.work myecho 1 "Creating working directory ${workdir}..." mkdir ${workdir} myecho 1 "Unpacking ${file}..." /usr/bin/unzip ${file} -d ${workdir} >/dev/null # Does the file have a Pictures directory? if [ ! -d ${workdir}/Pictures ]; then myecho 1 "This file does not have a \"Pictures\" directory." /bin/rm -rf ${workdir} continue fi # # Go through all the pictures and optimize them. Since there are also # subdirectories with spaces in their names, we need to encode space # characters. # pictures=`find ${workdir}/Pictures -type f -print | sed -e "s/%/%p/g" | sed -e "s/ /%s/g" | sed -e "s/ /%t/g"` if [ -z "${pictures}" ]; then myecho 1 "There are no pictures in this file." /bin/rm -rf ${workdir} exit 0 fi for i in ${pictures} ; do # decode spaces, etc. picture=`echo ${i} | sed -e "s/%t/ /g" | sed -e "s/%s/ /g" | sed -e "s/%p/%/g"` myecho 1 "Optimizing `echo \"${picture}\" | sed -e 's%^[^/]*/%%'`." # Analyze the current picture analysis=`identify "${picture}" 2>/dev/null` size=`echo $analysis | sed -e "s/^.* \([0-9][0-9]*x[0-9][0-9]*\)[+0-9]* .*$/\1/"` width=`echo ${size} | cut -f 1 -d x` height=`echo ${size} | cut -f 2 -d x` format=`echo $analysis | sed -e "s/^.* \([GIFBMPJENT]\{3,4\}\) .*$/\1/"` if [ `echo ${format} | wc -c` -gt 5 -o -z "${format}" -o -z "${size}" -o -z "${width}" -o -z "${height}" ]; then # We don't know this format. myecho 2 "$analysis" myecho 2 "Don't know this format. Skipping picture." continue fi ext=`basename "${picture}" | cut -f 2 -d .` myecho 2 "- This is a ${width} pixels wide and ${height} pixels high ${format} file." # # Try out different optimizations, according to image type. # options="" newformat="" # # Format independent optimizations # # rescale the image if it is too large if [ ${width} -gt ${maxwidth} -o ${height} -gt ${maxheight} ]; then options="${options} -geometry ${maxwidth}x${maxheight}" myecho 2 "- This image is too large, we'll resize it to ${maxwidth}x${maxheight}." fi # # Format dependent optimizations # case ${format} in JPEG) # Apply new quality setting for JPEG images options="${options} -quality $jpegquality" myecho 2 "- We will try re-encoding this image with JPEG quality setting of ${jpegquality}%." ;; BMP) # Find out if it's better to use JPEG for this image colors=`identify -verbose "${picture}" | grep "Colors:" | sed -e "s/[ ]*//g" | cut -f 2 -d ":"` if [ ${colors} -gt ${maxcolors} ]; then # yes, we'll go for JPEG newformat=jpg options="${options} -quality $jpegquality" myecho 2 "- This image has ${colors} colors, we better convert it into JPEG." else # If we don't go for JPEG, PNG is still more efficient. newformat=png options="${options} -quality $pngcomplevel" myecho 2 "- We will convert this image to PNG, which is probably more efficient." fi ;; GIF) # Find out if it's better to use JPEG for this image animated=`identify -verbose "${picture}" | grep -c Scene:` if [ ${animated} -gt 0 ]; then myecho 2 "- This is an animated GIF. Better leave it alone." options="" else # Transparent pictures can't be converted to JPEG. But if the picture # Does not have transparency and uses many colors, it's good to # convert to JPEG. transparency=`identify -verbose "${picture}" | grep -c ransparency` if [ ${transparency} -gt 0 ]; then myecho 2 "- This image is transparent. Can't convert to JPEG." fi colors=`identify -verbose "${picture}" | grep "Colors:" | sed -e "s/[ ]*//g" | cut -f 2 -d ":"` if [ ${colors} -gt ${maxcolors} -a ${transparency} -eq 0 ]; then # yes, we'll go for JPEG newformat=jpg options="${options} -quality $jpegquality" myecho 2 "- This image has ${colors} colors, we better convert it into JPEG." else # If we don't go for JPEG, PNG is still more efficient. newformat=png options="${options} -quality $pngcomplevel" myecho 2 "- We will convert this image to PNG, which is probably more efficient." fi fi ;; TIFF) # Find out if it's better to use JPEG for this image colors=`identify -verbose "${picture}" 2>&1 | grep "Colors:" | sed -e "s/[ ]*//g" | cut -f 2 -d ":"` if [ ${colors} -gt ${maxcolors} ]; then # yes, we'll go for JPEG newformat=jpg options="${options} -quality $jpegquality" myecho 2 "- This image has ${colors} colors, we better convert it into JPEG." else # If we don't go for JPEG, PNG is still more efficient. newformat=png options="${options} -quality $pngcomplevel" myecho 2 "- We will convert this image to PNG, which is probably more efficient." fi ;; PNG) # Find out if it's better to use JPEG for this image colors=`identify -verbose "${picture}" | grep "Colors:" | sed -e "s/[ ]*//g" | cut -f 2 -d ":"` transparency=`identify -verbose "${picture}" | grep -c ransparency` if [ ${transparency} -ge 1 ]; then myecho 2 "- This image is transparent. Can't convert to JPEG." fi if [ ${colors} -gt ${maxcolors} -a ${transparency} -eq 0 ]; then # yes, we'll go for JPEG newformat=jpg options="${options} -quality $jpegquality" myecho 2 "- This picture has ${colors} colors, so JPEG is a better choice." else # Use highest compression level for PNG images options="${options} -quality $pngcomplevel" myecho 2 "- We will try re-encoding this image with PNG compression level ${pngcomplevel}." fi ;; *) continue ;; esac # Do the conversion if necessary and check if it helped. if [ -n "${options}" -o -n "${newformat}" ]; then # Do we need to change the format? if [ -n "${newformat}" ]; then imagebase=`basename "${picture}" ${ext}` newname=${imagebase}${newformat} newfile=`dirname "${picture}"` newfile="${newfile}/${newname}" testfile=test.$$.${newformat} else testfile=test.$$.${ext} fi convert "${picture}" ${options} ${testfile} >/dev/null 2>&1 # Check if conversion was successful if [ ! -f ${testfile} ]; then myecho 2 "Error while converting ${picture}. Will keep original" else size1=`ls -l "${picture}" | sed -e "s/ [ ]*/ /g" | cut -f 5` size2=`ls -l ${testfile} | sed -e "s/ [ ]*/ /g" | cut -f 5` if [ ${size1} -gt ${size2} ]; then saving=`expr ${size2} \* 1000 + 5` saving=`expr ${saving} / ${size1} / 10` saving=`expr 100 - ${saving}` myecho 2 "- Success: Old: ${size1}, New: ${size2} (-${saving}%). We'll use the new picture." rm "${picture}" # Be careful if the format (and the filename) has changed if [ -n "${newformat}" ]; then mv ${testfile} "${newfile}" # Patch the relevant XML documents to reflect the name change. # The main document: # We need to do a bit of tr magick here because the StarOffice xml # files have _very_ long lines and sed can't cope with it. Also, # we need to make sure we patch the right "content.xml" in nested # filesystems. content=`echo "${picture}" | sed -e "s%/Pictures/.*$%%"` content="${content}/content.xml" if [ -f "${content}" ]; then myecho 2 "Patching `echo \"${content}\" | sed -e 's%^[^/]*/%%'` with new image file name." success=`patch "${content}" "${imagebase}${ext}" "${newname}"` if [ ${success} -ne 1 ]; then echo "Error while patching ${content} in ${file}." echo "Please report this to constantin.gonzalez@sun.com as a bug." myecho 1 "Cleaning up..." /bin/rm -rf ${workdir} exit 1 fi fi # The styles document: styles=`echo "${picture}" | sed -e "s%/Pictures/.*$%%"` styles="${styles}/styles.xml" if [ -f ${styles} ]; then myecho 2 "Patching styles.xml with new image file name." success=`patch "${styles}" "${imagebase}${ext}" "${newname}"` if [ ${success} -ne 1 ]; then echo "Error while patching ${content} in ${file}." echo "Please report this to constantin.gonzalez@sun.com as a bug." myecho 1 "Cleaning up..." /bin/rm -rf ${workdir} exit 1 fi fi # The manifest document requires some more magick, too: myecho 2 "Patching manifest.xml with new image file name." success=`patch "${workdir}/META-INF/manifest.xml" "image/[a-z]*\(.*\)${imagebase}${ext}" "image/${newformat}\1${newname}"` if [ ${success} -ne 1 ]; then echo "Error while patching ${content} in ${file}." echo "Please report this to constantin.gonzalez@sun.com as a bug." myecho 1 "Cleaning up..." /bin/rm -rf ${workdir} exit 1 fi else mv "${testfile}" "${picture}" fi else myecho 2 "- Failure: Old: ${size1}, New: ${size2}. We better keep the original." rm ${testfile} fi fi fi done myecho 1 "All images optimized." # # Re-pack the StarOffice file # cd ${workdir} newfile="${basename}.new${fileext}" myecho 1 "Re-packing..." /usr/bin/zip -r9 ../${newfile} * >/dev/null 2>&1 cd .. size1=`ls -l ${file} | sed -e "s/ [ ]*/ /g" | cut -f 5` size2=`ls -l ${newfile} | sed -e "s/ [ ]*/ /g" | cut -f 5` if [ ${size1} -gt ${size2} ]; then saving=`expr ${size2} \* 1000 + 5` saving=`expr ${saving} / ${size1} / 10` myecho 1 "Success: The new file is only ${saving}% as big as the original!" if [ ${brave} -eq 1 ]; then # If we are brave, replace original with optimized version. myecho 2 "Replacing original with optimized version..." /bin/rm -f ${file} mv ${newfile} ${file} fi else myecho 1 "Sorry, we couldn't optimize this presentation's size." rm ${newfile} fi myecho 1 "Cleaning up..." /bin/rm -rf ${workdir} myecho 1 "Done." myecho 1 done