#!/usr/bin/env bash ## This script is used to collate many pictures into a single PDF ## document. It is useful for book scanning. This assumes you are ## in the directory already # This assumes all the pictures have been converted and deposited # into a pdf/ directory, with a "pdf/0-XX.pdf" filename. # do this to kick off --> \ls | sort -n | parallel -j12 --eta convert {} -monochrome -compress lzw ../pdf/0-{/.}.pdf PDFLOC="pdf" BATCHSIZE=10 PROCSIZE=12 passcount=0 oIFS="$IFS" IFS=$'\n' function process_files_at_location { # Pass this one argument--which directory to pull the file list from filelist=($(find $1 -type f -name "$passcount-*" | sort -n -t'-' -k2)) batch=() commands=() passcount=$(($passcount+1)) echo $passcount function cut_batch { first=$(echo ${batch[0]} | cut -d'/' -f2 | cut -d'-' -f2) cmd="gs -dBATCH -dNOPAUSE -q -sDEVICE=pdfwrite -sOutputFile=pdf/$passcount-$first ${batch[@]}" commands+=("$cmd") batch=() } for file in ${filelist[@]} do batch+=($file) if [ ${#batch[@]} -ge $BATCHSIZE ] || [ ] then cut_batch fi done cut_batch # Run commands printf '%s\n' "${commands[@]}" | parallel -j$PROCSIZE # Repeat as necessary, until one pdf remains if [ `find $PDFLOC -type f -name "$passcount-*" | wc -l` -gt 1 ] then echo "### repeating process" process_files_at_location "$1" fi } process_files_at_location "$PDFLOC" IFS="$oIFS"