#!/usr/bin/env bash set -o errexit -o nounset # as suggested in https://github.com/koalaman/shellcheck/wiki/SC2044 shopt -s globstar nullglob OCRMYPDF_CMD='docker run --rm -i ocrmypdf' SCR_DIR=/mnt/docscans DST_DIR=/mnt/documents LOCKFILE=$DST_DIR/.lock ARCHIVE_DIR=$DST_DIR/archive FAILED_DIR=$DST_DIR/failed LOG_DIR=$DST_DIR/logs PROCESSED_COUNT=0 FAILED_COUNT=0 function ocrFile() { SRC=$1 FILE_NAME=$(basename -- "$SRC") DST_PDF=$DST_DIR/${FILE_NAME%.*}.pdf DST_LOG=$LOG_DIR/${FILE_NAME%.*}.log $OCRMYPDF_CMD \ -l deu \ --clean \ --rotate-pages \ --deskew \ - - \ < "$FILE" \ > "$DST_PDF" 2> "$DST_LOG" } exec 100>$LOCKFILE || exit 23 flock -n 100 || { echo $LOCKFILE is locked; exit 1; } mkdir -p "$DST_DIR" "$ARCHIVE_DIR" "$FAILED_DIR" "$LOG_DIR" while IFS= read -r -d '' file do echo "processing $FILE ..." if ocrFile "$FILE" "$DST_DIR" then echo processed "$FILE" mv "$FILE" "$ARCHIVE_DIR" echo moved "$FILE" to "$ARCHIVE_DIR" (( PROCESSED_COUNT++ )) else echo failed to process "$FILE" mv "$FILE" "$FAILED_DIR" echo moved "$FILE" to "$FAILED_DIR" (( FAILED_COUNT++ )) fi done < <(find $SCR_DIR -maxdepth 1 -name '*.pdf' -print0) echo Done echo processed $PROCESSED_COUNT PDFs [ $FAILED_COUNT -gt 0 ] && echo failed on $FAILED_COUNT PDFs