#!/usr/bin/env bash set -o errexit -o nounset OCRMYPDF_CMD='docker run --rm -i ocrmypdf' SRC_DIR=/mnt/docscans DST_DIR=/mnt/documents LOCKFILE=$SRC_DIR/.lock ARCHIVE_DIR=$SRC_DIR/archive FAILED_DIR=$SRC_DIR/failed LOG_DIR=$SRC_DIR/logs PROCESSED_COUNT=0 FAILED_FILES= function ocrFile() { SRC=$1 FILE_NAME=$(basename -- "$SRC") DST_PDF=$DST_DIR/${FILE_NAME%.*}.pdf DST_LOG=$LOG_DIR/${FILE_NAME%.*}.log echo "Started at $(date -Isec)" >> "$DST_LOG" $OCRMYPDF_CMD \ -l deu \ --clean \ --rotate-pages \ --deskew \ - - \ < "$SRC" \ > "$DST_PDF" 2>> "$DST_LOG" } exec 100>"$LOCKFILE" || exit 23 flock -n 100 || { echo "$LOCKFILE" is locked; exit 1; } mkdir -p "$DST_DIR" "$ARCHIVE_DIR" "$FAILED_DIR" "$LOG_DIR" while IFS= read -r -d '' FILE do echo "processing $FILE ..." if ocrFile "$FILE" then echo processed "$FILE" mv "$FILE" "$ARCHIVE_DIR" echo moved "$FILE" to "$ARCHIVE_DIR" (( PROCESSED_COUNT+=1 )) else echo failed to process "$FILE" mv "$FILE" "$FAILED_DIR" echo moved "$FILE" to "$FAILED_DIR" FAILED_FILES+="${FILE}\n" fi echo done < <(find "$SRC_DIR" -maxdepth 1 -name '*.pdf' -print0) echo Done echo echo processed "$PROCESSED_COUNT" PDFs if [ -n "$FAILED_FILES" ] then echo failed on PDFs echo "$FAILED_FILES" sendmail -t thomasruoff@gmail.com <