#!/usr/bin/env bash set -o errexit -o nounset OCRMYPDF_CMD='docker run --rm -i ocrmypdf' SCR_DIR=/mnt/docscans DST_DIR=/mnt/documents LOCKFILE=$SRC_DIR/.lock ARCHIVE_DIR=$SRC_DIR/archive FAILED_DIR=$SRC_DIR/failed LOG_DIR=$SRC_DIR/logs PROCESSED_COUNT=0 FAILED_COUNT=0 function ocrFile() { SRC=$1 FILE_NAME=$(basename -- "$SRC") DST_PDF=$DST_DIR/${FILE_NAME%.*}.pdf DST_LOG=$LOG_DIR/${FILE_NAME%.*}.log $OCRMYPDF_CMD \ -l deu \ --clean \ --rotate-pages \ --deskew \ - - \ < "$SRC" \ > "$DST_PDF" 2> "$DST_LOG" } exec 100>$LOCKFILE || exit 23 flock -n 100 || { echo $LOCKFILE is locked; exit 1; } mkdir -p "$DST_DIR" "$ARCHIVE_DIR" "$FAILED_DIR" "$LOG_DIR" while IFS= read -r -d '' FILE do echo "processing $FILE ..." if ocrFile "$FILE" then echo processed "$FILE" mv "$FILE" "$ARCHIVE_DIR" echo moved "$FILE" to "$ARCHIVE_DIR" (( PROCESSED_COUNT+=1 )) else echo failed to process "$FILE" mv "$FILE" "$FAILED_DIR" echo moved "$FILE" to "$FAILED_DIR" (( FAILED_COUNT+=1 )) fi echo done < <(find $SCR_DIR -maxdepth 1 -name '*.pdf' -print0) echo Done echo echo processed "$PROCESSED_COUNT" PDFs if (( FAILED_COUNT > 0 )) then echo failed on "$FAILED_COUNT" PDFs fi