diff --git a/run.sh b/run.sh index 3a761aa..1d6c929 100755 --- a/run.sh +++ b/run.sh @@ -1,24 +1,25 @@ #!/usr/bin/env bash -set -eu +set -o errexit -o nounset +# as suggested in https://github.com/koalaman/shellcheck/wiki/SC2044 shopt -s globstar nullglob OCRMYPDF_CMD='docker run --rm -i ocrmypdf' -SRC_DIR=./test -DST_DIR=./dst +SCR_DIR=/mnt/docscans +DST_DIR=/mnt/documents +LOCKFILE=$DST_DIR/.lock +ARCHIVE_DIR=$DST_DIR/archive FAILED_DIR=$DST_DIR/failed LOG_DIR=$DST_DIR/logs -ARCHIVE_DIR=$DST_DIR/archive -mkdir -p "$DST_DIR" -mkdir -p "$ARCHIVE_DIR" +PROCESSED_COUNT=0 +FAILED_COUNT=0 function ocrFile() { SRC=$1 - DST_DIR=$2 FILE_NAME=$(basename -- "$SRC") DST_PDF=$DST_DIR/${FILE_NAME%.*}.pdf @@ -34,13 +35,31 @@ function ocrFile() { > "$DST_PDF" 2> "$DST_LOG" } -for FILE in "$SRC_DIR"/*.pdf -do - echo "processing $FILE..." - ocrFile "$FILE" "$DST_DIR" - # TODO: move to FAILED_DIR and error out - echo processed "$FILE" - mv "$FILE" "$ARCHIVE_DIR" - echo moved "$FILE" to archive -done +exec 100>$LOCKFILE || exit 23 + +flock -n 100 || { echo $LOCKFILE is locked; exit 1; } + +mkdir -p "$DST_DIR" "$ARCHIVE_DIR" "$FAILED_DIR" "$LOG_DIR" + +while IFS= read -r -d '' file +do + echo "processing $FILE ..." + + if ocrFile "$FILE" "$DST_DIR" + then + echo processed "$FILE" + mv "$FILE" "$ARCHIVE_DIR" + echo moved "$FILE" to "$ARCHIVE_DIR" + (( PROCESSED_COUNT++ )) + else + echo failed to process "$FILE" + mv "$FILE" "$FAILED_DIR" + echo moved "$FILE" to "$FAILED_DIR" + (( FAILED_COUNT++ )) + fi +done < <(find $SCR_DIR -maxdepth 1 -name '*.pdf' -print0) + +echo Done +echo processed $PROCESSED_COUNT PDFs +[ $FAILED_COUNT -gt 0 ] && echo failed on $FAILED_COUNT PDFs