Files
docscan/run.sh
2021-02-04 23:17:24 +01:00

83 lines
1.6 KiB
Bash
Executable File

#!/usr/bin/env bash
set -o errexit -o nounset
OCRMYPDF_CMD='docker run --rm -i ocrmypdf'
SRC_DIR=/mnt/docscans
DST_DIR=/mnt/documents
LOCKFILE=$SRC_DIR/.lock
ARCHIVE_DIR=$SRC_DIR/archive
FAILED_DIR=$SRC_DIR/failed
LOG_DIR=$SRC_DIR/logs
PROCESSED_COUNT=0
FAILED_FILES=
function ocrFile() {
SRC=$1
FILE_NAME=$(basename -- "$SRC")
DST_PDF=$DST_DIR/${FILE_NAME%.*}.pdf
DST_LOG=$LOG_DIR/${FILE_NAME%.*}.log
echo "Started at $(date -Isec)" >> "$DST_LOG"
$OCRMYPDF_CMD \
-l deu \
--clean \
--rotate-pages \
--deskew \
- - \
< "$SRC" \
> "$DST_PDF" 2>> "$DST_LOG"
}
exec 100>"$LOCKFILE" || exit 23
flock -n 100 || { echo "$LOCKFILE" is locked; exit 1; }
mkdir -p "$DST_DIR" "$ARCHIVE_DIR" "$FAILED_DIR" "$LOG_DIR"
while IFS= read -r -d '' FILE
do
echo "processing $FILE ..."
if ocrFile "$FILE"
then
echo processed "$FILE"
mv "$FILE" "$ARCHIVE_DIR"
echo moved "$FILE" to "$ARCHIVE_DIR"
(( PROCESSED_COUNT+=1 ))
else
echo failed to process "$FILE"
mv "$FILE" "$FAILED_DIR"
echo moved "$FILE" to "$FAILED_DIR"
FAILED_FILES+="${FILE}\n"
fi
echo
done < <(find "$SRC_DIR" -maxdepth 1 -name '*.pdf' -print0)
echo Done
echo
echo processed "$PROCESSED_COUNT" PDFs
if [ -n "$FAILED_FILES" ]
then
echo failed on PDFs
echo "$FAILED_FILES"
sendmail -t thomasruoff@gmail.com <<MAILTEXT
Subject: [$(hostname)] OCR on PDFs failed
Hey,
following PDFs failed to be OCR'd:
${FAILED_FILES}
Cheers
MAILTEXT
fi