commit 85162c511bbe325244b6126153ea4c35e490fcdf Author: Thomas Ruoff Date: Wed Jan 27 01:09:21 2021 +0100 inital commit diff --git a/run.sh b/run.sh new file mode 100755 index 0000000..3a761aa --- /dev/null +++ b/run.sh @@ -0,0 +1,46 @@ +#!/usr/bin/env bash + +set -eu + +shopt -s globstar nullglob + +OCRMYPDF_CMD='docker run --rm -i ocrmypdf' + +SRC_DIR=./test +DST_DIR=./dst + +FAILED_DIR=$DST_DIR/failed +LOG_DIR=$DST_DIR/logs +ARCHIVE_DIR=$DST_DIR/archive + +mkdir -p "$DST_DIR" +mkdir -p "$ARCHIVE_DIR" + +function ocrFile() { + SRC=$1 + DST_DIR=$2 + FILE_NAME=$(basename -- "$SRC") + + DST_PDF=$DST_DIR/${FILE_NAME%.*}.pdf + DST_LOG=$LOG_DIR/${FILE_NAME%.*}.log + + $OCRMYPDF_CMD \ + -l deu \ + --clean \ + --rotate-pages \ + --deskew \ + - - \ + < "$FILE" \ + > "$DST_PDF" 2> "$DST_LOG" +} + +for FILE in "$SRC_DIR"/*.pdf +do + echo "processing $FILE..." + ocrFile "$FILE" "$DST_DIR" + # TODO: move to FAILED_DIR and error out + echo processed "$FILE" + mv "$FILE" "$ARCHIVE_DIR" + echo moved "$FILE" to archive +done +