various and big improvements

This commit is contained in:
Thomas Ruoff
2021-01-27 23:40:37 +01:00
parent 85162c511b
commit 871514a530

43
run.sh
View File

@@ -1,24 +1,25 @@
#!/usr/bin/env bash #!/usr/bin/env bash
set -eu set -o errexit -o nounset
# as suggested in https://github.com/koalaman/shellcheck/wiki/SC2044
shopt -s globstar nullglob shopt -s globstar nullglob
OCRMYPDF_CMD='docker run --rm -i ocrmypdf' OCRMYPDF_CMD='docker run --rm -i ocrmypdf'
SRC_DIR=./test SCR_DIR=/mnt/docscans
DST_DIR=./dst DST_DIR=/mnt/documents
LOCKFILE=$DST_DIR/.lock
ARCHIVE_DIR=$DST_DIR/archive
FAILED_DIR=$DST_DIR/failed FAILED_DIR=$DST_DIR/failed
LOG_DIR=$DST_DIR/logs LOG_DIR=$DST_DIR/logs
ARCHIVE_DIR=$DST_DIR/archive
mkdir -p "$DST_DIR" PROCESSED_COUNT=0
mkdir -p "$ARCHIVE_DIR" FAILED_COUNT=0
function ocrFile() { function ocrFile() {
SRC=$1 SRC=$1
DST_DIR=$2
FILE_NAME=$(basename -- "$SRC") FILE_NAME=$(basename -- "$SRC")
DST_PDF=$DST_DIR/${FILE_NAME%.*}.pdf DST_PDF=$DST_DIR/${FILE_NAME%.*}.pdf
@@ -34,13 +35,31 @@ function ocrFile() {
> "$DST_PDF" 2> "$DST_LOG" > "$DST_PDF" 2> "$DST_LOG"
} }
for FILE in "$SRC_DIR"/*.pdf exec 100>$LOCKFILE || exit 23
flock -n 100 || { echo $LOCKFILE is locked; exit 1; }
mkdir -p "$DST_DIR" "$ARCHIVE_DIR" "$FAILED_DIR" "$LOG_DIR"
while IFS= read -r -d '' file
do do
echo "processing $FILE ..." echo "processing $FILE ..."
ocrFile "$FILE" "$DST_DIR"
# TODO: move to FAILED_DIR and error out if ocrFile "$FILE" "$DST_DIR"
then
echo processed "$FILE" echo processed "$FILE"
mv "$FILE" "$ARCHIVE_DIR" mv "$FILE" "$ARCHIVE_DIR"
echo moved "$FILE" to archive echo moved "$FILE" to "$ARCHIVE_DIR"
done (( PROCESSED_COUNT++ ))
else
echo failed to process "$FILE"
mv "$FILE" "$FAILED_DIR"
echo moved "$FILE" to "$FAILED_DIR"
(( FAILED_COUNT++ ))
fi
done < <(find $SCR_DIR -maxdepth 1 -name '*.pdf' -print0)
echo Done
echo processed $PROCESSED_COUNT PDFs
[ $FAILED_COUNT -gt 0 ] && echo failed on $FAILED_COUNT PDFs