mirror of
https://github.com/tomru/docscan.git
synced 2026-03-03 06:27:21 +01:00
various and big improvements
This commit is contained in:
51
run.sh
51
run.sh
@@ -1,24 +1,25 @@
|
|||||||
#!/usr/bin/env bash
|
#!/usr/bin/env bash
|
||||||
|
|
||||||
set -eu
|
set -o errexit -o nounset
|
||||||
|
|
||||||
|
# as suggested in https://github.com/koalaman/shellcheck/wiki/SC2044
|
||||||
shopt -s globstar nullglob
|
shopt -s globstar nullglob
|
||||||
|
|
||||||
OCRMYPDF_CMD='docker run --rm -i ocrmypdf'
|
OCRMYPDF_CMD='docker run --rm -i ocrmypdf'
|
||||||
|
|
||||||
SRC_DIR=./test
|
SCR_DIR=/mnt/docscans
|
||||||
DST_DIR=./dst
|
DST_DIR=/mnt/documents
|
||||||
|
|
||||||
|
LOCKFILE=$DST_DIR/.lock
|
||||||
|
ARCHIVE_DIR=$DST_DIR/archive
|
||||||
FAILED_DIR=$DST_DIR/failed
|
FAILED_DIR=$DST_DIR/failed
|
||||||
LOG_DIR=$DST_DIR/logs
|
LOG_DIR=$DST_DIR/logs
|
||||||
ARCHIVE_DIR=$DST_DIR/archive
|
|
||||||
|
|
||||||
mkdir -p "$DST_DIR"
|
PROCESSED_COUNT=0
|
||||||
mkdir -p "$ARCHIVE_DIR"
|
FAILED_COUNT=0
|
||||||
|
|
||||||
function ocrFile() {
|
function ocrFile() {
|
||||||
SRC=$1
|
SRC=$1
|
||||||
DST_DIR=$2
|
|
||||||
FILE_NAME=$(basename -- "$SRC")
|
FILE_NAME=$(basename -- "$SRC")
|
||||||
|
|
||||||
DST_PDF=$DST_DIR/${FILE_NAME%.*}.pdf
|
DST_PDF=$DST_DIR/${FILE_NAME%.*}.pdf
|
||||||
@@ -34,13 +35,31 @@ function ocrFile() {
|
|||||||
> "$DST_PDF" 2> "$DST_LOG"
|
> "$DST_PDF" 2> "$DST_LOG"
|
||||||
}
|
}
|
||||||
|
|
||||||
for FILE in "$SRC_DIR"/*.pdf
|
exec 100>$LOCKFILE || exit 23
|
||||||
do
|
|
||||||
echo "processing $FILE..."
|
flock -n 100 || { echo $LOCKFILE is locked; exit 1; }
|
||||||
ocrFile "$FILE" "$DST_DIR"
|
|
||||||
# TODO: move to FAILED_DIR and error out
|
mkdir -p "$DST_DIR" "$ARCHIVE_DIR" "$FAILED_DIR" "$LOG_DIR"
|
||||||
echo processed "$FILE"
|
|
||||||
mv "$FILE" "$ARCHIVE_DIR"
|
while IFS= read -r -d '' file
|
||||||
echo moved "$FILE" to archive
|
do
|
||||||
done
|
echo "processing $FILE ..."
|
||||||
|
|
||||||
|
if ocrFile "$FILE" "$DST_DIR"
|
||||||
|
then
|
||||||
|
echo processed "$FILE"
|
||||||
|
mv "$FILE" "$ARCHIVE_DIR"
|
||||||
|
echo moved "$FILE" to "$ARCHIVE_DIR"
|
||||||
|
(( PROCESSED_COUNT++ ))
|
||||||
|
else
|
||||||
|
echo failed to process "$FILE"
|
||||||
|
mv "$FILE" "$FAILED_DIR"
|
||||||
|
echo moved "$FILE" to "$FAILED_DIR"
|
||||||
|
(( FAILED_COUNT++ ))
|
||||||
|
fi
|
||||||
|
done < <(find $SCR_DIR -maxdepth 1 -name '*.pdf' -print0)
|
||||||
|
|
||||||
|
echo Done
|
||||||
|
echo processed $PROCESSED_COUNT PDFs
|
||||||
|
[ $FAILED_COUNT -gt 0 ] && echo failed on $FAILED_COUNT PDFs
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user