diff --git a/resources/scripts/ocr.bash b/resources/scripts/ocr.bash new file mode 100755 index 0000000..e1cfa2b --- /dev/null +++ b/resources/scripts/ocr.bash @@ -0,0 +1,24 @@ +#!/usr/bin/env bash + +# Simple script to OCR multiple PDFs using ocrmypdf. +# Usage: ocrpdf.sh input.pdf + +if [ $# -eq 0 ]; then + echo "Usage: $(basename "$0") input.pdf" + exit 1 +fi + +for f in "$@"; do + # Make sure it's a PDF + if [[ "$f" == *.pdf ]]; then + dir=$(dirname "$f") + base=$(basename "$f" .pdf) + out="${dir}/${base}-ocr.pdf" + + echo "Processing $f -> $out" + ocrmypdf --redo-ocr "$f" "$out" + echo "Created: $out" + else + echo "Skipping non-PDF file: $f" + fi +done