pdftotxt
TRANSCRIPT
-
7/26/2019 pdftotxt
1/1
#!/bin/shPAGES=22 # set to the number of pages in the PDFSOURCE=input.pdf # set to the file name of the PDFOUTPUT=output # set to the final output fileRESOLUTION=300 # set to the resolution the scanner used (the higher, the better)
#xpdf-pdfinfo pamphlet-low.pdf | grep Pages: | awk '{print $2}' | tail -n 1
#touch $OUTPUTfor i in `seq 1 $PAGES`; do convert -density $RESOLUTION -depth 8 $SOURCE\[$(($i - 1 ))\] page$i.png# tesseract page$i.tif >> $OUTPUT tesseract page$i.png $OUTPUT$idone
##########################