fruitcombo

go back

OCR snipping tool with imagemagick and tesseract

inspired by normcap which i like very much but seems slow to launch on my pc. this uses slop to select a region of the screen, imagemagick to capture and process it into a blackish-text-on-whiteish-background image for tesseract to convert to text, xclip to load it to the clipboard, and notify-send to preview the output.

there are two versions, one that saves an image, text file, and loads it to the clipboard, and another that only loads it to the clipboard.

#!/bin/bash

# setup
timestamp=$(date +%Y-%m-%d--%H-%M-%S)
slop=$(slop -f "%x %y %w %h %g %i") || exit 1
read -r X Y W H G ID <<< $slop

# take and process pics
import -display :0 -window root -crop "$W"x"$H"+"$X"+"$Y" $HOME/Pictures/screenshots/$timestamp.png
convert -colorspace gray $HOME/Pictures/screenshots/$timestamp.png $HOME/Pictures/screenshots/$timestamp-temp.png

# invert pic if dark so tesseract can read it
value=$(identify -format "%[mean]" $HOME/Pictures/screenshots/$timestamp-temp.png |rev| cut -d"." -f2- |rev)
if [[ $value -lt 19000 ]];
   then
      mogrify -channel RGB -negate $HOME/Pictures/screenshots/$timestamp-temp.png
fi

# ocr
tesseract -l eng $HOME/Pictures/screenshots/$timestamp-temp.png $HOME/Pictures/screenshots/$timestamp
cat $HOME/Pictures/screenshots/$timestamp.txt | tr '\n' ' ' | xclip -i -selection clipboard
rm $HOME/Pictures/screenshots/$timestamp-temp.png
notify-send "📋 OCR processed:" "$(xclip -selection clipboard -o | head -3)"


#!/bin/bash

# setup
timestamp=$(date +%Y-%m-%d--%H-%M-%S)
slop=$(slop -f "%x %y %w %h %g %i") || exit 1 read -r X Y W H G ID <<< $slop

# take and process pics
import -display :0 -window root -crop "$W"x"$H"+"$X"+"$Y" /tmp/$timestamp.png
mogrify -colorspace gray /tmp/$timestamp.png

# invert pic if dark so tesseract can read it
value=$(identify -format "%[mean]" /tmp/$timestamp.png |rev| cut -d"." -f2- |rev)
if [[ $value -lt 19000 ]];
   then
      mogrify -channel RGB -negate /tmp/$timestamp.png
fi

# ocr
tesseract -l eng /tmp/$timestamp.png stdout | tr '\n' ' ' | xclip -i -selection clipboard
notify-send "📋 OCR processed:" "$(xclip -selection clipboard -o | head -3)"