#!/bin/bash # http://tex.stackexchange.com/a/18776 # The "holy imagemagick batman" answer :-) # run this script like this: # findlines.sh # without specifying file extension # works for two-column pages, not so well for single-column convert $1.pdf -crop 50x100% png:$1 for f in $1-*; do convert $f -flatten -resize 1X1000! -black-threshold 99% -white-threshold 10% -negate -morphology Erode Diamond -morphology Thinning:-1 Skeleton -black-threshold 50% txt:-| \ sed -e '1d' -e '/black/d' -e 's/^[^,]*,//' -e 's/[(]//g' -e 's/:.*//' -e 's/,/ /g' > $f.txt; done