class PSPage

a PostScript page (lines with position information)

Attributes

lines[RW]

Public Class Methods

new(str=nil) click to toggle source
# File misc/pdfparse.rb, line 443
def initialize(str=nil)
        parse(str) if str
end

Public Instance Methods

clip_lines(ymin, ymax) click to toggle source

remove lines not within ymin and ymax

# File misc/pdfparse.rb, line 448
def clip_lines(ymin, ymax)
        ymin, ymax = ymax, ymin if ymin > ymax
        @lines.each { |la| la.delete_if { |l| l.y < ymin or l.y > ymax } }
        @lines.delete_if { |la| la.empty? }
        self
end
parse(str) click to toggle source

parse a postscript string to an array of paragraph (itself an array of lines) handles text strings and basic cursor position updates

# File misc/pdfparse.rb, line 457
        def parse(str)
                @lines = []
                curx = cury = 0
                fontx = fonty = 12
                charspc = wordspc = 0
                stack = []
                linelead = -12
                ps2tok(str) { |t|
case t
when Float, String; print "#{t} "
else puts t
end if $VERBOSE
                        case t
                        when Float, String; stack << t               # be postfix !
                        when :BT; intext = true ; @lines << []       # begin text
                        when :ET; intext = false             # end text
                        when :Tj, :TJ        # print line
                                @lines.last << Line.new(stack.pop, curx, cury, fontx, fonty, charspc, wordspc)
                        when :Td, :TD        # move cursor
                                linelead = stack.last*fonty if t == :TD
                                cury += stack.pop*fonty
                                curx += stack.pop*fontx
                        when :'T*'   # new line
                                cury += linelead
                        when :Tc     # character spacing
                                # RHAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA
                                #3.17731 Tc 9 0 0 9 343.41 653.84998 Tm
                                #[(3T)3202(O)729(R)3179(A)-3689(S)3178(I)]TJ
                                # => 3    TO     RA             SI
                                charspc = stack.pop
                        when :Tw
                                wordspc = stack.pop
                        when :Tm     # set transform matrix (scale, rotate, translate)
                                params = Array.new(6) { stack.pop }.reverse
                                next if params[0] == 0.0    # rotated text
                                fontx, _, _, fonty, curx, cury = params
                        end
                }
        end
ps2tok(str) { |tok| ... } click to toggle source

yields PS tokens: floats, commands, and strings

# File misc/pdfparse.rb, line 498
def ps2tok(str)
        loop do
                case str
                when ''; break
                when /\A-?\d+(?:\.\d+)?/; tok = $&.to_f
                when /\A\((?:\.|[^\)])*\)/; tok = $&
                when /\A\[(?:[^\](]*\((?:\.|[^\)])*\))*[^\]]*\]/; tok = $&
                when /\A[a-zA-Z0-9_*]+/; tok = $&.to_sym rescue nil
                when /\A\S+/, /\A\s+/
                end
                str = str[$&.length..-1]
                yield tok if tok
        end
end
to_s() click to toggle source

renders the lines, according to the layout (almost ;) )

# File misc/pdfparse.rb, line 514
def to_s
        mx = @lines.flatten.map { |l| l.x }.min
        py = nil
        strs = ['']
        @lines.sort_by { |la| -la.map { |l| l.y }.max.to_i }.each { |la|
        y = la.map { |l| l.y }.max
        strs.concat ['']*((py-y)/12) if py and py > y
        la.sort_by { |l| [-l.y, l.x] }.each { |l|
                # 9 == base font size
                strs << '' if y > l.y+l.fonty*0.9 or strs.last.length*1000/Line::CHARWIDTH/9 > l.x-mx
                strs[-1] = strs.last.ljust((l.x-mx)*1000/Line::CHARWIDTH/9-1) << ' ' << l.str
                y = l.y
        }
        py = y if not py or py > y
        }
        strs.join("\n")
end