require 'strscan' class Vooly def initialize(io) @io = io end def parse(strip=false, &block) scanner = StringScanner.new(@io) until scanner.eos? unless scanner.scan(/(.*?)(>>|<<\w*)/m) scanner.scan(/(.*)/m) # Slurp the rest end text = scanner[1] if strip text.strip! next if text.empty? end block.call :text, text case scanner[2] when /<<(\w+)/ block.call :open, $1 when "<<" block.call :open when ">>" block.call :close end end end class ParseError < StandardError; end class Pull attr_reader :state attr_reader :text OPENING_REGEXP = Hash.new { |hash, key| hash[key] = /(<{#{key.size},})(\w*)/ } CLOSING_REGEXP = Hash.new { |hash, key| hash[key] = />{#{key.size}}/ } TEXT_REGEXP = Hash.new { |hash, key| hash[key] = /.+?(?=<{#{key.size}}|(>{#{key.size}}))/m } # Prefetch OPENING_REGEXP["<<"] OPENING_REGEXP["<<<"] OPENING_REGEXP["<<<<"] CLOSING_REGEXP["<<"] CLOSING_REGEXP["<<<"] CLOSING_REGEXP["<<<<"] TEXT_REGEXP["<<"] TEXT_REGEXP["<<<"] TEXT_REGEXP["<<<<"] def initialize(string, strip_whitespace=false) @scanner = StringScanner.new(string) @strip = strip_whitespace @state = @text = nil @openings = [] @openrx = [OPENING_REGEXP["<<"]] @closerx = [CLOSING_REGEXP["<<"]] @textrx = [TEXT_REGEXP["<<"]] end def strip! case @strip when true # Strip everything @text.strip! when false # Strip whitespace if only whitespace @text.strip! unless @text =~ /\S/ when nil # Strip nothing # end end def next if @scanner.eos? @state = @text = nil end until @scanner.eos? if @openrx.empty? or @closerx.empty? or @textrx.empty? raise ParseError, "Invalid nesting, no open tags [offset #{@scanner.pos}]" elsif @scanner.scan(@openrx.last) @openings << @scanner.pos - @scanner.matched_size @openrx << OPENING_REGEXP[@scanner[1]] @closerx << CLOSING_REGEXP[@scanner[1]] @textrx << TEXT_REGEXP[@scanner[1]] @state = :open @text = @scanner[2] @text = nil if @text.empty? if @strip == false @scanner.scan /\s/ # Read one whitespace end elsif @scanner.scan(@closerx.last) @state = :close @text = nil @closerx.pop @textrx.pop @openrx.pop @openings.pop elsif @scanner.scan(@textrx.last) @state = :text @text = @scanner.matched if @strip == false and @scanner[1] =~ />/ @text.sub! /\s$/, '' end strip! redo if @text.empty? else @state = :text @text = @scanner.rest @scanner.terminate strip! if @text.empty? self.next else return true end end break end p @closerx if @scanner.eos? and @closerx.size != 1 raise ParseError, "Invalid nesting " + if @closerx.size < 1 "(trailing garbage)" else "(wanted: #{@closerx.last.source} " + "from offset #{@openings.last} " + "here at offset #{@scanner.pos})" end end not @scanner.eos? end end class TaggedArray < Array attr_accessor :tag def initialize(tag, *data) super data self.tag = tag end def inspect "#<#{self.class.name} #{tag.inspect} #{super}>" end def with_mapping!(mapping) if mapping.is_a? Array mapping = mapping.inject({}) { |a, e| a[e.name] = e; a } end map! { |e| if e.kind_of? TaggedArray e.with_mapping! mapping end if e.kind_of?(TaggedArray) and klass = mapping[e.tag] klass.new(*e) else e end } self end end class Slurp def initialize(string, skip_whitespace=false) @parser = Pull.new(string, skip_whitespace) end def slurp stack = [TaggedArray.new(:root)] while @parser.next case @parser.state when :open if @parser.text new = TaggedArray.new(@parser.text) else new = [] end stack.last << new stack << new when :close stack.pop when :text stack.last << @parser.text end end stack.first end end module Line class Writer def initialize(string, skip_whitespace=false) @parser = Pull.new(string, skip_whitespace) end def write(io=STDOUT) while @parser.next case @parser.state when :open io << "+#{@parser.text}\n" when :close io << "-\n" when :text @parser.text.each { |line| io << " #{line}" } io << "\n" end end end end class Reader attr_reader :text, :state def initialize(io) @io = io @text = @state = nil end def next line = @io.gets case line when /\+(.*)/ @text = $1.empty? ? nil : $1 @state = :open when /-/ @text = false @state = :close when / (.*)/ @text = $1 while (h = @io.getc) == ?\s @text << @io.gets end @io.ungetc h @state = :text when nil return false end true end end end end