Base
Used for parsing a document in kramdown format.
If you want to extend the functionality of the parser, you need to do the following:
Create a new subclass
add the needed parser methods
modify the @block_parsers and @span_parsers variables and add the names of your parser methods
Here is a small example for an extended parser class that parses ERB style tags as raw text if they are used as span-level elements (an equivalent block-level parser should probably also be made to handle the block case):
require 'kramdown/parser/kramdown' class Kramdown::Parser::ERBKramdown < Kramdown::Parser::Kramdown def initialize(source, options) super @span_parsers.unshift(:erb_tags) end ERB_TAGS_START = /<%.*?%>/ def parse_erb_tags @src.pos += @src.matched_size @tree.children << Element.new(:raw, @src.matched) end define_parser(:erb_tags, ERB_TAGS_START, '<%') end
The new parser can be used like this:
require 'kramdown/document' # require the file with the above parser class Kramdown::Document.new(input_text, :input => 'ERBKramdown').to_html
Struct class holding all the needed data for one block/span-level parser method.
Mapping of markdown attribute value to content model. I.e. :raw when "0", :default when "1" (use default content model for the HTML element), :span when "span", :block when block and for everything else nil is returned.
Regexp for matching indentation (one tab or four spaces)
Regexp for matching the optional space (zero or up to three spaces)
Add a parser method
with the given name,
using start_re as start regexp
and, for span parsers, span_start as a String that can be used in a regexp and which identifies the starting character(s)
to the registry. The method name is automatically derived from the name or can explicitly be set by using the meth_name parameter.
# File lib/kramdown/parser/kramdown.rb, line 277 def self.define_parser(name, start_re, span_start = nil, meth_name = "parse_#{name}") raise "A parser with the name #{name} already exists!" if @@parsers.has_key?(name) @@parsers[name] = Data.new(name, start_re, span_start, meth_name) end
Return true if there is a parser called name.
# File lib/kramdown/parser/kramdown.rb, line 288 def self.has_parser?(name) @@parsers.has_key?(name) end
Return the Data structure for the parser name.
# File lib/kramdown/parser/kramdown.rb, line 283 def self.parser(name = nil) @@parsers[name] end
This helper methods adds the approriate attributes to the element el of type a or img and the element itself to the @tree.
# File lib/kramdown/parser/kramdown/link.rb, line 34 def add_link(el, href, title, alt_text = nil) if el.type == :a el.attr['href'] = href else el.attr['src'] = href el.attr['alt'] = alt_text el.children.clear end el.attr['title'] = title if title @tree.children << el end
Return true if we are after a block boundary.
# File lib/kramdown/parser/kramdown/block_boundary.rb, line 20 def after_block_boundary? !@tree.children.last || @tree.children.last.type == :blank || (@tree.children.last.type == :eob && @tree.children.last.value.nil?) || @block_ial end
Return true if we are before a block boundary.
# File lib/kramdown/parser/kramdown/block_boundary.rb, line 26 def before_block_boundary? @src.check(self.class::BLOCK_BOUNDARY) end
# File lib/kramdown/parser/kramdown/extensions.rb, line 93 def handle_extension(name, opts, body, type) case name when 'comment' @tree.children << Element.new(:comment, body, nil, :category => type) if body.kind_of?(String) true when 'nomarkdown' @tree.children << Element.new(:raw, body, nil, :category => type, :type => opts['type'].to_s.split(/\s+/)) if body.kind_of?(String) true when 'options' opts.select do |k,v| k = k.to_sym if Kramdown::Options.defined?(k) begin val = Kramdown::Options.parse(k, v) @options[k] = val (@root.options[:options] ||= {})[k] = val rescue end false else true end end.each do |k,v| warning("Unknown kramdown option '#{k}'") end @tree.children << Element.new(:eob, :extension) if type == :block true else false end end
# File lib/kramdown/parser/kramdown/html.rb, line 24 def handle_kramdown_html_tag(el, closed, handle_body) el.options[:ial] = @block_ial if @block_ial content_model = if @tree.type != :html_element || @tree.options[:content_model] != :raw (@options[:parse_block_html] ? HTML_CONTENT_MODEL[el.value] : :raw) else :raw end if val = HTML_MARKDOWN_ATTR_MAP[el.attr.delete('markdown')] content_model = (val == :default ? HTML_CONTENT_MODEL[el.value] : val) end @src.scan(TRAILING_WHITESPACE) if content_model == :block el.options[:content_model] = content_model el.options[:is_closed] = closed if !closed && handle_body if content_model == :block if !parse_blocks(el) warning("Found no end tag for '#{el.value}' - auto-closing it") end elsif content_model == :span curpos = @src.pos if @src.scan_until(/(?=<\/#{el.value}\s*>)/i) add_text(extract_string(curpos...@src.pos, @src), el) @src.scan(HTML_TAG_CLOSE_RE) else add_text(@src.rest, el) @src.terminate warning("Found no end tag for '#{el.value}' - auto-closing it") end else parse_raw_html(el, &method(:handle_kramdown_html_tag)) end @src.scan(TRAILING_WHITESPACE) unless (@tree.type == :html_element && @tree.options[:content_model] == :raw) end end
Normalize the link identifier.
# File lib/kramdown/parser/kramdown/link.rb, line 14 def normalize_link_id(id) id.gsub(/(\s|\n)+/, ' ').downcase end
The source string provided on initialization is parsed into the @root element.
# File lib/kramdown/parser/kramdown.rb, line 86 def parse configure_parser parse_blocks(@root, adapt_source(source)) update_tree(@root) replace_abbreviations(@root) @footnotes.each {|name,data| update_tree(data[:marker].value) if data[:marker]} end
Parse the link definition at the current location.
# File lib/kramdown/parser/kramdown/abbreviation.rb, line 16 def parse_abbrev_definition @src.pos += @src.matched_size abbrev_id, abbrev_text = @src[1], @src[2] abbrev_text.strip! warning("Duplicate abbreviation ID '#{abbrev_id}' - overwriting") if @root.options[:abbrev_defs][abbrev_id] @root.options[:abbrev_defs][abbrev_id] = abbrev_text @tree.children << Element.new(:eob, :abbrev_def) true end
Parse the string str and extract all attributes and add all found attributes to the hash opts.
# File lib/kramdown/parser/kramdown/extensions.rb, line 17 def parse_attribute_list(str, opts) return if str.strip.empty? attrs = str.scan(ALD_TYPE_ANY) attrs.each do |key, sep, val, ref, id_and_or_class, _, _| if ref (opts[:refs] ||= []) << ref elsif id_and_or_class id_and_or_class.scan(ALD_TYPE_ID_OR_CLASS).each do |id_attr, class_attr| if class_attr opts[IAL_CLASS_ATTR] = (opts[IAL_CLASS_ATTR] || '') << " #{class_attr}" opts[IAL_CLASS_ATTR].lstrip! else opts['id'] = id_attr end end else val.gsub!(/\\(\}|#{sep})/, "\\1") opts[key] = val end end warning("No or invalid attributes found in IAL/ALD content: #{str}") if attrs.length == 0 end
Parse the Atx header at the current location.
# File lib/kramdown/parser/kramdown/header.rb, line 38 def parse_atx_header return false if !after_block_boundary? @src.check(ATX_HEADER_MATCH) level, text, id = @src[1], @src[2].to_s.strip, @src[3] return false if text.empty? @src.pos += @src.matched_size el = new_block_el(:header, nil, nil, :level => level.length, :raw_text => text) add_text(text, el) el.attr['id'] = id if id @tree.children << el true end
Parse the autolink at the current location.
# File lib/kramdown/parser/kramdown/autolink.rb, line 24 def parse_autolink @src.pos += @src.matched_size href = (@src[2].nil? ? "mailto:#{@src[1]}" : @src[1]) el = Element.new(:a, nil, {'href' => href}) add_text(@src[1].sub(/^mailto:/, ''), el) @tree.children << el end
Parse the blank line at the current postition.
# File lib/kramdown/parser/kramdown/blank_line.rb, line 16 def parse_blank_line @src.pos += @src.matched_size if @tree.children.last && @tree.children.last.type == :blank @tree.children.last.value << @src.matched else @tree.children << new_block_el(:blank, @src.matched) end true end
Parse one of the block extensions (ALD, block IAL or generic extension) at the current location.
# File lib/kramdown/parser/kramdown/extensions.rb, line 150 def parse_block_extensions if @src.scan(ALD_START) parse_attribute_list(@src[2], @alds[@src[1]] ||= Utils::OrderedHash.new) @tree.children << Element.new(:eob, :ald) true elsif @src.check(EXT_BLOCK_START) parse_extension_start_tag(:block) elsif @src.scan(IAL_BLOCK_START) if @tree.children.last && @tree.children.last.type != :blank && @tree.children.last.type != :eob parse_attribute_list(@src[1], @tree.children.last.options[:ial] ||= Utils::OrderedHash.new) @tree.children << Element.new(:eob, :ial) unless @src.check(IAL_BLOCK_START) else parse_attribute_list(@src[1], @block_ial = Utils::OrderedHash.new) end true else false end end
Parse the HTML at the current position as block-level HTML.
# File lib/kramdown/parser/kramdown/html.rb, line 66 def parse_block_html if result = @src.scan(HTML_COMMENT_RE) @tree.children << Element.new(:xml_comment, result, nil, :category => :block) @src.scan(TRAILING_WHITESPACE) true elsif result = @src.scan(HTML_INSTRUCTION_RE) @tree.children << Element.new(:xml_pi, result, nil, :category => :block) @src.scan(TRAILING_WHITESPACE) true else if result = @src.check(/^#{OPT_SPACE}#{HTML_TAG_RE}/) && !HTML_SPAN_ELEMENTS.include?(@src[1].downcase) @src.pos += @src.matched_size handle_html_start_tag(&method(:handle_kramdown_html_tag)) Kramdown::Parser::Html::ElementConverter.convert(@root, @tree.children.last) if @options[:html_to_native] true elsif result = @src.check(/^#{OPT_SPACE}#{HTML_TAG_CLOSE_RE}/) && !HTML_SPAN_ELEMENTS.include?(@src[1].downcase) name = @src[1].downcase if @tree.type == :html_element && @tree.value == name @src.pos += @src.matched_size throw :stop_block_parsing, :found else false end else false end end end
Parse the math block at the current location.
# File lib/kramdown/parser/kramdown/math.rb, line 18 def parse_block_math if !after_block_boundary? return false elsif @src[1] @src.scan(/^#{OPT_SPACE}\\/) if @src[3] return false end orig_pos = @src.pos @src.pos += @src.matched_size data = @src[2] if before_block_boundary? @tree.children << new_block_el(:math, data, nil, :category => :block) true else @src.pos = orig_pos false end end
Parse the blockquote at the current location.
# File lib/kramdown/parser/kramdown/blockquote.rb, line 20 def parse_blockquote result = @src.scan(PARAGRAPH_MATCH) while !@src.match?(self.class::LAZY_END) result << @src.scan(PARAGRAPH_MATCH) end result.gsub!(BLOCKQUOTE_START, '') el = new_block_el(:blockquote) @tree.children << el parse_blocks(el, result) true end
Parse the indented codeblock at the current location.
# File lib/kramdown/parser/kramdown/codeblock.rb, line 22 def parse_codeblock data = @src.scan(self.class::CODEBLOCK_MATCH) data.gsub!(/\n( {0,3}\S)/, ' \1') data.gsub!(INDENT, '') @tree.children << new_block_el(:codeblock, data) true end
Parse the fenced codeblock at the current location.
# File lib/kramdown/parser/kramdown/codeblock.rb, line 36 def parse_codeblock_fenced if @src.check(FENCED_CODEBLOCK_MATCH) @src.pos += @src.matched_size el = new_block_el(:codeblock, @src[3]) lang = @src[2].to_s.strip el.attr['class'] = "language-#{lang}" unless lang.empty? @tree.children << el true else false end end
Parse the codespan at the current scanner location.
# File lib/kramdown/parser/kramdown/codespan.rb, line 16 def parse_codespan result = @src.scan(CODESPAN_DELIMITER) simple = (result.length == 1) reset_pos = @src.pos if simple && @src.pre_match =~ /\s\Z/ && @src.match?(/\s/) add_text(result) return end if text = @src.scan_until(/#{result}/) text.sub!(/#{result}\Z/, '') if !simple text = text[1..-1] if text[0..0] == ' ' text = text[0..-2] if text[-1..-1] == ' ' end @tree.children << Element.new(:codespan, text) else @src.pos = reset_pos add_text(result) end end
Parse the ordered or unordered list at the current location.
# File lib/kramdown/parser/kramdown/list.rb, line 134 def parse_definition_list children = @tree.children if !children.last || (children.length == 1 && children.last.type != :p ) || (children.length >= 2 && children[-1].type != :p && (children[-1].type != :blank || children[-1].value != "\n" || children[-2].type != :p)) return false end first_as_para = false deflist = new_block_el(:dl) para = @tree.children.pop if para.type == :blank para = @tree.children.pop first_as_para = true end para.children.first.value.split(/\n/).each do |term| el = Element.new(:dt) el.children << Element.new(:raw_text, term) deflist.children << el end deflist.options[:ial] = para.options[:ial] item = nil content_re, lazy_re, indent_re = nil def_start_re = DEFINITION_LIST_START last_is_blank = false while !@src.eos? if @src.scan(def_start_re) item = Element.new(:dd) item.options[:first_as_para] = first_as_para item.value, indentation, content_re, lazy_re, indent_re = parse_first_list_line(@src[1].length, @src[2]) deflist.children << item item.value.sub!(self.class::LIST_ITEM_IAL) do |match| parse_attribute_list($1, item.options[:ial] ||= {}) '' end def_start_re = /^( {0,#{[3, indentation - 1].min}}:)([\t| ].*?\n)/ first_as_para = false last_is_blank = false elsif @src.check(EOB_MARKER) break elsif (result = @src.scan(content_re)) || (!last_is_blank && (result = @src.scan(lazy_re))) result.sub!(/^(\t+)/) { " "*($1 ? 4*$1.length : 0) } result.sub!(indent_re, '') item.value << result first_as_para = false last_is_blank = false elsif result = @src.scan(BLANK_LINE) first_as_para = true item.value << result last_is_blank = true else break end end last = nil deflist.children.each do |it| next if it.type == :dt parse_blocks(it, it.value) it.value = nil next if it.children.size == 0 if it.children.last.type == :blank last = it.children.pop else last = nil end if it.children.first && it.children.first.type == :p && !it.options.delete(:first_as_para) it.children.first.children.first.value << "\n" if it.children.size > 1 it.children.first.options[:transparent] = true end end if @tree.children.length >= 1 && @tree.children.last.type == :dl @tree.children[-1].children.concat(deflist.children) elsif @tree.children.length >= 2 && @tree.children[-1].type == :blank && @tree.children[-2].type == :dl @tree.children.pop @tree.children[-1].children.concat(deflist.children) else @tree.children << deflist end @tree.children << last if !last.nil? true end
Parse the emphasis at the current location.
# File lib/kramdown/parser/kramdown/emphasis.rb, line 16 def parse_emphasis result = @src.scan(EMPHASIS_START) element = (result.length == 2 ? :strong : :em) type = result[0..0] reset_pos = @src.pos if (type == '_' && @src.pre_match =~ /[[:alpha:]]\z/ && @src.check(/[[:alpha:]]/)) || @src.check(/\s/) || @tree.type == element || @stack.any? {|el, _| el.type == element} add_text(result) return end sub_parse = lambda do |delim, elem| el = Element.new(elem) stop_re = /#{Regexp.escape(delim)}/ found = parse_spans(el, stop_re) do (@src.pre_match[-1, 1] !~ /\s/) && (elem != :em || !@src.match?(/#{Regexp.escape(delim*2)}(?!#{Regexp.escape(delim)})/)) && (type != '_' || !@src.match?(/#{Regexp.escape(delim)}[[:alpha:]]/)) && el.children.size > 0 end [found, el, stop_re] end found, el, stop_re = sub_parse.call(result, element) if !found && element == :strong && @tree.type != :em @src.pos = reset_pos - 1 found, el, stop_re = sub_parse.call(type, :em) end if found @src.scan(stop_re) @tree.children << el else @src.pos = reset_pos add_text(result) end end
Parse the EOB marker at the current location.
# File lib/kramdown/parser/kramdown/eob.rb, line 16 def parse_eob_marker @src.pos += @src.matched_size @tree.children << new_block_el(:eob) true end
Parse the backslash-escaped character at the current location.
# File lib/kramdown/parser/kramdown/escaped_chars.rb, line 16 def parse_escaped_chars @src.pos += @src.matched_size add_text(@src[1]) end
Parse the generic extension at the current point. The parameter type can either be :block or :span depending whether we parse a block or span extension tag.
# File lib/kramdown/parser/kramdown/extensions.rb, line 55 def parse_extension_start_tag(type) orig_pos = @src.pos @src.pos += @src.matched_size error_block = lambda do |msg| warning(msg) @src.pos = orig_pos add_text(@src.getch) if type == :span false end if @src[4] || @src.matched == '{:/}' name = (@src[4] ? "for '#{@src[4]}' " : '') return error_block.call("Invalid extension stop tag #{name}found - ignoring it") end ext = @src[1] opts = {} body = nil parse_attribute_list(@src[2] || '', opts) if !@src[3] stop_re = (type == :block ? /#{EXT_BLOCK_STOP_STR % ext}/ : /#{EXT_STOP_STR % ext}/) if result = @src.scan_until(stop_re) body = result.sub!(stop_re, '') body.chomp! if type == :block else return error_block.call("No stop tag for extension '#{ext}' found - ignoring it") end end if !handle_extension(ext, opts, body, type) error_block.call("Invalid extension with name '#{ext}' specified - ignoring it") else true end end
Used for parsing the first line of a list item or a definition, i.e. the line with list item marker or the definition marker.
# File lib/kramdown/parser/kramdown/list.rb, line 23 def parse_first_list_line(indentation, content) if content =~ self.class::LIST_ITEM_IAL_CHECK indentation = 4 else while content =~ /^ *\t/ temp = content.scan(/^ */).first.length + indentation content.sub!(/^( *)(\t+)/) {$1 << " "*(4 - (temp % 4) + ($2.length - 1)*4)} end indentation += content.scan(/^ */).first.length end content.sub!(/^\s*/, '') indent_re = /^ {#{indentation}}/ content_re = /^(?:(?:\t| {4}){#{indentation / 4}} {#{indentation % 4}}|(?:\t| {4}){#{indentation / 4 + 1}}).*\S.*\n/ lazy_re = /(?!^ {0,#{[indentation, 3].min}}(?:#{IAL_BLOCK}|#{LAZY_END_HTML_STOP}|#{LAZY_END_HTML_START})).*\S.*\n/ [content, indentation, content_re, lazy_re, indent_re] end
Parse the foot note definition at the current location.
# File lib/kramdown/parser/kramdown/footnote.rb, line 20 def parse_footnote_definition @src.pos += @src.matched_size el = Element.new(:footnote_def) parse_blocks(el, @src[2].gsub(INDENT, '')) warning("Duplicate footnote name '#{@src[1]}' - overwriting") if @footnotes[@src[1]] (@footnotes[@src[1]] = {})[:content] = el @tree.children << Element.new(:eob, :footnote_def) true end
Parse the footnote marker at the current location.
# File lib/kramdown/parser/kramdown/footnote.rb, line 36 def parse_footnote_marker @src.pos += @src.matched_size fn_def = @footnotes[@src[1]] if fn_def valid = fn_def[:marker] && fn_def[:stack][0..-2].zip(fn_def[:stack][1..-1]).all? do |par, child| par.children.include?(child) end if !fn_def[:marker] || !valid fn_def[:marker] = Element.new(:footnote, fn_def[:content], nil, :name => @src[1]) fn_def[:stack] = [@stack.map {|s| s.first}, @tree, fn_def[:marker]].flatten.compact @tree.children << fn_def[:marker] else warning("Footnote marker '#{@src[1]}' already appeared in document, ignoring newly found marker") add_text(@src.matched) end else warning("Footnote definition for '#{@src[1]}' not found") add_text(@src.matched) end end
Parse the horizontal rule at the current location.
# File lib/kramdown/parser/kramdown/horizontal_rule.rb, line 16 def parse_horizontal_rule @src.pos += @src.matched_size @tree.children << new_block_el(:hr) true end
Parse the HTML entity at the current location.
# File lib/kramdown/parser/kramdown/html_entity.rb, line 16 def parse_html_entity @src.pos += @src.matched_size begin @tree.children << Element.new(:entity, ::Kramdown::Utils::Entities.entity(@src[1] || (@src[2] && @src[2].to_i) || @src[3].hex), nil, :original => @src.matched) rescue ::Kramdown::Error @tree.children << Element.new(:entity, ::Kramdown::Utils::Entities.entity('amp')) add_text(@src.matched[1..-1]) end end
Parse the inline math at the current location.
# File lib/kramdown/parser/kramdown/math.rb, line 43 def parse_inline_math @src.pos += @src.matched_size @tree.children << Element.new(:math, @src[1], nil, :category => :span) end
Parse the line break at the current location.
# File lib/kramdown/parser/kramdown/line_break.rb, line 16 def parse_line_break @src.pos += @src.matched_size @tree.children << Element.new(:br) end
Parse the link at the current scanner position. This method is used to parse normal links as well as image links.
# File lib/kramdown/parser/kramdown/link.rb, line 54 def parse_link result = @src.scan(LINK_START) reset_pos = @src.pos link_type = (result =~ /^!/ ? :img : :a) # no nested links allowed if link_type == :a && (@tree.type == :img || @tree.type == :a || @stack.any? {|t,s| t && (t.type == :img || t.type == :a)}) add_text(result) return end el = Element.new(link_type) count = 1 found = parse_spans(el, LINK_BRACKET_STOP_RE) do count = count + (@src[1] ? -1 : 1) count - el.children.select {|c| c.type == :img}.size == 0 end if !found || (link_type == :a && el.children.empty?) @src.pos = reset_pos add_text(result) return end alt_text = extract_string(reset_pos...@src.pos, @src) @src.scan(LINK_BRACKET_STOP_RE) # reference style link or no link url if @src.scan(LINK_INLINE_ID_RE) || !@src.check(/\(/) link_id = normalize_link_id(@src[1] || alt_text) if @link_defs.has_key?(link_id) add_link(el, @link_defs[link_id].first, @link_defs[link_id].last, alt_text) else warning("No link definition for link ID '#{link_id}' found") @src.pos = reset_pos add_text(result) end return end # link url in parentheses if @src.scan(/\(<(.*?)>/) link_url = @src[1] if @src.scan(/\)/) add_link(el, link_url, nil, alt_text) return end else link_url = '' nr_of_brackets = 0 while temp = @src.scan_until(LINK_PAREN_STOP_RE) link_url << temp if @src[2] nr_of_brackets -= 1 break if nr_of_brackets == 0 elsif @src[1] nr_of_brackets += 1 else break end end link_url = link_url[1..-2] link_url.strip! if nr_of_brackets == 0 add_link(el, link_url, nil, alt_text) return end end if @src.scan(LINK_INLINE_TITLE_RE) add_link(el, link_url, @src[2], alt_text) else @src.pos = reset_pos add_text(result) end end
Parse the link definition at the current location.
# File lib/kramdown/parser/kramdown/link.rb, line 21 def parse_link_definition @src.pos += @src.matched_size link_id, link_url, link_title = normalize_link_id(@src[1]), @src[2] || @src[3], @src[5] warning("Duplicate link ID '#{link_id}' - overwriting") if @link_defs[link_id] @link_defs[link_id] = [link_url, link_title] @tree.children << Element.new(:eob, :link_def) true end
Parse the ordered or unordered list at the current location.
# File lib/kramdown/parser/kramdown/list.rb, line 47 def parse_list type, list_start_re = (@src.check(LIST_START_UL) ? [:ul, LIST_START_UL] : [:ol, LIST_START_OL]) list = new_block_el(type) item = nil content_re, lazy_re, indent_re = nil eob_found = false nested_list_found = false last_is_blank = false while !@src.eos? if last_is_blank && @src.check(HR_START) break elsif @src.scan(EOB_MARKER) eob_found = true break elsif @src.scan(list_start_re) item = Element.new(:li) item.value, indentation, content_re, lazy_re, indent_re = parse_first_list_line(@src[1].length, @src[2]) list.children << item item.value.sub!(self.class::LIST_ITEM_IAL) do |match| parse_attribute_list($1, item.options[:ial] ||= {}) '' end list_start_re = (type == :ul ? /^( {0,#{[3, indentation - 1].min}}[+*-])([\t| ].*?\n)/ : /^( {0,#{[3, indentation - 1].min}}\d+\.)([\t| ].*?\n)/) nested_list_found = (item.value =~ LIST_START) last_is_blank = false elsif (result = @src.scan(content_re)) || (!last_is_blank && (result = @src.scan(lazy_re))) result.sub!(/^(\t+)/) { " "*($1 ? 4*$1.length : 0) } result.sub!(indent_re, '') if !nested_list_found && result =~ LIST_START item.value << "^\n" nested_list_found = true end item.value << result last_is_blank = false elsif result = @src.scan(BLANK_LINE) nested_list_found = true last_is_blank = true item.value << result else break end end @tree.children << list last = nil list.children.each do |it| temp = Element.new(:temp) parse_blocks(temp, it.value) it.children = temp.children it.value = nil next if it.children.size == 0 # Handle the case where an EOB marker is inserted by a block IAL for the first paragraph it.children.delete_at(1) if it.children.first.type == :p && it.children.length >= 2 && it.children[1].type == :eob && it.children.first.options[:ial] if it.children.first.type == :p && (it.children.length < 2 || it.children[1].type != :blank || (it == list.children.last && it.children.length == 2 && !eob_found)) && (list.children.last != it || list.children.size == 1 || list.children[0..-2].any? {|cit| !cit.children.first || cit.children.first.type != :p || cit.children.first.options[:transparent]}) it.children.first.children.first.value << "\n" if it.children.size > 1 && it.children[1].type != :blank it.children.first.options[:transparent] = true end if it.children.last.type == :blank last = it.children.pop else last = nil end end @tree.children << last if !last.nil? && !eob_found true end
Parse the paragraph at the current location.
# File lib/kramdown/parser/kramdown/paragraph.rb, line 30 def parse_paragraph result = @src.scan(PARAGRAPH_MATCH) while !@src.match?(self.class::PARAGRAPH_END) result << @src.scan(PARAGRAPH_MATCH) end result.chomp! if @tree.children.last && @tree.children.last.type == :p @tree.children.last.children.first.value << "\n" << result else @tree.children << new_block_el(:p) result.lstrip! @tree.children.last.children << Element.new(@text_type, result) end true end
Parse the Setext header at the current location.
# File lib/kramdown/parser/kramdown/header.rb, line 19 def parse_setext_header return false if !after_block_boundary? @src.pos += @src.matched_size text, id, level = @src[1], @src[2], @src[3] text.strip! el = new_block_el(:header, nil, nil, :level => (level == '-' ? 2 : 1), :raw_text => text) add_text(text, el) el.attr['id'] = id if id @tree.children << el true end
Parse the smart quotes at current location.
# File lib/kramdown/parser/kramdown/smart_quotes.rb, line 156 def parse_smart_quotes substs = SQ_RULES.find {|reg, subst| @src.scan(reg)}[1] substs.each do |subst| if subst.kind_of?(Integer) add_text(@src[subst]) else val = SQ_SUBSTS[[subst, @src[subst.to_s[-1,1].to_i]]] || subst @tree.children << Element.new(:smart_quote, val) end end end
Parse the extension span at the current location.
# File lib/kramdown/parser/kramdown/extensions.rb, line 177 def parse_span_extensions if @src.check(EXT_SPAN_START) parse_extension_start_tag(:span) elsif @src.check(IAL_SPAN_START) if @tree.children.last && @tree.children.last.type != :text @src.pos += @src.matched_size attr = Utils::OrderedHash.new parse_attribute_list(@src[1], attr) update_ial_with_ial(@tree.children.last.options[:ial] ||= Utils::OrderedHash.new, attr) update_attr_with_ial(@tree.children.last.attr, attr) else warning("Found span IAL after text - ignoring it") add_text(@src.getch) end else add_text(@src.getch) end end
Parse the HTML at the current position as span-level HTML.
# File lib/kramdown/parser/kramdown/html.rb, line 101 def parse_span_html if result = @src.scan(HTML_COMMENT_RE) @tree.children << Element.new(:xml_comment, result, nil, :category => :span) elsif result = @src.scan(HTML_INSTRUCTION_RE) @tree.children << Element.new(:xml_pi, result, nil, :category => :span) elsif result = @src.scan(HTML_TAG_CLOSE_RE) warning("Found invalidly used HTML closing tag for '#{@src[1]}'") add_text(result) elsif result = @src.scan(HTML_TAG_RE) tag_name = @src[1].downcase if HTML_BLOCK_ELEMENTS.include?(tag_name) warning("Found block HTML tag '#{tag_name}' in span-level text") add_text(result) return end attrs = Utils::OrderedHash.new @src[2].scan(HTML_ATTRIBUTE_RE).each {|name,sep,val| attrs[name.downcase] = (val || '').gsub(/\n+/, ' ')} do_parsing = (HTML_CONTENT_MODEL[tag_name] == :raw || @tree.options[:content_model] == :raw ? false : @options[:parse_span_html]) if val = HTML_MARKDOWN_ATTR_MAP[attrs.delete('markdown')] if val == :block warning("Cannot use block-level parsing in span-level HTML tag - using default mode") elsif val == :span do_parsing = true elsif val == :default do_parsing = HTML_CONTENT_MODEL[tag_name] != :raw elsif val == :raw do_parsing = false end end el = Element.new(:html_element, tag_name, attrs, :category => :span, :content_model => (do_parsing ? :span : :raw), :is_closed => !!@src[4]) @tree.children << el stop_re = /<\/#{Regexp.escape(tag_name)}\s*>/ if !@src[4] && HTML_ELEMENTS_WITHOUT_BODY.include?(el.value) warning("The HTML tag '#{el.value}' cannot have any content - auto-closing it") elsif !@src[4] if parse_spans(el, stop_re, (do_parsing ? nil : [:span_html])) @src.scan(stop_re) else warning("Found no end tag for '#{el.value}' - auto-closing it") add_text(@src.rest, el) @src.terminate end end Kramdown::Parser::Html::ElementConverter.convert(@root, el) if @options[:html_to_native] else add_text(@src.getch) end end
Parse the table at the current location.
# File lib/kramdown/parser/kramdown/table.rb, line 24 def parse_table return false if !after_block_boundary? orig_pos = @src.pos table = new_block_el(:table, nil, nil, :alignment => []) leading_pipe = (@src.check(TABLE_LINE) =~ /^\s*\|/) @src.scan(TABLE_SEP_LINE) rows = [] has_footer = false columns = 0 add_container = lambda do |type, force| if !has_footer || type != :tbody || force cont = Element.new(type) cont.children, rows = rows, [] table.children << cont end end while !@src.eos? break if !@src.check(TABLE_LINE) if @src.scan(TABLE_SEP_LINE) && !rows.empty? if table.options[:alignment].empty? && !has_footer add_container.call(:thead, false) table.options[:alignment] = @src[1].scan(TABLE_HSEP_ALIGN).map do |left, right| (left.empty? && right.empty? && :default) || (right.empty? && :left) || (left.empty? && :right) || :center end else # treat as normal separator line add_container.call(:tbody, false) end elsif @src.scan(TABLE_FSEP_LINE) add_container.call(:tbody, true) if !rows.empty? has_footer = true elsif @src.scan(TABLE_ROW_LINE) trow = Element.new(:tr) # parse possible code spans on the line and correctly split the line into cells env = save_env cells = [] @src[1].split(/(<code.*?>.*?<\/code>)/).each_with_index do |str, i| if i % 2 == 1 (cells.empty? ? cells : cells.last) << str else reset_env(:src => StringScanner.new(str)) root = Element.new(:root) parse_spans(root, nil, [:codespan]) root.children.each do |c| if c.type == :raw_text # Only on Ruby 1.9: f, *l = c.value.split(/(?<!\\)\|/).map {|t| t.gsub(/\\\|/, '|')} f, *l = c.value.split(/\\\|/, -1).map {|t| t.split(/\|/, -1)}.inject([]) do |memo, t| memo.last << "|#{t.shift}" if memo.size > 0 memo.concat(t) end (cells.empty? ? cells : cells.last) << f cells.concat(l) else delim = (c.value.scan(/`+/).max || '') + '`' tmp = "#{delim}#{' ' if delim.size > 1}#{c.value}#{' ' if delim.size > 1}#{delim}" (cells.empty? ? cells : cells.last) << tmp end end end end restore_env(env) cells.shift if leading_pipe && cells.first.strip.empty? cells.pop if cells.last.strip.empty? cells.each do |cell_text| tcell = Element.new(:td) tcell.children << Element.new(:raw_text, cell_text.strip) trow.children << tcell end columns = [columns, cells.length].max rows << trow else break end end if !before_block_boundary? @src.pos = orig_pos return false end # Parse all lines of the table with the code span parser env = save_env reset_env(:src => StringScanner.new(extract_string(orig_pos...(@src.pos-1), @src))) root = Element.new(:root) parse_spans(root, nil, [:codespan]) restore_env(env) # Check if each line has at least one unescaped backslash that is not inside a code span pipe_on_line = false while (c = root.children.shift) lines = c.value.split(/\n/) if c.type == :codespan if lines.size > 2 || (lines.size == 2 && !pipe_on_line) break elsif lines.size == 2 && pipe_on_line pipe_on_line = false end else break if lines.size > 1 && !pipe_on_line && lines.first !~ /^#{TABLE_PIPE_CHECK}/ pipe_on_line = (lines.size > 1 ? false : pipe_on_line) || (lines.last =~ /^#{TABLE_PIPE_CHECK}/) end end @src.pos = orig_pos and return false if !pipe_on_line add_container.call(has_footer ? :tfoot : :tbody, false) if !rows.empty? if !table.children.any? {|el| el.type == :tbody} warning("Found table without body - ignoring it") @src.pos = orig_pos return false end # adjust all table rows to have equal number of columns, same for alignment defs table.children.each do |kind| kind.children.each do |row| (columns - row.children.length).times do row.children << Element.new(:td) end end end if table.options[:alignment].length > columns table.options[:alignment] = table.options[:alignment][0...columns] else table.options[:alignment] += [:default] * (columns - table.options[:alignment].length) end @tree.children << table true end
Parse the typographic symbols at the current location.
# File lib/kramdown/parser/kramdown/typographic_symbol.rb, line 21 def parse_typographic_syms @src.pos += @src.matched_size val = TYPOGRAPHIC_SYMS_SUBST[@src.matched] if val.kind_of?(Symbol) @tree.children << Element.new(:typographic_sym, val) elsif @src.matched == '\<<' @tree.children << Element.new(:entity, ::Kramdown::Utils::Entities.entity('lt')) @tree.children << Element.new(:entity, ::Kramdown::Utils::Entities.entity('lt')) else @tree.children << Element.new(:entity, ::Kramdown::Utils::Entities.entity('gt')) @tree.children << Element.new(:entity, ::Kramdown::Utils::Entities.entity('gt')) end end
Replace the abbreviation text with elements.
# File lib/kramdown/parser/kramdown/abbreviation.rb, line 28 def replace_abbreviations(el, regexps = nil) return if @root.options[:abbrev_defs].empty? if !regexps sorted_abbrevs = @root.options[:abbrev_defs].keys.sort {|a,b| b.length <=> a.length} regexps = [Regexp.union(*sorted_abbrevs.map {|k| /#{Regexp.escape(k)}/})] regexps << /(?=(?:\W|^)#{regexps.first}(?!\w))/ # regexp should only match on word boundaries end el.children.map! do |child| if child.type == :text if child.value =~ regexps.first result = [] strscan = StringScanner.new(child.value) while temp = strscan.scan_until(regexps.last) abbr = strscan.scan(regexps.first) # begin of line case of abbr with \W char as first one if abbr.nil? temp << strscan.scan(/\W|^/) abbr = strscan.scan(regexps.first) end result << Element.new(:text, temp) << Element.new(:abbreviation, abbr) end result << Element.new(:text, strscan.rest) else child end else replace_abbreviations(child, regexps) child end end.flatten! end
Update the ial with the information from the inline attribute list opts.
# File lib/kramdown/parser/kramdown/extensions.rb, line 41 def update_ial_with_ial(ial, opts) (ial[:refs] ||= []) << opts[:refs] opts.each do |k,v| if k == IAL_CLASS_ATTR ial[k] = (ial[k] || '') << " #{v}" ial[k].lstrip! elsif k.kind_of?(String) ial[k] = v end end end
Adapt the object to allow parsing like specified in the options.
# File lib/kramdown/parser/kramdown.rb, line 99 def configure_parser @parsers = {} (@block_parsers + @span_parsers).each do |name| if self.class.has_parser?(name) @parsers[name] = self.class.parser(name) else raise Kramdown::Error, "Unknown parser: #{name}" end end @span_start, @span_start_re = span_parser_regexps end
Create a new block-level element, taking care of applying a preceding block IAL if it exists. This method should always be used for creating a block-level element!
# File lib/kramdown/parser/kramdown.rb, line 257 def new_block_el(*args) el = Element.new(*args) el.options[:ial] = @block_ial if @block_ial && el.type != :blank && el.type != :eob el end
Parse all block-level elements in text into the element el.
# File lib/kramdown/parser/kramdown.rb, line 118 def parse_blocks(el, text = nil) @stack.push([@tree, @src, @block_ial]) @tree, @src, @block_ial = el, (text.nil? ? @src : StringScanner.new(text)), nil status = catch(:stop_block_parsing) do while !@src.eos? block_ial_set = @block_ial @block_parsers.any? do |name| if @src.check(@parsers[name].start_re) send(@parsers[name].method) else false end end || begin warning('Warning: this should not occur - no block parser handled the line') add_text(@src.scan(/.*\n/)) end @block_ial = nil if block_ial_set end end @tree, @src, @block_ial = *@stack.pop status end
Parse all span-level elements in the source string of @src into el.
If the parameter stop_re (a regexp) is used, parsing is immediately stopped if the regexp matches and if no block is given or if a block is given and it returns true.
The parameter parsers can be used to specify the (span-level) parsing methods that should be used for parsing.
The parameter text_type specifies the type which should be used for created text nodes.
# File lib/kramdown/parser/kramdown.rb, line 181 def parse_spans(el, stop_re = nil, parsers = nil, text_type = @text_type) @stack.push([@tree, @text_type]) unless @tree.nil? @tree, @text_type = el, text_type span_start = @span_start span_start_re = @span_start_re span_start, span_start_re = span_parser_regexps(parsers) if parsers parsers = parsers || @span_parsers used_re = (stop_re.nil? ? span_start_re : /(?=#{Regexp.union(stop_re, span_start)})/) stop_re_found = false while !@src.eos? && !stop_re_found if result = @src.scan_until(used_re) add_text(result) if stop_re && @src.check(stop_re) stop_re_found = (block_given? ? yield : true) end processed = parsers.any? do |name| if @src.check(@parsers[name].start_re) send(@parsers[name].method) true else false end end unless stop_re_found add_text(@src.getch) if !processed && !stop_re_found else (add_text(@src.rest); @src.terminate) unless stop_re break end end @tree, @text_type = @stack.pop stop_re_found end
Reset the current parsing environment. The parameter env can be used to set initial values for one or more environment variables.
# File lib/kramdown/parser/kramdown.rb, line 220 def reset_env(opts = {}) opts = {:text_type => :raw_text, :stack => []}.merge(opts) @src = opts[:src] @tree = opts[:tree] @block_ial = opts[:block_ial] @stack = opts[:stack] @text_type = opts[:text_type] end
Restore the current parsing environment.
# File lib/kramdown/parser/kramdown.rb, line 235 def restore_env(env) @src, @tree, @block_ial, @stack, @text_type = *env end
Return the current parsing environment.
# File lib/kramdown/parser/kramdown.rb, line 230 def save_env [@src, @tree, @block_ial, @stack, @text_type] end
Create the needed span parser regexps.
# File lib/kramdown/parser/kramdown.rb, line 112 def span_parser_regexps(parsers = @span_parsers) span_start = /#{parsers.map {|name| @parsers[name].span_start}.join('|')}/ [span_start, /(?=#{span_start})/] end
Update the given attributes hash attr with the information from the inline attribute list ial and all referenced ALDs.
# File lib/kramdown/parser/kramdown.rb, line 241 def update_attr_with_ial(attr, ial) ial[:refs].each do |ref| update_attr_with_ial(attr, ref) if ref = @alds[ref] end if ial[:refs] ial.each do |k,v| if k == IAL_CLASS_ATTR attr[k] = (attr[k] || '') << " #{v}" attr[k].lstrip! elsif k.kind_of?(String) attr[k] = v end end end
Update the tree by parsing all :raw_text elements with the span-level parser (resets the environment) and by updating the attributes from the IALs.
# File lib/kramdown/parser/kramdown.rb, line 145 def update_tree(element) last_blank = nil element.children.map! do |child| if child.type == :raw_text last_blank = nil reset_env(:src => StringScanner.new(child.value), :text_type => :text) parse_spans(child) child.children elsif child.type == :eob [] elsif child.type == :blank if last_blank last_blank.value << child.value [] else last_blank = child child end else last_blank = nil update_tree(child) update_attr_with_ial(child.attr, child.options[:ial]) if child.options[:ial] child end end.flatten! end
Generated with the Darkfish Rdoc Generator 2.