blob: 4044aa2ea344052dec54c4234fea8c0dbce7ec84 [file] [log] [blame]
# -*- coding: utf-8 -*-
#
#--
# Copyright (C) 2009-2016 Thomas Leitner <t_leitner@gmx.at>
#
# This file is part of kramdown which is licensed under the MIT.
#++
#
require 'kramdown/utils'
require 'kramdown/parser'
module Kramdown
module Parser
# == \Base class for parsers
#
# This class serves as base class for parsers. It provides common methods that can/should be
# used by all parsers, especially by those using StringScanner(Kramdown) for parsing.
#
# A parser object is used as a throw-away object, i.e. it is only used for storing the needed
# state information during parsing. Therefore one can't instantiate a parser object directly but
# only use the Base::parse method.
#
# == Implementing a parser
#
# Implementing a new parser is rather easy: just derive a new class from this class and put it
# in the Kramdown::Parser module -- the latter is needed so that the auto-detection of the new
# parser works correctly. Then you need to implement the +#parse+ method which has to contain
# the parsing code.
#
# Have a look at the Base::parse, Base::new and Base#parse methods for additional information!
class Base
# The hash with the parsing options.
attr_reader :options
# The array with the parser warnings.
attr_reader :warnings
# The original source string.
attr_reader :source
# The root element of element tree that is created from the source string.
attr_reader :root
# Initialize the parser object with the +source+ string and the parsing +options+.
#
# The @root element, the @warnings array and @text_type (specifies the default type for newly
# created text nodes) are automatically initialized.
def initialize(source, options)
@source = source
@options = Kramdown::Options.merge(options)
@root = Element.new(:root, nil, nil, :encoding => (source.encoding rescue nil), :location => 1,
:options => {}, :abbrev_defs => {}, :abbrev_attr => {})
@warnings = []
@text_type = :text
end
private_class_method(:new, :allocate)
# Parse the +source+ string into an element tree, possibly using the parsing +options+, and
# return the root element of the element tree and an array with warning messages.
#
# Initializes a new instance of the calling class and then calls the +#parse+ method that must
# be implemented by each subclass.
def self.parse(source, options = {})
parser = new(source, options)
parser.parse
[parser.root, parser.warnings]
end
# Parse the source string into an element tree.
#
# The parsing code should parse the source provided in @source and build an element tree the
# root of which should be @root.
#
# This is the only method that has to be implemented by sub-classes!
def parse
raise NotImplementedError
end
# Add the given warning +text+ to the warning array.
def warning(text)
@warnings << text
#TODO: add position information
end
# Modify the string +source+ to be usable by the parser (unifies line ending characters to
# +\n+ and makes sure +source+ ends with a new line character).
def adapt_source(source)
if source.respond_to?(:encode)
if !source.valid_encoding?
raise "The source text contains invalid characters for the used encoding #{source.encoding}"
end
source = source.encode('UTF-8')
end
source.gsub(/\r\n?/, "\n").chomp + "\n"
end
# This helper method adds the given +text+ either to the last element in the +tree+ if it is a
# +type+ element or creates a new text element with the given +type+.
def add_text(text, tree = @tree, type = @text_type)
last = tree.children.last
if last && last.type == type
last.value << text
elsif !text.empty?
tree.children << Element.new(type, text, nil, :location => (last && last.options[:location] || tree.options[:location]))
end
end
# Extract the part of the StringScanner +strscan+ backed string specified by the +range+. This
# method works correctly under Ruby 1.8 and Ruby 1.9.
def extract_string(range, strscan)
result = nil
if strscan.string.respond_to?(:encoding)
begin
enc = strscan.string.encoding
strscan.string.force_encoding('ASCII-8BIT')
result = strscan.string[range].force_encoding(enc)
ensure
strscan.string.force_encoding(enc)
end
else
result = strscan.string[range]
end
result
end
end
end
end