pytorch/caffe2/python/docs/parser.py

# Copyright (c) 2016-present, Facebook, Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
##############################################################################

## @package parser
# Module caffe2.python.docs.parser
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
from __future__ import unicode_literals
import re


class Parser(object):
    # List of tuples (regex_str, lambda(regex_match, formatter))
    # If a lambda returns True it will be called repeatedly with replacement
    # otherwise it will only be called on text that hasn't been parsed yet.
    regexes = [
        # Code blocks of various formats
        ('````(.+?)````',
         lambda m, f: f.addCode(m.group(1))
         ),
        ('```(.+?)```',
         lambda m, f: f.addCode(m.group(1))
         ),
        ('((( {2})+)(\S.*)(\n\s*\n|\n))+',
         lambda m, f: f.addCode(m.group(0))
         ),
        ('([^\.])\n',
         lambda m, f: f.addRaw('{c} '.format(c=m.group(1))) or True
         ),
        ('`(.+?)`',
         lambda m, f: f.addCode(m.group(1), True)
         ),
        # Make links clickable
        ('http[s]?://(?:[a-zA-Z]|[0-9]|[$-_@.&+]'
         '|[!*\(\),]|(?:%[0-9a-fA-F][0-9a-fA-F]))+',
         lambda m, f: f.addLink(m.group(0), m.group(0))
         ),
        ('\*\*(.+?)\*\*',
         lambda m, f: f.addEmphasis(m.group(1), 2)
         ),
        ('\*(.+?)\*',
         lambda m, f: f.addEmphasis(m.group(1), 1)
         ),
    ]

    def __init__(self, text, formatter):
        self.text = text
        self.lines = []
        self.formatter = formatter

    def parseText(self):
        UNPARSED = 0
        PARSED = 1
        parsed_block = [(UNPARSED, self.text)]
        for regex, func in self.regexes:
            index = 0
            while index < len(parsed_block):
                label, text = parsed_block[index]

                # Already been parsed
                if (label == PARSED):
                    index += 1
                    continue

                match = re.search(regex, text)
                if match:
                    parsed_block.pop(index)
                    start = match.start(0)
                    end = match.end(0)

                    f = self.formatter.clone()
                    merge = func(match, f)

                    if merge:
                        merged = text[:start] + f.dump() + text[end:]
                        parsed_block.insert(index, (UNPARSED, merged))
                    else:
                        if text[:start]:
                            parsed_block.insert(index,
                                                (UNPARSED, text[:start]))

                        index += 1
                        parsed_block.insert(index, (PARSED, f.dump()))

                        index += 1
                        if text[end:]:
                            parsed_block.insert(index,
                                                (UNPARSED, text[end:]))

                else:
                    index += 1

        self.lines += [i for _, i in parsed_block]
        self.text = ' '.join(self.lines)

    def parse(self):
        self.parseText()
        return self.text