From be858bc28af94a498b613fa6541ed3e730fd9473 Mon Sep 17 00:00:00 2001 From: Rob Austein Date: Wed, 2 Jun 2021 20:09:35 +0000 Subject: First cut at project-independent version of these scripts --- trac2md.py | 193 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 193 insertions(+) create mode 100755 trac2md.py (limited to 'trac2md.py') diff --git a/trac2md.py b/trac2md.py new file mode 100755 index 0000000..5a00754 --- /dev/null +++ b/trac2md.py @@ -0,0 +1,193 @@ +# This started out as https://www.snip2code.com/Snippet/1704331/Convert-trac-markup-to-Markdown/ +# which in turn said "This code mostly taken from patches to pagure_importer by mreynolds". +# Has mutated considerably since then. + +import re +from urllib.parse import quote + +class Trac2Markdown: + + content_linebreak_pattern = re.compile(r"\[\[br\]\]|\\\\", re.I) + camelcase_pattern = re.compile(r"(?:^|(?<=\s))([A-Z][a-z]+[A-Z][a-z][A-Za-z]*)(?:$|(?=\s))") + + wikiheading_patterns = tuple( + (level, re.compile("^{} (.*)[ \t]*=*$".format("=" * level))) + for level in range(1, 7) + ) + + def convert_headers(self, line): + for level_count, header in self.wikiheading_patterns: + try: + level = header.search(line).group(1) + if level: + line = "{} {}".format('#' * level_count, level.rstrip("= \r\t")) + break # No need to check other heading levels + except: + pass # Try the next heading level + return line + + def convert_to_creole(self, m): + # Convert Trac's native link form to Creole's, so that rest of the code only has to deal with one format. + # Creole's is easier to parse and harder to confuse with partially converted Markdown. + + text = m.group(1).strip() + if " " in text: + return "[[{0[0]}|{0[1]}]]".format(text.split(" ", 1)) + elif ":" in text or self.camelcase_pattern.match(text): + return "[[{}]]".format(text) + else: + return m.group(0) + + # Probably most of the non-wiki scheme tests should become a table in an + # extended JSON config file which maps + # + # { "source:fee/fie/foe/fum": "https://git.cryptech.is/blarg/blee/blue" } + + def convert_wikilinks(self, m): + scheme, link, text = [p.strip() if p else p for p in m.groups()] + if text is None: + text = link + if any(link.startswith(q) and link.endswith(q) for q in ('"', "'")): + link = link[1:-1] + if any(text.startswith(q) and text.endswith(q) for q in ('"', "'")): + text = text[1:-1] + if text == link and link.startswith("http") and "://" in link: + return "<{}>".format(link) + elif scheme == "attachment:": + return "[{}]({{attach}}{}/{})".format(text, self.slug, link) + elif scheme in ("source:", "browser:"): + return "[{}]({}/{})".format(text, self.source_url.rstrip("/"), link.lstrip("/")) + elif scheme == "wiki:" or (scheme is None and self.camelcase_pattern.match(link)): + return "[{}]({{filename}}{}.md)".format(text, link) + else: + return "[{}]({})".format(text, link) + + def convert_image(self, m): + text = m.group(1).split(",")[0].strip() + if "://" in text: + return "".format(text) + else: + return "![{}]({{attach}}{}/{})".format(text, self.slug, quote(text, "")) + + def __init__(self, source_url): + self.source_url = source_url + self.pattern_actions = ( + + # Convert TracLinks to WikiCreole syntax to simplify remaining processing + (re.compile(r"(?\1"), + + # Convert line breaks -- Markdown spec says linebreak is , who am I to argue? + (re.compile(r"\\\\$"), r" "), + + # Convert bold and italic text (do this last) + (re.compile(r"'''"), r"**"), + (re.compile(r"''"), r"*"), + ) + + def __call__(self, content, slug): + self.slug = slug + + old_content = self.content_linebreak_pattern.sub("\\\\\\\\\n", content).splitlines() + new_content = [] + + code_block = False + in_list = False + in_table = False + nested_level = 0 + prev_indent = 0 + + while old_content: + line = old_content.pop(0).rstrip() + tail = ["\n"] + while "{{{" in line or "}}}" in line: + if "{{{" in line: + code_block = True + line = line.replace("{{{", "```") + if "}}}" in line: + code_block = False + line = line.replace("}}}", "```") + if not code_block: + + # Convert tables. References: + # https://github.github.com/gfm/#tables-extension- + # https://permatrac.noc.ietf.org/wiki/WikiFormatting#Tables + # Table start: line containing "||"; table end: blank line? + # + # Figuring out whether there's a real header line is fun, trac doesn't require one, markdown does. Guess we can + # add a dummy header if no better idea. Markdown requires delimiter line, which we add immediately after the + # header, both appear to be mandatory. Trac can have label cells anywhere, not just in header, might need to + # add "*" to those or just ignore the issue. Justification we can sort of figure out from the header, + # if the rows do anything different, ouch, because markdown specifies in delimiter line. + # + # Might do something clever with the "=" markers and alignment, start with just getting the basic table + # structure to something markdown will believe. + + if line.strip().startswith("||"): + line = line.replace("=|", "|").replace("|=", "|") + line = line.replace("||", "|") + if not in_table: + tail.append("|---" * (line.count("|") - 1) + "|\n") + in_table = True + elif in_table and not line.strip().startswith("||"): + new_content.append("\n") + in_table = False + + # + # Convert bullet lists. The start and end of a list needs an empty line. + # + nested_line = line.lstrip(' ') + if nested_line.startswith('- ') or nested_line.startswith('* '): + if not in_list: + new_content.append("\n") + nested_level = 0 + prev_indent = 0 + in_list = True + indent = len(line) - len(nested_line) + text_indent = len(line) - len(nested_line[1:].lstrip()) + if indent > prev_indent: + nested_level += 1 + elif indent < prev_indent: + nested_level -= 1 + prev_indent = indent + line = ' ' * nested_level + nested_line + elif in_list and len(line) < len(nested_line) + text_indent: + new_content.append("\n") + in_list = False + nested_level = 0 + prev_indent = 0 + text_indent = 0 + + # Convert headers + line = self.convert_headers(line) + + # Rest is regexp-driven conversions + for pattern, action in self.pattern_actions: + line = pattern.sub(action, line) + + new_content.append(line) + new_content.extend(tail) + + del self.slug + + return "".join(new_content) -- cgit v1.2.3