# This started out as https://www.snip2code.com/Snippet/1704331/Convert-trac-markup-to-Markdown/ # which in turn said "This code mostly taken from patches to pagure_importer by mreynolds". # Has mutated considerably since then. import re from urllib.parse import quote class Trac2Markdown: content_linebreak_pattern = re.compile(r"\[\[br\]\]|\\\\", re.I) camelcase_pattern = re.compile(r"(?:^|(?<=\s))([A-Z][a-z]+[A-Z][a-z][A-Za-z]*)(?:$|(?=\s))") wikiheading_patterns = tuple( (level, re.compile("^{} (.*)[ \t]*=*$".format("=" * level))) for level in range(1, 7) ) def convert_headers(self, line): for level_count, header in self.wikiheading_patterns: try: level = header.search(line).group(1) if level: line = "{} {}".format('#' * level_count, level.rstrip("= \r\t")) break # No need to check other heading levels except: pass # Try the next heading level return line def convert_to_creole(self, m): # Convert Trac's native link form to Creole's, so that rest of the code only has to deal with one format. # Creole's is easier to parse and harder to confuse with partially converted Markdown. text = m.group(1).strip() if " " in text: return "[[{0[0]}|{0[1]}]]".format(text.split(" ", 1)) elif ":" in text or self.camelcase_pattern.match(text): return "[[{}]]".format(text) else: return m.group(0) # Probably most of the non-wiki scheme tests should become a table in an # extended JSON config file which maps # # { "source:fee/fie/foe/fum": "https://git.cryptech.is/blarg/blee/blue" } def convert_wikilinks(self, m): scheme, link, text = [p.strip() if p else p for p in m.groups()] if text is None: text = link if any(link.startswith(q) and link.endswith(q) for q in ('"', "'")): link = link[1:-1] if any(text.startswith(q) and text.endswith(q) for q in ('"', "'")): text = text[1:-1] if text == link and link.startswith("http") and "://" in link: return "<{}>".format(link) elif scheme == "attachment:": return "[{}]({{attach}}/{}/{})".format(text, self.slug, link) elif scheme in ("source:", "browser:"): return "[{}]({}/{})".format(text, self.source_url.rstrip("/"), link.lstrip("/")) elif scheme == "wiki:" or (scheme is None and self.camelcase_pattern.match(link)): return "[{}]({{filename}}{}.md)".format(text, link) else: return "[{}]({})".format(text, link) def convert_image(self, m): text = m.group(1).split(",")[0].strip() if "://" in text: return "".format(text) else: return "![{}]({{attach}}/{}/{})".format(text, self.slug, quote(text, "")) def __init__(self, source_url): self.source_url = source_url self.pattern_actions = ( # Convert TracLinks to WikiCreole syntax to simplify remaining processing (re.compile(r"(?\1"), # Convert line breaks -- Markdown spec says linebreak is , who am I to argue? (re.compile(r"\\\\$"), r" "), # Convert bold and italic text (do this last) (re.compile(r"'''"), r"**"), (re.compile(r"''"), r"*"), ) def __call__(self, content, slug): self.slug = slug old_content = self.content_linebreak_pattern.sub("\\\\\\\\\n", content).splitlines() new_content = [] code_block = False in_list = False in_table = False nested_level = 0 prev_indent = 0 while old_content: line = old_content.pop(0).rstrip() tail = ["\n"] while "{{{" in line or "}}}" in line: if "{{{" in line: code_block = True line = line.replace("{{{", "```") if "}}}" in line: code_block = False line = line.replace("}}}", "```") if not code_block: # Convert tables. References: # https://github.github.com/gfm/#tables-extension- # https://permatrac.noc.ietf.org/wiki/WikiFormatting#Tables # Table start: line containing "||"; table end: blank line? # # Figuring out whether there's a real header line is fun, trac doesn't require one, markdown does. Guess we can # add a dummy header if no better idea. Markdown requires delimiter line, which we add immediately after the # header, both appear to be mandatory. Trac can have label cells anywhere, not just in header, might need to # add "*" to those or just ignore the issue. Justification we can sort of figure out from the header, # if the rows do anything different, ouch, because markdown specifies in delimiter line. # # Might do something clever with the "=" markers and alignment, start with just getting the basic table # structure to something markdown will believe. if line.strip().startswith("||"): line = line.replace("=|", "|").replace("|=", "|") line = line.replace("||", "|") if not in_table: tail.append("|---" * (line.count("|") - 1) + "|\n") in_table = True elif in_table and not line.strip().startswith("||"): new_content.append("\n") in_table = False # # Convert bullet lists. The start and end of a list needs an empty line. # nested_line = line.lstrip(' ') if nested_line.startswith('- ') or nested_line.startswith('* '): indent = len(line) - len(nested_line) if not in_list: new_content.append("\n") nested_level = 0 prev_indent = indent in_list = True text_indent = len(line) - len(nested_line[1:].lstrip()) if indent > prev_indent: nested_level += 1 elif indent < prev_indent: nested_level -= 1 prev_indent = indent line = ' ' * nested_level + nested_line elif in_list and len(line) < len(nested_line) + text_indent: new_content.append("\n") in_list = False nested_level = 0 prev_indent = 0 text_indent = 0 # Convert headers line = self.convert_headers(line) # Rest is regexp-driven conversions for pattern, action in self.pattern_actions: line = pattern.sub(action, line) new_content.append(line) new_content.extend(tail) del self.slug return "".join(new_content)