|
@@ -0,0 +1,193 @@
|
|
|
|
+# This started out as https://www.snip2code.com/Snippet/1704331/Convert-trac-markup-to-Markdown/
|
|
|
|
+# which in turn said "This code mostly taken from patches to pagure_importer by mreynolds".
|
|
|
|
+# Has mutated considerably since then.
|
|
|
|
+
|
|
|
|
+import re
|
|
|
|
+from urllib.parse import quote
|
|
|
|
+
|
|
|
|
+class Trac2Markdown:
|
|
|
|
+
|
|
|
|
+ content_linebreak_pattern = re.compile(r"\[\[br\]\]|\\\\", re.I)
|
|
|
|
+ camelcase_pattern = re.compile(r"(?:^|(?<=\s))([A-Z][a-z]+[A-Z][a-z][A-Za-z]*)(?:$|(?=\s))")
|
|
|
|
+
|
|
|
|
+ wikiheading_patterns = tuple(
|
|
|
|
+ (level, re.compile("^{} (.*)[ \t]*=*$".format("=" * level)))
|
|
|
|
+ for level in range(1, 7)
|
|
|
|
+ )
|
|
|
|
+
|
|
|
|
+ def convert_headers(self, line):
|
|
|
|
+ for level_count, header in self.wikiheading_patterns:
|
|
|
|
+ try:
|
|
|
|
+ level = header.search(line).group(1)
|
|
|
|
+ if level:
|
|
|
|
+ line = "{} {}".format('#' * level_count, level.rstrip("= \r\t"))
|
|
|
|
+ break # No need to check other heading levels
|
|
|
|
+ except:
|
|
|
|
+ pass # Try the next heading level
|
|
|
|
+ return line
|
|
|
|
+
|
|
|
|
+ def convert_to_creole(self, m):
|
|
|
|
+ # Convert Trac's native link form to Creole's, so that rest of the code only has to deal with one format.
|
|
|
|
+ # Creole's is easier to parse and harder to confuse with partially converted Markdown.
|
|
|
|
+
|
|
|
|
+ text = m.group(1).strip()
|
|
|
|
+ if " " in text:
|
|
|
|
+ return "[[{0[0]}|{0[1]}]]".format(text.split(" ", 1))
|
|
|
|
+ elif ":" in text or self.camelcase_pattern.match(text):
|
|
|
|
+ return "[[{}]]".format(text)
|
|
|
|
+ else:
|
|
|
|
+ return m.group(0)
|
|
|
|
+
|
|
|
|
+ # Probably most of the non-wiki scheme tests should become a table in an
|
|
|
|
+ # extended JSON config file which maps
|
|
|
|
+ #
|
|
|
|
+ # { "source:fee/fie/foe/fum": "https://git.cryptech.is/blarg/blee/blue" }
|
|
|
|
+
|
|
|
|
+ def convert_wikilinks(self, m):
|
|
|
|
+ scheme, link, text = [p.strip() if p else p for p in m.groups()]
|
|
|
|
+ if text is None:
|
|
|
|
+ text = link
|
|
|
|
+ if any(link.startswith(q) and link.endswith(q) for q in ('"', "'")):
|
|
|
|
+ link = link[1:-1]
|
|
|
|
+ if any(text.startswith(q) and text.endswith(q) for q in ('"', "'")):
|
|
|
|
+ text = text[1:-1]
|
|
|
|
+ if text == link and link.startswith("http") and "://" in link:
|
|
|
|
+ return "<{}>".format(link)
|
|
|
|
+ elif scheme == "attachment:":
|
|
|
|
+ return "[{}]({{attach}}{}/{})".format(text, self.slug, link)
|
|
|
|
+ elif scheme in ("source:", "browser:"):
|
|
|
|
+ return "[{}]({}/{})".format(text, self.source_url.rstrip("/"), link.lstrip("/"))
|
|
|
|
+ elif scheme == "wiki:" or (scheme is None and self.camelcase_pattern.match(link)):
|
|
|
|
+ return "[{}]({{filename}}{}.md)".format(text, link)
|
|
|
|
+ else:
|
|
|
|
+ return "[{}]({})".format(text, link)
|
|
|
|
+
|
|
|
|
+ def convert_image(self, m):
|
|
|
|
+ text = m.group(1).split(",")[0].strip()
|
|
|
|
+ if "://" in text:
|
|
|
|
+ return "<img src=\"{}\">".format(text)
|
|
|
|
+ else:
|
|
|
|
+ return "![{}]({{attach}}{}/{})".format(text, self.slug, quote(text, ""))
|
|
|
|
+
|
|
|
|
+ def __init__(self, source_url):
|
|
|
|
+ self.source_url = source_url
|
|
|
|
+ self.pattern_actions = (
|
|
|
|
+
|
|
|
|
+ # Convert TracLinks to WikiCreole syntax to simplify remaining processing
|
|
|
|
+ (re.compile(r"(?<!\[)\[([^][]+)\]"), self.convert_to_creole),
|
|
|
|
+
|
|
|
|
+ # Convert CamelCase links to explicit links
|
|
|
|
+ (self.camelcase_pattern, r"[[\1]]"),
|
|
|
|
+
|
|
|
|
+ # Convert !x quoting
|
|
|
|
+ (re.compile(r"!((?:\w|[#])+)"), r"\1"),
|
|
|
|
+
|
|
|
|
+ # Convert (limited subset of) spans
|
|
|
|
+ (re.compile(r"\[\[span\((?:[^][]*,)*([^(),]+)\)\]\]"), r"\1"),
|
|
|
|
+
|
|
|
|
+ # Convert images
|
|
|
|
+ (re.compile(r"\[\[Image\((.*)\)\]\]"), self.convert_image),
|
|
|
|
+
|
|
|
|
+ # Delete Trac macros that have no useful counterpart
|
|
|
|
+ (re.compile(r"\[\[PageOutline\]\]", re.I), r""),
|
|
|
|
+
|
|
|
|
+ # Convert wiki links
|
|
|
|
+ (re.compile(r"\[\[(wiki:|attachment:|source:|browser:)?([^]|[]+)(?:[|]([^][]+))?\]\]"), self.convert_wikilinks),
|
|
|
|
+
|
|
|
|
+ # Convert striked through text
|
|
|
|
+ (re.compile(r"~~([^~]+)~~"), r"<s>\1</s>"),
|
|
|
|
+
|
|
|
|
+ # Convert line breaks -- Markdown spec says linebreak is <SPACE><SPACE><RETURN>, who am I to argue?
|
|
|
|
+ (re.compile(r"\\\\$"), r" "),
|
|
|
|
+
|
|
|
|
+ # Convert bold and italic text (do this last)
|
|
|
|
+ (re.compile(r"'''"), r"**"),
|
|
|
|
+ (re.compile(r"''"), r"*"),
|
|
|
|
+ )
|
|
|
|
+
|
|
|
|
+ def __call__(self, content, slug):
|
|
|
|
+ self.slug = slug
|
|
|
|
+
|
|
|
|
+ old_content = self.content_linebreak_pattern.sub("\\\\\\\\\n", content).splitlines()
|
|
|
|
+ new_content = []
|
|
|
|
+
|
|
|
|
+ code_block = False
|
|
|
|
+ in_list = False
|
|
|
|
+ in_table = False
|
|
|
|
+ nested_level = 0
|
|
|
|
+ prev_indent = 0
|
|
|
|
+
|
|
|
|
+ while old_content:
|
|
|
|
+ line = old_content.pop(0).rstrip()
|
|
|
|
+ tail = ["\n"]
|
|
|
|
+ while "{{{" in line or "}}}" in line:
|
|
|
|
+ if "{{{" in line:
|
|
|
|
+ code_block = True
|
|
|
|
+ line = line.replace("{{{", "```")
|
|
|
|
+ if "}}}" in line:
|
|
|
|
+ code_block = False
|
|
|
|
+ line = line.replace("}}}", "```")
|
|
|
|
+ if not code_block:
|
|
|
|
+
|
|
|
|
+ # Convert tables. References:
|
|
|
|
+ # https://github.github.com/gfm/#tables-extension-
|
|
|
|
+ # https://permatrac.noc.ietf.org/wiki/WikiFormatting#Tables
|
|
|
|
+ # Table start: line containing "||"; table end: blank line?
|
|
|
|
+ #
|
|
|
|
+ # Figuring out whether there's a real header line is fun, trac doesn't require one, markdown does. Guess we can
|
|
|
|
+ # add a dummy header if no better idea. Markdown requires delimiter line, which we add immediately after the
|
|
|
|
+ # header, both appear to be mandatory. Trac can have label cells anywhere, not just in header, might need to
|
|
|
|
+ # add "*" to those or just ignore the issue. Justification we can sort of figure out from the header,
|
|
|
|
+ # if the rows do anything different, ouch, because markdown specifies in delimiter line.
|
|
|
|
+ #
|
|
|
|
+ # Might do something clever with the "=" markers and alignment, start with just getting the basic table
|
|
|
|
+ # structure to something markdown will believe.
|
|
|
|
+
|
|
|
|
+ if line.strip().startswith("||"):
|
|
|
|
+ line = line.replace("=|", "|").replace("|=", "|")
|
|
|
|
+ line = line.replace("||", "|")
|
|
|
|
+ if not in_table:
|
|
|
|
+ tail.append("|---" * (line.count("|") - 1) + "|\n")
|
|
|
|
+ in_table = True
|
|
|
|
+ elif in_table and not line.strip().startswith("||"):
|
|
|
|
+ new_content.append("\n")
|
|
|
|
+ in_table = False
|
|
|
|
+
|
|
|
|
+ #
|
|
|
|
+ # Convert bullet lists. The start and end of a list needs an empty line.
|
|
|
|
+ #
|
|
|
|
+ nested_line = line.lstrip(' ')
|
|
|
|
+ if nested_line.startswith('- ') or nested_line.startswith('* '):
|
|
|
|
+ if not in_list:
|
|
|
|
+ new_content.append("\n")
|
|
|
|
+ nested_level = 0
|
|
|
|
+ prev_indent = 0
|
|
|
|
+ in_list = True
|
|
|
|
+ indent = len(line) - len(nested_line)
|
|
|
|
+ text_indent = len(line) - len(nested_line[1:].lstrip())
|
|
|
|
+ if indent > prev_indent:
|
|
|
|
+ nested_level += 1
|
|
|
|
+ elif indent < prev_indent:
|
|
|
|
+ nested_level -= 1
|
|
|
|
+ prev_indent = indent
|
|
|
|
+ line = ' ' * nested_level + nested_line
|
|
|
|
+ elif in_list and len(line) < len(nested_line) + text_indent:
|
|
|
|
+ new_content.append("\n")
|
|
|
|
+ in_list = False
|
|
|
|
+ nested_level = 0
|
|
|
|
+ prev_indent = 0
|
|
|
|
+ text_indent = 0
|
|
|
|
+
|
|
|
|
+ # Convert headers
|
|
|
|
+ line = self.convert_headers(line)
|
|
|
|
+
|
|
|
|
+ # Rest is regexp-driven conversions
|
|
|
|
+ for pattern, action in self.pattern_actions:
|
|
|
|
+ line = pattern.sub(action, line)
|
|
|
|
+
|
|
|
|
+ new_content.append(line)
|
|
|
|
+ new_content.extend(tail)
|
|
|
|
+
|
|
|
|
+ del self.slug
|
|
|
|
+
|
|
|
|
+ return "".join(new_content)
|