trac2md.py 8.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193
  1. # This started out as https://www.snip2code.com/Snippet/1704331/Convert-trac-markup-to-Markdown/
  2. # which in turn said "This code mostly taken from patches to pagure_importer by mreynolds".
  3. # Has mutated considerably since then.
  4. import re
  5. from urllib.parse import quote
  6. class Trac2Markdown:
  7. content_linebreak_pattern = re.compile(r"\[\[br\]\]|\\\\", re.I)
  8. camelcase_pattern = re.compile(r"(?:^|(?<=\s))([A-Z][a-z]+[A-Z][a-z][A-Za-z]*)(?:$|(?=\s))")
  9. wikiheading_patterns = tuple(
  10. (level, re.compile("^{} (.*)[ \t]*=*$".format("=" * level)))
  11. for level in range(1, 7)
  12. )
  13. def convert_headers(self, line):
  14. for level_count, header in self.wikiheading_patterns:
  15. try:
  16. level = header.search(line).group(1)
  17. if level:
  18. line = "{} {}".format('#' * level_count, level.rstrip("= \r\t"))
  19. break # No need to check other heading levels
  20. except:
  21. pass # Try the next heading level
  22. return line
  23. def convert_to_creole(self, m):
  24. # Convert Trac's native link form to Creole's, so that rest of the code only has to deal with one format.
  25. # Creole's is easier to parse and harder to confuse with partially converted Markdown.
  26. text = m.group(1).strip()
  27. if " " in text:
  28. return "[[{0[0]}|{0[1]}]]".format(text.split(" ", 1))
  29. elif ":" in text or self.camelcase_pattern.match(text):
  30. return "[[{}]]".format(text)
  31. else:
  32. return m.group(0)
  33. # Probably most of the non-wiki scheme tests should become a table in an
  34. # extended JSON config file which maps
  35. #
  36. # { "source:fee/fie/foe/fum": "https://git.cryptech.is/blarg/blee/blue" }
  37. def convert_wikilinks(self, m):
  38. scheme, link, text = [p.strip() if p else p for p in m.groups()]
  39. if text is None:
  40. text = link
  41. if any(link.startswith(q) and link.endswith(q) for q in ('"', "'")):
  42. link = link[1:-1]
  43. if any(text.startswith(q) and text.endswith(q) for q in ('"', "'")):
  44. text = text[1:-1]
  45. if text == link and link.startswith("http") and "://" in link:
  46. return "<{}>".format(link)
  47. elif scheme == "attachment:":
  48. return "[{}]({{attach}}/{}/{})".format(text, self.slug, link)
  49. elif scheme in ("source:", "browser:"):
  50. return "[{}]({}/{})".format(text, self.source_url.rstrip("/"), link.lstrip("/"))
  51. elif scheme == "wiki:" or (scheme is None and self.camelcase_pattern.match(link)):
  52. return "[{}]({{filename}}{}.md)".format(text, link)
  53. else:
  54. return "[{}]({})".format(text, link)
  55. def convert_image(self, m):
  56. text = m.group(1).split(",")[0].strip()
  57. if "://" in text:
  58. return "<img src=\"{}\">".format(text)
  59. else:
  60. return "![{}]({{attach}}/{}/{})".format(text, self.slug, quote(text, ""))
  61. def __init__(self, source_url):
  62. self.source_url = source_url
  63. self.pattern_actions = (
  64. # Convert TracLinks to WikiCreole syntax to simplify remaining processing
  65. (re.compile(r"(?<!\[)\[([^][]+)\]"), self.convert_to_creole),
  66. # Convert CamelCase links to explicit links
  67. (self.camelcase_pattern, r"[[\1]]"),
  68. # Convert !x quoting
  69. (re.compile(r"!((?:\w|[#])+)"), r"\1"),
  70. # Convert (limited subset of) spans
  71. (re.compile(r"\[\[span\((?:[^][]*,)*([^(),]+)\)\]\]"), r"\1"),
  72. # Convert images
  73. (re.compile(r"\[\[Image\((.*)\)\]\]"), self.convert_image),
  74. # Delete Trac macros that have no useful counterpart
  75. (re.compile(r"\[\[PageOutline\]\]", re.I), r""),
  76. # Convert wiki links
  77. (re.compile(r"\[\[(wiki:|attachment:|source:|browser:)?([^]|[]+)(?:[|]([^][]+))?\]\]"), self.convert_wikilinks),
  78. # Convert striked through text
  79. (re.compile(r"~~([^~]+)~~"), r"<s>\1</s>"),
  80. # Convert line breaks -- Markdown spec says linebreak is <SPACE><SPACE><RETURN>, who am I to argue?
  81. (re.compile(r"\\\\$"), r" "),
  82. # Convert bold and italic text (do this last)
  83. (re.compile(r"'''"), r"**"),
  84. (re.compile(r"''"), r"*"),
  85. )
  86. def __call__(self, content, slug):
  87. self.slug = slug
  88. old_content = self.content_linebreak_pattern.sub("<br/>", content).splitlines()
  89. new_content = []
  90. code_block = False
  91. in_list = False
  92. in_table = False
  93. nested_level = 0
  94. prev_indent = 0
  95. while old_content:
  96. line = old_content.pop(0).rstrip()
  97. tail = ["\n"]
  98. while "{{{" in line or "}}}" in line:
  99. if "{{{" in line:
  100. code_block = True
  101. line = line.replace("{{{", "```")
  102. if "}}}" in line:
  103. code_block = False
  104. line = line.replace("}}}", "```")
  105. if not code_block:
  106. # Convert tables. References:
  107. # https://github.github.com/gfm/#tables-extension-
  108. # https://permatrac.noc.ietf.org/wiki/WikiFormatting#Tables
  109. # Table start: line containing "||"; table end: blank line?
  110. #
  111. # Figuring out whether there's a real header line is fun, trac doesn't require one, markdown does. Guess we can
  112. # add a dummy header if no better idea. Markdown requires delimiter line, which we add immediately after the
  113. # header, both appear to be mandatory. Trac can have label cells anywhere, not just in header, might need to
  114. # add "*" to those or just ignore the issue. Justification we can sort of figure out from the header,
  115. # if the rows do anything different, ouch, because markdown specifies in delimiter line.
  116. #
  117. # Might do something clever with the "=" markers and alignment, start with just getting the basic table
  118. # structure to something markdown will believe.
  119. if line.strip().startswith("||"):
  120. line = line.replace("=|", "|").replace("|=", "|")
  121. line = line.replace("||", "|")
  122. if not in_table:
  123. tail.append("|---" * (line.count("|") - 1) + "|\n")
  124. in_table = True
  125. elif in_table and not line.strip().startswith("||"):
  126. new_content.append("\n")
  127. in_table = False
  128. #
  129. # Convert bullet lists. The start and end of a list needs an empty line.
  130. #
  131. nested_line = line.lstrip(' ')
  132. if nested_line.startswith('- ') or nested_line.startswith('* '):
  133. indent = len(line) - len(nested_line)
  134. if not in_list:
  135. new_content.append("\n")
  136. nested_level = 0
  137. prev_indent = indent
  138. in_list = True
  139. text_indent = len(line) - len(nested_line[1:].lstrip())
  140. if indent > prev_indent:
  141. nested_level += 1
  142. elif indent < prev_indent:
  143. nested_level -= 1
  144. prev_indent = indent
  145. line = ' ' * nested_level + nested_line
  146. elif in_list and len(line) < len(nested_line) + text_indent:
  147. new_content.append("\n")
  148. in_list = False
  149. nested_level = 0
  150. prev_indent = 0
  151. text_indent = 0
  152. # Convert headers
  153. line = self.convert_headers(line)
  154. # Rest is regexp-driven conversions
  155. for pattern, action in self.pattern_actions:
  156. line = pattern.sub(action, line)
  157. new_content.append(line)
  158. new_content.extend(tail)
  159. del self.slug
  160. return "".join(new_content)