summaryrefslogtreecommitdiff
path: root/extract.py
diff options
context:
space:
mode:
Diffstat (limited to 'extract.py')
-rwxr-xr-xextract.py128
1 files changed, 128 insertions, 0 deletions
diff --git a/extract.py b/extract.py
new file mode 100755
index 0000000..368211c
--- /dev/null
+++ b/extract.py
@@ -0,0 +1,128 @@
+#!/usr/bin/env python3
+
+import fnmatch
+import hashlib
+import json
+import os
+import shutil
+import sqlite3
+import sys
+import time
+import urllib.parse
+import argparse
+
+import trac2md
+
+wiki_query = '''
+ SELECT
+ name,
+ author,
+ version,
+ time / 1000000 AS time,
+ text
+ FROM wiki
+ ORDER BY
+ name, version
+'''
+
+attachment_query = '''
+ SELECT
+ id,
+ filename,
+ size,
+ author,
+ description,
+ ipnr,
+ time / 1000000 AS createdtime
+ FROM
+ attachment
+ WHERE
+ type = 'wiki'
+ ORDER BY
+ filename, time
+'''
+
+def attachment_link(row):
+ h = lambda whatever: hashlib.sha1(whatever.encode()).hexdigest()
+ h1 = h(row.id)
+ h2 = h(row.filename)
+ fn2 = os.path.splitext(row["filename"])[1]
+ return \
+ os.path.join("attachments", "wiki", h1[:3], h1, h2 + fn2), \
+ os.path.join("pelican", "content", urllib.parse.quote(row.id, ""), row.filename)
+
+class Filter:
+
+ def __init__(self, filename = "filter.json"):
+ with open(filename) as f:
+ filter = json.load(f)
+ if not all(action in "-+" for action, pattern in filter):
+ sys.exit("Bad action \"{}\" in filter".format(action))
+ self.filter = tuple((action == "+", pattern) for action, pattern in filter)
+
+ def __call__(self, name):
+ for action, pattern in self.filter:
+ if fnmatch.fnmatch(name, pattern):
+ return action
+ return True
+
+class Row(sqlite3.Row):
+
+ def __getattr__(self, name):
+ return self[name]
+
+ @property
+ def isotime(self):
+ return time.strftime("%Y-%m-%d %H:%M", time.gmtime(self.time))
+
+def markdown_header(row, first_published):
+ if row.name in first_published:
+ modtime = "Modified: {}\n".format(row.isotime)
+ else:
+ modtime = ""
+ first_published[row.name] = row.isotime
+ return "Title: {}\nAuthor: {}\nDate: {}\n{}\n".format(row.name, row.author, first_published[row.name], modtime)
+
+def main():
+ ap = argparse.ArgumentParser()
+ ap.add_argument("--source-url")
+ args = ap.parse_args()
+
+ for dn in ("wiki", "pelican"):
+ shutil.rmtree(dn)
+
+ for dn in ("wiki", "pelican/content"):
+ os.makedirs(dn)
+
+ os.link("pelicanconf.py", "pelican/pelicanconf.py")
+
+ wiki_to_markdown = trac2md.Trac2Markdown(args.source_url)
+
+ keep = Filter()
+
+ first_published = {}
+
+ db = sqlite3.connect("trac.db")
+ db.row_factory = Row
+
+ for row in db.execute(wiki_query):
+ if keep(row.name):
+ slug = urllib.parse.quote(row.name, "")
+ #print(slug, row.version)
+ with open("wiki/{}.trac".format(slug), "w") as f:
+ f.write(row.text)
+ md = markdown_header(row, first_published) + wiki_to_markdown(row.text, slug)
+ with open("pelican/content/{}.md".format(slug), "w") as f:
+ f.write(md)
+
+ for row in db.execute(attachment_query):
+ src, dst = attachment_link(row)
+ #print("{} => {}".format(dst, src))
+ if not os.path.isdir(os.path.dirname(dst)):
+ os.makedirs(os.path.dirname(dst))
+ os.link(src, dst)
+
+ db.close()
+
+if __name__ == "__main__":
+ main()