diff options
Diffstat (limited to 'extract.py')
-rwxr-xr-x | extract.py | 128 |
1 files changed, 128 insertions, 0 deletions
diff --git a/extract.py b/extract.py new file mode 100755 index 0000000..368211c --- /dev/null +++ b/extract.py @@ -0,0 +1,128 @@ +#!/usr/bin/env python3 + +import fnmatch +import hashlib +import json +import os +import shutil +import sqlite3 +import sys +import time +import urllib.parse +import argparse + +import trac2md + +wiki_query = ''' + SELECT + name, + author, + version, + time / 1000000 AS time, + text + FROM wiki + ORDER BY + name, version +''' + +attachment_query = ''' + SELECT + id, + filename, + size, + author, + description, + ipnr, + time / 1000000 AS createdtime + FROM + attachment + WHERE + type = 'wiki' + ORDER BY + filename, time +''' + +def attachment_link(row): + h = lambda whatever: hashlib.sha1(whatever.encode()).hexdigest() + h1 = h(row.id) + h2 = h(row.filename) + fn2 = os.path.splitext(row["filename"])[1] + return \ + os.path.join("attachments", "wiki", h1[:3], h1, h2 + fn2), \ + os.path.join("pelican", "content", urllib.parse.quote(row.id, ""), row.filename) + +class Filter: + + def __init__(self, filename = "filter.json"): + with open(filename) as f: + filter = json.load(f) + if not all(action in "-+" for action, pattern in filter): + sys.exit("Bad action \"{}\" in filter".format(action)) + self.filter = tuple((action == "+", pattern) for action, pattern in filter) + + def __call__(self, name): + for action, pattern in self.filter: + if fnmatch.fnmatch(name, pattern): + return action + return True + +class Row(sqlite3.Row): + + def __getattr__(self, name): + return self[name] + + @property + def isotime(self): + return time.strftime("%Y-%m-%d %H:%M", time.gmtime(self.time)) + +def markdown_header(row, first_published): + if row.name in first_published: + modtime = "Modified: {}\n".format(row.isotime) + else: + modtime = "" + first_published[row.name] = row.isotime + return "Title: {}\nAuthor: {}\nDate: {}\n{}\n".format(row.name, row.author, first_published[row.name], modtime) + +def main(): + ap = argparse.ArgumentParser() + ap.add_argument("--source-url") + args = ap.parse_args() + + for dn in ("wiki", "pelican"): + shutil.rmtree(dn) + + for dn in ("wiki", "pelican/content"): + os.makedirs(dn) + + os.link("pelicanconf.py", "pelican/pelicanconf.py") + + wiki_to_markdown = trac2md.Trac2Markdown(args.source_url) + + keep = Filter() + + first_published = {} + + db = sqlite3.connect("trac.db") + db.row_factory = Row + + for row in db.execute(wiki_query): + if keep(row.name): + slug = urllib.parse.quote(row.name, "") + #print(slug, row.version) + with open("wiki/{}.trac".format(slug), "w") as f: + f.write(row.text) + md = markdown_header(row, first_published) + wiki_to_markdown(row.text, slug) + with open("pelican/content/{}.md".format(slug), "w") as f: + f.write(md) + + for row in db.execute(attachment_query): + src, dst = attachment_link(row) + #print("{} => {}".format(dst, src)) + if not os.path.isdir(os.path.dirname(dst)): + os.makedirs(os.path.dirname(dst)) + os.link(src, dst) + + db.close() + +if __name__ == "__main__": + main() |