summaryrefslogtreecommitdiff
path: root/extract.py
blob: ff1862a5af9c8d9fe53b26994edb497e12358020 (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
#!/usr/bin/env python3

import fnmatch
import hashlib
import json
import os
import shutil
import sqlite3
import sys
import time
import urllib.parse
import argparse

import trac2md

wiki_query = '''
  SELECT
    name, 
    author,
    version, 
    time / 1000000 AS time, 
    text 
  FROM wiki
  ORDER BY
    name, version
'''

attachment_query = '''
  SELECT
    id,
    filename,
    size,
    author,
    description,
    ipnr,
    time / 1000000 AS createdtime
  FROM
    attachment
  WHERE
    type = 'wiki'
  ORDER BY
    filename, time
'''

def attachment_link(row):
    h   = lambda whatever: hashlib.sha1(whatever.encode()).hexdigest()
    h1  = h(row.id)
    h2  = h(row.filename)
    fn2 = os.path.splitext(row["filename"])[1]
    return \
        os.path.join("attachments", "wiki", h1[:3], h1, h2 + fn2), \
        os.path.join("pelican", "content", row.id, row.filename)

class Filter:

    def __init__(self, filename = "tools/filter.json"):
        with open(filename) as f:
            filter = json.load(f)
        if not all(action in "-+" for action, pattern in filter):
            sys.exit("Bad action \"{}\" in filter".format(action))
        self.filter = tuple((action == "+", pattern) for action, pattern in filter)

    def __call__(self, name):
        for action, pattern in self.filter:
            if fnmatch.fnmatch(name, pattern):
                return action
        return True

class Row(sqlite3.Row):

    def __getattr__(self, name):
        return self[name]

    @property
    def isotime(self):
        return time.strftime("%Y-%m-%d %H:%M", time.gmtime(self.time))

def markdown_header(row, first_published):
    if row.name in first_published:
        modtime = "Modified: {}\n".format(row.isotime)
    else:
        modtime = ""
        first_published[row.name] = row.isotime
    return "Title: {}\nAuthor: {}\nDate: {}\n{}\n".format(row.name, row.author, first_published[row.name], modtime)

def main():
    ap = argparse.ArgumentParser()
    ap.add_argument("--source-url")
    args = ap.parse_args()

    for dn in ("wiki", "pelican"):
        if os.path.exists(dn):
            shutil.rmtree(dn)

    for dn in ("wiki", "pelican/content"):
        os.makedirs(dn)

    os.link("pelicanconf.py", "pelican/pelicanconf.py")

    wiki_to_markdown = trac2md.Trac2Markdown(args.source_url)

    keep = Filter()

    first_published = {}

    db = sqlite3.connect("trac.db")
    db.row_factory = Row

    for row in db.execute(wiki_query):
        if keep(row.name):
            with open("wiki/{}.trac".format(urllib.parse.quote(row.name, "")), "w") as f:
                f.write(row.text)
            md = markdown_header(row, first_published) + wiki_to_markdown(row.text, row.name)
            fn = "pelican/content/{}.md".format(row.name)
            dn = os.path.dirname(fn)
            if not os.path.exists(dn):
                os.makedirs(dn)
            with open(fn, "w") as f:
                f.write(md)

    for row in db.execute(attachment_query):
        src, dst = attachment_link(row)
        #print("{} => {}".format(dst, src))
        if not os.path.isdir(os.path.dirname(dst)):
            os.makedirs(os.path.dirname(dst))
        os.link(src, dst)

    db.close()

if __name__ == "__main__":
    main()