diff options
author | Rob Austein <sra@hactrn.net> | 2016-08-04 12:27:06 -0400 |
---|---|---|
committer | Rob Austein <sra@hactrn.net> | 2016-08-04 12:27:06 -0400 |
commit | c897c7cecf4134f20354e3dbba9438cbab706eaf (patch) | |
tree | 7066ad608aa285a2b80589e224d06a96e7421120 /doc/wiki-dump/convert-and-slurp-attachments.sh | |
parent | 949e9c8358b5259656c02e4a1ada7912d943afd2 (diff) |
Wiki->HTML->Markdown on all dumped pages, zip attachments.
Diffstat (limited to 'doc/wiki-dump/convert-and-slurp-attachments.sh')
-rwxr-xr-x | doc/wiki-dump/convert-and-slurp-attachments.sh | 18 |
1 files changed, 18 insertions, 0 deletions
diff --git a/doc/wiki-dump/convert-and-slurp-attachments.sh b/doc/wiki-dump/convert-and-slurp-attachments.sh new file mode 100755 index 00000000..ce7f34da --- /dev/null +++ b/doc/wiki-dump/convert-and-slurp-attachments.sh @@ -0,0 +1,18 @@ +#!/bin/sh - + +ls | fgrep -v . | +while read page +do + base="https://trac.rpki.net" + path="/wiki/$(echo $page | sed s=%2F=/=g)" + + # Fetch the Wiki page, extract the useful portion of the HTML, convert that into Markdown + curl "${base}${path}" | + xsltproc --html extract-wiki-content.xsl - | + html2markdown --no-skip-internal-links --reference-links >"$page.md" + + # Fetch a ZIP file containing any attachments, clean up if result is empty or broken + curl "${base}/zip-attachment${path}/" >"$page.zip" + zipinfo "$page.zip" >/dev/null 2>&1 || rm -f "$page.zip" + +done |