From 85d531d8f26cdb6db4be902f0a6580bd94309862 Mon Sep 17 00:00:00 2001 From: Rob Austein Date: Wed, 13 Jun 2012 00:27:28 +0000 Subject: htmldoc doesn't understand SVG, so generate PNG, scaled to a size that fits on a PDF page as understood by htmldoc. Resulting image quality leaves something to be desired, but not obvious how to do better. svn path=/trunk/; revision=4535 --- buildtools/pull-doc-from-wiki.py | 38 +++++++++++++++++++++++++++++++++++++- 1 file changed, 37 insertions(+), 1 deletion(-) (limited to 'buildtools/pull-doc-from-wiki.py') diff --git a/buildtools/pull-doc-from-wiki.py b/buildtools/pull-doc-from-wiki.py index 946b01ae..e75a4e91 100644 --- a/buildtools/pull-doc-from-wiki.py +++ b/buildtools/pull-doc-from-wiki.py @@ -37,6 +37,7 @@ import lxml.etree import urllib import urlparse import subprocess +import tempfile # Main program, up front so it doesn't get lost under all the XSL @@ -52,7 +53,7 @@ def main(): def usage(msg = 0): sys.stderr.write("Usage: %s %s\n" % ( - sys.argv[0], " ".join("[%s value]" % o[:-1] if o.endswith("=") else "[%s]" % o + sys.argv[0], " ".join("[--%s value]" % o[:-1] if o.endswith("=") else "[--%s]" % o for o in options))) sys.stderr.write(__doc__) sys.exit(msg) @@ -88,12 +89,29 @@ def main(): lxml.etree.ElementTree(xml_title).write(htmldoc.stdin) + png_fns = [] + for url in urls: path = urlparse.urlparse(url).path page = xsl_get_page(lxml.etree.parse(urllib.urlopen(url)).getroot(), basename = repr(base), path = repr(path)) + for img in page.xpath("//img | //object | //embed"): + attr = "data" if img.tag == "object" else "src" + img_url = img.get(attr) + if img_url.endswith(".svg"): + #sys.stderr.write("Converting %s to PNG\n" % img_url) + svg = tempfile.NamedTemporaryFile(suffix = ".svg") + svg.write(urllib.urlopen(img_url).read()) + svg.flush() + png_fd, png_fn = tempfile.mkstemp(suffix = ".png") + subprocess.check_call(("convert", "-resize", "800x800>", svg.name, png_fn)) + svg.close() + os.close(png_fd) + img.set(attr, png_fn) + png_fns.append(png_fn) + page.write(htmldoc.stdin) html2text = subprocess.Popen(("html2text", "-rcfile", h2trc, "-nobs", "-ascii"), @@ -125,6 +143,8 @@ def main(): htmldoc.wait() sys.stderr.write("Wrote %s\n" % pdf) + for png_fn in png_fns: + os.unlink(png_fn) # HTMLDOC title page. At some point we might want to generate this # dynamically as an ElementTree, but static content will do for the @@ -248,6 +268,22 @@ xsl_get_page = lxml.etree.XSLT(lxml.etree.XML('''\ + + + + + + + + + + + + + + -- cgit v1.2.3