# $Id$ # # Copyright (C) 2010 Internet Systems Consortium ("ISC") # # Permission to use, copy, modify, and distribute this software for any # purpose with or without fee is hereby granted, provided that the above # copyright notice and this permission notice appear in all copies. # # THE SOFTWARE IS PROVIDED "AS IS" AND ISC DISCLAIMS ALL WARRANTIES WITH # REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY # AND FITNESS. IN NO EVENT SHALL ISC BE LIABLE FOR ANY SPECIAL, DIRECT, # INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM # LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE # OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR # PERFORMANCE OF THIS SOFTWARE. # # Clean up html2text's output. html2text is pretty good at the # impossible job of converting HTML to readable flat text, but has its # own peculiarities. In particular, it tends to generate either too # few or too many blank lines, depending on how it's been configured. # Simplest fix is is to configure it to err on the side of too many # blank lines, then suppress the extraneous ones. # # We might add other fixups here later, but this will suffice for now. NF && want_blank { print ""; } NF { seen_text = 1; want_blank = 0; print; } !NF && !seen_text { next; } !NF && seen_text { want_blank = 1; next; }