diff options
author | Rob Austein <sra@hactrn.net> | 2017-05-21 22:13:00 -0400 |
---|---|---|
committer | Rob Austein <sra@hactrn.net> | 2017-05-21 22:13:00 -0400 |
commit | 54dc2f126d4921985211b1732d34feaaa5dcb1f8 (patch) | |
tree | 760ba1e97191804f0b3c63efeaf076224df479ea |
First public version.
-rw-r--r-- | README.md | 387 | ||||
-rwxr-xr-x | git-remote-only | 19 | ||||
-rwxr-xr-x | zc | 576 |
3 files changed, 982 insertions, 0 deletions
diff --git a/README.md b/README.md new file mode 100644 index 0000000..fa05aba --- /dev/null +++ b/README.md @@ -0,0 +1,387 @@ +zc zone compiler +================ + +## Overview ## + +This is a small tool for generating DNS zones from relatively +simple text files, with some automation to handle complex or +repetitive tasks and to automate generation of reverse zone data. + +`zc` ("zone compiler") is a Python script which uses an external +package (Bob Halley's excellent [dnspython][] toolkit) to do a lot of +the the heavy lifting. + +`zc` can be used either as a straightforward command line tool or as a +pair of `pre-receive` and `post-receive` hooks in a bare git +repository. In the latter mode, `zc` pulls its input data and +configuration directly from commits pushed to the master branch in the +git repository, using another external library ([GitPython][]). + +Upshot of all this is that, once the git repository has been set up, +you just clone a copy of the repository, edit flat text files with +your favorite editor, commit and push, and you're done. Compilation +will happen automatically when you push, any serious errors will abort +the push so you can fix them and try again, and output will be +installed automatically if there were no serious errors. + + +## Command line use ## + +If you just want to use `zc` as a command line tool, it's simple. +Usage as of this writing (subject to change, run `zc --help` for +current syntax): + + usage: zc [-h] [-o OUTPUT_DIRECTORY] [-l {debug,info,warning,error}] + input [input ...] + + Generate zone files from a simpl(er) flat text file. + + General intent here is to let users specify normal hosts in a simple + and compact format, with a few utilities we provide to automate + complex or repetitive stuff, including automatic generation of AAAA + RRs based on a mapping scheme from A RRs. + + After generating the text of the forward zone, we run it through + dnspython's zone parser, then generate reverse zones by translating + the A and AAAA RRs in the forward zone into the corresponding PTR RRs. + + positional arguments: + input input file + + optional arguments: + -h, --help show this help message and exit + -o OUTPUT_DIRECTORY, --output-directory OUTPUT_DIRECTORY + directory for output files (default: .) + -l {debug,info,warning,error}, --log-level {debug,info,warning,error} + how loudly to bark (default: warning) + +You can supply more than one input file, `zc` will process them all +together before writing out any of the zone files. + + +## Use as git hooks ## + +When used as git hooks, configuration can't come from the command +line, so it comes from two places: + +1. A file called `config.json` in the repository, and + +2. Variables set in the configuration of the bare git repository where + the `pre-receive` and `post-receive` hooks are installed. + +The general idea is that stuff which should be under control of the +data owner is controlled by `config.json`, while stuff that should be +controlled by the server operator is controlled by configuration +variables in the server git repository. These are described below. + +The general idea is that we want to make sure that the zones compile +correctly *before* allowing the `git push` operation to complete, +then, assuming everything's OK, we want to install the zones *after* +the commit completes. + +All the real work happens in the `pre-receive` hook, the +`post-receive` hook's job is just to trigger final installation of the +output zone files after `git-receive-pack` finishes accepting the +push. If you don't understand git well enough to know what that +means, don't worry about it. If you want to learn more, see +[githooks][], but the learning curve necessary for the documentation +to make any sense is a bit steep, so bring a bag lunch. + +`zc` determines whether it's running as one of the git hooks or not by +examining the name by which it was invoked: if the name ends with +`/pre-receive` or `/post-receive`, it's a hook, otherwise you get the +command line behavior. In practice, this means that you can just +install `zc`, symlink the correct hook names to whereever you +installed the `zc` script, and the right thing should happen. + + +### `config.json` settings ### + +The list of input files and the verbosity are set in the JSON file, +while the output directory is set in the git configuration on the +server where the bare git repository lives. + + { + "zones": ["foo.zone", "bar.zone"], + "log-level": "info" + } + +The `zones` parameter is mandatory, and specifies the names of the +input files within the git repository (at the moment we only look at +the top-level directory -- we could change this given a reason). + +The `log-level` parameter is optional, and defaults to `warning`. + + +### git server repository settings ### + +All of the following settings have defaults except for `zc.output-directory`. + + git --git-dir /where/ever.git config zc.output-directory /my/output/directory + +The `zc.output-directory` parameter in the git repository's +configuration file specifies the location of the directory to which +`zc` should write its final output. `zc` also uses this directory to +stash a FIFO which it uses to coordinate actions between the +`pre-receive` and `post-receive` hooks. + +There is no default for `zc.output-directory`, you must set it. + + git --git-dir /where/ever.git config zc.hook-timeout 15 + +`zc.hook-timeout` controls how many seconds the `pre-receive` hook +should wait for confirmation from the `post-receive` hook before +giving up. The default value of 15 seconds should be fine unless your +server is really slow. + + git --git-dir /where/ever.git config zc.post-command 'rndc reload' + +`zc.post-command`, if set, specifies a command to run after all +generated files have been installed. The default is not to run any +such command. + + git --git-dir /where/ever.git config zc.log-file /var/log/zc/zc.log + git --git-dir /where/ever.git config zc.log-level warning + git --git-dir /where/ever.git config zc.log-file-hours 24 + git --git-dir /where/ever.git config zc.log-file-count 7 + +When running in git hook mode, `zc` can log to both `stderr` (which +git passes back to the user executing the push) and to a log file. +The `zc.log-*` parameters control the log file. + +`zc.log-file` is the name of the log file; if not set, `zc` will not +log to a file. + +`zc.log-level` is optional, and defaults to `warning`. + +`zc.log-file-hours` controls how many hours should elapse before `zc` +rotates its log file. The default is 24 hours. + +`zc.log-file-count` controls how many old log files `zc` should keep. +The default is 7. + + +## Zone generation ## + +Other than `config.json`, the input files to `zc` look the same +regardless of whether you're running `zc` on the command line or via +git hooks. + +While `zc` generates both forward and reverse zones, the underlying +mechanisms are (deliberately) very different, so it's simplest to +consider them separately. Forward zones are driven from human-edited +files, while reverse zones are generated completely automatically from +the corresponding forward zones. + + +### Forward zone generation ### + +Forward zone generation starts with a flat text file which is parsed +line-by-line to produce a forward zone file. There three basic kinds +of lines in this file: + +1. Stuff passed unchanged through `zc`: blank lines, comments, raw DNS + RRs (for things other than addresses), and standard control + operations like $TTL and $ORIGIN. + +2. Name-address pairs, processed to generate A and AAAA RRs. + +3. Control operations, all of which have names starting with "$". + +`zc` requires that the text file start with a `$ORIGIN` control to +specify the name of the zone itself. + +Other than the above, the one bit of processing `zc` performs is +replacement of the string "@SERIAL@ in an SOA RR with a +seconds-since-epoch integer timestamp. + + +#### Name-address pairs #### + +"Name-address pairs" are exactly what they sound like: something that +a DNS zone file parser would consider a valid owner name, and and an +IP address (IPv4 or IPv6). + +Processing of one name-address pair produces either one or two RRs, +depending on whether automatic generation of IPv6 addresses from IPv4 +addresses is enabled when `zc` processes this name-address pair (see +the `$MAP` control operation, below). + +Example: + + $MAP yes + + ; A couple of dual-stack hosts, with IPv6 addresses generated + ; algorithmically from the IPv4 addresses. + + tweedledee 10.0.0.1 + tweedledum 10.0.0.2 + + $MAP no + + ; Three single-stack hosts, addresses are what you see, RR type + ; inferred from the address family + + larry 10.0.0.3 + moe 2002:a00::4 + curly 10.0.0.5 + + +#### `$MAP` and `$MAP_RULE` #### + +The `$MAP` control operation enables or disables automatic generation +of IPv6 addresses from IPv4 addresses, according to zone-specific +mappings specified by the `$MAP_RULE` operation. + +`$MAP` is simple: it takes one argument, `yes` or `no` (`on`, +`off`, `true`, and `false` are allowed as aliases). + +`$MAP_RULE` takes two arguments: a prefix and a format string. You +can specify `$MAP_RULE` more than once to build up an ordered set of +mapping rules. When mapping is enabled, a given address will be +checked against the prefix of each rule in turn: the format string +from the first matching rule (if any) will be used to format the +mapped address. + +Format strings are in the syntax used by Python's `str.format()` +operator; the `.format()` operator will be called with one argument, +the input address converted to a tuple of integers, one per byte in +the binary representation of the input address. So an input address +of `10.0.0.44` would be yield the tuple `(10, 0, 0, 44)`, and so forth. + + $MAP_RULE 10.1.3.0/24 2002:a00:0000:f{0[2]}::{0[3]} + $MAP_RULE 10.1.0.0/16 2002:a00:0000:{0[2]}::{0[3]} + + $MAP on + + larry 10.1.2.3 + moe 10.1.3.2 + + $MAP off + + curly 10.1.4.1 + +This mechanism is intended primarily for mapping IPv4 addresses to +IPv6 addresses. The mechanism itself is address-family-agnostic: in +principal, it should work equally well in the other direction if you +can specify a useful set of rules, but the author has not tested this. + + +#### `$RANGE` #### + +The `$RANGE` control operation is a variation on the same general idea +as the (BIND9-specific) `$GENERATE` control operation, but is, in the +author's opinion, a bit easier both to use and to read. For all but +the most esoteric uses, it takes three of four arguments: + +1. A format string to generate the name field of the resulting RRs. + +2. A start addresses (IPv4 or IPv6). + +3. A stop address + +4. An optional numeric `offset`. + +The basic idea here is to generate a sequence of A or AAAA RRs (type +selected automatically to fit the addresses provided) for every +address in the specified range, inclusive, with names generated +according to a format string containing a numeric field. + +An `offset` value of zero would start with the name generated by +applying the format string to the number zero; an `offset` value of +one would start with the name generated by applying the format string +to the number 1, and so forth. If the `offset` field isn't specified +at all, it defaults to the numeric value of the least significant +octet of the start address. + +Examples: + + ; Access points using generate(). This is equivalent to: + ; + ; ap-101 10.0.1.101 + ; ap-102 10.0.1.102 + ; ... + ; ap-200 10.0.1.200 + + $RANGE ap-{:d} 10.0.1.101 10.0.1.200 + + ; Switches, also using $RANGE, but with numbering explicitly + ; specified rather than inferred from the IPv4 addressing, + ; equivalent to: + ; + ; sw-1 10.0.3.17 + ; sw-2 10.0.3.18 + ; ... + ; sw-26 10.0.3.42 + + $RANGE sw-{:d} 10.0.3.17 10.0.3.42 1 + + ; Finally, a whole lot of DHCP client addresses, for IPv4 + ; addresses ranging from 10.1.0.50 to 10.2.255.254, names left as + ; an exercise for the reader. + + $RANGE dhcp-f{:03x} 10.1.0.50 10.2.255.254 50 + + +#### `$INCLUDE` and `$GENERATE` #### + +The `$INCLUDE` and `$GENERATE` control operators are not currently implemented. + +`$INCLUDE` is a standard control operator, but we appear to have no +current need for it. + +`$GENERATE` is a BIND-specific control operator. We could implement +it if there were a real need, but the `$RANGE` operator covers the +things for which we have been using `$GENERATE` in the forward zone. + +Our current use of `$GENERATE` in reverse zones is a source of +consistency problems, and is therefore unsupported with prejudice. + + +#### `$REVERSE_ZONE` #### + +The `$REVERSE_ZONE` control operation has no effect on the forward +zone. Rather, it's a mechanism for specifying the list of reverse +zones which should be generated from this forward zone. We include +this in the input source for the forward zone in order to keep all the +data describing the zone in one place. + +If you don't use the `$REVERSE_ZONE` control, `zc` will not generate +any reverse data for this forward zone. + +Sample: + + $REVERSE_ZONE 0.10.in-addr.arpa + $REVERSE_ZONE 1.10.in-addr.arpa + $REVERSE_ZONE 2.10.in-addr.arpa + $REVERSE_ZONE 0.0.0.0.0.0.a.0.2.0.0.2.ip6.arpa + + +### Reverse zone generation ### + +As noted above, reverse zones are generated entirely from data +extracted from the forward zone. This is deliberate: we are trying to +make sure that the reverse data corresponds to the forward data, and +giving the user an opportunity to get creative here is just asking for +trouble. + +The basic strategy is: + +* Create a reverse zone object for every name listed in the forward + zone's source via `$REVERSE_ZONE` operators. + +* For each `A` and `AAAA` RR in the forward zone, generate the + corresponding `PTR` RR and and find the reverse zone in which that + RR belongs; whine for each PTR RR that doesn't fit into any + specified reverse zone. + +* Populate the zone apex data (SOA and apex NS RRsets) of each reverse + zone by copying the corresponding rdata from the forward zone. Yes, + this assumes that the forward and reverse zones are served by the + same servers; we could "fix" that given a need, but as of this + writing no such need exists, and this keeps it simple. + + +[dnspython]: http://www.dnspython.org +[GitPython]: https://github.com/gitpython-developers/GitPython +[githooks]: https://git-scm.com/docs/githooks diff --git a/git-remote-only b/git-remote-only new file mode 100755 index 0000000..27aab6d --- /dev/null +++ b/git-remote-only @@ -0,0 +1,19 @@ +#!/usr/bin/env python +# +# Restrict an ssh authorized_keys entry to be used only for git push +# and git fetch. Use thusly: +# +# command="git-remote-only /path/to/repository.git alice@example.org" ssh-rsa ABCDEF....== alice@example.org dedicated git key +# +# You might also want options like no-port-forwarding,no-X11-forwarding,no-agent-forwarding. + +import os, sys, shlex + +os.environ.update(GIT_REMOTE_ONLY_COMMAND = " ".join(sys.argv)) + +cmd = shlex.split(os.getenv("SSH_ORIGINAL_COMMAND", "")) + +if len(cmd) == 2 and cmd[0] in ("git-upload-pack", "git-receive-pack") and cmd[1] == sys.argv[1]: + os.execv("/usr/bin/" + cmd[0], cmd) + +sys.exit("Not authorized: {}".format(" ".join(cmd))) @@ -0,0 +1,576 @@ +#!/usr/bin/env python + +""" +Generate zone files from a simpl(er) flat text file. + +General intent here is to let users specify normal hosts in a simple +and compact format, with a few utilities we provide to automate +complex or repetitive stuff, including automatic generation of AAAA +RRs based on a mapping scheme from A RRs. + +After generating the text of the forward zone, we run it through +dnspython's zone parser, then generate reverse zones by translating +the A and AAAA RRs in the forward zone into the corresponding PTR RRs. +""" + +from dns.rdatatype import A, AAAA, SOA, NS, PTR +from argparse import ArgumentParser, ArgumentDefaultsHelpFormatter, RawDescriptionHelpFormatter, FileType +from socket import inet_ntop, inet_pton, AF_INET, AF_INET6 +from collections import OrderedDict + +import dns.reversename +import dns.rdataclass +import dns.rdatatype +import dns.rdata +import dns.name +import dns.zone + +import logging.handlers +import subprocess +import logging +import atexit +import signal +import select +import fcntl +import stat +import time +import sys +import os + + +logger = logging.getLogger("zc") + +log_levels = OrderedDict((logging.getLevelName(i).lower(), i) + for i in (logging.DEBUG, logging.INFO, logging.WARNING, logging.ERROR)) + + +class Address(long): + """ + Addresses are integers with some extra code to handle conversion + to and from text strings. + """ + + def __new__(cls, x): + if cls is Address and issubclass(x.__class__, Address): + cls = x.__class__ + if isinstance(x, (str, unicode)): + if cls is Address: + cls = V6 if ":" in x else V4 + x = int(inet_pton(cls.af, str(x)).encode("hex"), 16) + return long.__new__(cls, x) + + @property + def _bytestring(self): + if self < 0: + raise ValueError("value out of range") + return "{0:0{1}x}".format(self, self.bits / 4).decode("hex") + + def __str__(self): + return inet_ntop(self.af, self._bytestring) + + @property + def bytes(self): + return tuple(ord(b) for b in self._bytestring) + + @property + def mask(self): + return (1 << self.bits) - 1 + + @classmethod + def is_instance(cls, obj): + return isinstance(obj, cls) + +class V4(Address): + af = AF_INET + bits = 32 + rrtype = "A" + +class V6(Address): + af = AF_INET6 + bits = 128 + rrtype = "AAAA" + + +class Prefix(object): + """ + Prefixes are an address and a length. + """ + + def __init__(self, x, y = None): + if isinstance(x, (str, unicode)) and y is None: + x, y = x.split("/") + self.net = Address(x) + self.len = int(y) + if self.len < 0 or self.len > self.net.bits: + raise ValueError("Prefix length {0.len!s} is out of range for prefix {0.net!s}".format(self)) + + def __cmp__(self, other): + return cmp(self.net, other.net) or cmp(self.len, other.len) + + def __hash__(self): + return hash(self.net) ^ hash(self.len) + + def __int__(self): + return self.net + + def __long__(self): + return self.net + + def __str__(self): + return "{0.net!s}/{0.len!s}".format(self) + + @property + def subnet_mask(self): + return (self.net.mask >> self.len) ^ self.net.mask + + @property + def host_mask(self): + return ~self.subnet_mask & self.net.mask + + def matches(self, addr): + return self.net.__class__ is addr.__class__ and (self.net ^ addr) & self.subnet_mask == 0 + + +class ZoneGen(object): + """ + Parse input file, line-by-line. Lines can be: + + * Host-address pairs (generate A or AAAA RRs) + * DNS RRs (unchanged) + * Comments, blank lines (unchanged) + * Control operations: + + $ORIGIN <dns-name> + + $TTL <ttl-value> + + $MAP_RULE <prefix> <format> + + $MAP <boolean> + + $RANGE <start-addr> <stop-addr> [<offset> [<multiplier> [<mapaddr>]]] + + $REVERSE_ZONE <zone-name> [<zone-name> ...] + + At present $INCLUDE and $GENERATE are not supported: we don't really need the former, + and $RANGE is (intended as) a replacement for the latter. + """ + + def __init__(self, input, filename, now, reverse): + self.input = input + self.filename = filename + self.now = now + self.lines = [] + self.origin = None + self.cur_origin = None + self.map = OrderedDict() + self.reverse = [] + logger.info("Compiling zone %s", filename) + try: + for self.lineno, self.line in enumerate(input, 1): + self.line = self.line.rstrip() + part = self.line.partition(";") + token = part[0].split() + if token and token[0].startswith("$"): + handler = getattr(self, "handle_" + token[0][1:], None) + if handler is None: + raise ValueError("Unrecognized control operation") + handler(*token[1:]) + elif len(token) != 2: + if len(token) >= 9 and "SOA" in token: + self.line = self.line.replace("@SERIAL@", str(now)) + token[token.index("@SERIAL@")] = str(now) + if len(token) > 0: + self.check_dns(token) + self.lines.append(self.line) + else: + comment = " ;" + part[2] if part[2] else "" + name, addr = token[0], Address(token[1]) + self.rr(name, addr, comment) + if self.map_enable: + self.map_rr(name, addr, comment) + except Exception as e: + logger.error("{self.filename}:{self.lineno}: {e!s}: {self.line}\n".format(self = self, e = e)) + sys.exit(1) + fn = self.origin.to_text(omit_final_dot = True) + logger.debug("Generated zone file %s:", fn) + for i, line in enumerate(self.lines, 1): + logger.debug("[%5d] %s", i, line) + logger.debug("End of generated zone file %s", fn) + self.text = "\n".join(self.lines) + "\n" + self.zone = dns.zone.from_text(self.text, relativize = False, filename = fn) + self.build_reverse(reverse) + + def check_dns(self, token): + try: + dns.name.from_text(token.pop(0)) + if token[0].isdigit(): + del token[0] + if token[0].upper() == "IN": + del token[0] + rdtype = dns.rdatatype.from_text(token.pop(0)) + dns.rdata.from_text(dns.rdataclass.IN, rdtype, " ".join(token), self.cur_origin) + except: + raise ValueError("Syntax error") + + def rr(self, name, addr, comment = ""): + self.lines.append("{name:<23s} {addr.rrtype:<7s} {addr!s}{comment}".format( + name = name, addr = addr, comment = comment)) + + def map_rr(self, name, addr, comment = ""): + for prefix, format in self.map.iteritems(): + if prefix.matches(addr): + self.rr(name, Address(format.format(addr.bytes)), comment) + break + + def to_file(self, f, relativize = None): + f.write(self.text) # "relativize" ignored, present only for dnspython API compatability + + def handle_ORIGIN(self, origin): + self.cur_origin = dns.name.from_text(origin) + if self.origin is None: + self.origin = self.cur_origin + self.lines.append("$ORIGIN {}".format(self.cur_origin.to_text())) + + def handle_TTL(self, ttl): + self.lines.append(self.line) + + def handle_MAP_RULE(self, prefix, format): + self.map[Prefix(prefix)] = format + + _bool_names = dict(yes = True, no = False, on = True, off = False, true = True, false = False) + + def get_mapping_state(self, token): + try: + return self._bool_names[token.lower()] + except: + raise ValueError("Unrecognized mapping state") + + def handle_MAP(self, cmd): + self.map_enable = self.get_mapping_state(cmd) + + def handle_INCLUDE(self, name): + raise NotImplementedError("Not implemented") + + def handle_GENERATE(self, name, *args): + raise NotImplementedError("Not implemented (try $RANGE)") + + def handle_RANGE(self, fmt, start, stop, offset = None, multiplier = None, mapaddr = None): + start = Address(start) + stop = Address(stop) + offset = start.bytes[-1] if offset is None else int(offset, 0) + multiplier = 1 if multiplier is None else int(multiplier, 0) + method = self.rr if mapaddr is None or not self.get_mapping_state(mapaddr) else self.map_rr + for i in xrange(stop - start + 1): + method(fmt.format(offset + i), start.__class__(start + i * multiplier)) + + def handle_REVERSE_ZONE(self, *names): + self.reverse.extend(dns.name.from_text(name) for name in names) + + def build_reverse(self, reverse): + + zones = [] + + for name in self.reverse: + if name not in reverse: + reverse[name] = dns.zone.Zone(name, relativize = False) + reverse[name].find_rdataset(rdtype = SOA, name = name, create = True).update( + self.zone.find_rdataset(rdtype = SOA, name = self.zone.origin)) + reverse[name].find_rdataset(rdtype = NS, name = name, create = True).update( + self.zone.find_rdataset(rdtype = NS, name = self.zone.origin)) + reverse[name].check_origin() + zones.append(reverse[name]) + + if not zones: + return + + for qtype in (A, AAAA): + for name, ttl, addr in self.zone.iterate_rdatas(qtype): + rname = dns.reversename.from_address(addr.to_text()) + rdata = name.to_wire() + rdata = dns.rdata.from_wire(self.zone.rdclass, PTR, rdata, 0, len(rdata)) + for z in zones: + if rname.is_subdomain(z.origin): + z.find_rdataset(rname, PTR, create = True).add(rdata, ttl) + break + else: + logger.warn("%29s (%-16s %s) does not match any given reverse zone", rname, addr, name) + + +class ZoneHerd(object): + """ + Collection of zones to be generated and written. This is a class + rather than a function to simplify doing all the real work up + front while deferring final installation until we've gone through + a confirmation dance when running as git {pre,post}-receive hooks + """ + + def __init__(self, inputs, outdir, tempword = "RENMWO"): + self.names = OrderedDict() + atexit.register(self.cleanup) + + now = int(time.time()) + reverse = OrderedDict() + forward = [ZoneGen(lines, name, now, reverse) for lines, name in inputs] + + header = ";; Generated by zc at {time}, do not edit by hand\n\n".format( + time = time.strftime("%Y-%m-%dT%H:%M:%SZ", time.gmtime(now))) + + os.chdir(outdir) + + pid = os.getpid() + + for z in reverse.values() + forward: + fn = z.origin.to_text(omit_final_dot = True) + tfn = ".~{}~{}~{}".format(pid, tempword, fn) + self.names[tfn] = fn + with open(tfn, "w") as f: + f.write(header) + z.to_file(f, relativize = False) + logger.info("Wrote %s", fn) + + def finish(self): + while self.names: + tfn, fn = self.names.popitem() + os.rename(tfn, fn) + logger.info("Installed %s", fn) + + def cleanup(self): + for tfn in self.names: + try: + os.unlink(tfn) + logger.debug("Unlinked %s", tfn) + except: + pass + + +class GitView(object): + """ + Wrapper around git code common to both hooks. + """ + + all_zeros = "0" * 40 + + def __init__(self): + import git, json + self.repo = git.Repo() + self.gcfg = self.repo.config_reader() + self.configure_logging() + self.outdir = self.gcfg.get_value("zc", "output-directory") + self.timeout = self.gcfg.get_value("zc", "hook-timeout", 15) + self.postcmd = self.gcfg.get_value("zc", "post-command", "").split() + self.commit = None + for line in sys.stdin: + oldsha, newsha, refname = line.split() + if refname == "refs/heads/master" and newsha != self.all_zeros: + self.commit = newsha + break + if self.commit is not None: + tree = self.repo.commit(self.commit).tree + self.jcfg = json.loads(tree["config.json"].data_stream.read()) + log_level = self.jcfg.get("log-level", "warning").strip() + self.stderr_logger.setLevel(log_levels[log_level]) + self.zone_blobs = [tree[name] for name in self.jcfg["zones"]] + self.log_user_hook_commit() + + def configure_logging(self): + self.stderr_logger = logging.StreamHandler() + self.stderr_logger.setLevel(logging.WARNING) + self.stderr_logger.setFormatter(logging.Formatter("%(name)s %(levelname)s %(message)s")) + logging.getLogger().addHandler(self.stderr_logger) + logging.getLogger().setLevel(logging.DEBUG) + log_level = self.gcfg.get_value("zc", "log-level", "warning") + log_file = self.gcfg.get_value("zc", "log-file", "/var/log/zc/zc.log") + log_hours = self.gcfg.get_value("zc", "log-file-hours", 24) + log_count = self.gcfg.get_value("zc", "log-file-count", 7) + if log_file: + self.file_logger = logging.handlers.TimedRotatingFileHandler( + filename = log_file, + interval = log_hours, + backupCount = log_count, + when = "H", + utc = True) + self.file_logger.setFormatter(logging.Formatter( + "%(asctime)-15s %(name)s [%(process)s] %(levelname)s %(message)s")) + self.file_logger.setLevel(log_levels[log_level]) + logging.getLogger().addHandler(self.file_logger) + else: + self.file_logger = None + + def log_user_hook_commit(self): + logger.debug("Original SSH command: %s", os.getenv("SSH_ORIGINAL_COMMAND")) + logger.debug("authorized_keys command: %s", os.getenv("GIT_REMOTE_ONLY_COMMAND")) + user = os.getenv("GIT_REMOTE_ONLY_COMMAND", "").split() + user = user[2] if len(user) > 2 else "unknown" + logger.info("User %s running %s processing commit %s", user, sys.argv[0], self.commit) + + @property + def fifo_name(self): + return os.path.join(self.outdir, ".zc.fifo") + + +def daemonize(): + """ + Detach from parent process, in this case git, so that can report + success to git when running as a pre-receive hook while sticking + around to handle final installation of our generated zone files. + + Not sure how much of the following ritual is necessary, but some + of it definitely is (git push hangs if we just fork() and _exit()). + Sacrifice the rubber chicken and move on. + """ + + sys.stdout.flush() + sys.stderr.flush() + old_action = signal.signal(signal.SIGHUP, signal.SIG_IGN) + if os.fork() > 0: + os._exit(0) + os.setsid() + fd = os.open(os.devnull, os.O_RDWR) + os.dup2(fd, 0) + os.dup2(fd, 1) + os.dup2(fd, 2) + if fd > 2: + os.close(fd) + signal.signal(signal.SIGHUP, old_action) + + +def cli_main(): + + """ + Entry point for command line use. + """ + + parser = ArgumentParser(formatter_class = type("HF", (ArgumentDefaultsHelpFormatter, + RawDescriptionHelpFormatter), {}), + description = __doc__) + + parser.add_argument("-o", "--output-directory", + default = ".", + help = "directory for output files") + + parser.add_argument("-l", "--log-level", + choices = tuple(log_levels), + default = "warning", + help = "how loudly to bark") + + parser.add_argument("input", + nargs = "+", + type = FileType("r"), + help = "input file") + + args = parser.parse_args() + + logging.basicConfig(format = "%(message)s", level = log_levels[args.log_level]) + + herd = ZoneHerd(((input, input.name) for input in args.input), args.output_directory) + herd.finish() + + +def pre_receive_main(): + """ + Entry point for git pre-receive hook. + + Do all the zone generation and write the files to disk under + temporary names, but defer final installation until we get + confirmation from the post-receive hook that git is done accepting + the push. Since git won't do this until after the pre-receive + hook exits, this hook has to daemonize itself after doing all the + real work, so that git can get on with the rest. + + This may be excessively paranoid, but git makes few promises about + what will happen if more than one push is active at the same time. + In theory, the lock on our FIFO is enough to force serialization, + but that can fail if, eg, somebody deletes the FIFO itself. So + our wakeup signal is receiving the commit hash through the FIFO + from the post-receive hook. + + If we don't get the right wakeup signal before a (configurable) + timeout expires, we clean up our output files and exit. + """ + + try: + gv = GitView() + if gv.commit is None: + logger.info("No commits on master branch, nothing to do") + sys.exit() + + if not os.path.exists(gv.fifo_name): + os.mkfifo(gv.fifo_name) + + fifo = os.open(gv.fifo_name, os.O_RDONLY | os.O_NONBLOCK) + + fcntl.flock(fifo, fcntl.LOCK_EX) + + if not stat.S_ISFIFO(os.fstat(fifo).st_mode): + raise RuntimeError("{} is not a FIFO!".format(gv.fifo_name)) + + herd = ZoneHerd(((blob.data_stream.read().splitlines(), blob.name) for blob in gv.zone_blobs), + gv.outdir, + gv.commit) + + logging.getLogger().removeHandler(gv.stderr_logger) + + daemonize() + + logger.info("Awaiting confirmation of commit %s before installing files", gv.commit) + + remaining = gv.timeout + confirmation = "" + + while remaining > 0: + t = time.time() + if not select.select([fifo], [], [], remaining)[0]: + break # Timeout + chunk = os.read(fifo, 1024) + if chunk == "": + break # EOF + confirmation += chunk + if gv.commit in confirmation.splitlines(): + logger.info("Commit %s confirmed", gv.commit) + herd.finish() # Success + if gv.postcmd: + logger.info("Running post-command %r", gv.postcmd) + proc = subprocess.Popen(gv.postcmd, stdout = subprocess.PIPE, stderr = subprocess.STDOUT) + for line in proc.stdout.read().splitlines(): + logger.info(">> %s", line) + proc.stdout.close() + proc.wait() + break + remaining -= time.time() - t + + except Exception as e: + logger.error("%s", e) + +def post_receive_main(): + """ + Entry point for git post-receive hook. + + Zone files have already been generated and written, daemonized + pre-receive hook process is just waiting for us to confirm that + git has finished accepting push of this commit, which we do by + sending our commit hash to the pre-receive daemon. + """ + + try: + gv = GitView() + if gv.commit is not None: + with open(gv.fifo_name, "w") as f: + f.write(gv.commit + "\n") + except Exception as e: + logger.error("%s", e) + + +def main(): + """ + Entry point, just dispatch based on how we were invoked. + """ + + jane = os.path.basename(sys.argv[0]) + + if jane == "pre-receive": + pre_receive_main() + + elif jane == "post-receive": + post_receive_main() + + else: + cli_main() + + +if __name__ == "__main__": + main() |