Checkpoint

svn path=/scripts/rcynic-prototype.pl; revision=238
author: Rob Austein <sra@hactrn.net> 2006-08-29 02:53:23 +0000
committer: Rob Austein <sra@hactrn.net> 2006-08-29 02:53:23 +0000
commit: 2652f84d9b22a3b84abd8dfbbd6985b5e1bde14b (patch)
tree: 5218fec5e92b09fe9d5441e05398e12cc3bc60e3
parent: 1acfd11031c55a029562891e6607b4b458518e6d (diff)
1 files changed, 301 insertions, 0 deletions
diff --git a/scripts/rcynic-prototype.pl b/scripts/rcynic-prototype.pl
new file mode 100644
index 00000000..43690d02
--- /dev/null
+++ b/scripts/rcynic-prototype.pl
@@ -0,0 +1,301 @@
+# $Id$
+
+# This is a PROTOTYPE of rcynic, just to see whether I have the
+# general algorithms and data flow right.
+
+use strict;
+
+my $root		 = "rcynic-data";
+my $trust_anchor_tree	 = "$root/trust-anchors";
+my $preaggregated_tree	 = "$root/preaggregated";
+my $unauthenticated_tree = "$root/unauthenticated";
+my $authenticated_tree   = "$root/authenticated";
+my $temporary_tree	 = "$root/temporary";
+
+# Read config
+
+my @anchors;
+my @preaggregated;
+
+while (<>) {
+    chomp;
+    next if (/^\s*$/ || /^\s*[;\#]/);
+    my @argv = split;
+    if ($argv[0] eq "anchor") {
+	push(@anchors, $argv[1]);
+    } elsif ($argv[0] eq "preaggregated") {
+	push(@preaggregated, $argv[1]);
+    } else {
+	die("Could not parse: $_\n");
+    }
+}
+
+sub mkdir_maybe {
+    my $dir = shift;
+    $dir =~ s=[^/]+$==;
+    !system("mkdir", "-p", $dir)
+	or die("Couldn't make $dir\n")
+	unless (-d $dir);
+}
+
+sub uri_to_filename {
+    local $_ = shift;
+    if ($_) {
+	die("Not an rsync URI: $_\n")
+	    unless (m=^rsync://=);
+	s=^rsync://==;
+	die("Evil character sequences in URI: $_\n")
+	    if (m=^/= || m=^\.\.$= || m=^\.\./= || m=/\.\./= || m=/\.\.$= || m=//=);
+    }
+    return $_;
+}
+
+sub extract_cert_uris {
+    my $uri = shift;
+    my $dir = shift;
+    my $file = uri_to_filename($uri);
+    my %res = (file => $file, uri => $uri);
+    my ($a, $s, $c);
+    local $_;
+    open(F, "-|", qw(openssl x509 -noout -inform DER -text -in), "$dir/$file")
+	or die("Couldn't run openssl x509 on $file: $!\n");
+    while (<F>) {
+	chomp;
+	s{^.+URI:rsync://}{};
+	$a = $. + 1
+	    if (/Authority Information Access:/);
+	$s = $. + 1
+	    if (/Subject Information Access:/);
+	$c = $. + 1
+	    if (/X509v3 CRL Distribution Points:/);
+	$res{aia} = $_
+	    if ($a && $. == $a);
+	$res{sia} = $_
+	    if ($s && $. == $s);
+	$res{cdp} = $_
+	    if ($c && $. == $c);
+    }
+    close(F);
+    if ($res{sia} && $res{sia} !~ m=/$=) {
+	warn("Badly formatted AIA URI, compensating: $res{sia}\n");
+	$res{sia} .= "/";
+    }
+    return \%res;
+}
+
+sub copy_cert {
+    my $name = shift;
+    my $indir = shift || $unauthenticated_tree;
+    my $outdir = shift || $temporary_tree;
+    mkdir_maybe("$outdir/$name");
+    !system("openssl", "x509", "-inform", "DER", "-in", "$indir/$name", "-outform", "PEM", "-out", "$outdir/$name")
+	or die("Couldn't copy $indir/$name to $outdir/$name\n");
+}
+
+sub copy_crl {
+    my $name = shift;
+    my $indir = shift || $unauthenticated_tree;
+    my $outdir = shift || $authenticated_tree;
+    mkdir_maybe("$outdir/$name");
+    !system("openssl", "crl", "-inform", "DER", "-in", "$indir/$name", "-outform", "PEM", "-out", "$outdir/$name")
+	or die("Couldn't copy $indir/$name to $outdir/$name\n");
+}
+
+# Create any missing directories.
+
+mkdir_maybe("$_/")
+    foreach (($trust_anchor_tree, $preaggregated_tree, $unauthenticated_tree, $authenticated_tree, $temporary_tree));
+
+# Pull over any pre-aggregated data.  We'll still have to check
+# signatures in all of this, it's just a convenience to get us
+# started.
+
+for my $uri (@preaggregated) {
+    my $dir = uri_to_filename($uri);
+    !system("rsync", "-ai", $uri, "$preaggregated_tree/$dir")
+	or die("Couldn't rsync from $uri\n");
+}
+
+# Update our unauthenticated tree from the pre-aggregated data.  Will
+# need to pay attention to rsync parameters here to make sure we don't
+# overwrite newer stuff.
+
+!system("rsync", "-ai", "$preaggregated_tree/", "$unauthenticated_tree/")
+    or die("Couldn't rsync $preaggregated_tree/ to $unauthenticated_tree/\n");
+
+# Local trust anchors always win over anything else, so seed our
+# authenticated tree with them
+
+copy_cert($_, $trust_anchor_tree, $authenticated_tree)
+    foreach (@anchors);
+
+my %certs;
+
+sub check_crl {
+    die "NIY";
+}
+
+# start with -verified- cert here (because we start with trust
+# anchors) pull uris, rsync cdp and check resulting crl, rsync sdp,
+# then walk through resulting certs, one at a time, verifying each
+# one, then calling this function recursively.
+#
+# $1:	 cert we're examining
+# &rest: ancestor certs (er, and crls?)
+
+sub check_cert {
+    die "NIY";
+}
+
+# Now start walking the tree, starting with our trust anchors.
+
+for my $anchor (@anchors) {
+    check_cert($anchor);
+}
+
+die "NIY";
+
+# for now will need to fix up sia urls as they are missing trailing slashes.
+# have asked about this on rescert.
+
+# walk tree starting from trust anchors, do the validate/fetch cycle
+#
+# still probably easiest to build the chains using the aia uris.
+
+# hmm, may need to have config file tell us the uris associated with
+# our trust anchors, otherwise (a) how do we name them in uri space
+# and (b) how do we check that their children have the right sia uri?
+# taking the children's word for what the parent's uri should be seems
+# wrong.  maybe we just insist that our trust anchors have filenames
+# that match our mapping of uris to filenames....
+
+
+################################################################
+#
+# Date: Sat, 19 Aug 2006 02:53:25 -0400
+# From: Rob Austein <sra@hactrn.net>
+# Subject: rcynic design
+# Message-Id: <20060819065325.B4C525C53@thrintun.hactrn.net>
+# 
+# overall tasks: collect certificates from publication points, assemble
+# them into a local certificate store, perform validation checks on all
+# of them, discarding the ones that don't pass.  output is a valid
+# repository containing a snapshot of all the (valid, accessible)
+# certificates in the rpki system.  also want to provide ability for
+# others to synchronize from this repository, so mustn't do anything
+# that precludes serving results via rsync.  code should also support
+# building a validated repository purely from locally maintained data.
+# 
+# inputs to the process:
+# 
+# - a (small) set of trust anchors
+# 
+# - zero or more rsync uris for pre-aggregated object collections
+# 
+# - a configuration file containing or pointing to the above inputs and
+#   whatever other parameters we turn out to need.
+# 
+# i was initially arguing for a collection phase followed by a
+# validation phase after fetching all the data.  randy convinced me that
+# we don't want to follow uris that didn't come from local config or a
+# cert we've already checked.  most paranoid version of this would
+# involve pulling one directory at a time via rsync, but that's wasteful
+# of tcp connections and process forks, so we compromised on allowing
+# rsync of everything under a given uri once we've validated it.
+# 
+# so we end up with a two phase model that looks like this:
+# 
+# 1) fetch pre-aggregated stuff from zero or more uris specified in
+#    config file.  listing a uri in this part of the config file is
+#    construed as willingness to rsync data from it without further
+#    checks.  we will validate all of this later, we just don't have to
+#    validate it while we're fetching it.
+# 
+# 2) walk the tree starting with the trust anchors, checking stuff, and
+#    examining uris.  optionally follow rsync sia uris from validated
+#    certs, fetching more stuff that's missing or stale in our store,
+#    applying this process recursively until we run out of new uris to
+#    follow or decide that we've followed too many uris ("too many" is a
+#    configurable parameter with a relatively high default).
+# 
+# if we don't fetch anything in either phase, this is just a check of a
+# pre-existing tree, which is an operation we want to have anyway.
+# 
+# we need to maintain two separate collections:
+# 
+# a) everything we got via rsync from whichever parties we were willing
+#    to ask, and
+# 
+# b) only the stuff we've blessed.
+# 
+# there may be transient states in which we have both old and new
+# versions of each of these, although probably not of both at once.
+# 
+# we need to perform certain sanity checks on any uris we use
+# (principally checking for "/../" sequences and any other pathnames
+# which are potentially dangerous and which we don't there's any sane
+# reason for us ever to see), and if possible we want to run rsync
+# inside a chroot jail with restricted permissions and a paranoid set of
+# client options (in particular, we don't want to receive symlinks).
+# the chroot code should be written in such a way that it is easy for a
+# paranoid administrator to verify, and so that it can be omitted if the
+# administrator's paranoia trusts rsync more than they trust our chroot
+# code (which, by definition, has to run as root).
+# 
+# output of the collection stage is a local disk mirror of all the
+# candidate certificates and crls we could fetch.  some may not have
+# been accessible, in which case we may have to fall back to previously
+# fetched data from an earlier pass, if we have any and if it's still
+# valid.  if a validation pass finds that we're broken badly enough, we
+# may need to disable distribution of our results to others (ie, disable
+# rsync server), but we may not have a lot of choice about using some of
+# the new data, as clocks will be ticking and old stuff will time out.
+# 
+# unless i think of a better way to do it, local store will be organized
+# in approximately the way that wget would organize such a collection: a
+# top level directory, each first level subdirectory of which is named
+# for the hostname portion of the publication uri, second (and lower)
+# level subdirectories track the directory structure at each of the
+# publication points.
+# 
+# when validating our candidate set of certificates and crls, we need to
+# walk through them, probably top down, checking each one (signature,
+# revocation, path validation including well-formed 3779 extensions).
+# we build a parallel tree (same directory structure) containing only
+# objects that pass our checks.  if we have not already pruned out all
+# non-file, non-directory objects at an earlier stage, we check for this
+# (posix stat() call) before we open any object file.
+# 
+# rsync efficiency issue: any changes we make to our local copy to
+# correct a remote problem will be overwritten by the same remote
+# problem the next time we run rsync unless the problem has been
+# corrected.  it'd be nice to avoid continually fetching the same
+# mistakes.  so we don't want to delete stuff from our raw unvalidated
+# mirror, we just don't copy it to our validated mirror.  there may be
+# other ways to deal with this, eg, having three local trees: one
+# maintained by rsync, a second which is a copy of the first with
+# symlinks etc cleaned out, and a third which we've validated.
+# 
+# failure mode: can't get new copies of stuff we already had.  recovery:
+# reuse old stuff if still valid.  we want to use our old unvalidated
+# copies (a) for this, since some time skew problems may have fixed
+# themselves by now and there might be now-valid stuff in our old
+# unvalidated store that didn't pass validation last time.
+# 
+# failure mode: pulled new broken copies of stuff for which we had old
+# valid copies.  recovery: reuse the old valid copies (b), unless we got
+# to a three step model just to preserve old unvalidated stuff for this
+# case too (probably unnecessary).
+# 
+# additional check we should perform: do we get the same answer if we
+# follow the aia uris upwards within our local store as we get when we
+# follow the sia uris downwards?  not clear how we should handle this if
+# the answer is "no": warning at minimum, but probably should reject at
+# least some of the certificates involved if this check fails.  whether
+# we should reject all the certificates that mismatch or only the
+# children is a tricky, as rejecting all could be an invitation to
+# denial of service attacks (bozo-isp intentionally or through
+# incompetence generates bogus uri, arin's validator stops running,
+# oops!), so this may need to be a configurable choice.  randy suspects
+# that most mismatches will be due to time skews, for which "retry
+# later" might be a plausible recovery.
author	Rob Austein <sra@hactrn.net>	2006-08-29 02:53:23 +0000
committer	Rob Austein <sra@hactrn.net>	2006-08-29 02:53:23 +0000
commit	2652f84d9b22a3b84abd8dfbbd6985b5e1bde14b (patch)
tree	5218fec5e92b09fe9d5441e05398e12cc3bc60e3
parent	1acfd11031c55a029562891e6607b4b458518e6d (diff)