aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorRob Austein <sra@hactrn.net>2006-08-29 02:53:23 +0000
committerRob Austein <sra@hactrn.net>2006-08-29 02:53:23 +0000
commit2652f84d9b22a3b84abd8dfbbd6985b5e1bde14b (patch)
tree5218fec5e92b09fe9d5441e05398e12cc3bc60e3
parent1acfd11031c55a029562891e6607b4b458518e6d (diff)
Checkpoint
svn path=/scripts/rcynic-prototype.pl; revision=238
-rw-r--r--scripts/rcynic-prototype.pl301
1 files changed, 301 insertions, 0 deletions
diff --git a/scripts/rcynic-prototype.pl b/scripts/rcynic-prototype.pl
new file mode 100644
index 00000000..43690d02
--- /dev/null
+++ b/scripts/rcynic-prototype.pl
@@ -0,0 +1,301 @@
+# $Id$
+
+# This is a PROTOTYPE of rcynic, just to see whether I have the
+# general algorithms and data flow right.
+
+use strict;
+
+my $root = "rcynic-data";
+my $trust_anchor_tree = "$root/trust-anchors";
+my $preaggregated_tree = "$root/preaggregated";
+my $unauthenticated_tree = "$root/unauthenticated";
+my $authenticated_tree = "$root/authenticated";
+my $temporary_tree = "$root/temporary";
+
+# Read config
+
+my @anchors;
+my @preaggregated;
+
+while (<>) {
+ chomp;
+ next if (/^\s*$/ || /^\s*[;\#]/);
+ my @argv = split;
+ if ($argv[0] eq "anchor") {
+ push(@anchors, $argv[1]);
+ } elsif ($argv[0] eq "preaggregated") {
+ push(@preaggregated, $argv[1]);
+ } else {
+ die("Could not parse: $_\n");
+ }
+}
+
+sub mkdir_maybe {
+ my $dir = shift;
+ $dir =~ s=[^/]+$==;
+ !system("mkdir", "-p", $dir)
+ or die("Couldn't make $dir\n")
+ unless (-d $dir);
+}
+
+sub uri_to_filename {
+ local $_ = shift;
+ if ($_) {
+ die("Not an rsync URI: $_\n")
+ unless (m=^rsync://=);
+ s=^rsync://==;
+ die("Evil character sequences in URI: $_\n")
+ if (m=^/= || m=^\.\.$= || m=^\.\./= || m=/\.\./= || m=/\.\.$= || m=//=);
+ }
+ return $_;
+}
+
+sub extract_cert_uris {
+ my $uri = shift;
+ my $dir = shift;
+ my $file = uri_to_filename($uri);
+ my %res = (file => $file, uri => $uri);
+ my ($a, $s, $c);
+ local $_;
+ open(F, "-|", qw(openssl x509 -noout -inform DER -text -in), "$dir/$file")
+ or die("Couldn't run openssl x509 on $file: $!\n");
+ while (<F>) {
+ chomp;
+ s{^.+URI:rsync://}{};
+ $a = $. + 1
+ if (/Authority Information Access:/);
+ $s = $. + 1
+ if (/Subject Information Access:/);
+ $c = $. + 1
+ if (/X509v3 CRL Distribution Points:/);
+ $res{aia} = $_
+ if ($a && $. == $a);
+ $res{sia} = $_
+ if ($s && $. == $s);
+ $res{cdp} = $_
+ if ($c && $. == $c);
+ }
+ close(F);
+ if ($res{sia} && $res{sia} !~ m=/$=) {
+ warn("Badly formatted AIA URI, compensating: $res{sia}\n");
+ $res{sia} .= "/";
+ }
+ return \%res;
+}
+
+sub copy_cert {
+ my $name = shift;
+ my $indir = shift || $unauthenticated_tree;
+ my $outdir = shift || $temporary_tree;
+ mkdir_maybe("$outdir/$name");
+ !system("openssl", "x509", "-inform", "DER", "-in", "$indir/$name", "-outform", "PEM", "-out", "$outdir/$name")
+ or die("Couldn't copy $indir/$name to $outdir/$name\n");
+}
+
+sub copy_crl {
+ my $name = shift;
+ my $indir = shift || $unauthenticated_tree;
+ my $outdir = shift || $authenticated_tree;
+ mkdir_maybe("$outdir/$name");
+ !system("openssl", "crl", "-inform", "DER", "-in", "$indir/$name", "-outform", "PEM", "-out", "$outdir/$name")
+ or die("Couldn't copy $indir/$name to $outdir/$name\n");
+}
+
+# Create any missing directories.
+
+mkdir_maybe("$_/")
+ foreach (($trust_anchor_tree, $preaggregated_tree, $unauthenticated_tree, $authenticated_tree, $temporary_tree));
+
+# Pull over any pre-aggregated data. We'll still have to check
+# signatures in all of this, it's just a convenience to get us
+# started.
+
+for my $uri (@preaggregated) {
+ my $dir = uri_to_filename($uri);
+ !system("rsync", "-ai", $uri, "$preaggregated_tree/$dir")
+ or die("Couldn't rsync from $uri\n");
+}
+
+# Update our unauthenticated tree from the pre-aggregated data. Will
+# need to pay attention to rsync parameters here to make sure we don't
+# overwrite newer stuff.
+
+!system("rsync", "-ai", "$preaggregated_tree/", "$unauthenticated_tree/")
+ or die("Couldn't rsync $preaggregated_tree/ to $unauthenticated_tree/\n");
+
+# Local trust anchors always win over anything else, so seed our
+# authenticated tree with them
+
+copy_cert($_, $trust_anchor_tree, $authenticated_tree)
+ foreach (@anchors);
+
+my %certs;
+
+sub check_crl {
+ die "NIY";
+}
+
+# start with -verified- cert here (because we start with trust
+# anchors) pull uris, rsync cdp and check resulting crl, rsync sdp,
+# then walk through resulting certs, one at a time, verifying each
+# one, then calling this function recursively.
+#
+# $1: cert we're examining
+# &rest: ancestor certs (er, and crls?)
+
+sub check_cert {
+ die "NIY";
+}
+
+# Now start walking the tree, starting with our trust anchors.
+
+for my $anchor (@anchors) {
+ check_cert($anchor);
+}
+
+die "NIY";
+
+# for now will need to fix up sia urls as they are missing trailing slashes.
+# have asked about this on rescert.
+
+# walk tree starting from trust anchors, do the validate/fetch cycle
+#
+# still probably easiest to build the chains using the aia uris.
+
+# hmm, may need to have config file tell us the uris associated with
+# our trust anchors, otherwise (a) how do we name them in uri space
+# and (b) how do we check that their children have the right sia uri?
+# taking the children's word for what the parent's uri should be seems
+# wrong. maybe we just insist that our trust anchors have filenames
+# that match our mapping of uris to filenames....
+
+
+################################################################
+#
+# Date: Sat, 19 Aug 2006 02:53:25 -0400
+# From: Rob Austein <sra@hactrn.net>
+# Subject: rcynic design
+# Message-Id: <20060819065325.B4C525C53@thrintun.hactrn.net>
+#
+# overall tasks: collect certificates from publication points, assemble
+# them into a local certificate store, perform validation checks on all
+# of them, discarding the ones that don't pass. output is a valid
+# repository containing a snapshot of all the (valid, accessible)
+# certificates in the rpki system. also want to provide ability for
+# others to synchronize from this repository, so mustn't do anything
+# that precludes serving results via rsync. code should also support
+# building a validated repository purely from locally maintained data.
+#
+# inputs to the process:
+#
+# - a (small) set of trust anchors
+#
+# - zero or more rsync uris for pre-aggregated object collections
+#
+# - a configuration file containing or pointing to the above inputs and
+# whatever other parameters we turn out to need.
+#
+# i was initially arguing for a collection phase followed by a
+# validation phase after fetching all the data. randy convinced me that
+# we don't want to follow uris that didn't come from local config or a
+# cert we've already checked. most paranoid version of this would
+# involve pulling one directory at a time via rsync, but that's wasteful
+# of tcp connections and process forks, so we compromised on allowing
+# rsync of everything under a given uri once we've validated it.
+#
+# so we end up with a two phase model that looks like this:
+#
+# 1) fetch pre-aggregated stuff from zero or more uris specified in
+# config file. listing a uri in this part of the config file is
+# construed as willingness to rsync data from it without further
+# checks. we will validate all of this later, we just don't have to
+# validate it while we're fetching it.
+#
+# 2) walk the tree starting with the trust anchors, checking stuff, and
+# examining uris. optionally follow rsync sia uris from validated
+# certs, fetching more stuff that's missing or stale in our store,
+# applying this process recursively until we run out of new uris to
+# follow or decide that we've followed too many uris ("too many" is a
+# configurable parameter with a relatively high default).
+#
+# if we don't fetch anything in either phase, this is just a check of a
+# pre-existing tree, which is an operation we want to have anyway.
+#
+# we need to maintain two separate collections:
+#
+# a) everything we got via rsync from whichever parties we were willing
+# to ask, and
+#
+# b) only the stuff we've blessed.
+#
+# there may be transient states in which we have both old and new
+# versions of each of these, although probably not of both at once.
+#
+# we need to perform certain sanity checks on any uris we use
+# (principally checking for "/../" sequences and any other pathnames
+# which are potentially dangerous and which we don't there's any sane
+# reason for us ever to see), and if possible we want to run rsync
+# inside a chroot jail with restricted permissions and a paranoid set of
+# client options (in particular, we don't want to receive symlinks).
+# the chroot code should be written in such a way that it is easy for a
+# paranoid administrator to verify, and so that it can be omitted if the
+# administrator's paranoia trusts rsync more than they trust our chroot
+# code (which, by definition, has to run as root).
+#
+# output of the collection stage is a local disk mirror of all the
+# candidate certificates and crls we could fetch. some may not have
+# been accessible, in which case we may have to fall back to previously
+# fetched data from an earlier pass, if we have any and if it's still
+# valid. if a validation pass finds that we're broken badly enough, we
+# may need to disable distribution of our results to others (ie, disable
+# rsync server), but we may not have a lot of choice about using some of
+# the new data, as clocks will be ticking and old stuff will time out.
+#
+# unless i think of a better way to do it, local store will be organized
+# in approximately the way that wget would organize such a collection: a
+# top level directory, each first level subdirectory of which is named
+# for the hostname portion of the publication uri, second (and lower)
+# level subdirectories track the directory structure at each of the
+# publication points.
+#
+# when validating our candidate set of certificates and crls, we need to
+# walk through them, probably top down, checking each one (signature,
+# revocation, path validation including well-formed 3779 extensions).
+# we build a parallel tree (same directory structure) containing only
+# objects that pass our checks. if we have not already pruned out all
+# non-file, non-directory objects at an earlier stage, we check for this
+# (posix stat() call) before we open any object file.
+#
+# rsync efficiency issue: any changes we make to our local copy to
+# correct a remote problem will be overwritten by the same remote
+# problem the next time we run rsync unless the problem has been
+# corrected. it'd be nice to avoid continually fetching the same
+# mistakes. so we don't want to delete stuff from our raw unvalidated
+# mirror, we just don't copy it to our validated mirror. there may be
+# other ways to deal with this, eg, having three local trees: one
+# maintained by rsync, a second which is a copy of the first with
+# symlinks etc cleaned out, and a third which we've validated.
+#
+# failure mode: can't get new copies of stuff we already had. recovery:
+# reuse old stuff if still valid. we want to use our old unvalidated
+# copies (a) for this, since some time skew problems may have fixed
+# themselves by now and there might be now-valid stuff in our old
+# unvalidated store that didn't pass validation last time.
+#
+# failure mode: pulled new broken copies of stuff for which we had old
+# valid copies. recovery: reuse the old valid copies (b), unless we got
+# to a three step model just to preserve old unvalidated stuff for this
+# case too (probably unnecessary).
+#
+# additional check we should perform: do we get the same answer if we
+# follow the aia uris upwards within our local store as we get when we
+# follow the sia uris downwards? not clear how we should handle this if
+# the answer is "no": warning at minimum, but probably should reject at
+# least some of the certificates involved if this check fails. whether
+# we should reject all the certificates that mismatch or only the
+# children is a tricky, as rejecting all could be an invitation to
+# denial of service attacks (bozo-isp intentionally or through
+# incompetence generates bogus uri, arin's validator stops running,
+# oops!), so this may need to be a configurable choice. randy suspects
+# that most mismatches will be due to time skews, for which "retry
+# later" might be a plausible recovery.