diff options
author | Rob Austein <sra@hactrn.net> | 2016-01-20 04:56:20 +0000 |
---|---|---|
committer | Rob Austein <sra@hactrn.net> | 2016-01-20 04:56:20 +0000 |
commit | cf1b6c19525d9a4abec0981f744f8efdf8d99ca3 (patch) | |
tree | 0a1eaf27eb8077a3ad6e73d7d7956a3f4732fc93 /rp/rcynic | |
parent | 565f27716014aed53a98f550e6a28bc7f4da9dfd (diff) |
Tweak cleanup code again.
svn path=/branches/tk705/; revision=6227
Diffstat (limited to 'rp/rcynic')
-rwxr-xr-x | rp/rcynic/rcynicng | 65 |
1 files changed, 54 insertions, 11 deletions
diff --git a/rp/rcynic/rcynicng b/rp/rcynic/rcynicng index 9fa174eb..7e7db7a0 100755 --- a/rp/rcynic/rcynicng +++ b/rp/rcynic/rcynicng @@ -1084,9 +1084,13 @@ class Fetcher(object): else: # So do this the slow way for now, do better later if it turns out to matter. + logger.debug("Starting potentially really slow copy operation") + for obj in old_snapshot.rpkiobject_set.all(): new_snapshot.rpkiobject_set.add(obj) + logger.debug("Finished potentially really slow copy operation") + new_snapshot.save() for retrieval, delta in deltas: @@ -1198,27 +1202,27 @@ def final_report(): def final_cleanup(): - from django.db import transaction + from django.db import transaction, models def report(when): - logger.debug("Database state %s cleanup: %s Authenticated %s RRDPSnapshot %s RPKIObject %s Retrieval", - when, - Authenticated.objects.all().count(), - RRDPSnapshot.objects.all().count(), - RPKIObject.objects.all().count(), - Retrieval.objects.all().count()) + logger.debug("Database %s cleanup: %s Authenticated %s RRDPSnapshot %s RPKIObject %s Retrieval", when, + Authenticated.objects.all().count(), RRDPSnapshot.objects.all().count(), + RPKIObject.objects.all().count(), Retrieval.objects.all().count()) report("before") with transaction.atomic(): - #logger.debug("Flushing old authenticated sets") + logger.debug("Flushing old authenticated sets") q = Authenticated.objects q = q.exclude(id = authenticated.id) q.delete() - #logger.debug("Flushing RRDP snapshots which don't contain anything in the (remaining) authenticated set") + logger.debug("Flushing RRDP snapshots which don't contain anything in the (remaining) authenticated set") + + # We clean the RRDPSnapshot table in two steps. First step is to remove snapshots which aren't related + # to any RPKI object in the current authenticated set. q = RPKIObject.objects q = q.filter(authenticated = authenticated.id) @@ -1229,14 +1233,53 @@ def final_cleanup(): q = RRDPSnapshot.objects.exclude(id__in = q) q.delete() - #logger.debug("Flushing RPKI objects which are in neither current authenticated set nor current RRDP snapshot") + # This is still too many snapshots, because an RPKI object can be (and usually are) present in more than + # one snapshot for a given session. So we still have to do something to prune out old snapshots from + # sessions we're keeping. I don't know a sane way to do this via the ORM as a single SQL query, so + # we do it as a query followed by a loop. If this is too slow, we can try doing it as raw SQL, + # but that has portability issues. + + # well, maybe we want the raw sql interface after all, as the query is fairly simple: + # + # DELETE FROM rcynicdb_rrdpsnapshot + # WHERE id IN + # (SELECT t2.id + # FROM (SELECT session_id, max(serial) AS max_serial FROM rcynicdb_rrdpsnapshot GROUP BY session_id) AS t1 + # JOIN rcynicdb_rrdpsnapshot AS t2 + # ON t1.session_id = t2.session_id + # WHERE serial < max_serial); + # + # Or, equivalently but slightly easier to read: + # + # DELETE FROM rcynicdb_rrdpsnapshot + # WHERE id IN + # (SELECT id + # FROM (SELECT session_id, max(serial) AS max_serial FROM rcynicdb_rrdpsnapshot GROUP BY session_id) AS filter + # NATURAL JOIN rcynicdb_rrdpsnapshot + # WHERE serial < max_serial); + # + # See + # http://stackoverflow.com/questions/2317686/joining-2-sql-select-result-sets-into-one + # https://docs.djangoproject.com/en/1.8/topics/db/sql/#executing-custom-sql-directly + + q = RRDPSnapshot.objects + q = q.values("session_id") + q = q.annotate(max_serial = models.Max("serial")) + q = q.values_list("session_id", "max_serial") + + logger.debug("Annotation query for RRDPSnapshots gave us %r", list(q)) + + for u, s in q: + RRDPSnapshot.objects.filter(session_id = u, serial__lt = s).delete() + + logger.debug("Flushing RPKI objects which are in neither current authenticated set nor current RRDP snapshot") q = RPKIObject.objects q = q.exclude(authenticated = authenticated.id) q = q.filter(snapshot = None) q.delete() - #logger.debug("Flushing retrieval objects which are no longer related to any RPKI objects or RRDP snapshot") + logger.debug("Flushing retrieval objects which are no longer related to any RPKI objects or RRDP snapshot") q = RPKIObject.objects q = q.order_by("retrieved__id") |