# Copyright (C) 2012, 2013, 2016  SPARTA, Inc. a Parsons Company
#
# Permission to use, copy, modify, and distribute this software for any
# purpose with or without fee is hereby granted, provided that the above
# copyright notice and this permission notice appear in all copies.
#
# THE SOFTWARE IS PROVIDED "AS IS" AND SPARTA DISCLAIMS ALL WARRANTIES WITH
# REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY
# AND FITNESS.  IN NO EVENT SHALL SPARTA BE LIABLE FOR ANY SPECIAL, DIRECT,
# INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM
# LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE
# OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
# PERFORMANCE OF THIS SOFTWARE.

__version__ = '$Id$'
__all__ = ('import_routeviews_dump')

import itertools
import os.path
import subprocess
import time
import logging
import urlparse
import bz2
from urllib import urlretrieve, unquote

from django.db import transaction

from rpki.resource_set import resource_range_ipv4, resource_range_ipv6
from rpki.exceptions import BadIPResource
import rpki.gui.app.timestamp
from rpki.gui.routeview.models import RouteOrigin

# globals
logger = logging.getLogger(__name__)

class ParseError(Exception): pass

class RouteDumpParser(object):
    """Base class for parsing various route dump formats."""

    range_class = resource_range_ipv4

    def __init__(self, path, *args, **kwargs):
        transaction.set_autocommit(False)

        self.path = path
        self.last_prefix = None
        self.asns = set()

    def parse(self):
        RouteOrigin.objects.all().delete()

        logger.info('Adding rows to table...')
        for line in self.input:
            try:
                prefix, origin_as = self.parse_line(line)
            except ParseError as e:
                logger.warning('error while parsing line: {} ({})'.format(line, str(e)))
                continue

	    if prefix is None: # used when encountering AS sets that we skip over
		continue

            # the output may contain multiple paths to the same origin.
            # if this is the same prefix as the last entry, we don't need
            # to validate it again.
            #
            # prefixes are sorted, but the origin_as is not, so we keep a set to
            # avoid duplicates, and insert into the db once we've seen all the
            # origin_as values for a given prefix
            if prefix != self.last_prefix:
                self.ins_routes()
                self.last_prefix = prefix
            self.asns.add(origin_as)

        self.ins_routes() # process data from last line

        self.cleanup()  # allow cleanup function to throw prior to COMMIT

        logger.info('Updating timestamp metadata...')
        rpki.gui.app.timestamp.update('bgp_v4_import')

        transaction.commit() # not sure if requried, or if transaction.commit() will do it

    def parse_line(self, row):
        "Parse one line of input. Return a (prefix, origin_as) tuple."
        return None

    def cleanup(self):
        pass

    def ins_routes(self):
        # output routes for previous prefix
        if self.last_prefix is not None:
            try:
                rng = self.range_class.parse_str(self.last_prefix)
                for asn in self.asns:
                    RouteOrigin.objects.create(asn=asn, prefix_min=rng.min, prefix_max=rng.max)
            except BadIPResource:
                logger.warning('skipping bad prefix: ' + self.last_prefix)
            self.asns = set() # reset


class TextDumpParser(RouteDumpParser):
    """Parses the RouteViews.org text dump."""

    def __init__(self, *args, **kwargs):
        super(TextDumpParser, self).__init__(*args, **kwargs)
        if self.path.endswith('.bz2'):
            logger.info('decompressing bz2 file')
            self.file = bz2.BZ2File(self.path, buffering=4096)
        else:
            self.file = open(self.path, buffering=-1)
        self.input = itertools.islice(self.file, 5, None)  # skip first 5 lines

    def parse_line(self, row):
        "Parse one line of input"
        cols = row.split()

        # index -1 is i/e/? for igp/egp
        try:
	    if cols[-2][0] == '{' and cols[-2][-1] == '}':
		# skip AS sets
		return None, None
            origin_as = int(cols[-2])
        except IndexError:
            raise ParseError('unexpected format')
        except ValueError:
            raise ParseError('bad AS value')

        # FIXME Django doesn't have a field for positive integers up to 2^32-1
	if origin_as < 0 or origin_as > 2147483647:
            logger.debug('AS value out of range: %d', origin_as)
            return None, None

        prefix = cols[1]

        # validate the prefix since the "sh ip bgp" output is sometimes
        # corrupt by no space between the prefix and the next hop IP
        # address.
        net, bits = prefix.split('/')
        if len(bits) > 2:
            s = ['mask for %s looks fishy...' % prefix]
            prefix = '%s/%s' % (net, bits[0:2])
            s.append('assuming it should be %s' % prefix)
            logger.warning(' '.join(s))

        return prefix, origin_as

    def cleanup(self):
        self.file.close()


class MrtDumpParser(RouteDumpParser):
    def __init__(self, *args, **kwargs):
        super(MrtDumpParser, self).__init__(*args, **kwargs)
        # filter input through bgpdump
        # bgpdump can decompress bz2 files directly, no need to do it here
        self.pipe = subprocess.Popen(['bgpdump', '-m', '-v', self.path], stdout=subprocess.PIPE, bufsize=-1)
        self.input = self.pipe.stdout

    def parse_line(self, row):
        a = row.split('|')
        prefix = a[5]
        try:
            origin_as = int(a[6].split()[-1])
        except ValueError:
            raise ParseError('bad AS value')

        return prefix, origin_as

    def cleanup(self):
        logger.info('waiting for child process to terminate')
        self.pipe.wait()
        if self.pipe.returncode:
            raise PipeFailed('bgpdump exited with code %d' % self.pipe.returncode)


class ProgException(Exception):
    pass


class UnknownInputType(ProgException):
    pass


class PipeFailed(ProgException):
    pass


def import_routeviews_dump(filename, filetype='text', download_dir='/var/tmp'):
    """Load the oix-full-snapshot-latest.bz2 from routeview.org into the
    rpki.gui.routeview database.

    Arguments:

        filename [optional]: the full path to the downloaded file to parse

        filetype [optional]: 'text' or 'mrt'
    """

    start_time = time.time()
    tmpname = None

    try:
        if filename.startswith('http://'):
            #get filename from the basename of the URL
            u = urlparse.urlparse(filename)
            bname = os.path.basename(unquote(u.path))
            tmpname = os.path.join(download_dir, bname)

            logger.info("Downloading %s to %s", filename, tmpname)
            if os.path.exists(tmpname):
		os.remove(tmpname)
	    filename, headers = urlretrieve(filename, tmpname)

        try:
            dispatch = {'text': TextDumpParser, 'mrt': MrtDumpParser}
            dispatch[filetype](filename).parse()
        except KeyError:
            raise UnknownInputType('"%s" is an unknown input file type' % filetype)

    finally:
        # make sure to always clean up the temp download file
        if tmpname is not None:
            os.unlink(tmpname)

    logger.info('Elapsed time %d secs', (time.time() - start_time))