#!  /usr/local/bin/perl
# 
## Copyright 2000 The Regents of the University of California 
## All Rights Reserved 
## 
## Permission to use, copy, modify and distribute any part of this 
## count_countries_in_bgp.pl for educational, research and non-profit 
## purposes, without fee, and without a written agreement is hereby 
## granted, provided that the above copyright notice, this paragraph 
## and the following paragraphs appear in all copies. 
## 
## Those desiring to incorporate this into commercial products or use for 
## commercial purposes should contact the 
##
##      Technology Transfer Office, University of California, San Diego, 
##      9500 Gilman Drive, La Jolla, CA 92093-0910
##      Ph: (619) 534-5815, FAX: (619) 534-7345. 
## 
## IN NO EVENT SHALL THE UNIVERSITY OF CALIFORNIA BE LIABLE TO ANY PARTY FOR
## DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES, INCLUDING
## LOST PROFITS, ARISING OUT OF THE USE OF THIS ARTSHACK, EVEN IF THE 
## UNIVERSITY OF CALIFORNIA HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
##
## THE COUNT_COUNTRIES_IN_BGP.PL, PROVIDED HEREIN IS ON AN "AS IS" BASIS, AND
## THE UNIVERSITY OF CALIFORNIA HAS NO OBLIGATION TO PROVIDE MAINTENANCE, 
## SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS. THE UNIVERSITY OF 
## CALIFORNIA MAKES NO REPRESENTATIONS AND EXTENDS NO WARRANTIES OF ANY KIND, 
## EITHER IMPLIED OR EXPRESS, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 
## WARRANTIES OF MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, OR THAT 
## THE USE OF THE ARTSHACK WILL NOT INFRINGE ANY PATENT, TRADEMARK OR OTHER 
## RIGHTS. 
##
## COUNT_COUNTRIES_IN_BGP.PL is developed by Bradley L. Huffaker at the 
## University of California, San Diego under the Cooperative Association 
## for Internet Data Analysis (CAIDA) Program. Support for this effort is
## provided by DARPA cooperative agreement N66001-98-2-8922 and by CAIDA 
## members.
## 
## Written by: Bradley Huffaker <bradley@caida.org> (09/11/2000)
##==========================================================================
## 
## This is a breakdown of Autonomous Systems (AS)s, prefixes, and 
## announced address space from the tables collected from RouteViews 
## (http://www.antc.uoregon.edu/route-views/) on 29 August 2000.  
## We used NetGeo (https://www.caida.org/publications/papers/inet_netgeo/)
## to map each unit to the geographic location of its administratively 
## registered headquarters.  We map prefixes and address space according 
## to the origin AS announcing them in RouteViews.
## 
## USAGE: count_countries_in_bgp.pl [-p | -a | -s] < bgp_table > count.txt
##	The parameters really only select the metric to sort the list by.
##	default is address space.  But you can also use:
##	    p : number of Prefix
##	    s : size of address Space
##	    a : number of AS
##   
use Socket;
use strict;

use CAIDA::NetGeo;
my $netgeo = new CAIDA::NetGeo;

use DB_File;
use vars qw( %as2country_db );
tie %as2country_db, "DB_File", "/usr/home/bhuffake/bgp/as2country_db.db";

use vars qw( $USAGE $PREFIX $ADDRESS_SPACE $AS );
$USAGE = "$0 [-p | -s | -a ]\n";
$PREFIX = "prefix";
$ADDRESS_SPACE = "address space";
$AS = "as";
my $type = &ParseARGV(@ARGV);

use vars qw( %type2total %type2country2count );
use vars qw( %network2len2as %as );

ReadSTDIN();

CountPrefix_Address_space();
CountAS();
PrintCountries($type);

# Parses the BGP table and fills the network2len2as table
sub ReadSTDIN
{
    my $key = 0;
    my $linenum =0;
    while (<STDIN>)
    {
	if (
    /^\s*\d+\)[^\d]+(\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3})\/(\d+).* ([\d|\{|,|\}]+) [^\d]+$/)
	{
	    my ($network, $length, $as) = ($1, $2,$3);
	    if ($as =~ /(\d+)\}/)
	    {
		$as = $1;
	    }
	    if ($as =~ /[^\d]/)
	    {
		print STDERR "parse error unknow as`$as'\n";
	    }
	    my $hex = unpack "N", inet_aton($network);
	    $network2len2as{$hex}{$length} = $as; 
	    $as{$as} = 1;
	}
	else 
	{
	    s/\n//g;
	    print STDERR "parse error couldn't parse `$_'\n";
	}
	#last if ($linenum++ > 100000); # Debug purspose
    }
}

# Counts the number of Prefixs and the size of the Address Space 
# in each country
sub CountPrefix_Address_space
{
    
    # The last edge (bit) of the previous prefix and it's country
    my @edge_country;
    foreach my $network (sort {$a<=>$b;} keys %network2len2as)
    {
	foreach my $len (sort {$a<=>$b;} keys %{$network2len2as{$network}})
	{
	    my $size = 1<<(32-$len);
	    my $as = $network2len2as{$network}{$len};
	    my $country = as2country($as);
	    my $process;
	    my $edge = $network + $size;

	    # Clear the stack of any prefixes which ended before the
	    # current prefix
	    while (($#edge_country > -1) 
		&& ($edge_country[0][0] < $edge))
	    {
		pop @edge_country;
	    }

	    $type2country2count{$PREFIX}{$country}++;
	    $type2total{$PREFIX}++;
	    # This is used to keep track of inbedded prefixes.  If the more 
	    # specific (smaller) prefix is the same country as the larger one.
	    # then don't count the address space at all.  If the small space
	    # is a different country then you must subtrack it's address space
	    # from the larger country and add it to the smaller one.
	    if (($#edge_country < 0) 
		|| ($edge_country[0][0] < $edge) 
		|| ($edge_country[0][1] ne $country))
	    {
		$type2country2count{$ADDRESS_SPACE}{$country}+=$size;

		if (($#edge_country > -1) && $edge_country[0][1] ne $country)
		{
		    my $parent_country = $edge_country[0][1];
		    $type2country2count{$ADDRESS_SPACE}{$parent_country}-=$size;
		}
		else
		{
		    $type2total{$ADDRESS_SPACE} += $size;
		}
	    }
		
	    push @edge_country, [$edge,$country];
	}
    }
}

# Count the number of AS
sub CountAS
{
    my $total = 0;
    foreach my $as (keys %as)
    {
	my $country = as2country($as);

	$type2country2count{$AS}{$country}++;
	
	$type2total{$AS}++;
    }
}

# Print out a sorted list of Countries
sub PrintCountries
{
    my ($type) = @_;
    my %count2country;
    foreach my $country (keys %{$type2country2count{$type}})
    {
	my $count = $type2country2count{$type}{$country};
	push @{$count2country{$count}}, $country;
    }
    my @countries;

    my @types = ( $ADDRESS_SPACE, $PREFIX, $AS );
    print "\t";
    foreach my $type (@types)
    {
	printf "%-".(length($type2total{$type})+10)."s    ", $type;
    }
    print "\n";
	
    foreach my $count (reverse sort {$a <=> $b;} keys %count2country)
    {
	foreach my $country (sort @{$count2country{$count}})
	{
	    printf "%s\t",$country;
	    foreach my $type (@types)
	    {
		my $count = $type2country2count{$type}{$country};
		my $total = $type2total{$type};
		my $length = length($total);
		
		printf "%".$length."d (%6.3f\%)    ", $count,100*$count/$total;
	    }
	    print "\n";
	}
    }
}

# Prase the ARGV 
sub ParseARGV
{
    my @ARGV = @_;

    return $ADDRESS_SPACE if ($#ARGV == -1);
 
    die($USAGE) if ($ARGV > 0);

    my $argv = $ARGV[0];
    if ($argv eq "-p")
    {
	return $PREFIX;
    }
    elsif ($argv eq "-s")
    {
	return $ADDRESS_SPACE;
    }
    elsif ($argv eq "-a")
    {
	return $AS;
    }
    die($USAGE);
}

# convert AS to countries.  First check the DB hash and it it fails
# check NetGeo.
sub as2country
{
    my ($as) = @_;
    
    my $country = $as2country_db{$as};
    if ($country)
    {
	return $country;
    }

    my $country;
    if ($as =~ /[^\d]/)
    {
	$country = "??";
    }
    else
    {
	my $rec = $netgeo->getRecord($as);
	if ($rec->{ LAT } != 0 || $rec->{ LONG } != 0 )
	{
	    $country = $rec->{COUNTRY};
	}
	else
	{
	    $country = "??";
	}
    }

    $as2country_db{$as} = $country;
    return $country;
}
