#! /usr/local/bin/perl # ## Copyright 2000 The Regents of the University of California ## All Rights Reserved ## ## Permission to use, copy, modify and distribute any part of this ## count_countries_in_bgp.pl for educational, research and non-profit ## purposes, without fee, and without a written agreement is hereby ## granted, provided that the above copyright notice, this paragraph ## and the following paragraphs appear in all copies. ## ## Those desiring to incorporate this into commercial products or use for ## commercial purposes should contact the ## ## Technology Transfer Office, University of California, San Diego, ## 9500 Gilman Drive, La Jolla, CA 92093-0910 ## Ph: (619) 534-5815, FAX: (619) 534-7345. ## ## IN NO EVENT SHALL THE UNIVERSITY OF CALIFORNIA BE LIABLE TO ANY PARTY FOR ## DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES, INCLUDING ## LOST PROFITS, ARISING OUT OF THE USE OF THIS ARTSHACK, EVEN IF THE ## UNIVERSITY OF CALIFORNIA HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. ## ## THE COUNT_COUNTRIES_IN_BGP.PL, PROVIDED HEREIN IS ON AN "AS IS" BASIS, AND ## THE UNIVERSITY OF CALIFORNIA HAS NO OBLIGATION TO PROVIDE MAINTENANCE, ## SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS. THE UNIVERSITY OF ## CALIFORNIA MAKES NO REPRESENTATIONS AND EXTENDS NO WARRANTIES OF ANY KIND, ## EITHER IMPLIED OR EXPRESS, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED ## WARRANTIES OF MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, OR THAT ## THE USE OF THE ARTSHACK WILL NOT INFRINGE ANY PATENT, TRADEMARK OR OTHER ## RIGHTS. ## ## COUNT_COUNTRIES_IN_BGP.PL is developed by Bradley L. Huffaker at the ## University of California, San Diego under the Cooperative Association ## for Internet Data Analysis (CAIDA) Program. Support for this effort is ## provided by DARPA cooperative agreement N66001-98-2-8922 and by CAIDA ## members. ## ## Written by: Bradley Huffaker (09/11/2000) ##========================================================================== ## ## This is a breakdown of Autonomous Systems (AS)s, prefixes, and ## announced address space from the tables collected from RouteViews ## (http://www.antc.uoregon.edu/route-views/) on 29 August 2000. ## We used NetGeo (http://www.caida.org/publications/papers/inet_netgeo/) ## to map each unit to the geographic location of its administratively ## registered headquarters. We map prefixes and address space according ## to the origin AS announcing them in RouteViews. ## ## USAGE: count_countries_in_bgp.pl [-p | -a | -s] < bgp_table > count.txt ## The parameters really only select the metric to sort the list by. ## default is address space. But you can also use: ## p : number of Prefix ## s : size of address Space ## a : number of AS ## use Socket; use strict; use CAIDA::NetGeo; my $netgeo = new CAIDA::NetGeo; use DB_File; use vars qw( %as2country_db ); tie %as2country_db, "DB_File", "/usr/home/bhuffake/bgp/as2country_db.db"; use vars qw( $USAGE $PREFIX $ADDRESS_SPACE $AS ); $USAGE = "$0 [-p | -s | -a ]\n"; $PREFIX = "prefix"; $ADDRESS_SPACE = "address space"; $AS = "as"; my $type = &ParseARGV(@ARGV); use vars qw( %type2total %type2country2count ); use vars qw( %network2len2as %as ); ReadSTDIN(); CountPrefix_Address_space(); CountAS(); PrintCountries($type); # Parses the BGP table and fills the network2len2as table sub ReadSTDIN { my $key = 0; my $linenum =0; while () { if ( /^\s*\d+\)[^\d]+(\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3})\/(\d+).* ([\d|\{|,|\}]+) [^\d]+$/) { my ($network, $length, $as) = ($1, $2,$3); if ($as =~ /(\d+)\}/) { $as = $1; } if ($as =~ /[^\d]/) { print STDERR "parse error unknow as`$as'\n"; } my $hex = unpack "N", inet_aton($network); $network2len2as{$hex}{$length} = $as; $as{$as} = 1; } else { s/\n//g; print STDERR "parse error couldn't parse `$_'\n"; } #last if ($linenum++ > 100000); # Debug purspose } } # Counts the number of Prefixs and the size of the Address Space # in each country sub CountPrefix_Address_space { # The last edge (bit) of the previous prefix and it's country my @edge_country; foreach my $network (sort {$a<=>$b;} keys %network2len2as) { foreach my $len (sort {$a<=>$b;} keys %{$network2len2as{$network}}) { my $size = 1<<(32-$len); my $as = $network2len2as{$network}{$len}; my $country = as2country($as); my $process; my $edge = $network + $size; # Clear the stack of any prefixes which ended before the # current prefix while (($#edge_country > -1) && ($edge_country[0][0] < $edge)) { pop @edge_country; } $type2country2count{$PREFIX}{$country}++; $type2total{$PREFIX}++; # This is used to keep track of inbedded prefixes. If the more # specific (smaller) prefix is the same country as the larger one. # then don't count the address space at all. If the small space # is a different country then you must subtrack it's address space # from the larger country and add it to the smaller one. if (($#edge_country < 0) || ($edge_country[0][0] < $edge) || ($edge_country[0][1] ne $country)) { $type2country2count{$ADDRESS_SPACE}{$country}+=$size; if (($#edge_country > -1) && $edge_country[0][1] ne $country) { my $parent_country = $edge_country[0][1]; $type2country2count{$ADDRESS_SPACE}{$parent_country}-=$size; } else { $type2total{$ADDRESS_SPACE} += $size; } } push @edge_country, [$edge,$country]; } } } # Count the number of AS sub CountAS { my $total = 0; foreach my $as (keys %as) { my $country = as2country($as); $type2country2count{$AS}{$country}++; $type2total{$AS}++; } } # Print out a sorted list of Countries sub PrintCountries { my ($type) = @_; my %count2country; foreach my $country (keys %{$type2country2count{$type}}) { my $count = $type2country2count{$type}{$country}; push @{$count2country{$count}}, $country; } my @countries; my @types = ( $ADDRESS_SPACE, $PREFIX, $AS ); print "\t"; foreach my $type (@types) { printf "%-".(length($type2total{$type})+10)."s ", $type; } print "\n"; foreach my $count (reverse sort {$a <=> $b;} keys %count2country) { foreach my $country (sort @{$count2country{$count}}) { printf "%s\t",$country; foreach my $type (@types) { my $count = $type2country2count{$type}{$country}; my $total = $type2total{$type}; my $length = length($total); printf "%".$length."d (%6.3f\%) ", $count,100*$count/$total; } print "\n"; } } } # Prase the ARGV sub ParseARGV { my @ARGV = @_; return $ADDRESS_SPACE if ($#ARGV == -1); die($USAGE) if ($ARGV > 0); my $argv = $ARGV[0]; if ($argv eq "-p") { return $PREFIX; } elsif ($argv eq "-s") { return $ADDRESS_SPACE; } elsif ($argv eq "-a") { return $AS; } die($USAGE); } # convert AS to countries. First check the DB hash and it it fails # check NetGeo. sub as2country { my ($as) = @_; my $country = $as2country_db{$as}; if ($country) { return $country; } my $country; if ($as =~ /[^\d]/) { $country = "??"; } else { my $rec = $netgeo->getRecord($as); if ($rec->{ LAT } != 0 || $rec->{ LONG } != 0 ) { $country = $rec->{COUNTRY}; } else { $country = "??"; } } $as2country_db{$as} = $country; return $country; }