#!/bin/perl
############################################################################
#
# Copyright 2001 The Regents of the University of California
# All Rights Reserved
#
# Permission to use, copy, modify and distribute any part of this skitterTrace
# for educational, research and non-profit purposes, without fee, and without
# a written agreement is hereby granted, provided that the above copyright
# notice, this paragraph and the following paragraphs appear in all copies.
#
# Those desiring to incorporate this into commercial products or use for
# commercial purposes should contact the
#
#      Technology Transfer Office, University of California, San Diego,
#      9500 Gilman Drive, La Jolla, CA 92093-0910
#      Ph: (619) 534-5815, FAX: (619) 534-7345.
#
# IN NO EVENT SHALL THE UNIVERSITY OF CALIFORNIA BE LIABLE TO ANY PARTY FOR
# DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES, INCLUDING
# LOST PROFITS, ARISING OUT OF THE USE OF THIS create_data.pl, EVEN IF THE
# UNIVERSITY OF CALIFORNIA HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#
# THE create_data.pl, PROVIDED HEREIN IS ON AN "AS IS" BASIS, AND THE
# UNIVERSITY OF CALIFORNIA HAS NO OBLIGATION TO PROVIDE MAINTENANCE, SUPPORT,
# UPDATES, ENHANCEMENTS, OR MODIFICATIONS. THE UNIVERSITY OF CALIFORNIA MAKES
# NO REPRESENTATIONS AND EXTENDS NO WARRANTIES OF ANY KIND, EITHER IMPLIED OR
# EXPRESS, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
# MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, OR THAT THE USE OF THE
# create_data.pl WILL NOT INFRINGE ANY PATENT, TRADEMARK OR OTHER RIGHTS.
#
# create_data.pl is developed by Bradley L. Huffaker at the University of
# California, San Diego under the Cooperative Association for
# Internet Data Analysis (CAIDA) Program. Support for this effort is
# provided by DARPA grant N66001-98-2-8922 and by CAIDA members.
############################################################################
use strict;

my $output_file = "data.txt";
my $error_file = "error.txt";
my $country2abbr = "country2abbr.txt";

 # ----- Setting up NetGeo;
use CAIDA::NetGeo;
my $netgeo = new CAIDA::NetGeo;

my @file={"inc.txe","fed.txt"};


my %clouds;
my %isps;

my %oldcity2latlong;

my %city2seen;
my %city2index;
my %city2latlong;
my %city2filename2linenum;

my %isp2link2bandwidth;
my %isp2index;
my %isp2type;

my %isp2peer2city;

open(ERR,">$error_file") || die("Unable to open $error_file:$!");
open(OUT,">$output_file") || die("Unable to open $output_file:$!");

&ParseOldCity2LatLong("newloc2latlong.txt");
&ParseInput("inc.txt");
&ParseInput("fed.txt");
&PrintHeader();
&PrintCityLatLong();
&PrintLink_Peer();

close ERR;
sub PrintHeader {
  print OUT<<EOP;
#
#  Data file for Mapnet
# --------------------------
# written Ying
#
# T^num_cities
#       (total number of nodes)
#       num_cities - the number of cities (int)
#
# t^num_clouds
#       (total number of clouds)
#       num_clouds - the number of clouds (int)
#
# R^num_isps
#       (total number of isps)
#       num_isps - the number of isps
#
# C^city_index^lat^long^city_name
#       (city)
#       city_index - unque index for each city (int)
#       lat - latidtie (float)
#       long - longitude (float)
#       city_name - the name of the city (string)
#
# c^cloud_index^cloud_name
#       (cloud)
#       cloud_index - unque index for each cloud (int)
#       cloud_name - name of the cloud (string)
#
# I^isp_index^isp2type^isp_name
#       (I isp with link data add to list)
#       (i isp without link data do not add to list)
#       isp_index - unque index for each isp (int)
#       isp_name - the name of the isp (string)
#       type - R or I (research or Indusrty)
#
# E^isp_name
#       (end of isp
#       marks the end of a isp
#
# P^city_index1^city_index2^bandwidth
#       (pipe)
#       city_index1 - the index of the to city (int)
#       city_index2 - the index of the from city (int)
#       bandwidth - bandwidth of the connection (float)
#
# L^cloud_index^city_index^bandwidth
#       (link)
#       cloud- the index of the to cloud (int)
#       city_index - the index of the to city (int)
#       bandwidth - bandwidth of the connection (float)
#
# p^node_index^isp_index^isp_index
#       (peer)
#       node_index - where
EOP
}
 
sub PrintCityLatLong {
    my $i=0;
    my @cities = sort keys %city2latlong;
    my @clouds = sort keys %clouds;
    my @isps = sort keys %isps;

    print OUT "T^",$#cities+2,"\n";
    print OUT "t^",$#clouds+1 ,"\n";
    print OUT "R^",$#isps+2, "\n";

    my $city_index=0;

    my $latLong;
    foreach my $index (0..$#cities) {   
	my $loc = $cities[$index];
	my ($lat, $long) = split /\0/, $city2latlong{$loc};
	$city2index{$loc} = $index;

	print OUT "C^$index^$lat^$long^$loc\n";
    }
    foreach my $loc (sort keys %city2filename2linenum) {
	my $message= "TYPE22::no lat/long found for `$loc' ";
	foreach my $filename (sort keys 
	  %{$city2filename2linenum{$loc}}) {
	    my $linenum= $city2filename2linenum{$loc}{$filename};
	    printError($filename, $linenum,$message);
	}
    }
}
##----------sub PrintLink
sub PrintLink_Peer {
    my @ips = sort keys %isp2link2bandwidth;
    my %cloud2index;
    my $cloud_index = 0;
    my $isp_index_global = 0;
    foreach my $isp (@ips) {
	my $isp_index = $isp_index_global++;
	$isp2index{$isp}=$isp_index;
	my $type = $isp2type{$isp};
	my %linkfound;

	print OUT "\I^$isp_index^$type^$isp\n";

	my $name;
	foreach my $link ( sort keys %{$isp2link2bandwidth{$isp}}) {
	    my ($type, $fr,$to,$number)= split /\0/,$link;
	    my ($city_index1, $city_index2)=
		($city2index{$fr},$city2index{$to});

	    foreach my $bandwidth ( sort keys %{$isp2link2bandwidth{$isp}{$link}}) {
		if( $type eq "cloud") {
		    my $cloud = $1;

		    unless (defined $cloud2index{$cloud}) {
			print OUT  "c^$cloud_index^$cloud\n"; 
			$cloud2index{$cloud}=$cloud_index++;
		    }

		    printf  OUT "L^%s^%s^%s\n",$cloud2index{$cloud},
			    ,$city_index2, $bandwidth;
		} else {
		    my $link_key = "$city_index1^$city_index2^$number^$bandwidth";
		    unless (defined $linkfound{$link_key}) {
			print OUT  "P^$city_index1^$city_index2^$bandwidth\n";
			$linkfound{$link_key}=1;
			$link_key = "$city_index2^$city_index1^$number^$bandwidth";
			$linkfound{$link_key}=1;
		    }
		    unless (defined $city_index1) {
			print "$isp $fr $city_index1\n";
		    }
		} 
	    }
	}
	print OUT "E^$isp\n\n"; 

    }

    my %peerFound;
    foreach my $isp (sort keys %isp2peer2city) {
	unless (defined $isp2index{$isp}) {
	    print OUT "i^$isp_index_global^$isp\n";
	    $isp2index{$isp}=$isp_index_global++;
	}
	my $isp_index = $isp2index{$isp};
	foreach my $peer (sort keys %{$isp2peer2city{$isp}}) {
	    unless (defined $isp2index{$peer}) {
		print OUT "i^$isp_index_global^$peer\n";
		$isp2index{$peer}=$isp_index_global++;
	    }
	    my $peer_index = $isp2index{$peer};
	    foreach my $city (sort keys %{$isp2peer2city{$isp}{$peer}}) {
		my $city_index = $city2index{$city};
		my $peer_key = "$isp_index\0$peer_index\0$city_index";
		unless ($peerFound{$peer_key}) {
		    print OUT "p^$city_index^$isp_index^$peer_index\n";
		    $peerFound{$peer_key} = 1;
		    $peer_key = "$peer_index\0$isp_index\0$city_index";
		    $peerFound{$peer_key} = 1;
		}
	    } 
	}
    }
}

    sub ParseOldCity2LatLong {
	my($input_file)=@_;
	my ($city,$lowcaseCity,$lat,$long);
	open(IN2,"$input_file") || die("Unable to open `$input_file':$!"); 
	while (<IN2>) {
	    if(/(.*?,\s.*?,\s.*?)\s+(\S.+$)/) {
		my ($city, $latlong) = ($1,$2);
		my ($lat,$long)=split /\s+/,$latlong;
		my ($loc) = StripLoc($city);
		$oldcity2latlong{$loc}="$lat\0$long";
	    }   
	} 
	close IN2;
    }   

##----------sub ParseInput

sub ParseInput {

    my($input_file)=@_;

    open(IN,"<$input_file") || die("Unable to open $input_file:$!");
    my $linenum=0;

    my $linenum = 0;
    my $isp_name;
    while (<IN>) {
	$linenum++;
	chop $_;                  #----- chop \n in IN

	s/\#.*$//g;
	if(/::::::::::/) {
	    # $isp_index++;
	    $isp_name=<IN>;
	    $linenum++;
	    $isp_name =~ s/\s+$//;
	    <IN>;
	    $linenum++;
	    & AddIsp($isp_name,$input_file,$linenum);
	    if($input_file eq "inc.txt" || $input_file eq "i.txt" ) { 
		$isp2type{$isp_name}="I";
	    } else {
		$isp2type{$isp_name}="R";
	    } 
	} 

	# for peer ??? isp -
	elsif ((/^(\?+)\s+?-\s+?(\S.+$)/)||(/^(.+?)\s+-\s+?(\S.+$)/))  {
	    chop;
	    my ($peer, $cities) = ($1,$2);
	    AddIsp($peer, $input_file, $linenum);

	    my @to_cities = split /\~\s*/,$cities ;


	    foreach my $city (@to_cities) {    
		$city=~ s/~//g;

		my ($city) = &AddCity($city,$input_file,$linenum);
		if (defined $city) {
		    $isp2peer2city{$isp_name}{$peer}{$city} = 1;
		}
	    }
	}

	# for city  consider -1
	elsif (/(.+?)\s+?(.+?,\s\w+)\s+(.*)/) {
	    my ($bandwidth, $from, $cities) = ($1, $2, $3);  
	    my @to_cities = split /\~\s*/,$cities;

	    my ($from) = &AddCity($from,$input_file,$linenum);

	    foreach my $city (@to_cities) {             
		my ($city, $number) = &AddCity($city,$input_file,$linenum);
		if (defined $from && defined $city) {
		    $isp2link2bandwidth{$isp_name}
			{"link\0$from\0$city\0$number"}{$bandwidth} = 1 ;
		}
	    }
	}
	# for cloud
	elsif (/(\d+?)\s+?\%(.+)\%\s+?(\S.+)/) {
	    my ($bandwidth,$from, $cities) = ($1, $2, $3);
	    my @to_cities = split /\~\s*/,$cities ;

	      
	    my ($from) = &AddCloud($from,$input_file,$linenum);
	    foreach my $city (@to_cities) {    
		my ($city, $number) = &AddCity($city,$input_file,$linenum);
		if (defined $from && defined $city) {
		    $isp2link2bandwidth{$isp_name}
		      {"cloud\0$from\0$city\0$number"}{$bandwidth} = 1;
		}
	    }
	}
	#elsif (/\d\?+\s+-\s+(.*)/)
	elsif (/\?+\s+-\s+(.*)/) {
	    # print  "start with`???' line--> `$1'\n";
	}
	elsif (!(/^\s*$/)) {
	    my $message="TYPE11:: No match <".$_.">";
	    &printError($input_file, $linenum, $message);
	}
    }                 # ----- end of while
                                        
   close IN;
}

##--------start of sub addCity
    sub AddCity {

	my($loc, $input_file, $linenum)=@_;

	$loc =~ s/~//g;
	my $number = 1;
	if ($loc =~ /^([^\d]+)(\d+)\s*$/) {
	    ($loc, $number) = ($1, $2);
	}
	my ($loc_new) = StripLoc($loc);

	unless (defined $city2seen{$loc_new}) {
	    $city2seen{$loc_new} = 1;

	    if (defined $oldcity2latlong{$loc_new}) {
		$city2latlong{$loc_new} = $oldcity2latlong{$loc_new};
	    } else {
		my ($city, $state, $country) = split /,\s*/, $loc_new;
		my ($lat,$long) = 
		    $netgeo->getLatLongFromLoc($city,$state,$country); 
		unless ($lat == 0 && $long == 0) {
		    $city2latlong{$loc_new} = "$lat\0$long";
		}
	    }
	}
	if (defined $city2latlong{$loc_new}) {
	    return ($loc_new, $number);
	} else {
	    my $linenums = $city2filename2linenum{$loc}{$input_file};
	    if ($linenums)
	    {
		$linenums .= ", ".$linenum;
	    } else {
		$linenums = $linenum;
	    }
	    
	    $city2filename2linenum{$loc}{$input_file} = $linenums;
	    return ();
	}
    }

    sub StripLoc {
	my ($loc) = @_;
	$loc =~ s/^\s+//g;
	$loc =~ s/\s+$//g;
	my ($city, $state, $country) = split /\,\s/, $loc;
	unless (defined $country) {
	    $country = "US";
	}
	my $loc_new = join(", ",$city, $state, $country);
	$loc_new =~ y/A-Z/a-z/;
	return ($loc_new);
    }
##---------end of sub AddCity

##--------start of sub addCloud
    sub AddCloud {

	my($cloud,$input_file,$linenum)=@_;

	$cloud =~ s/^\s+//g;
	$cloud =~ s/\s+$//g;
	if(!$clouds{$cloud}{$input_file}) {
	    $clouds{$cloud}{$input_file} = $linenum;
	} 
    }

    sub AddIsp {

	my($isp,$input_file,$linenum)=@_;

	$isp=~ s/^\s+//g;
	$isp =~ s/\s+$//g;
	if( defined $isps{$isp}{$input_file}) {
	    $isps{$isp}{$input_file} .= ",";
	} 
	$isps{$isp}{$input_file} .= $linenum;
    }

##------- start of sub printError
    sub printError {
	my($data_file,$linenum,$message)=@_;
	print ERR "ERROR::",$data_file,"[",$linenum,"]",$message, "\n";
    }
