[Author Prev][Author Next][Thread Prev][Thread Next][Author Index][Thread Index]
[or-cvs] r15497: Add a perl script that automatically queries http, https, an (website/trunk)
Author: ioerror
Date: 2008-06-27 06:35:38 -0400 (Fri, 27 Jun 2008)
New Revision: 15497
Added:
website/trunk/update-mirrors.pl
Log:
Add a perl script that automatically queries http, https, and ftp for their date of last update. This also includes much more metadata about each mirror. It isn't ready to be used, the main mirror page needs to be updated to incorporate the new fields that are often blank.
Added: website/trunk/update-mirrors.pl
===================================================================
--- website/trunk/update-mirrors.pl (rev 0)
+++ website/trunk/update-mirrors.pl 2008-06-27 10:35:38 UTC (rev 15497)
@@ -0,0 +1,646 @@
+#!/usr/bin/perl -w
+use warnings;
+use strict;
+use LWP::Simple;
+use LWP;
+use Date::Parse;
+
+print "Creating LWP agent ($LWP::VERSION)...\n";
+my $lua = LWP::UserAgent->new(
+ keep_alive => 1,
+ timeout => 15,
+ agent => "Tor MirrorCheck Agent"
+);
+
+sub sanitize {
+ my $taintedData = shift;
+ my $cleanedData;
+ my $whitelist = '-a-zA-Z0-9: +';
+
+ # clean the data, return cleaned data
+ $taintedData =~ s/[^$whitelist]//go;
+ $cleanedData = $taintedData;
+
+ return $cleanedData;
+}
+
+sub FetchDate {
+ my $url = shift; # Base url for mirror
+ my $trace = "project/trace/www.torproject.org"; # Location of recent update info
+ $url = "$url$trace";
+
+ print "Fetching possible date from: $url\n";
+
+ my $request = new HTTP::Request GET => "$url";
+ my $result = $lua->request($request);
+
+ if ($result->is_success){
+ my $taint = $result->content;
+ my $content = sanitize($taint);
+ if ($content) {
+
+ my $date = str2time($content);
+
+ if ($date) {
+ print "We've fetched a date $date.\n";
+ return $date;
+ } else {
+ print "We've haven't fetched a date.\n";
+ return "Unknown";
+ }
+
+ } else {
+ print "Unable to fetch date, empty content returned.\n";
+ return "Unknown";
+ }
+
+ } else {
+ print "Our request failed, we had no result.\n";
+ return "Unknown";
+ }
+}
+
+# This is the list of all known Tor mirrors
+# Add new mirrors to the bottom!
+my %m = (
+ mirror000 => {
+ orgName => "cypherpunks.at",
+ isoCC => "AT",
+ subRegion => "",
+ region => "Europe",
+ ipv4 => "True",
+ ipv6 => "False",
+ loadBalanced => "Unknown",
+ httpWebsiteMirror => "http://tor.cypherpunks.at/",
+ rsyncWebsiteMirror => "rsync://tor.cypherpunks.at/tor",
+ httpDistMirror => "http://tor.cypherpunks.at/dist/",
+ rsyncDistMirror => "rsync: tor.cypherpunks.at::tor/dist/",
+ updateDate => "",
+ },
+
+ mirror001 => {
+ orgName => "depthstrike.com",
+ isoCC => "CA",
+ subRegion => "NS",
+ region => "North America",
+ ipv4 => "True",
+ ipv6 => "False",
+ loadBalanced => "Unknown",
+ httpWebsiteMirror => "http://tor.depthstrike.com/",
+ ftpWebsiteMirror => "",
+ rsyncWebsiteMirror => "",
+ httpDistMirror => "http://tor.depthstrike.com/dist/",
+ rsyncDistMirror => "",
+ updateDate => "",
+ },
+
+ mirror001 => {
+ orgName => "depthstrike.com",
+ isoCC => "CA",
+ subRegion => "NS",
+ region => "North America",
+ ipv4 => "True",
+ ipv6 => "False",
+ loadBalanced => "Unknown",
+ httpWebsiteMirror => "http://tor.depthstrike.com/",
+ ftpWebsiteMirror => "",
+ rsyncWebsiteMirror => "",
+ httpDistMirror => "http://tor.depthstrike.com/dist/",
+ rsyncDistMirror => "",
+ updateDate => "",
+ },
+
+ mirror002 => {
+ orgName => "hermetix.org",
+ isoCC => "CA",
+ subRegion => "QC",
+ region => "North America",
+ ipv4 => "True",
+ ipv6 => "False",
+ loadBalanced => "Unknown",
+ httpWebsiteMirror => "http://tor.hermetix.org/",
+ rsyncWebsiteMirror => "",
+ httpDistMirror => "http://tor.hermetix.org/dist/",
+ rsyncDistMirror => "",
+ updateDate => "",
+ },
+
+ mirror003 => {
+ orgName => "Boinc.ch",
+ isoCC => "CH",
+ subRegion => "",
+ region => "Europe",
+ ipv4 => "True",
+ ipv6 => "False",
+ loadBalanced => "Unknown",
+ httpWebsiteMirror => "http://tor.boinc.ch/",
+ ftpWebsiteMirror => "",
+ rsyncWebsiteMirror => "",
+ httpDistMirror => "http://tor.boinc.ch/dist/",
+ rsyncDistMirror => "",
+ updateDate => "",
+ },
+
+ mirror004 => {
+ orgName => "anonymity.cn",
+ isoCC => "CN",
+ subRegion => "",
+ region => "Asia",
+ ipv4 => "True",
+ ipv6 => "False",
+ loadBalanced => "Unknown",
+ httpWebsiteMirror => "http://tor.anonymity.cn/",
+ ftpWebsiteMirror => "",
+ rsyncWebsiteMirror => "",
+ httpDistMirror => "http://tor.anonymity.cn/dist/",
+ rsyncDistMirror => "",
+ updateDate => "",
+ },
+
+ mirror004 => {
+ orgName => "bbs",
+ isoCC => "DE",
+ subRegion => "",
+ region => "Europe",
+ ipv4 => "True",
+ ipv6 => "False",
+ loadBalanced => "Unknown",
+ httpWebsiteMirror => "http://tor.blingblingsquad.net/",
+ ftpWebsiteMirror => "",
+ rsyncWebsiteMirror => "",
+ httpDistMirror => "http://tor.blingblingsquad.net/dist/",
+ rsyncDistMirror => "",
+ updateDate => "",
+ },
+
+ mirror005 => {
+ orgName => "Berapla",
+ isoCC => "DE",
+ subRegion => "",
+ region => "Europe",
+ ipv4 => "True",
+ ipv6 => "False",
+ loadBalanced => "Unknown",
+ httpWebsiteMirror => "http://download.berapla.de/mirrors/tor/",
+ ftpWebsiteMirror => "",
+ rsyncWebsiteMirror => "",
+ httpDistMirror => "",
+ rsyncDistMirror => "",
+ updateDate => "",
+ },
+
+ mirror006 => {
+ orgName => "cybermirror",
+ isoCC => "DE",
+ subRegion => "",
+ region => "Europe",
+ ipv4 => "True",
+ ipv6 => "False",
+ loadBalanced => "Unknown",
+ httpWebsiteMirror => "http://tor.cybermirror.org/",
+ ftpWebsiteMirror => "",
+ rsyncWebsiteMirror => "",
+ httpDistMirror => "http://tor.cybermirror.org/dist/",
+ rsyncDistMirror => "",
+ updateDate => "",
+ },
+
+ mirror007 => {
+ orgName => "Spline",
+ isoCC => "DE",
+ subRegion => "FU",
+ region => "Europe",
+ ipv4 => "True",
+ ipv6 => "False",
+ loadBalanced => "Unknown",
+ httpWebsiteMirror => "http://rem.spline.de/tor/",
+ ftpWebsiteMirror => "",
+ rsyncWebsiteMirror => "",
+ httpDistMirror => "",
+ rsyncDistMirror => "",
+ updateDate => "",
+ },
+
+ mirror008 => {
+ orgName => "mirror.bsdhost.eu",
+ isoCC => "DE",
+ subRegion => "",
+ region => "Europe",
+ ipv4 => "True",
+ ipv6 => "False",
+ loadBalanced => "Unknown",
+ httpWebsiteMirror => "http://mirror.bsdhost.eu/www.torproject.org/",
+ ftpWebsiteMirror => "",
+ rsyncWebsiteMirror => "",
+ httpDistMirror => "http://mirror.bsdhost.eu/www.torproject.org/dist/",
+ rsyncDistMirror => "",
+ updateDate => "",
+ },
+
+ mirror009 => {
+ orgName => "onionland",
+ isoCC => "DE",
+ subRegion => "",
+ region => "Europe",
+ ipv4 => "True",
+ ipv6 => "False",
+ loadBalanced => "Unknown",
+ httpWebsiteMirror => "http://mirror.onionland.org/",
+ ftpWebsiteMirror => "",
+ rsyncWebsiteMirror => "rsync: mirror.onionland.org::tor/",
+ httpDistMirror => "http://mirror.onionland.org/dist/",
+ rsyncDistMirror => "rsync: mirror.onionland.org::tor/dist/",
+ updateDate => "",
+ },
+
+ mirror010 => {
+ orgName => "plentyfact",
+ isoCC => "DE",
+ subRegion => "",
+ region => "Europe",
+ ipv4 => "True",
+ ipv6 => "False",
+ loadBalanced => "Unknown",
+ httpWebsiteMirror => "http://tor.plentyfact.net/",
+ ftpWebsiteMirror => "",
+ httpsWebsiteMirror => "https://tor.plentyfact.net/",
+ rsyncWebsiteMirror => "",
+ httpDistMirror => "http://tor.plentyfact.net/dist/",
+ rsyncDistMirror => "",
+ updateDate => "",
+ },
+
+ mirror011 => {
+ orgName => "loxal.net",
+ isoCC => "DE",
+ subRegion => "",
+ region => "Europe",
+ ipv4 => "True",
+ ipv6 => "False",
+ loadBalanced => "Unknown",
+ httpWebsiteMirror => "http://tor-anonymizer.mirror.loxal.net/",
+ ftpWebsiteMirror => "",
+ rsyncWebsiteMirror => "",
+ httpDistMirror => "http://tor-anonymizer.mirror.loxal.net/dist/",
+ rsyncDistMirror => "",
+ updateDate => "",
+ },
+
+ mirror012 => {
+ orgName => "centervenus.com",
+ isoCC => "DE",
+ subRegion => "",
+ region => "Europe",
+ ipv4 => "True",
+ ipv6 => "False",
+ loadBalanced => "Unknown",
+ httpWebsiteMirror => "",
+ ftpWebsiteMirror => "",
+ rsyncWebsiteMirror => "",
+ httpDistMirror => "http://www.centervenus.com/mirrors/tor/dist/",
+ rsyncDistMirror => "",
+ updateDate => "",
+ },
+
+ mirror013 => {
+ orgName => "zdg-gmbh.eu",
+ isoCC => "DK",
+ subRegion => "",
+ region => "Europe",
+ ipv4 => "True",
+ ipv6 => "False",
+ loadBalanced => "Unknown",
+ httpWebsiteMirror => "http://tor.zdg-gmbh.eu/",
+ ftpWebsiteMirror => "",
+ rsyncWebsiteMirror => "",
+ httpDistMirror => "http://tor.zdg-gmbh.eu/dist/",
+ rsyncDistMirror => "",
+ updateDate => "",
+ },
+
+ mirror014 => {
+ orgName => "CRAN",
+ isoCC => "FR",
+ subRegion => "",
+ region => "Europe",
+ ipv4 => "True",
+ ipv6 => "False",
+ loadBalanced => "Unknown",
+ httpWebsiteMirror => "http://tor.miroir-francais.fr/",
+ rsyncWebsiteMirror => "rsync: miroir-francais.fr::tor",
+ ftpWebsiteMirror => "ftp://miroir-francais.fr/pub/tor/",
+ httpDistMirror => "",
+ rsyncDistMirror => "",
+ updateDate => "",
+ },
+
+ mirror015 => {
+ orgName => "tor.newworldorder.com.es",
+ isoCC => "HU",
+ subRegion => "",
+ region => "Europe",
+ ipv4 => "True",
+ ipv6 => "False",
+ loadBalanced => "Unknown",
+ httpWebsiteMirror => "http://tor.newworldorder.com.es/",
+ rsyncWebsiteMirror => "",
+ ftpWebsiteMirror => "",
+ httpDistMirror => "",
+ rsyncDistMirror => "",
+ updateDate => "",
+ },
+
+ mirror016 => {
+ orgName => "amorphis.eu",
+ isoCC => "NL",
+ subRegion => "",
+ region => "Europe",
+ ipv4 => "True",
+ ipv6 => "False",
+ loadBalanced => "Unknown",
+ httpWebsiteMirror => "http://tor.amorphis.eu/",
+ rsyncWebsiteMirror => "",
+ ftpWebsiteMirror => "",
+ httpDistMirror => "http://tor.amorphis.eu/dist/",
+ rsyncDistMirror => "",
+ updateDate => "",
+ },
+
+ mirror017 => {
+ orgName => "BIT BV",
+ isoCC => "NL",
+ subRegion => "",
+ region => "Europe",
+ ipv4 => "True",
+ ipv6 => "False",
+ loadBalanced => "Unknown",
+ httpWebsiteMirror => "http://ftp.bit.nl/mirror/tor/",
+ rsyncWebsiteMirror => "",
+ ftpWebsiteMirror => "ftp://ftp.bit.nl/mirror/tor/",
+ httpDistMirror => "",
+ rsyncDistMirror => "",
+ updateDate => "",
+ },
+
+ mirror018 => {
+ orgName => "CCC",
+ isoCC => "NL",
+ subRegion => "",
+ region => "Europe",
+ ipv4 => "True",
+ ipv6 => "False",
+ loadBalanced => "Unknown",
+ httpWebsiteMirror => "http://tor.ccc.de/",
+ rsyncWebsiteMirror => "",
+ ftpWebsiteMirror => "",
+ httpDistMirror => "http://tor.ccc.de/dist/",
+ rsyncDistMirror => "",
+ updateDate => "",
+ },
+
+ mirror018 => {
+ orgName => "kamagurka.org",
+ isoCC => "NL",
+ subRegion => "Haarlem",
+ region => "Europe",
+ ipv4 => "True",
+ ipv6 => "False",
+ loadBalanced => "Unknown",
+ httpWebsiteMirror => "http://tor.kamagurka.org/",
+ rsyncWebsiteMirror => "",
+ ftpWebsiteMirror => "",
+ httpDistMirror => "http://tor.kamagurka.org/dist/",
+ rsyncDistMirror => "",
+ updateDate => "",
+ },
+
+ mirror019 => {
+ orgName => "OS Mirror",
+ isoCC => "NL",
+ subRegion => "",
+ region => "Europe",
+ ipv4 => "True",
+ ipv6 => "False",
+ loadBalanced => "Unknown",
+ httpWebsiteMirror => "http://tor.osmirror.nl/",
+ rsyncWebsiteMirror => "rsync: rsync.osmirror.nl::tor/",
+ ftpWebsiteMirror => "ftp://ftp.osmirror.nl/pub/tor/",
+ httpDistMirror => "",
+ rsyncDistMirror => "",
+ updateDate => "",
+ },
+
+
+ mirror020 => {
+ orgName => "Meulie.net",
+ isoCC => "NO",
+ subRegion => "",
+ region => "Europe",
+ ipv4 => "True",
+ ipv6 => "False",
+ loadBalanced => "Unknown",
+ httpWebsiteMirror => "http://tor.meulie.net/",
+ rsyncWebsiteMirror => "",
+ ftpWebsiteMirror => "",
+ httpDistMirror => "",
+ rsyncDistMirror => "",
+ updateDate => "",
+ },
+
+ mirror021 => {
+ orgName => "Swedish Linux Society",
+ isoCC => "SE",
+ subRegion => "",
+ region => "Europe",
+ ipv4 => "True",
+ ipv6 => "False",
+ loadBalanced => "Unknown",
+ httpWebsiteMirror => "http://ftp.se.linux.org/crypto/tor/",
+ rsyncWebsiteMirror => "",
+ ftpWebsiteMirror => "ftp://ftp.se.linux.org/pub/crypto/tor/",
+ httpDistMirror => "",
+ rsyncDistMirror => "",
+ updateDate => "",
+ },
+
+ mirror022 => {
+ orgName => "Ghirai.com",
+ isoCC => "UK",
+ subRegion => "London",
+ region => "Europe",
+ ipv4 => "True",
+ ipv6 => "False",
+ loadBalanced => "Unknown",
+ httpWebsiteMirror => "http://www.ghirai.com/tor/",
+ rsyncWebsiteMirror => "",
+ ftpWebsiteMirror => "",
+ httpDistMirror => "",
+ rsyncDistMirror => "",
+ updateDate => "",
+ },
+
+ mirror023 => {
+ orgName => "BJWOnline.com",
+ isoCC => "US",
+ subRegion => "California",
+ region => "North America",
+ ipv4 => "True",
+ ipv6 => "False",
+ loadBalanced => "Unknown",
+ httpWebsiteMirror => "http://mirror.bjwonline.com/tor/",
+ rsyncWebsiteMirror => "",
+ ftpWebsiteMirror => "",
+ httpDistMirror => "",
+ rsyncDistMirror => "",
+ updateDate => "",
+ },
+
+ mirror024 => {
+ orgName => "Libertarian Action Network",
+ isoCC => "",
+ subRegion => "",
+ region => "North America",
+ ipv4 => "True",
+ ipv6 => "False",
+ loadBalanced => "Unknown",
+ httpWebsiteMirror => "",
+ rsyncWebsiteMirror => "",
+ ftpWebsiteMirror => "ftp://libertarianactivism.com/tor.eff.org/dist/",
+ httpDistMirror => "",
+ rsyncDistMirror => "",
+ updateDate => "",
+ },
+
+ mirror025 => {
+ orgName => "TheOnionRouter.com",
+ isoCC => "US",
+ subRegion => "Texas",
+ region => "North America",
+ ipv4 => "True",
+ ipv6 => "False",
+ loadBalanced => "Unknown",
+ httpWebsiteMirror => "http://www.theonionrouter.com/",
+ rsyncWebsiteMirror => "",
+ ftpWebsiteMirror => "",
+ httpDistMirror => "http://www.theonionrouter.com/dist/",
+ rsyncDistMirror => "",
+ updateDate => "",
+ },
+
+ mirror026 => {
+ orgName => "Site2nd.org",
+ isoCC => "USA",
+ subRegion => "Texas",
+ region => "North America",
+ ipv4 => "True",
+ ipv6 => "False",
+ loadBalanced => "Unknown",
+ httpWebsiteMirror => "http://tor.site2nd.org",
+ rsyncWebsiteMirror => "",
+ ftpWebsiteMirror => "",
+ httpDistMirror => "",
+ rsyncDistMirror => "",
+ updateDate => "",
+ },
+
+ mirror027 => {
+ adminContact => "jeroen\@unfix.org",
+ orgName => "unfix",
+ isoCC => "CH",
+ subRegion => "",
+ region => "Europe",
+ ipv4 => "True",
+ ipv6 => "True",
+ loadBalanced => "Unknown",
+ httpWebsiteMirror => "http://tor.unfix.org/",
+ rsyncWebsiteMirror => "",
+ ftpWebsiteMirror => "",
+ httpDistMirror => "",
+ rsyncDistMirror => "",
+ updateDate => "",
+ },
+
+ mirror028 => {
+ adminContact => "jeroen\@unfix.org",
+ orgName => "sixx",
+ isoCC => "",
+ subRegion => "",
+ region => "Europe",
+ ipv4 => "True",
+ ipv6 => "True",
+ loadBalanced => "Unknown",
+ httpWebsiteMirror => "http://tor.sixxs.net/",
+ rsyncWebsiteMirror => "",
+ ftpWebsiteMirror => "",
+ httpDistMirror => "",
+ rsyncDistMirror => "",
+ updateDate => "",
+ },
+);
+
+my $count = values %m;
+print "We have a total of $count mirrors\n";
+print "Fetching the last updated date for each mirror.\n";
+
+foreach my $server ( keys %m ) {
+
+ print "Attempting to fetch from $m{$server}{'orgName'}\n";
+
+ if ($m{$server}{'httpWebsiteMirror'}) {
+ print "Attempt to fetch via HTTP.\n";
+ $m{$server}{"updateDate"} = FetchDate("$m{$server}{'httpWebsiteMirror'}");
+ } elsif ($m{$server}{'httpsWebsiteMirror'}) {
+ print "Attempt to fetch via HTTPS.\n";
+ $m{$server}{"updateDate"} = FetchDate("$m{$server}{'httpsWebsiteMirror'}");
+ } elsif ($m{$server}{'ftpWebsiteMirror'}) {
+ print "Attempt to fetch via FTP.\n";
+ $m{$server}{"updateDate"} = FetchDate("$m{$server}{'ftpWebsiteMirror'}");
+ } else {
+ print "We were unable to fetch or store anything. We still have the following: $m{$server}{'updateDate'}\n";
+ }
+
+ print "We fetched and stored the following: $m{$server}{'updateDate'}\n";
+
+ }
+
+
+print "We sorted the following mirrors by their date of last update: \n";
+foreach my $server ( sort { $m{$b}{'updateDate'} <=> $m{$a}{'updateDate'}} keys %m ) {
+
+ print "\n";
+ print "Mirror $m{$server}{'orgName'}: \n";
+
+ foreach my $attrib ( sort keys %{$m{$server}} ) {
+ print "$attrib = $m{$server}{$attrib}";
+ print "\n";
+ };
+}
+
+my $outFile = "include/mirrors-table.wmi";
+my $html;
+open(OUT, "> $outFile") or die "Can't open $outFile: $!";
+
+# Here's where we open a file and print some wml include goodness
+# This is storted from last known recent update to unknown update times
+foreach my $server ( sort { $m{$b}{'updateDate'} <=> $m{$a}{'updateDate'}} keys %m ) {
+
+ # Country Organisation website mirror dist/ mirror
+ print OUT "\n<tr>\n";
+ print OUT " <td>$m{$server}{'isoCC'}</td>\n";
+ print OUT " <td>$m{$server}{'orgName'}</td>\n";
+ print OUT " <td>$m{$server}{'updateDate'}</td>\n";
+
+ foreach my $precious ( "httpWebsiteMirror", "ftpWebsiteMirror",
+ "rsyncWebsiteMirror","httpDistMirror",
+ "rsyncDistMirror" )
+ {
+ if ($m{$server}{"$precious"}) {
+ print OUT " <td><a href=\"" . $m{$server}{$precious} . "\">" .
+ "$m{$server}{$precious}</a></td>\n";
+ } else { print OUT " <td> - </td>\n"; }
+ }
+
+ print OUT "</tr>\n";
+}
+
+close(OUT);
Property changes on: website/trunk/update-mirrors.pl
___________________________________________________________________
Name: svn:executable
+ *