freebsd-ports/www/webglimpse/files/patch-af
Andrey A. Chernov 0a1257cfcd autosense localization from env vars
always put site cache locally
1998-11-04 04:17:03 +00:00

183 lines
5.1 KiB
Plaintext

--- makenh.orig Tue Jul 28 03:21:30 1998
+++ makenh Wed Nov 4 07:05:47 1998
@@ -68,6 +68,7 @@
$SITE_RE = '[^:]+:\/\/([^\/]+)\/.*';
$NumLocalCollected = 0;
$NumRemoteCollected = 0;
+$max_redir = 6;
# LOGFILE, ERRFILE -- files for logging
### *TO CHANGE TRAVERSAL*
@@ -105,6 +106,7 @@
$LOGFILENAME = ".wg_log";
# $STARTFILE = ".wgstart";
$WGADDSEARCH = ".wgfilter-box";
+$SITECACHE = ".wgsitecache";
$ROBOTNAME = "HTTPGET";
@@ -187,22 +189,22 @@
# Initialize variables to avoid warnings
($title, $urlpath, $traverse_type, $explicit_only, $numhops,
- $nhhops, $local_limit, $remote_limit, $addboxes, $vhost, $usemaxmem) =
- ('','','','','','','','','','','');
+ $nhhops, $local_limit, $remote_limit, $addboxes, $vhost, $usemaxmem, $locale, $charset) =
+ ('','','','','','','','','','','','','');
($title, $urlpath, $traverse_type, $explicit_only, $numhops,
- $nhhops, $local_limit, $remote_limit, $addboxes, $vhost, $usemaxmem, @urllist) = ReadConfig($archivepwd);
+ $nhhops, $local_limit, $remote_limit, $addboxes, $vhost, $usemaxmem, $locale, $charset, @urllist) = ReadConfig($archivepwd);
# open logs
&open_logs();
print LOGFILE "From Configuration:\n";
my(@configlist) = qw(title urlpath traverse_type explicit_only numhops
- nhhops local_limit remote_limit addboxes vhost usemaxmem ) ;
+ nhhops local_limit remote_limit addboxes vhost usemaxmem locale charset) ;
foreach $item (@configlist)
{
$value = '';
- eval "$value = \$$item";
+ eval "\$value = \$$item";
print LOGFILE " $item: $value\n";
}
print LOGFILE " urllist: @urllist\n\n";
@@ -231,6 +233,7 @@
$MAPFILE = "$archivepwd/$MAPFILE";
$TEMPROBOTFILE = "$archivepwd/$TEMPROBOTFILE";
$WGADDSEARCH = "$archivepwd/$WGADDSEARCH";
+$SITECACHE = "$archivepwd/$SITECACHE";
($archiveprot, $archivehost, $archiveport, $archivepath) =
&url::parse_url($archiveurl);
@@ -252,7 +255,7 @@
# read in the site configuration
&siteconf::ReadConf($vhost);
-&siteconf::LoadCache();
+&siteconf::LoadCache("$SITECACHE");
###############
### PHASE 1 ###
@@ -398,7 +401,7 @@
&close_logs();
# remove the robots file
-system("rm -rf $TEMPROBOTFILE");
+unlink($TEMPROBOTFILE);
#----------------------
#change the dir back
@@ -751,7 +754,7 @@
my($prot, $host, $port, $path) = &url::parse_url($url);
# if the protocol isn't http, assume it's good
- if($prot!~/http/i){
+ if(!defined($prot) || $prot!~/http/i){
return 1;
}
@@ -800,6 +803,7 @@
my($output);
my($olddata, $newdata);
my($newprot, $newhost, $newport, $newpath, $url);
+ my($redcount)=0;
# make the $url
$url = "http://$host:$port/robots.txt";
@@ -815,6 +819,7 @@
while($output ne ""){
# more for error?
if($output=~/^error/i){
+ truncate($TEMPROBOTFILE,0);
print ERRFILE "Error with getting $url\n";
# print LOGFILE "Error with getting $url\n";
last;
@@ -822,7 +827,13 @@
# look at output for redirect -- store redirects in file, too
if($output=~/^Redirect: (.*)$/){
- print LOGFILE "Redirected to: $1...";
+ if ($redcount >= $max_redir) {
+ truncate($TEMPROBOTFILE,0);
+ print ERRFILE "Too many redirections with $url\n";
+ last;
+ }
+ $redcount++;
+ print LOGFILE "Redirected to: $1...\n";
# see if we have the redirected server
($newprot, $newhost, $newport, $newpath) = &url::parse_url($1);
@@ -843,6 +854,7 @@
}
}else{
# we've got it, or there's an error...
+ truncate($TEMPROBOTFILE,0);
last;
}
}
@@ -894,6 +906,7 @@
sub geturl2file{
my($url) = @_;
my($output, $link, $file, $oldfile, @aliases);
+ my($redcount)=0;
# check if we have that in stock (we know it's not local)
if (defined($URL2FILE{$url})) {
@@ -930,6 +943,7 @@
while($output ne ""){
# more for error?
if($output=~/^error/i){
+ truncate($file,0);
print ERRFILE "Error with getting $url: $output\n";
# print LOGFILE "Error with getting $url\n";
last;
@@ -937,6 +951,12 @@
# look at output for redirect -- store redirects in file, too
if($output=~/^Redirect: (.*)$/){
+ if ($redcount >= $max_redir) {
+ truncate($file,0);
+ print ERRFILE "Too many redirections with $url\n";
+ last;
+ }
+ $redcount++;
&ungetnewname(); # rewind the name counter
# The next get will overwrite the unnecessary file
@@ -970,6 +990,7 @@
}
}else{
# we've got it, or there's an error...
+ truncate($file,0);
last;
}
}
@@ -1159,6 +1180,15 @@
($prot, $host, $port, $path) = &url::parse_url($url);
#print "URL after parsing: $prot://$host:$port$path\n";
+ next if !defined($prot);
+ if (!defined($port) ||
+ ($port eq '80' && $prot =~ /^https?$/) ||
+ ($port eq '21' && $prot eq 'ftp')) {
+ $port = '';
+ } else {
+ $port = ":$port";
+ }
+
# make sure the path has a preceding /
$path = "/$path" if $path!~/^\//;
@@ -1177,7 +1207,7 @@
# $host = "$a.$b.$c.$d";
# }
- $url = "$prot://$host:$port$path";
+ $url = "$prot://$host$port$path";
#print "URL after normalization: $url\n";
# strip off any #text