[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
Misc
Hi, Itīs again me from Sweden
I saw that post concerning the wanted X11 tutorial stuff. Paul/Anni, can
you put up that "Articles wanted" page? I hacked some simple news2html
script (attached). Unfortunately I couldnt test it, mainly because I
couldnt find that ("/#/(Ī&) Net::NNTP module on my CPAN cd. Anyway, if
you want to use it right now, here it is. If not, a simple page just
listing the topics will be enough for now.
Christian
#!/usr/bin/perl -w
#
# Read thread from newsgroup and wrap all the messages into a HTML page
# The messages are also stored in a DB (Key is the msgid) to avoid costly
# refetching of already known ones.
#
# Actually two databases are maintained: n2h-meta (containing statistics
# about the newsgroups and subjects already processed) and n2h-messages
# (containing the known messages)
#
# To be run locally (no CGI)
#
use strict;
use DB_File;
use Net::NNTP; # CPAN Module !!
# Some internal flags ------------------------------------------------
my $Debug = 0; # print debugging info ?
my $Verbose = 0; # print process info ?
my $Version = "0.1";
# Operation parameters, configurable --------------------------------
my $Server = "sunsite.auc.dk"; # newsserver to contact
my $Group = "sunsite.linux.linuxgames"; # newsgroup to work on
my $Fetch = 1; # fetch messages ?
my $Write = 1; # write HTML ?
my $FetchAll = 0; # fetch even already known msgs?
my $DBase = "~/n2h"; # database basename
my $Output = ""; # output file name
my $Subject = "foobar"; # subject to search for
# Some runtime vars -------------------------------------------------
my %Messages = (); # known messages. tied to the database.
my %Metainfo = (); # misc persistent data. tied to database.
# Contents of Metainfo:
#
# <groupname> => Number of last message read
# <subject> => MsgIDs of matching messages (seperated by newline)
#
#
# ================================= main =====================================
#
GetOpts ();
InitDB ();
FetchMessages () if $Fetch;
WriteHTML () if $Write;
CloseDB ();
#
# ============================== subroutines =================================
#
######################
#
# Pack the stuff into a HTML page
#
sub WriteHTML
{
my $OldHandle;
my $MsgID;
my $Article;
my $Header;
my $Body;
my $From;
my $Date;
if ($Output)
{
open (OUTPUT, ">$Output")
or die "Can't open output file $Output: $!\n";
$OldHandle = select (OUTPUT);
}
WriteHTMLHeader ();
foreach $MsgID (split /\n/ $Metainfo {$Subject})
{
chomp $MsgID;
$Verbose && print STDERR "HTMLizing message $MsgID\n";
$Article = $Messages {$MsgID};
$Article =~ /(.+?)\n\n(.+)/s;
$Header = $1;
$Body = $2;
print "<hr>";
$Header =~ m/^From: (.+)$/m;
$From = QuoteMeta ($1);
print "<b>From: $From </b> <br>";
$Header =~ m/^Date: (.+)$/m;
$Date = QuoteMeta ($1);
print "<b>Date: $Date </b> <br>";
print "<br><pre>";
print $Body;
print "</pre><br>";
}
WriteHTMLFooter ();
if ($Output) select ($OldHandle);
$Verbose && print STDERR "Done writing HTML\n\n";
}
######################
#
# Quote HTML special chars
#
# Parameter1: String to process
# Returns: processed string
#
sub Quotemeta
{
shift;
# replace angled brackets with proper html codes
s#<#<#g;
s#>#>#g;
return $_;
}
######################
#
# Write HTML Header
#
sub WriteHTMLHeader
{
print "<html>";
print "<head><title>Thread listing for subject $Subject</title></head>";
print "<body>";
print "<h1>Thread listing for subject $Subject</h1>";
print "<b>Newsgroup :</b> $Group <br>";
print "<b>Newsserver:</b> $Group <br>";
print "<br>";
}
######################
#
# Write HTML Footer
#
sub WriteHTMLFooter
{
print "<br><br>";
print "<font size=-2>Generated with mail2html V $Version</font>";
print "</body>";
print "</html>\n";
}
######################
#
# Fetch Messages
# [inspired by Perl Cookbook Recipe 18.4]
#
sub FetchMessages
{
my $ArticleCount;
my $First;
my $Last;
my @Header;
my $Article;
my $TheServer;
my $MsgID;
my $i;
$TheServer = Net::NNTP->new ($Server)
or die "Can't connect to server $Server: $!\n";
$Verbose && print STDERR "Connected to $Server\n";
($ArticleCount, $First, $Last, undef) = $TheServer->group ($Group)
or die "Can't access newsgroup $Group: $!\n";
$Verbose && print STDERR "Accessing newsgroup $Group\n";
if (($Last <= $Metainfo {$Group}) && (!$FetchAll))
{
$Verbose && print STDERR "No new articles\n";
return; # all articles are already known
}
if (!$FetchAll && ($First < $Metainfo {$Group}))
{
$First = $Metainfo {$Group};
}
$Verbose && print STDERR "Preparing to fetch ", $Last - $First, " articles\n";
for ($i=$First ; $i <= $Last ; $i++)
{
$Verbose && print STDERR "Fetching Message Nr. $i\n";
if (!(@Header = $TheServer->head ($i)))
{
$Metainfo {$Group} = $i - 1;
die "Couldn't read message # $i: $!\n";
}
if (defined ($MsgID = MatchHeader (@Header)))
{
$Verbose && print STDERR " Match! MsgID = $MsgID\n";
$Article = join "\n", $TheServer->article ($i);
$Messages {$MsgID} = $Article;
$Metainfo {$Subject} .= "$MsgID\n";
}
}
$Metainfo {$Group} = $Last;
$Verbose && print STDERR "Done fetching articles\n\n";
}
######################
#
# Match message header against Subject pattern
#
# Parameter1: message header as array of lines
# Returns: messageID if matched, else undef
#
sub MatchHeader
{
my $MsgID = undef;
my $Matched = 0;
while (shift)
{
if (/^Subject: .*$Subject/i)
{ # Cool, a match!
return $MsgID if $MsgID; # do we already have it?
$Matched = 1; # Uh, no
}
elsif (/^Message-Id: (.+)$/i)
{
$MsgID = $1;
chomp $MsgID;
return $MsgID if $Matched;
}
}
return undef; # no match :(
}
######################
#
# Initialize Database
# [inspired by Perl Cookbook Recipe 14.1]
#
sub InitDB
{
$DBase = ExpandTilde ($DBase);
tie (%Metainfo, "DB_File", $DBase."-meta.db")
or die "Can't open database ${DBase}-meta.db: $!\n";
tie (%Messages, "DB_File", $DBase."-messages.db")
or die "Can't open database ${DBase}-messages.db: $!\n";
if ((!defined $Metainfo {$Group}) or ($FetchAll))
$Metainfo {$Group} = 0;
}
######################
#
# Close Database
#
sub CloseDB
{
untie %Messages;
untie %Metainfo;
}
######################
#
# Expand '~' to proper home dir
# [Perl Cookbook Recipe 7.3]
#
# Parameter1 : String to process
# Returns : Processed string
#
sub ExpandTilde
{
my $TheString = shift;
$TheString =~ s#^~([^/]*)#
$1 ? (getpwnam ($1)) [7] :
($ENV {HOME} || $ENV {LOGDIR} || (getpwuid ($>)) [7])
#ex;
return $TheString;
}
}
######################
#
# parse cmdline
#
sub GetOpts
{
my ($Arg) = "";
my ($SPos) = 0;
while ($Arg = shift (@ARGV))
{
if ($Arg =~ /^-[sgadofwvhV]{2,}/)
{
for ($SPos = length ($Arg) ; $SPos > 1 ; $SPos--)
{
unshift (@ARGV, "-" . substr ($Arg, $SPos-1, 1));
$ODebug && warn "Option -".substr ($Arg, $SPos-1, 1)." found in multi\n";
}
}
elsif (($Arg eq "-s") or ($Arg eq "--server"))
{
$Server = shift (@ARGV);
}
elsif (($Arg eq "-g") or ($Arg eq "--group"))
{
$Group = shift (@ARGV);
}
elsif (($Arg eq "-a") or ($Arg eq "--all"))
{
$FetchAll = 1;
}
elsif (($Arg eq "-d") or ($Arg eq "--database"))
{
$DBase = shift (@ARGV);
}
elsif (($Arg eq "-o") or ($Arg eq "--outputfile"))
{
$Output = shift (@ARGV);
}
elsif (($Arg eq "-f") or ($Arg eq "--fetchonly"))
{
$Fetch = 1;
$Write = 0;
}
elsif (($Arg eq "-w") or ($Arg eq "--convertonly"))
{
$Fetch = 0;
$Write = 1;
}
elsif (($Arg eq "-v") or ($Arg eq "--verbose"))
{
$Verbose = 1;
}
elsif (($Arg eq "-h") or ($Arg eq "--help"))
{
Usage ();
die;
}
elsif (($Arg eq "-V") or ($Arg eq "--version"))
{
Version ();
die;
}
else
{
$Subject = $Arg;
}
}
}
######################
#
# Display version info
#
sub Version
{
my $OldHandle = select (STDERR);
print "news2html V $Version (C) Christian Reiniger\n";
print " may be redistributed/changed under the terms of the GPL\n";^
print "\n";
select ($OldHandle);
}
######################
#
# Display usage info
#
sub Usage
{
my $OldHandle = select (STDERR);
Version ();
print <<'USAGEEND';
Usage:
news2html [options] "Thread subject"
Options (short | long : argument (default value)):
-s | --server : string ("sunsite.auc.dk")
Newsserver to connect to
-g | --group : string ("sunsite.linux.linuxgames")
Newsgroup to browse
-a | --all : none
Fetch complete thread, incl. already known msgs
-d | --database : string ("~/n2h")
Basename to use for the database
-o | --outputfile : string (STDOUT)
Name of output file (HTML)
-f | --fetchonly : none
Only fetch messages, do not generate HTML
-w | --convertonly : none
Only generate HTML, do not fetch messages
-v | --verbose : none
Print some process info
-h | --help : none
This info
-V | --version : none
Print version info
USAGEEND
select ($OldHandle);
}