[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
Re: [kidsgames] wordnet
>dict-wn...8.5 M download.... 1hour+ download, hope it's worth it....
>
>Now how do I use this thing?
I wrote a CGI script. I sent it in to Princeton, but I see no mention
of it there. I'll paste it at the end of this message.
>
>-->Here is the copyright notice:
>-->
>--> <-snip->
>-->
>I think this passes the DFSG, but we should probably ask some experts
>maybe Eric Raymond at OSI and the debian people (what is that mailing
>list), and perhaps Richard Stallman at gnu.org.
That it is in the Debian distribution already answers that question, no?
>--> <-snip->
Paul Kienzle
pkienzle@kienzle.powernet.co.uk
--- wn2html.pl ---
#!/usr/bin/perl
#
# CGI script for displaying definitions from Word Net 1.6
# Modify the WORDNET and ISPELL paths for your environoment.
# Place the script in your CGI bin directory, point your
# browser to it, and away you go.
#
# Copyright 1998, Paul Kienzle
#
# Do whatever you want with it, but keep the copyright notice.
#
# TODO:
# rewrite in C accessing the Word Net directly --- hopefully
# that will improve synonym/antonym relations.
# include even more info from database, such as sentence frames,
# part/whole and isa relations.
# check for security problems beyond the shell characters: `;&|\
#
$word = join('_',@ARGV);
$word =~ tr/`\\&;|/-----/;
$WORDNET="/usr/local/bin/wn \"$word\"";
$ISPELL="/usr/bin/ispell -a -S";
# Convert a comma separated list of entries into a comma separated list
# of references
sub reflist {
my($list,@list,$nospace);
@list = split /, /, @_[0];
foreach $list (@list) {
$list =~ s/[(].*[)]//g;
$list =~ s/\s*$//;
$nospace = $list;
$nospace =~ tr/ /+/;
$list = "<a href=\"$ENV{SCRIPT_NAME}?$nospace\">$list</a>";
}
join(",\n",@list);
}
# Set up html document header and search form
print <<"END_HEADER";
Content-type: text/html
<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 3.2 Final/EN">
<HTML><HEAD><TITLE>Word Net</TITLE></HEAD>
<BODY><H1>Word Net Dictionary Lookup</H1><ISINDEX>
END_HEADER
# Ask Word Net for the word definition
open(DEFN, "$WORDNET -over |") || die("wn error</BODY>");
%POS = ('noun','n','verb','v','adj','a','adv','r');
# Convert definition text to HTML
$inlist=0;
while(<DEFN>) {
if (m/^The/) {
#
# wordnet: The partofspeech word has n senses (x from tagged text)
# html: word (partofspeech)
#
print "</dl>\n\n<hr>" if $inlist;
$inlist=1;
s/^The ([a-z]*) (.*) has ([0-9]* sense.*)$/<h2>\2 (\1)<\/h2>\n<dl>/;
$POStag = $POS{$1};
print;
}
elsif (m/^[0-9]/) {
#
# wordnet: idx. synonym1, ... -- (gloss; example1; ...; exampleq)
# html: idx. <a href=script?synonym1>synonym1</a>, ...
# gloss
# example1
# ...
# exampleq
#
# First, split into idx., synonym list, gloss list
s/^([0-9]*). (.*) -- [(](.*)[)]$/\1--\2--\3/;
($idx, $list, $gloss) = split /--/;
# Next, convert each synonym to <a href=syn>syn</a>, with spaces
# replaced by + in the reference
$list = reflist($list);
# Then chop up the gloss with line breaks at each : or ;
$gloss =~ s/(:|;)/<br>\n/g;
# Collect synonyms
open(SYNS, "$WORDNET -n$idx -syns$POStag |");
$syns = "";
while (<SYNS>) {
next if ! m/^(\s*=>|INDIRECT.*->) /;
$syns = $syns . ",\n" if $syns;
$syns = $syns . reflist($');
}
$syns = "<BR>\n<strong>Syn:</strong> <cite>\n$syns\n</cite>" if $syns;
close(<SYNS>);
# Collect antonyms
open(ANTS, "$WORDNET -n$idx -ants$POStag |");
$ants = "";
while (<ANTS>) {
next if ! m/^(\s*=>|INDIRECT.*->) /;
$ants = $ants . ",\n" if $ants;
$ants = $ants . reflist($');
}
print STDERR "End: $ants\n";
$ants = "<BR>\n<strong>Ant:</strong> <cite>\n$ants\n</cite>" if $ants;
print STDERR "Ant: $ants\n";
close(<ANTS>);
# Finally, print it all out.
print "<dt><strong>$idx.</strong> <cite>\n$list\n</cite><dd>\n$gloss$syns$ants\n";
}
}
close(DEFN);
print "</dl>\n" if $inlist;
# If word was not in dictionary, use ispell to suggest alternates
if (!$inlist) {
open(ALT, "echo \"$word\" | $ISPELL |") || die("ispell error</BODY>");
while(<ALT>) {
next if ! m/^&/;
#
# ispell: & misspelling x n: alt1, ...
# html: misspelling: <a href=script?alt1>alt1</a>, ...
#
s/^& ([-a-zA-Z_]*) .*: /<br>\1: Try , /;
s/, ([-a-zA-Z_ ]*)/<a href="$ENV{SCRIPT_NAME}?\1">\1<\/a> /g;
s/[?]([^"]*) /?\1+/g;
print;
}
close(ALT);
}
# end html document
print "</BODY></HTML>\n";
-
kidsgames@smluc.org -- To get off this list send "unsubscribe" in the
body of a message to majordomo@smluc.org