[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Re: [kidsgames] wordnet



>dict-wn...8.5 M download.... 1hour+ download, hope it's worth it....
>
>Now how do I use this thing?

I wrote a CGI script.  I sent it in to Princeton, but I see no mention 
of it there.  I'll paste it at the end of this message.

>
>-->Here is the copyright notice:
>-->
>--> <-snip->
>-->
>I think this passes the DFSG, but we should probably ask some experts
>maybe Eric Raymond at OSI and the debian people (what is that mailing
>list), and perhaps Richard Stallman at gnu.org.

That it is in the Debian distribution already answers that question, no?

>--> <-snip->

Paul Kienzle
pkienzle@kienzle.powernet.co.uk

--- wn2html.pl ---


#!/usr/bin/perl
#
# CGI script for displaying definitions from Word Net 1.6
# Modify the WORDNET and ISPELL paths for your environoment.
# Place the script in your CGI bin directory, point your
# browser to it, and away you go.
#
# Copyright 1998, Paul Kienzle
#
# Do whatever you want with it, but keep the copyright notice.
#
# TODO: 
# rewrite in C accessing the Word Net directly --- hopefully
#      that will improve synonym/antonym relations.
# include even more info from database, such as sentence frames,
#      part/whole and isa relations.
# check for security problems beyond the shell characters: `;&|\ 
#

$word = join('_',@ARGV);
$word =~ tr/`\\&;|/-----/;
$WORDNET="/usr/local/bin/wn \"$word\"";
$ISPELL="/usr/bin/ispell -a -S";

# Convert a comma separated list of entries into a comma separated list
# of references
sub reflist {
    my($list,@list,$nospace);
    @list = split /, /, @_[0];
    foreach $list (@list) {
	$list =~ s/[(].*[)]//g;
	$list =~ s/\s*$//;
	$nospace = $list; 
	$nospace =~ tr/ /+/;
	$list = "<a href=\"$ENV{SCRIPT_NAME}?$nospace\">$list</a>";
    }
    join(",\n",@list);
}

# Set up html document header and search form
print <<"END_HEADER";
Content-type: text/html

<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 3.2 Final/EN">
<HTML><HEAD><TITLE>Word Net</TITLE></HEAD>
<BODY><H1>Word Net Dictionary Lookup</H1><ISINDEX>
END_HEADER

# Ask Word Net for the word definition
open(DEFN, "$WORDNET -over |") || die("wn error</BODY>");

%POS = ('noun','n','verb','v','adj','a','adv','r');
# Convert definition text to HTML
$inlist=0;
while(<DEFN>) {
    if (m/^The/) { 
	#
        # wordnet: The partofspeech word has n senses (x from tagged text)
	# html:    word (partofspeech)
	#
	print "</dl>\n\n<hr>" if $inlist;
	$inlist=1;
        s/^The ([a-z]*) (.*) has ([0-9]* sense.*)$/<h2>\2 (\1)<\/h2>\n<dl>/;
	$POStag = $POS{$1};
	print;
    }
    elsif (m/^[0-9]/) {
	#
	# wordnet: idx. synonym1, ... -- (gloss; example1; ...; exampleq)
	# html:    idx. <a href=script?synonym1>synonym1</a>, ...
	#                  gloss
	#                  example1
        #                  ...
        #                  exampleq
	#

	# First, split into idx., synonym list, gloss list
	s/^([0-9]*). (.*) -- [(](.*)[)]$/\1--\2--\3/;
	($idx, $list, $gloss) = split /--/;

	# Next, convert each synonym to <a href=syn>syn</a>, with spaces
	# replaced by + in the reference
	$list = reflist($list);

	# Then chop up the gloss with line breaks at each : or ;
	$gloss =~ s/(:|;)/<br>\n/g;

	# Collect synonyms
	open(SYNS, "$WORDNET -n$idx -syns$POStag |");
	$syns = "";
	while (<SYNS>) {
	    next if ! m/^(\s*=>|INDIRECT.*->) /;
	    $syns = $syns . ",\n" if $syns;
	    $syns = $syns . reflist($');
	}
	$syns = "<BR>\n<strong>Syn:</strong> <cite>\n$syns\n</cite>" if $syns;
	close(<SYNS>);

	# Collect antonyms
	open(ANTS, "$WORDNET -n$idx -ants$POStag |");
	$ants = "";
	while (<ANTS>) {
	    next if ! m/^(\s*=>|INDIRECT.*->) /;
	    $ants = $ants . ",\n" if $ants;
	    $ants = $ants . reflist($');
	}
	print STDERR "End: $ants\n";
	$ants = "<BR>\n<strong>Ant:</strong> <cite>\n$ants\n</cite>" if $ants;
	print STDERR "Ant: $ants\n";
	close(<ANTS>);

	# Finally, print it all out.
	print "<dt><strong>$idx.</strong> <cite>\n$list\n</cite><dd>\n$gloss$syns$ants\n";
    }
}
close(DEFN);
print "</dl>\n" if $inlist;

# If word was not in dictionary, use ispell to suggest alternates
if (!$inlist) { 
    open(ALT, "echo \"$word\" | $ISPELL |") || die("ispell error</BODY>");
    while(<ALT>) {
	next if ! m/^&/;
	#
	# ispell: & misspelling x n: alt1, ...
	# html:   misspelling: <a href=script?alt1>alt1</a>, ...
	#
	s/^& ([-a-zA-Z_]*) .*: /<br>\1: Try , /;
	s/, ([-a-zA-Z_ ]*)/<a href="$ENV{SCRIPT_NAME}?\1">\1<\/a> /g;
	s/[?]([^"]*) /?\1+/g;
	print;
    }
    close(ALT);
}

# end html document
print "</BODY></HTML>\n";


-
kidsgames@smluc.org  -- To get off this list send "unsubscribe" in the
body of a message to majordomo@smluc.org