#!/usr/local/bin/perl # (C) Copyright 1996 by John J. Chew, III # All rights reserved # ddtag.pl - tag text according to the dictionaries in which words are to # be found, e.g. prior to submission to a crossword game mailing list. # ddtag.pl unshift(@INC, "$ENV{HOME}/lib/perl"); require 'dawg.pl'; require 'getopts.pl'; $gVersion = "2.11"; # edit the following two lines to point at default local DAWG files $opt_C = "$ENV{'HOME'}/scrabble/tcd/words.dawg"; $opt_N = "$ENV{'HOME'}/scrabble/tcd/nonwords.dawg"; $opt_O = "$ENV{'HOME'}/scrabble/osw/words.dawg"; $opt_T = "$ENV{'HOME'}/scrabble/ospd/words.dawg"; sub Usage { die "Usage: $0 [-1] [-C chambers.dawg] [-N non-tcd.dawg] [-O osw.dawg] [-T tw l.dawg] [file...]\n" ." -1 tag only first occurrence of each word\n" ." -i alter file in place, for use as a Pine alternate edi tor\n" ." -C chambers.dawg DAWG file containing partial Chambers lexicon\n" ." -N non-tcd.dawg DAWG file containing list of known non-Chambers word s\n" ." -O osw.dawg DAWG file containing OSW lexicon\n" ." -T twl.dawg DAWG file containing TWL lexicon\n"; } &Getopts('1C:O:N:T:i') || &Usage; $^I = 1 if $opt_i; &dawg'open(*TWL, $opt_T) || die "Can't open $opt_T\n"; &dawg'open(*OSW, $opt_O) || die "Can't open $opt_O\n"; &dawg'open(*TCD, $opt_C) || die "Can't open $opt_C\n"; &dawg'open(*NTCD, $opt_N) || die "Can't open $opt_N\n"; while (<>) { while (length) { s/^\s+// && print $&; if (s/^\S+//) { $word = $&; $pre = $post = $tag = ''; $word =~ s/^[([{'"`]+// && ($pre = $&); $word =~ s/[]}).,:;"'!?]+$// && ($post = $&); if (length($word) >= 2 && $word !~ /[^a-zA-Z]/) { l1: { if ($opt_1) { if (defined $seen{$word}) { last l1; } else { $seen{$word} = 1; } } $lc = "\L$word"; if (&dawg'check(*TWL, $lc)) { unless (&dawg'check(*OSW, $lc) || &dawg'check(*TCD, $lc)) { $tag = (length($word) <= 9 || &dawg'check(*NTCD, $lc)) ? '<$>' : ''; } } elsif (&dawg'check(*OSW, $lc) || &dawg'check(*TCD, $lc)) { $tag = '<#>'; } elsif (length($word) <= 9 || &dawg'check(*NTCD, $lc)) { $tag = '<*>'; } else { $tag = ''; } } # l1 } print "$pre$word$tag$post"; } } if (eof(ARGV)) { print "--\n"; print "This message has been automatically tagged by ddtag $gVersion, a Per l script\n"; print "that tags words according to the lexica in which they are found:\n"; print " <\$> TWL only <#> OSW only <*> neither TWL nor OSW\n"; print "ddtag is available at http://sushi.st.usm.edu/~jjchew/software/ddtag .pl.\n"; } } &dawg'close(*TCD); &dawg'close(*OSW); &dawg'close(*TWL); 0;