#!/pkg/bin/perl #--------------------------------------------* # Hindi Morpheme Tagger * # Under development * #By Vasu Renganathan vasur@ccat.sas.upenn.edu* #--------------------------------------------* #last modified on 2/27/97 $mypath = '/yourpath'; #print "Content-type: text/html\n\n"; @environment_variables = keys (%ENV); $data = $ENV{QUERY_STRING}; $data =~ s/\+/ /g; # Replace pluses with spaces $data =~ s/%2F/\//g; # Replace %2F with / $data =~ s/%3F/?/g; # Replace %3F with question mark $data =~ s/%3A/:/g; # Replace %3a with colon $data =~ s/%2C/,/g; # Replace %2C with comma $data =~ s/%27/'/g; # Replace %27 with apostrophe $data =~ s/%22/"/g; # Replace %22 with double quote $data =~ s/%21/!/g; # Replace %22 with exclamation $data =~ s/%0D%0A/ /g; # Replace CR with space $data =~ s/message//g; $data =~ s/Guest//g; $data =~ s/=//g; $data =~ s/-/ /g; $data =~ s/&//g; $data =~ s/Tag$//g; #chop($data); #print(" Your Sentence:

"); #print("
"); #print("$data

"); #print("
Tagged output is:

"); #print("
"); $inpline = $data; #unshift (@INC, 'pwd'); require("mainverb.pl"); require("ambiguity.pl"); unless ( open(VERBFILE, "verbs.dat") ) { die("I can't find verbs.dat");} unless ( open(NOUNFILE, "nouns.dat") ) { die("I can't find nouns.dat");} @nounentry = ; foreach $pro (@nounentry){ chop($pro); push(@nouns,$pro); # print("$pro"); } @vbentry = ; foreach $wo (@vbentry){ chop($wo); push(@vbentries,$wo); #print("$wo\n"); } #print("Enter a simple Hindi sentence or a word. Enter q to quit\n"); #$inpline = ; #chop($inpline); @inpsent = split(/ /, $inpline); # while ($inpsent[0] ne "q" ){ @senttag = (); $i = 0; while ( $line = $inpsent[$i++] ){ if ( ($a = &parword($line) ) eq "true") { push(@senttag, @parwrd); } elsif ( ($a = &compoundword($line) ) ne "false" ){ push(@senttag, @comptag); } elsif ( ( $a = &empn($line) ) ne "false") { push(@senttag, @nptag); } elsif ( ( $a = &konp($line) ) eq "true"){ push(@senttag, @nptag); } elsif ( ( $a = &np($line) ) ne "false" ){ push(@senttag, @nptag); } elsif ( ( $b = &vp($line) ) eq "true") { push(@senttag, @tensetag); } elsif ( ($c = &pluralnoun($line) ) ne "false") { push(@senttag, @nptag); } else { push(@senttag, "($line nul)" ); } } #end while #foreach $tag (@senttag){ # print("$tag "); #} &printhtml(); &writemyfile(); sub writemyfile{ open(STDIN); open(STDOUT); open(STDERR); open (TEMP,">>$mypath/temp.txt"); print TEMP ("\n\n$data\n\n"); foreach $mytag (@senttag){ print TEMP ("$mytag "); } print("\n\n"); close(TEMP); } #&printhtml(); #print("\n"); #print(""); #print("Enter a simple Hindi sentence or word. Enter q to quit\n"); #$inpline = ; #chop ($inpline); #@inpsent = split(/ /, $inpline); #} #end while #close(F); sub printhtml{ print "Content-type: text/html\n\n"; print(" Your Sentence:

"); print("
"); print("$data

"); print("
Tagged output is:

"); print("
"); foreach $tag (@senttag){ print("$tag "); } print(""); close(STDIN); close(STDOUT); close(STDERR); } sub empn{ local($myword) = @_; local($hold) = $myword; @nptag = (); local($mytag) = " emphatic"; # hamen ? if ( $myword ne "nahiin" && $myword =~ s/hiin$/haan/ || $myword =~ s/iin// || $myword =~ s/ii// ){ if ( ( $res = &np($myword) ne "false" ) ){ &insertnptag($mytag); $retval = "true"; } elsif ( $hold ne "nahiin" && $hold =~ s/hiin$// && ( $res = &np($hold) ne "false")){ &insertnptag($mytag); $retval = "true"; }else{$retval = "false";} } else { $retval = "false";} } sub foundword{ local($myword) = @_; local($verb) = "nul"; foreach $wor (@vbentries){ if ( $wor eq $myword){ $verb = "found"; } else{next;} } if ( $verb eq "found" ){ $retval = "true";} else{$retval = "false";} } # rules included are: sub pluralnoun{ # kamraa > kamre, laTkiyon/yaan > laTki, # makaanon/en > makaan # laTkaa > laTke local($word) = @_; local($retval) = "nul"; local($mystring); local($srcwrd) = $word; local($hold) = $word; @nptag = (); #takes care of oblique plural if ( $word =~ s/yon$/i/ || $word =~ s/yaan$/i/ ){ if ( ( $mystring = &np($word) ) ne "false" ){ @nptag = (); push(@nptag, "($srcwrd $word $mystring plural)"); $retval = "true"; } } elsif ( $word =~ s/on$// || $word =~ s/en$//) { if ( ( $mystring = &np($word) ) ne "false" ){ @nptag = (); push(@nptag, "($srcwrd $word $mystring plural)"); $retval = "true"; } elsif ( $hold =~ s/on$/aa/ && ( $mystring = &np($hold) ) ne "false" ) {@nptag = (); push(@nptag, "($srcwrd $hold $mystring plural)"); $retval = "true"; } else { $retval = "false";} } elsif ( $word =~ s/e$/aa/) { if ( ( $mystring = &np($word) ) ne "false" ) { @nptag = (); push(@nptag, "($srcwrd $word $mystring plural)"); $retval = "true"; } else { $retval = "false";} } elsif ( $word =~ s/aa$/ah/){ #ve > vah, ye > yah if ( ( $mystring = &np($word) ) ne "false" ){ @nptag = (); push(@nptag, "($srcwrd $word $mystring plural)"); $retval = "true"; }else {$retval = "false";} } elsif ( $word =~ s/ii$/aa/ ) { if ( ( $mystring = &np($word) ) ne "false" ) { @nptag = (); push(@nptag, "($srcwrd $word $mystring fem)"); $retval = "true"; } else {$retval = "false";} } else{$retval = "false";} } sub konp{ #pronoun+ko or ne local($word) = @_; local($hod) = $word; local($tagd) = " dative"; local($res); @nptag = (); if ( $word ne "ko" && $word =~ s/ko$// && ( $res = &np($word) ne "false") ){ &insertnptag($tagd); $retval = "true"; } elsif ( $word ne "ne" && $word =~ s/ne$// && ( $res = &np($word) ne "false" ) ) { $mytag = " agentive"; &insertnptag($mytag); $retval = "true"; } elsif ( $word =~ s/e$// && ( $res = &np($word) ne "false") ){ &insertnptag($tagd); $retval = "true"; } elsif ( $word =~ s/hen$// && ( $res = &np($word) ne "false") ){ &insertnptag($tagd); $retval = "true"; } elsif ( $word =~ s/en$// && ( $res = &np($word) ne "false") ) { &insertnptag($tagd); $retval = "true"; } else { $retval = "false";} } sub insertnptag{ local($mytag) = @_; local($temp); $temp = join(/ /, @nptag); $temp =~ s/\)$/$mytag)/; @nptag = (); @nptag[0] = $temp; } sub np{ local($words) = @_; local($index) = 0; local($retval) = "nul"; @nptag = (); for ( $index = 0; $index <= @nouns; $index++){ if( $nouns[$index] eq $words){ $retval = $nouns[$index + 1]; last; } else{next;} } if ( $retval eq "nul" ) { $retval = "false";} else { push(@nptag, "($words $retval)"); $ret = $retval;} }