#!/usr/bin/perl -w use utf8; ######################################################################### #### Author: Eckhard Bick 2003,2008; contact: eckhard.bick@mail.dk ###### ######################################################################### # # use: eval_cg file1 file2 # # compares two cg files, either a gold file with a test file, or simply two different runs on the same input. # # (1) Input format can either be niceline or cohort format # (a) niceline: word [base] <...> ... POS MORF ... @FUNC ... # optional extra tags on the same line: either §SEM .. or %ERROR ... # (b) cohort: "" # "base" <...> ... POS MORF ... @FUNC # "base" <...> ... POS MORF ... @FUNC # ... # However, only for syntatic tags (@) and extra tags (% or §) is ambiguity allowed - cohort format will be truncated to one morphological line per cohort (the first). Output format will be "niceline". # # (2) rewrites testfile with difference markers (3a-d), followed by file1 line number and file1 tag (in parentheses). For syntax, the parenthesis will also contain a hit-out-of count, e.g. 1/2. # # (3) At the end of the rewritten file2, eval_cg will output file difference as an evaluation metrics, providing recall, precision and F-score for # (a) base form *B # (b) part of speech *P # (c) morphology *M # (d) syntax *S # (e) extra *E (could e.g. be §SEM, or % markers for spell/grammarchecking or komma # (f) extra_exist *X (i.e. whether or not there is a %/§-tag at all - useful for e.g. komma-evaluation) ... on the line, *Xover marks a precision error, *Xunder a recall error # # (4) The program has no special alignment needs - it will tolerate some tokenisation difference (e.g. regarding polylexicals). Tokenisation mismatches will be marked in the rewritten file2 as *T_missing, *T_extra and *T_mismatch followed by the corresponding line number in file1, in the case of many to many mismatches also with an unti-n/m indicator, where n and m are the respective line numbers of the point where alignment was reestablished # # Set $no_meta to 1 to provide for meta-markup mismatches (<..> lines ignored) # # Set $match_only to 1 to evaluate pos and morf only for matching words # # Set $sta_adj to 1 if you want to evaluate STA participles as adjectives # ######################################################################### $no_meta =0; $match_only =0; $sta_adj =1; $goldfile =$ARGV[0]; $testfile =$ARGV[1]; $i=0; open(FH, "<$goldfile"); while () { $j++; s/\t.* PU .*\@PU[^%§\n]*/ /; # punctuation stripped of its tags s/^\$([^ \t]+)[ \t]([%§\n])/\$$1\t \[$1\] PU \@PU $2/; s/^\$([^ \t]+)\n/\$$1\t \[$1\] PU \@PU \n/; # if (/,/) {print "--gold: $_\n";} if (/^\"<(.*)>\"/) { $token =$1; # if ($i>0 && $goldline[$i-1] && $goldline[$i-1] =~ /\[/) {$goldline[$i-1] .= "\n";} $i++; $jgold[$i]=$j; $token_gold[$i] =$token; $goldline[$i] =$token; } elsif (/^[ \t]+\"(.*?)\"(.*)/) { $goldline[$i].= "\t\[$1\]$2"; $words++; } elsif ($no_meta && /^/) {s/^(.)/\u$1/;} s/(\@.*?)[ \t]+[\"\[].*/$1/; # only one morphological reading; s/ (\@[^ ]+)( .*| )\1([ \n])/$2$1$3/; # erase identical syntax readings; s/(\@[^ ]+ )\1/$1/; # print "$_"; } } $i=0; open(FH, "<$testfile"); while () { s/\t.* PU .*\@PU[^%§\n]*/ /; # punctuation stripped of its tags s/^\$([^ \t]+)[ \t]+([%§\n])/\$$1\t \[$1\] PU \@PU $2/; s/^\$([^ \t]+)\n/\$$1\t \[$1\] PU \@PU \n/; # if (/,/) {print "--$_\n";} if (/^\"<(.*)>\"/) { $token =$1; # if ($i>0 && $testline[$i-1] && $testline[$i-1] =~ /\[/) {$testline[$i-1] .= "\n";} $i++; $token_test[$i] =$token; $testline[$i] =$token; } elsif (/^[ \t]+\"(.*?)\"(.*)/) { $testline[$i].= "\t\[$1\]$2"; if (! /\@/) {$testline[$i] .= " \@X";} # add dummy syntax where missing } elsif ($no_meta && /^/) {s/^(.)/\u$1/;} s/(\@.*?)[ \t]+[\"\[].*/$1/; # only one morphological reading; s/ (\@[^ ]+)( .*| )\1([ \n])/$2$1$3/; # erase identical syntax readings; s/(\@[^ ]+ )\1/$1/; # print "--$_"; } } $num =$i; $delta_gold =0; $delta_test =0; $eyeold =0; for ($i=1; $i <=$num - $delta_test; $i++) { match: $sem_gold =""; $sem_test =""; $goldeye = $jgold[$i+$delta_gold]; if ($goldline[$i+$delta_gold] && $testline[$i+$delta_test] && $goldline[$i+$delta_gold] =~ /\[.*\].* [A-Z]+ .*\@/ && $testline[$i+$delta_test] =~ /\[.*\].* [A-Z]+ .*\@/) { # print "---i=" . ($i+$delta_gold) . " goldline=" . $goldline[$i+$delta_gold]; # print "---i=" . ($i+$delta_test) . " testline=" . $testline[$i+$delta_test]; ($word_gold,$base_gold,$pos_gold,$morf_gold,$syn_gold) = $goldline[$i+$delta_gold] =~ /^(.*?)[\t ]+[\"\[](.*?)[\"\]].*? ([A-Z]+)( [A-Za-z0-9\/\- ]*).*?(\@[\w\-<>\@ ]+?) *[\#\n%&£]/; # print "--$word_gold,$base_gold,$pos_gold,$morf_gold,$syn_gold\n"; ($word_test,$base_test,$pos_test,$morf_test,$syn_test) = $testline[$i+$delta_test] =~ /^(.*?)[\t ]+[\"\[](.*?)[\"\]].*? ([A-Z]+)( [A-Za-z0-9\/\- ]*).*?(\@[\w\-<>\@ ]+?) *([\#\n%&£]|\*[A-Z])/; # cave: there might already be a *T added to the line - if gold had an extra token here # if (! $word_test ) {print "problem: formal error in line " . ($i+$delta_test) . " " . $testline[$i+$delta_test] . "\n";} # print "---$word_gold $word_test ---, $pos_gold $pos_test\n"; $syn_gold =~ s/ $//; $syn_test =~ s/ $//; if ($goldline[$i+$delta_gold] =~ / ([%§][\w\-<>%§ ]+)/) { $sem_gold = $1; $sem_gold =~ s/ $//; # print "--sem_gold=$sem_gold\n"; } if ($testline[$i+$delta_test] =~ / ([%§][\w\-<>%§ ]+)/) { $sem_test = $1; $sem_test =~ s/ $//; # print "--sem_test=$sem_test ### $testline[$i+$delta_test]\n"; } # if (! $syn_test) {print "--no syn for " . ($i + $delta_test) . " -- " . $testline[$i+$delta_test] . "\n";} if ($word_gold && $word_test && ($word_gold eq $word_test || $pos_gold eq $pos_test)) { # move here from higher up, to allow counting of pos and morf (for the current word even in mismatches, i.e. for instance the first part of token-mismatched MWE, but also simple one-word typo mismatches. Syn counts were already unaffected before the change, because they had their own gold count (due to allowed ambiguity) $word_match++; $morf_gold =~ s/^ +//; $morf_gold =~ s/ +$//; if ($morf_test) { $morf_test =~ s/^ +//; $morf_test =~ s/ +$//; } else { # print "no morf_test in line $i\n"; } # print "$word_test, $pos_test / $pos_gold, $morf_test / $morf_gold, $syn_test\n"; if ($pos_gold eq $pos_test) {$pos_match++;} elsif ($sta_adj && ($pos_gold eq "ADJ" && $testline[$i+$delta_test] =~ /(STA|PCP1.*\@>N)/ || $pos_test eq "ADJ" && $goldline[$i+$delta_gold] =~ /(STA|PCP1.*\@>N)/)) {$pos_match++;} elsif ($pos_gold eq "KP" && $testline[$i+$delta_test] =~ //) {$pos_match++;} else { $testline[$i+$delta_test] =~ s/$/ *P$goldeye($pos_gold)/; } if ($base_gold eq $base_test) {$base_match++;} else { $testline[$i+$delta_test] =~ s/$/ *B$goldeye($base_gold)/; } if ($morf_gold eq $morf_test) {$morf_match++;} else { $testline[$i+$delta_test] =~ s/$/ *M$goldeye($morf_gold)/; } ### comparing syns @syns = split / /, $syn_test; $synerror =0; $synyes =0; @syngolds = split / /, $syn_gold; $syn_ct_gold += @syngolds; foreach (@syns) { $syn_ct++; $syn_statitem = $_; # print "---------$syn_statitem -- $syn_gold --------\n"; if ($syn_gold =~ /$syn_statitem( .*)?$/) { $syn_match++; $synyes++; } else { $synerror++; } } if ($synerror) { $testline[$i+$delta_test] =~ s/$/ *S$goldeye($synyes\/$synerror:$syn_gold)/; } ### comparing sems, if any (*E extras) if ($sem_gold) { @semgolds = split / /, $sem_gold; $sem_ct_gold += @semgolds; $sem_exist_ct_gold++; if (! $sem_test) { $testline[$i+$delta_test] =~ s/$/ *E$goldeye(0\/0:$sem_gold) *Xunder/; } } if ($sem_test) { @sems = split / /, $sem_test; $semerror =0; $semyes =0; foreach (@sems) { $sem_ct++; $sem_statitem = $_; # print "---------$sem_statitem -- $sem_gold --------\n"; if ($sem_gold && $sem_gold =~ /$sem_statitem( .*)?$/) { $sem_match++; $semyes++; } else { $semerror++; } } if ($semerror) { if (! $sem_gold) {$sem_gold = "NONE";} $testline[$i+$delta_test] =~ s/$/ *E$goldeye($semyes\/$semerror:$sem_gold)/; } $sem_exist_ct++; if (! $sem_gold || $sem_gold =~ /NONE/) { # $sem_exist_error++; $testline[$i+$delta_test] =~ s/$/ *Xover/; } else { $sem_exist_match++; # print "--sem_exist_match=$sem_exist_match --- sem_exist_ct_gold=$sem_exist_ct_gold ---sem_exist_error=$sem_exist_error\n"; } } } elsif ($goldline[$i+$delta_gold] =~ /\[.*\].* [A-Z]+ .*\@/ && $testline[$i+$delta_test+1] =~ /\[.*\].* [A-Z]+ .*\@/ && $token_gold[$i+$delta_gold] eq $token_test[$i+$delta_test+1]) { # print "problem: extra word in test at line " . ($i+$delta_test) . "\n"; $testline[$i+$delta_test] =~ s/$/ *T_extra$goldeye/; $delta_test++; goto match; } elsif ($goldline[$i+$delta_gold+1] =~ /\[.*\].* [A-Z]+ .*\@/ && $testline[$i+$delta_test] =~ /\[.*\].* [A-Z]+ .*\@/ && $token_gold[$i+$delta_gold+1] eq $token_test[$i+$delta_test]) { # print "problem: extra word in gold at line " . ($i+$delta_gold) . "\n"; $testline[$i+$delta_test] =~ s/$/ *T_missing$goldeye/; $delta_gold++; goto match; } elsif ($goldline[$i+$delta_gold+1] =~ /\[.*\].* [A-Z]+ .*\@/ && $testline[$i+$delta_test+1] =~ /\[.*\].* [A-Z]+ .*\@/ && $token_gold[$i+$delta_gold+1] eq $token_test[$i+$delta_test+1]) { # print "problem: non-matching words at line " . ($i+$delta_test) . "/" . ($i+$delta_gold) . "\n"; $testline[$i+$delta_test] =~ s/$/ *T_mismatch$goldeye/; $delta_test++; $delta_gold++; goto match; } elsif ($token_test[$i+$delta_test+1] eq $token_gold[$i+$delta_gold+1]) { # print "problem: non-matching word in test at line " . ($i+$delta_gold) . " / " . ($i+$delta_test) . "\n"; $testline[$i+$delta_test] =~ s/$/ *T_mismatch$goldeye/; $delta_gold++; $delta_test++; next; } else { for ($z=0; $z <=10; $z++) { for ($n=2; $n <=2+$z; $n++) { for ($y=0; $y <=$z; $y++) { for ($dir=-1; $dir <=1; $dir =$dir+2) { $m =$n + $y*$dir; if ($m >=2 && $m <= 2+$z) { if ($token_test[$i+$delta_test+$n] eq $token_gold[$i+$delta_gold+$m] && $token_test[$i+$delta_test+$n+1] eq $token_gold[$i+$delta_gold+$m+1]) { $testline[$i+$delta_test] =~ s/$/ *T_mismatch$goldeye\_until_$n\/$m/; $delta_test += $n; $delta_gold += $m; # print "problem: non-matching word in test at line " . ($i+$delta_gold) . " / " . ($i+$delta_test) . " - rematching at +$n/+$m\n"; goto match; } } } } } } print "--problem: serious multi-line mismatch at line " . ($i+$delta_test) . " - exiting\n"; # next; return; } } elsif (! $goldline[$i+$delta_gold]) { $goldshort++; print "-golddata shorter at thest $i + $delta_gold\n"; } elsif (! $testline[$i+$delta_test]) { $testshort++; # print "-testdata shorter\n"; } # print "--$eyeold $testline[$i+$delta_test]\n"; for ($eye=$eyeold+1; $eye<=$i+$delta_test; $eye++) { if ($testline[$eye]) {print $testline[$eye];} } $eyeold =$eye-1; } if ($goldshort) {print "--golddata short by $goldshort lines\n";} if ($testshort) {print "--testdata short by $testshort lines\n";} if ($syn_match) { $recall_syn = int (1000 * ($syn_match/$syn_ct_gold) + 0.5) / 10; $precision_syn = int (1000 * ($syn_match/$syn_ct)+ 0.5) / 10; $f_syn = int (10 * (2 * $recall_syn * $precision_syn) / ($recall_syn + $precision_syn + 0.1)+ 0.5) / 10; if ($match_only) {$pos_ct =$word_match; $pos_words =$word_match;} else {$pos_ct =$words; $pos_words =$words;} $recall_pos = int (1000 * ($pos_match/$pos_words)+ 0.5) / 10; $precision_pos = int (1000 * ($pos_match/$pos_ct)+ 0.5) / 10; $f_pos = int (10 * (2 * $recall_pos * $precision_pos) / ($recall_pos + $precision_pos)+ 0.5) / 10; if ($match_only) {$morf_ct =$word_match; $morf_words =$word_match;} else {$morf_ct =$words; $morf_words =$words;} $recall_morf = int (1000 * ($morf_match/$morf_words)+ 0.5) / 10; $precision_morf = int (1000 * ($morf_match/$morf_ct)+ 0.5) / 10; $f_morf = int (10 * (2 * $recall_morf * $precision_morf) / ($recall_morf + $precision_morf)+ 0.5) / 10; $base_ct =$words; $recall_base = int (1000 * ($base_match/$words)+ 0.5) / 10; $precision_base = int (1000 * ($base_match/$base_ct)+ 0.5) / 10; $f_base = int (10 * (2 * $recall_base * $precision_base) / ($recall_base + $precision_base)+ 0.5) / 10; print "$num tokens, $words real words\n"; print "pos_match =$pos_match, pos_words =$pos_words, pos_ct =$pos_ct\n"; print "morf_match =$morf_match, morf_words =$morf_words, morf_ct =$morf_ct\n\n"; print "syntax: recall=$recall_syn precision=$precision_syn F-score=$f_syn\n"; print "pos: recall=$recall_pos precision=$precision_pos F-score=$f_pos\n"; print "morphology: recall=$recall_morf precision=$precision_morf F-score=$f_morf\n"; print "base form: recall=$recall_base precision=$precision_base F-score=$f_base\n"; if ($sem_ct_gold) { $recall_sem = int (1000 * ($sem_match/$sem_ct_gold) + 0.5) / 10; $precision_sem = int (1000 * ($sem_match/$sem_ct)+ 0.5) / 10; $f_sem = int (10 * (2 * $recall_sem * $precision_sem) / ($recall_sem + $precision_sem + 0.1)+ 0.5) / 10; print "extras: recall=$recall_sem precision=$precision_sem F-score=$f_sem\n"; ### evaluate only existance of extra tag, not its type $recall_sem_exist = int (1000 * ($sem_exist_match/$sem_exist_ct_gold) + 0.5) / 10; $precision_sem_exist = int (1000 * ($sem_exist_match/$sem_exist_ct)+ 0.5) / 10; $f_sem_exist = int (10 * (2 * $recall_sem_exist * $precision_sem_exist) / ($recall_sem_exist + $precision_sem_exist + 0.1)+ 0.5) / 10; print "--sem_exist_match=$sem_exist_match, sem_exist_ct_gold=$sem_exist_ct_gold, sem_exist_ct=$sem_exist_ct\n"; print "extras_exist: recall=$recall_sem_exist precision=$precision_sem_exist F-score=$f_sem_exist\n"; } }