blob: d54147087ac14ffb2bd684453e1677af59104c97 [file] [log] [blame]
#!/usr/bin/env python
# A given source side can have multiple target sides, and these pairs
# can have the same scores. This results in nondeterminism in the
# output, since any of the equally-scored derivations might be
# selected based on low-level architectural and library details. This
# in turn makes testing difficult, because the results seem to fail.
# This script removes all but one of equivalent target sides.
import sys
skipped = 0
lastsource = ''
lastscores = []
for line in sys.stdin:
lhs, source, target, scores = line.rstrip().split(' ||| ')
if source == lastsource:
if scores in lastscores:
skipped += 1
# sys.stderr.write('SKIPPING(%s)\n' % (line.rstrip()))
continue
lastscores.append(scores)
else:
lastsource = source
lastscores = [scores]
print line,
sys.stderr.write('skipped %d ambiguous translations\n' % (skipped))