blob: e1b20a86013e3b1e99f5b88c7fdd2d6ecf4b521e [file] [log] [blame]
#!/usr/bin/env python
"""
Removes labels (if present) from features.
e.g.,
[X] ||| le ||| the ||| e_given_f_lex=1
becomes
[X] ||| le ||| the ||| 1
"""
import re
import sys
import codecs
reload(sys)
sys.setdefaultencoding('utf-8')
sys.stdin = codecs.getreader('utf-8')(sys.stdin)
sys.stdout = codecs.getwriter('utf-8')(sys.stdout)
sys.stdout.encoding = 'utf-8'
for line in sys.stdin:
tokens = line.split(' ||| ')
tokens[3] = re.sub(r' \S*=', ' ', tokens[3])
print ' ||| '.join(tokens),