blob: f6f6a92e2540c4c039c2cf1859ded5b43b677fe2 [file] [log] [blame]
#!/usr/bin/env python
"""
Removes labels (if present) from features.
e.g.,
[X] ||| le ||| the ||| e_given_f_lex=1
becomes
[X] ||| le ||| the ||| 1
"""
import re
import sys
import codecs
reload(sys)
sys.setdefaultencoding('utf-8')
sys.stdin = codecs.getreader('utf-8')(sys.stdin)
sys.stdout = codecs.getwriter('utf-8')(sys.stdout)
sys.stdout.encoding = 'utf-8'
for line in sys.stdin:
tokens = line.split(' ||| ')
tokens[3] = re.sub(r'\S*=', '', tokens[3])
print ' ||| '.join(tokens),