blob: bbf9d6af536bf13e662cf9a766eb6e1d26f31886 [file] [log] [blame]
#!/bin/env python
#
# Licensed to the Apache Software Foundation (ASF) under one or more
# contributor license agreements. See the NOTICE file distributed with
# this work for additional information regarding copyright ownership.
# The ASF licenses this file to You under the Apache License, Version 2.0
# (the "License"); you may not use this file except in compliance with
# the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""
Allows a file to be queried against a Joshua HTTP server. The file should be tokenized
and normalized, with one sentence per line. This script takes that file, packages it up
into blocks of size 100 (changeable with -b), and sends it to the server. The JSON output
is dumped to STDOUT. If you wish to only dump the "curl" commands instead of calling them,
add "--dry-run".
Usage:
query_http.py --dry-run -s localhost -p 5674 /path/to/corpus
"""
import sys
import urllib
import argparse
import subprocess
parser = argparse.ArgumentParser(description='Send a (tokenized) test set to a Joshua HTTP server')
parser.add_argument('-s', '--server', dest='server', default='localhost', help='server host')
parser.add_argument('-p', '--port', dest='port', type=int, default=5674, help='server port')
parser.add_argument('-b', '--blocksize', dest='size', type=int, default=100, help='number of sentences at a time')
parser.add_argument('--dry-run', default=None, action='store_true', help='print curl commands only (don\'t run')
parser.add_argument('test_file', help='the (tokenized) test file')
args = parser.parse_args()
sentences = []
def process(sentence = None):
global sentences
if sentence is None or len(sentences) == args.size:
urlstr = '{}:{}/translate?{}'.format(args.server, args.port, urllib.urlencode(sentences))
cmd = 'curl -s "{}"'.format(urlstr)
if args.dry_run:
print cmd
else:
subprocess.call(cmd, shell=True)
sentences = []
if sentence is not None:
sentences.append(('q', sentence.rstrip()))
for line in open(args.test_file):
process(line.rstrip())
process()