joshua-core/src/test/java/org/apache/joshua/system/StructuredOutputTest.java - joshua - Git at Google

 /*
  * Licensed to the Apache Software Foundation (ASF) under one
  * or more contributor license agreements.  See the NOTICE file
  * distributed with this work for additional information
  * regarding copyright ownership.  The ASF licenses this file
  * to you under the Apache License, Version 2.0 (the
  * "License"); you may not use this file except in compliance
  * with the License.  You may obtain a copy of the License at
  *
  *  http://www.apache.org/licenses/LICENSE-2.0
  *
  * Unless required by applicable law or agreed to in writing,
  * software distributed under the License is distributed on an
  * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
  * KIND, either express or implied.  See the License for the
  * specific language governing permissions and limitations
  * under the License.
  */
 package org.apache.joshua.system;

 import static com.typesafe.config.ConfigFactory.parseResources;
 import static com.typesafe.config.ConfigValueFactory.fromAnyRef;

 import java.util.Arrays;
 import java.util.List;

 import org.apache.joshua.decoder.Decoder;
 import org.apache.joshua.decoder.Translation;
 import org.apache.joshua.decoder.segment_file.Sentence;
 import org.testng.Assert;
 import org.testng.annotations.AfterMethod;
 import org.testng.annotations.BeforeMethod;
 import org.testng.annotations.Test;

 import com.typesafe.config.Config;

 /**
  * Integration test for the complete Joshua decoder using a toy grammar that translates
  * a bunch of capital letters to lowercase letters. Rules in the test grammar
  * drop and generate additional words and simulate reordering of rules, so that
  * proper extraction of word alignments can be tested.
  *
  * @author fhieber
  */
 public class StructuredOutputTest {

   private Decoder decoder = null;
   private Translation translation = null;
   private static final String input = "A K B1 U Z1 Z2 B2 C";
   private static final String expectedTranslation = "a b n1 u z c1 k1 k2 k3 n1 n2 n3 c2";
   private static final String expectedWordAlignmentString = "0-0 2-1 6-1 3-3 4-4 5-4 7-5 1-6 1-7 1-8 7-12";
   private static final List<List<Integer>> expectedWordAlignment = Arrays.asList(
       Arrays.asList(0), Arrays.asList(2, 6), Arrays.asList(), Arrays.asList(3),
       Arrays.asList(4, 5), Arrays.asList(7), Arrays.asList(1),
       Arrays.asList(1), Arrays.asList(1), Arrays.asList(), Arrays.asList(),
       Arrays.asList(), Arrays.asList(7));
   private static final double expectedScore = -17.0;

   @BeforeMethod
   public void setUp() throws Exception {
     Config flags = parseResources(this.getClass(), "StructuredOutputTest.conf")
         .withFallback(Decoder.getDefaultFlags());
     decoder = new Decoder(flags);
   }

   @AfterMethod
   public void tearDown() throws Exception {
     decoder.cleanUp();
     decoder = null;
     translation = null;
   }

   private Translation decode(String input, Config flags) {
     Sentence sentence = new Sentence(input, 0, flags);
     return decoder.decode(sentence);
   }

   @Test
   public void test() {

     // test standard output
     translation = decode(input,
         decoder.getFlags().withValue("use_structured_output", fromAnyRef(false)));
     Assert.assertEquals(translation.toString().trim(), expectedTranslation + " | " + expectedWordAlignmentString);

     // test structured output
     translation = decode(input, decoder.getFlags().withValue("use_structured_output", fromAnyRef(true)));
     Assert.assertEquals(translation.getStructuredTranslations().get(0).getTranslationString(), expectedTranslation);
     Assert.assertEquals(translation.getStructuredTranslations().get(0).getTranslationTokens(), Arrays.asList(expectedTranslation.split("\\s+")));
     Assert.assertEquals(translation.getStructuredTranslations().get(0).getTranslationScore(), expectedScore, 0.00001);
     Assert.assertEquals(translation.getStructuredTranslations().get(0).getTranslationWordAlignments(), expectedWordAlignment);
     Assert.assertEquals(translation.getStructuredTranslations().get(0).getTranslationWordAlignments().size(), translation
         .getStructuredTranslations().get(0).getTranslationTokens().size());
   }
 }
	/*
	* Licensed to the Apache Software Foundation (ASF) under one
	* or more contributor license agreements. See the NOTICE file
	* distributed with this work for additional information
	* regarding copyright ownership. The ASF licenses this file
	* to you under the Apache License, Version 2.0 (the
	* "License"); you may not use this file except in compliance
	* with the License. You may obtain a copy of the License at
	*
	* http://www.apache.org/licenses/LICENSE-2.0
	*
	* Unless required by applicable law or agreed to in writing,
	* software distributed under the License is distributed on an
	* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
	* KIND, either express or implied. See the License for the
	* specific language governing permissions and limitations
	* under the License.
	*/
	package org.apache.joshua.system;

	import static com.typesafe.config.ConfigFactory.parseResources;
	import static com.typesafe.config.ConfigValueFactory.fromAnyRef;

	import java.util.Arrays;
	import java.util.List;

	import org.apache.joshua.decoder.Decoder;
	import org.apache.joshua.decoder.Translation;
	import org.apache.joshua.decoder.segment_file.Sentence;
	import org.testng.Assert;
	import org.testng.annotations.AfterMethod;
	import org.testng.annotations.BeforeMethod;
	import org.testng.annotations.Test;

	import com.typesafe.config.Config;

	/**
	* Integration test for the complete Joshua decoder using a toy grammar that translates
	* a bunch of capital letters to lowercase letters. Rules in the test grammar
	* drop and generate additional words and simulate reordering of rules, so that
	* proper extraction of word alignments can be tested.
	*
	* @author fhieber
	*/
	public class StructuredOutputTest {

	private Decoder decoder = null;
	private Translation translation = null;
	private static final String input = "A K B1 U Z1 Z2 B2 C";
	private static final String expectedTranslation = "a b n1 u z c1 k1 k2 k3 n1 n2 n3 c2";
	private static final String expectedWordAlignmentString = "0-0 2-1 6-1 3-3 4-4 5-4 7-5 1-6 1-7 1-8 7-12";
	private static final List<List<Integer>> expectedWordAlignment = Arrays.asList(
	Arrays.asList(0), Arrays.asList(2, 6), Arrays.asList(), Arrays.asList(3),
	Arrays.asList(4, 5), Arrays.asList(7), Arrays.asList(1),
	Arrays.asList(1), Arrays.asList(1), Arrays.asList(), Arrays.asList(),
	Arrays.asList(), Arrays.asList(7));
	private static final double expectedScore = -17.0;

	@BeforeMethod
	public void setUp() throws Exception {
	Config flags = parseResources(this.getClass(), "StructuredOutputTest.conf")
	.withFallback(Decoder.getDefaultFlags());
	decoder = new Decoder(flags);
	}

	@AfterMethod
	public void tearDown() throws Exception {
	decoder.cleanUp();
	decoder = null;
	translation = null;
	}

	private Translation decode(String input, Config flags) {
	Sentence sentence = new Sentence(input, 0, flags);
	return decoder.decode(sentence);
	}

	@Test
	public void test() {

	// test standard output
	translation = decode(input,
	decoder.getFlags().withValue("use_structured_output", fromAnyRef(false)));
	Assert.assertEquals(translation.toString().trim(), expectedTranslation + " \| " + expectedWordAlignmentString);

	// test structured output
	translation = decode(input, decoder.getFlags().withValue("use_structured_output", fromAnyRef(true)));
	Assert.assertEquals(translation.getStructuredTranslations().get(0).getTranslationString(), expectedTranslation);
	Assert.assertEquals(translation.getStructuredTranslations().get(0).getTranslationTokens(), Arrays.asList(expectedTranslation.split("\\s+")));
	Assert.assertEquals(translation.getStructuredTranslations().get(0).getTranslationScore(), expectedScore, 0.00001);
	Assert.assertEquals(translation.getStructuredTranslations().get(0).getTranslationWordAlignments(), expectedWordAlignment);
	Assert.assertEquals(translation.getStructuredTranslations().get(0).getTranslationWordAlignments().size(), translation
	.getStructuredTranslations().get(0).getTranslationTokens().size());
	}
	}